diff options
Diffstat (limited to 'src/gallium')
237 files changed, 11153 insertions, 3265 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 3b202b5bc77..103d1d65880 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -33,6 +33,7 @@ C_SOURCES = \ draw/draw_pt_fetch_shade_emit.c \ draw/draw_pt_fetch_shade_pipeline.c \ draw/draw_pt_post_vs.c \ + draw/draw_pt_so_emit.c \ draw/draw_pt_util.c \ draw/draw_pt_varray.c \ draw/draw_pt_vcache.c \ @@ -166,6 +167,7 @@ GALLIVM_SOURCES = \ gallivm/lp_bld_tgsi_soa.c \ gallivm/lp_bld_type.c \ draw/draw_llvm.c \ + draw/draw_vs_llvm.c \ draw/draw_pt_fetch_shade_pipeline_llvm.c \ draw/draw_llvm_translate.c diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index af4d5edcf86..1f400575a76 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -78,6 +78,7 @@ source = [ 'draw/draw_pt_fetch_shade_emit.c', 'draw/draw_pt_fetch_shade_pipeline.c', 'draw/draw_pt_post_vs.c', + 'draw/draw_pt_so_emit.c', 'draw/draw_pt_util.c', 'draw/draw_pt_varray.c', 'draw/draw_pt_vcache.c', @@ -217,7 +218,8 @@ if env['llvm']: 'gallivm/lp_bld_type.c', 'draw/draw_llvm.c', 'draw/draw_pt_fetch_shade_pipeline_llvm.c', - 'draw/draw_llvm_translate.c' + 'draw/draw_llvm_translate.c', + 'draw/draw_vs_llvm.c' ] gallium = env.ConvenienceLibrary( diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 61980c3e4f5..dab95e50515 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -435,13 +435,18 @@ draw_num_shader_outputs(const struct draw_context *draw) */ void draw_texture_samplers(struct draw_context *draw, + uint shader, uint num_samplers, struct tgsi_sampler **samplers) { - draw->vs.num_samplers = num_samplers; - draw->vs.samplers = samplers; - draw->gs.num_samplers = num_samplers; - draw->gs.samplers = samplers; + if (shader == PIPE_SHADER_VERTEX) { + draw->vs.num_samplers = num_samplers; + draw->vs.samplers = samplers; + } else { + debug_assert(shader == PIPE_SHADER_GEOMETRY); + draw->gs.num_samplers = num_samplers; + draw->gs.samplers = samplers; + } } @@ -574,3 +579,25 @@ draw_get_rasterizer_no_cull( struct draw_context *draw, } return draw->rasterizer_no_cull[scissor][flatshade]; } + +void +draw_set_mapped_so_buffers(struct draw_context *draw, + void *buffers[PIPE_MAX_SO_BUFFERS], + unsigned num_buffers) +{ + int i; + + for (i = 0; i < num_buffers; ++i) { + draw->so.buffers[i] = buffers[i]; + } + draw->so.num_buffers = num_buffers; +} + +void +draw_set_so_state(struct draw_context *draw, + struct pipe_stream_output_state *state) +{ + memcpy(&draw->so.state, + state, + sizeof(struct pipe_stream_output_state)); +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index b905c2f2da6..c0122f2aca5 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -97,6 +97,7 @@ draw_num_shader_outputs(const struct draw_context *draw); void draw_texture_samplers(struct draw_context *draw, + uint shader_type, uint num_samplers, struct tgsi_sampler **samplers); @@ -162,6 +163,14 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer, unsigned size); +void +draw_set_mapped_so_buffers(struct draw_context *draw, + void *buffers[PIPE_MAX_SO_BUFFERS], + unsigned num_buffers); +void +draw_set_so_state(struct draw_context *draw, + struct pipe_stream_output_state *state); + /*********************************************************************** * draw_prim.c diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index 131deed43e4..79a57a67f3e 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -37,8 +37,8 @@ #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_prim.h" -#define MAX_PRIM_VERTICES 6 /* fixme: move it from here */ #define MAX_PRIMITIVES 64 @@ -75,6 +75,7 @@ draw_gs_set_constants(struct draw_context *draw, const void *constants, unsigned size) { + /* noop */ } @@ -90,6 +91,7 @@ draw_create_geometry_shader(struct draw_context *draw, if (!gs) return NULL; + gs->draw = draw; gs->state = *state; gs->state.tokens = tgsi_dup_tokens(state->tokens); if (!gs->state.tokens) { @@ -112,7 +114,7 @@ draw_create_geometry_shader(struct draw_context *draw, TGSI_PROPERTY_GS_OUTPUT_PRIM) gs->output_primitive = gs->info.properties[i].data[0]; else if (gs->info.properties[i].name == - TGSI_PROPERTY_GS_MAX_VERTICES) + TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) gs->max_output_vertices = gs->info.properties[i].data[0]; } @@ -154,128 +156,37 @@ void draw_delete_geometry_shader(struct draw_context *draw, FREE(dgs); } -static INLINE int num_vertices_for_prim(int prim) -{ - switch(prim) { - case PIPE_PRIM_POINTS: - return 1; - case PIPE_PRIM_LINES: - return 2; - case PIPE_PRIM_LINE_LOOP: - return 2; - case PIPE_PRIM_LINE_STRIP: - return 2; - case PIPE_PRIM_TRIANGLES: - return 3; - case PIPE_PRIM_TRIANGLE_STRIP: - return 3; - case PIPE_PRIM_TRIANGLE_FAN: - return 3; - case PIPE_PRIM_LINES_ADJACENCY: - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - return 4; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - return 6; - default: - assert(!"Bad geometry shader input"); - return 0; - } -} - -static void draw_fetch_geometry_input(struct draw_geometry_shader *shader, - int start_primitive, - int num_primitives, - const float (*input_ptr)[4], - unsigned input_vertex_stride, - unsigned inputs_from_vs) -{ - struct tgsi_exec_machine *machine = shader->machine; - unsigned slot, vs_slot, k, j; - unsigned num_vertices = num_vertices_for_prim(shader->input_primitive); - int idx = 0; - - for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; slot++) { - /*debug_printf("Slot = %d (semantic = %d)\n", slot, - shader->info.input_semantic_name[slot]);*/ - if (shader->info.input_semantic_name[slot] == - TGSI_SEMANTIC_PRIMID) { - for (j = 0; j < num_primitives; ++j) { - machine->Inputs[idx].xyzw[0].f[j] = (float)start_primitive + j; - machine->Inputs[idx].xyzw[1].f[j] = (float)start_primitive + j; - machine->Inputs[idx].xyzw[2].f[j] = (float)start_primitive + j; - machine->Inputs[idx].xyzw[3].f[j] = (float)start_primitive + j; - } - ++idx; - } else { - for (j = 0; j < num_primitives; ++j) { - int vidx = idx; - const float (*prim_ptr)[4]; - /*debug_printf(" %d) Prim (num_verts = %d)\n", start_primitive + j, - num_vertices);*/ - prim_ptr = (const float (*)[4])( - (const char *)input_ptr + - (j * num_vertices * input_vertex_stride)); - - for (k = 0; k < num_vertices; ++k, ++vidx) { - const float (*input)[4]; - input = (const float (*)[4])( - (const char *)prim_ptr + (k * input_vertex_stride)); - vidx = k * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; - /*debug_printf("\t%d)(%d) Input vert:\n", vidx, k);*/ -#if 1 - assert(!util_is_inf_or_nan(input[vs_slot][0])); - assert(!util_is_inf_or_nan(input[vs_slot][1])); - assert(!util_is_inf_or_nan(input[vs_slot][2])); - assert(!util_is_inf_or_nan(input[vs_slot][3])); -#endif - machine->Inputs[vidx].xyzw[0].f[j] = input[vs_slot][0]; - machine->Inputs[vidx].xyzw[1].f[j] = input[vs_slot][1]; - machine->Inputs[vidx].xyzw[2].f[j] = input[vs_slot][2]; - machine->Inputs[vidx].xyzw[3].f[j] = input[vs_slot][3]; -#if 0 - debug_printf("\t\t%d %f %f %f %f\n", slot, - machine->Inputs[vidx].xyzw[0].f[j], - machine->Inputs[vidx].xyzw[1].f[j], - machine->Inputs[vidx].xyzw[2].f[j], - machine->Inputs[vidx].xyzw[3].f[j]); -#endif - } - } - ++vs_slot; - idx += num_vertices; - } - } -} - +/*#define DEBUG_OUTPUTS 1*/ static INLINE void draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, int num_primitives, - float (*output)[4], - unsigned vertex_size) + float (**p_output)[4]) { struct tgsi_exec_machine *machine = shader->machine; unsigned prim_idx, j, slot; + float (*output)[4]; + + output = *p_output; /* Unswizzle all output results. */ - /* FIXME: handle all the primitives produced by the gs, not just - * the first one - unsigned prim_count = - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];*/ + for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { - unsigned num_verts_per_prim = machine->Primitives[0]; + unsigned num_verts_per_prim = machine->Primitives[prim_idx]; + shader->primitive_lengths[prim_idx + shader->emitted_primitives] = + machine->Primitives[prim_idx]; + shader->emitted_vertices += num_verts_per_prim; for (j = 0; j < num_verts_per_prim; j++) { int idx = (prim_idx * num_verts_per_prim + j) * shader->info.num_outputs; #ifdef DEBUG_OUTPUTS - debug_printf("%d) Output vert:\n", idx); + debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs); #endif for (slot = 0; slot < shader->info.num_outputs; slot++) { - output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[prim_idx]; - output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[prim_idx]; - output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[prim_idx]; - output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[prim_idx]; + output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0]; + output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0]; + output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0]; + output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0]; #ifdef DEBUG_OUTPUTS debug_printf("\t%d: %f %f %f %f\n", slot, output[slot][0], @@ -285,52 +196,286 @@ draw_geometry_fetch_outputs(struct draw_geometry_shader *shader, #endif debug_assert(!util_is_inf_or_nan(output[slot][0])); } - output = (float (*)[4])((char *)output + vertex_size); + output = (float (*)[4])((char *)output + shader->vertex_size); } } + *p_output = output; + shader->emitted_primitives += num_primitives; } -void draw_geometry_shader_run(struct draw_geometry_shader *shader, - const float (*input)[4], - float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - unsigned count, - unsigned input_stride, - unsigned vertex_size) +/*#define DEBUG_INPUTS 1*/ +static void draw_fetch_gs_input(struct draw_geometry_shader *shader, + unsigned *indices, + unsigned num_vertices, + unsigned prim_idx) { struct tgsi_exec_machine *machine = shader->machine; - unsigned int i; - unsigned num_vertices = num_vertices_for_prim(shader->input_primitive); - unsigned num_primitives = count/num_vertices; - unsigned inputs_from_vs = 0; + unsigned slot, vs_slot, i; + unsigned input_vertex_stride = shader->input_vertex_stride; + const float (*input_ptr)[4]; - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - machine->Consts[i] = constants[i]; - } + input_ptr = shader->input; - for (i = 0; i < shader->info.num_inputs; ++i) { - if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID) - ++inputs_from_vs; + for (i = 0; i < num_vertices; ++i) { + const float (*input)[4]; +#if DEBUG_INPUTS + debug_printf("%d) vertex index = %d (prim idx = %d)\n", + i, indices[i], prim_idx); +#endif + input = (const float (*)[4])( + (const char *)input_ptr + (indices[i] * input_vertex_stride)); + for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { + unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; + if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { + machine->Inputs[idx].xyzw[0].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[1].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[2].f[prim_idx] = + (float)shader->in_prim_idx; + machine->Inputs[idx].xyzw[3].f[prim_idx] = + (float)shader->in_prim_idx; + } else { +#if DEBUG_INPUTS + debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx); +#endif +#if 1 + assert(!util_is_inf_or_nan(input[vs_slot][0])); + assert(!util_is_inf_or_nan(input[vs_slot][1])); + assert(!util_is_inf_or_nan(input[vs_slot][2])); + assert(!util_is_inf_or_nan(input[vs_slot][3])); +#endif + machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0]; + machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1]; + machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2]; + machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3]; +#if DEBUG_INPUTS + debug_printf("\t\t%f %f %f %f\n", + machine->Inputs[idx].xyzw[0].f[prim_idx], + machine->Inputs[idx].xyzw[1].f[prim_idx], + machine->Inputs[idx].xyzw[2].f[prim_idx], + machine->Inputs[idx].xyzw[3].f[prim_idx]); +#endif + ++vs_slot; + } + } } +} + +static void gs_flush(struct draw_geometry_shader *shader, + unsigned input_primitives) +{ + unsigned out_prim_count; + struct tgsi_exec_machine *machine = shader->machine; + + debug_assert(input_primitives > 0 && + input_primitives < 4); + + tgsi_set_exec_mask(machine, + 1, + input_primitives > 1, + input_primitives > 2, + input_primitives > 3); + + /* run interpreter */ + tgsi_exec_machine_run(machine); + + out_prim_count = + machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0]; + +#if 0 + debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", + shader->emitted_primitives, shader->emitted_vertices, + out_prim_count); +#endif + draw_geometry_fetch_outputs(shader, out_prim_count, + &shader->tmp_output); +} + +static void gs_point(struct draw_geometry_shader *shader, + int idx) +{ + unsigned indices[1]; - for (i = 0; i < num_primitives; ++i) { - unsigned int max_primitives = 1; + indices[0] = idx; - draw_fetch_geometry_input(shader, i, max_primitives, input, - input_stride, inputs_from_vs); + draw_fetch_gs_input(shader, indices, 1, 0); + ++shader->in_prim_idx; - tgsi_set_exec_mask(machine, - 1, - max_primitives > 1, - max_primitives > 2, - max_primitives > 3); + gs_flush(shader, 1); +} + +static void gs_line(struct draw_geometry_shader *shader, + int i0, int i1) +{ + unsigned indices[2]; + + indices[0] = i0; + indices[1] = i1; + + draw_fetch_gs_input(shader, indices, 2, 0); + ++shader->in_prim_idx; + + gs_flush(shader, 1); +} + +static void gs_line_adj(struct draw_geometry_shader *shader, + int i0, int i1, int i2, int i3) +{ + unsigned indices[4]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + + draw_fetch_gs_input(shader, indices, 4, 0); + ++shader->in_prim_idx; + + gs_flush(shader, 1); +} + +static void gs_tri(struct draw_geometry_shader *shader, + int i0, int i1, int i2) +{ + unsigned indices[3]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + + draw_fetch_gs_input(shader, indices, 3, 0); + ++shader->in_prim_idx; + + gs_flush(shader, 1); +} + +static void gs_tri_adj(struct draw_geometry_shader *shader, + int i0, int i1, int i2, + int i3, int i4, int i5) +{ + unsigned indices[6]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + indices[3] = i3; + indices[4] = i4; + indices[5] = i5; - /* run interpreter */ - tgsi_exec_machine_run(machine); + draw_fetch_gs_input(shader, indices, 6, 0); + ++shader->in_prim_idx; - draw_geometry_fetch_outputs(shader, max_primitives, - output, vertex_size); + gs_flush(shader, 1); +} + +#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,i0,i1,i2) +#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) gs_tri_adj(gs,i0,i1,i2,i3,i4,i5) +#define LINE(gs,i0,i1) gs_line(gs,i0,i1) +#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,i0,i1,i2,i3) +#define POINT(gs,i0) gs_point(gs,i0) +#define FUNC gs_run +#define LOCAL_VARS +#include "draw_gs_tmp.h" + + +#define TRIANGLE(gs,i0,i1,i2) gs_tri(gs,elts[i0],elts[i1],elts[i2]) +#define TRI_ADJ(gs,i0,i1,i2,i3,i4,i5) \ + gs_tri_adj(gs,elts[i0],elts[i1],elts[i2],elts[i3], \ + elts[i4],elts[i5]) +#define LINE(gs,i0,i1) gs_line(gs,elts[i0],elts[i1]) +#define LINE_ADJ(gs,i0,i1,i2,i3) gs_line_adj(gs,elts[i0], \ + elts[i1], \ + elts[i2],elts[i3]) +#define POINT(gs,i0) gs_point(gs,elts[i0]) +#define FUNC gs_run_elts +#define LOCAL_VARS \ + const ushort *elts = input_prims->elts; +#include "draw_gs_tmp.h" + +int draw_geometry_shader_run(struct draw_geometry_shader *shader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims ) +{ + const float (*input)[4] = (const float (*)[4])input_verts->verts->data; + unsigned input_stride = input_verts->vertex_size; + unsigned vertex_size = input_verts->vertex_size; + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + unsigned num_input_verts = input_prim->linear ? + input_verts->count : + input_prim->count; + unsigned num_in_primitives = + MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts), + u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)); + unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive, + shader->max_output_vertices) + * num_in_primitives; + + output_verts->vertex_size = input_verts->vertex_size; + output_verts->stride = input_verts->vertex_size; + output_verts->verts = + (struct vertex_header *)MALLOC(input_verts->vertex_size * + num_in_primitives * + shader->max_output_vertices); + + +#if 0 + debug_printf("%s count = %d (in prims # = %d)\n", + __FUNCTION__, num_input_verts, num_in_primitives); + debug_printf("\tlinear = %d, prim_info->count = %d\n", + input_prim->linear, input_prim->count); + debug_printf("\tprimt pipe = %d, shader in = %d, shader out = %d, max out = %d\n", + input_prim->prim, shader->input_primitive, + shader->output_primitive, + shader->max_output_vertices); +#endif + + shader->emitted_vertices = 0; + shader->emitted_primitives = 0; + shader->vertex_size = vertex_size; + shader->tmp_output = (float (*)[4])output_verts->verts->data; + shader->in_prim_idx = 0; + shader->input_vertex_stride = input_stride; + shader->input = input; + if (shader->primitive_lengths) { + FREE(shader->primitive_lengths); } + shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); + + for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + machine->Consts[i] = constants[i]; + } + + if (input_prim->linear) + gs_run(shader, input_prim, input_verts, + output_prims, output_verts); + else + gs_run_elts(shader, input_prim, input_verts, + output_prims, output_verts); + + /* Update prim_info: + */ + output_prims->linear = TRUE; + output_prims->elts = NULL; + output_prims->start = 0; + output_prims->count = shader->emitted_vertices; + output_prims->prim = shader->output_primitive; + output_prims->primitive_lengths = shader->primitive_lengths; + output_prims->primitive_count = shader->emitted_primitives; + output_verts->count = shader->emitted_vertices; + +#if 0 + debug_printf("GS finished, prims = %d, verts = %d\n", + output_prims->primitive_count, + output_verts->count); +#endif + + return shader->emitted_vertices; } void draw_geometry_shader_delete(struct draw_geometry_shader *shader) diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index d8eb2103433..2cb634818c2 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -54,23 +54,36 @@ struct draw_geometry_shader { unsigned input_primitive; unsigned output_primitive; - /* Extracted from shader: - */ - const float (*immediates)[4]; + unsigned *primitive_lengths; + unsigned emitted_vertices; + unsigned emitted_primitives; + + float (*tmp_output)[4]; + unsigned vertex_size; + + unsigned in_prim_idx; + unsigned input_vertex_stride; + const float (*input)[4]; }; -void draw_geometry_shader_run(struct draw_geometry_shader *shader, - const float (*input)[4], - float (*output)[4], - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - unsigned count, - unsigned input_stride, - unsigned output_stride); +/* + * Returns the number of vertices emitted. + * The vertex shader can emit any number of vertices as long as it's + * smaller than the GS_MAX_OUTPUT_VERTICES shader property. + */ +int draw_geometry_shader_run(struct draw_geometry_shader *shader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prim, + struct draw_vertex_info *output_verts, + struct draw_prim_info *output_prims ); void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, struct draw_context *draw); void draw_geometry_shader_delete(struct draw_geometry_shader *shader); +int draw_gs_max_output_vertices(struct draw_geometry_shader *shader, + unsigned pipe_prim); #endif diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h new file mode 100644 index 00000000000..7a8683cf7c2 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h @@ -0,0 +1,152 @@ + +static void FUNC( struct draw_geometry_shader *shader, + const struct draw_prim_info *input_prims, + const struct draw_vertex_info *input_verts, + struct draw_prim_info *output_prims, + struct draw_vertex_info *output_verts) +{ + struct draw_context *draw = shader->draw; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i, j; + unsigned count = input_prims->count; + LOCAL_VARS + + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + + debug_assert(input_prims->primitive_count == 1); + + switch (input_prims->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) { + POINT( shader, i + 0 ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( shader , i + 0 , i + 1 ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + + for (i = 1; i < count; i++) { + LINE( shader, i - 1, i ); + } + + LINE( shader, i - 1, 0 ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < count; i++) { + LINE( shader, i - 1, i ); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( shader, i + 0, i + 1, i + 2 ); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( shader, + i + 0, + i + 1 + (i&1), + i + 2 - (i&1) ); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( shader, + i + 0 + (i&1), + i + 1 - (i&1), + i + 2 ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( shader, + i + 1, + i + 2, + 0 ); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( shader, + 0, + i + 1, + i + 2 ); + } + } + } + break; + + case PIPE_PRIM_POLYGON: + { + for (i = 0; i+2 < count; i++) { + + if (flatfirst) { + TRIANGLE( shader, 0, i + 1, i + 2 ); + } + else { + TRIANGLE( shader, i + 1, i + 2, 0 ); + } + } + } + break; + + case PIPE_PRIM_LINES_ADJACENCY: + for (i = 0; i+3 < count; i += 4) { + LINE_ADJ( shader , i + 0 , i + 1, i + 2, i + 3 ); + } + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + for (i = 1; i + 2 < count; i++) { + LINE_ADJ( shader, i - 1, i, i + 1, i + 2 ); + } + break; + + case PIPE_PRIM_TRIANGLES_ADJACENCY: + for (i = 0; i+5 < count; i += 5) { + TRI_ADJ( shader, i + 0, i + 1, i + 2, + i + 3, i + 4, i + 5); + } + break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + for (i = 0, j = 0; i+5 < count; i += 2, ++j) { + TRI_ADJ( shader, + i + 0, + i + 1 + 2*(j&1), + i + 2 + 2*(j&1), + i + 3 - 2*(j&1), + i + 4 - 2*(j&1), + i + 5); + } + break; + + default: + debug_assert(!"Unsupported primitive in geometry shader"); + break; + } +} + + +#undef TRIANGLE +#undef TRI_ADJ +#undef POINT +#undef LINE +#undef LINE_ADJ +#undef FUNC +#undef LOCAL_VARS diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index bd5d8853cf8..f521669fcde 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "draw_llvm.h" #include "draw_context.h" @@ -173,6 +200,8 @@ draw_llvm_create(struct draw_context *draw) #endif llvm = CALLOC_STRUCT( draw_llvm ); + if (!llvm) + return NULL; llvm->draw = draw; llvm->engine = draw->engine; @@ -217,6 +246,9 @@ draw_llvm_create(struct draw_context *draw) LLVMDumpModule(llvm->module); } + llvm->nr_variants = 0; + make_empty_list(&llvm->vs_variants_list); + return llvm; } @@ -229,9 +261,13 @@ draw_llvm_destroy(struct draw_llvm *llvm) } struct draw_llvm_variant * -draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) +draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs) { struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant)); + struct llvm_vertex_shader *shader = + llvm_vertex_shader(llvm->draw->vs.vertex_shader); + + variant->llvm = llvm; draw_llvm_make_variant_key(llvm, &variant->key); @@ -240,6 +276,12 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) draw_llvm_generate(llvm, variant); draw_llvm_generate_elts(llvm, variant); + variant->shader = shader; + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + /*variant->no = */shader->variants_created++; + variant->list_item_global.base = variant; + return variant; } @@ -895,3 +937,30 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, &llvm->draw->vs.vertex_shader->state, sizeof(struct pipe_shader_state)); } + +void +draw_llvm_destroy_variant(struct draw_llvm_variant *variant) +{ + struct draw_llvm *llvm = variant->llvm; + struct draw_context *draw = llvm->draw; + + if (variant->function_elts) { + if (variant->function_elts) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function_elts); + LLVMDeleteFunction(variant->function_elts); + } + + if (variant->function) { + if (variant->function) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + llvm->nr_variants--; + FREE(variant); +} diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 58fee7f9d60..c98d7fb393b 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -1,15 +1,48 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef HAVE_LLVM_H #define HAVE_LLVM_H #include "draw/draw_private.h" +#include "draw/draw_vs.h" + #include "pipe/p_context.h" +#include "util/u_simple_list.h" #include <llvm-c/Core.h> #include <llvm-c/Analysis.h> #include <llvm-c/Target.h> #include <llvm-c/ExecutionEngine.h> +struct draw_llvm; +struct llvm_vertex_shader; + struct draw_jit_texture { uint32_t width; @@ -104,11 +137,50 @@ typedef void unsigned stride, struct pipe_vertex_buffer *vertex_buffers); +struct draw_llvm_variant_key +{ + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + struct pipe_shader_state vs; +}; + +struct draw_llvm_variant_list_item +{ + struct draw_llvm_variant *base; + struct draw_llvm_variant_list_item *next, *prev; +}; + +struct draw_llvm_variant +{ + struct draw_llvm_variant_key key; + LLVMValueRef function; + LLVMValueRef function_elts; + draw_jit_vert_func jit_func; + draw_jit_vert_func_elts jit_func_elts; + + struct llvm_vertex_shader *shader; + + struct draw_llvm *llvm; + struct draw_llvm_variant_list_item list_item_global; + struct draw_llvm_variant_list_item list_item_local; +}; + +struct llvm_vertex_shader { + struct draw_vertex_shader base; + + struct draw_llvm_variant_list_item variants; + unsigned variants_created; + unsigned variants_cached; +}; + struct draw_llvm { struct draw_context *draw; struct draw_jit_context jit_context; + struct draw_llvm_variant_list_item vs_variants_list; + int nr_variants; + LLVMModuleRef module; LLVMExecutionEngineRef engine; LLVMModuleProviderRef provider; @@ -121,23 +193,12 @@ struct draw_llvm { LLVMTypeRef vb_ptr_type; }; -struct draw_llvm_variant_key +static struct llvm_vertex_shader * +llvm_vertex_shader(struct draw_vertex_shader *vs) { - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_elements; - struct pipe_shader_state vs; -}; + return (struct llvm_vertex_shader *)vs; +} -struct draw_llvm_variant -{ - struct draw_llvm_variant_key key; - LLVMValueRef function; - LLVMValueRef function_elts; - draw_jit_vert_func jit_func; - draw_jit_vert_func_elts jit_func_elts; - - struct draw_llvm_variant *next; -}; struct draw_llvm * draw_llvm_create(struct draw_context *draw); @@ -146,7 +207,10 @@ void draw_llvm_destroy(struct draw_llvm *llvm); struct draw_llvm_variant * -draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs); +draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs); + +void +draw_llvm_destroy_variant(struct draw_llvm_variant *variant); void draw_llvm_make_variant_key(struct draw_llvm *llvm, @@ -156,4 +220,5 @@ LLVMValueRef draw_llvm_translate_from(LLVMBuilderRef builder, LLVMValueRef vbuffer, enum pipe_format from_format); + #endif diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 7ea04e38193..8cd75ecf9a3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -177,15 +177,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i2]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -193,15 +193,15 @@ static void do_triangle( struct draw_context *draw, ( DRAW_PIPE_RESET_STIPPLE | \ DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ - verts + stride * elts[i0], \ - verts + stride * elts[i1], \ - verts + stride * elts[i3]); \ + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ - verts + stride * elts[i1], \ - verts + stride * elts[i2], \ - verts + stride * elts[i3]) + verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ @@ -218,7 +218,7 @@ static void do_triangle( struct draw_context *draw, #define POINT(i0) \ do_point( draw, \ - verts + stride * elts[i0] ) + verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run #define ARGS \ @@ -256,27 +256,34 @@ static void do_triangle( struct draw_context *draw, * draw_vbuf.c code uses when it has to perform a flush. */ void draw_pipeline_run( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { - char *verts = (char *)vertices; + unsigned i, start; + + draw->pipeline.verts = (char *)vert_info->verts; + draw->pipeline.vertex_stride = vert_info->stride; + draw->pipeline.vertex_count = vert_info->count; + + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + unsigned count = prim_info->primitive_lengths[i]; + + pipe_run(draw, + prim_info->prim, + vert_info->verts, + vert_info->stride, + prim_info->elts + start, + count); + } - draw->pipeline.verts = verts; - draw->pipeline.vertex_stride = stride; - draw->pipeline.vertex_count = vertex_count; - - pipe_run(draw, prim, vertices, stride, elts, count); - draw->pipeline.verts = NULL; draw->pipeline.vertex_count = 0; } - /* * Set up macros for draw_pt_decompose.h template code. * This code is for non-indexed (aka linear) rendering (no elts). @@ -289,14 +296,14 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_1 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) /* emit last quad vertex as last vertex in triangles */ #define QUAD_LAST_PV(i0,i1,i2,i3) \ @@ -305,31 +312,31 @@ void draw_pipeline_run( struct draw_context *draw, DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_2 ), \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i3)); \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \ do_triangle( draw, \ ( DRAW_PIPE_EDGE_FLAG_0 | \ DRAW_PIPE_EDGE_FLAG_1 ), \ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i2), \ - verts + stride * (i3)) + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)) #define TRIANGLE(flags,i0,i1,i2) \ do_triangle( draw, \ flags, /* flags */ \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1), \ - verts + stride * (i2)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)) #define LINE(flags,i0,i1) \ do_line( draw, \ flags, \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i1)) + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK)) #define POINT(i0) \ do_point( draw, \ - verts + stride * i0 ) + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) ) #define FUNC pipe_run_linear #define ARGS \ @@ -354,17 +361,29 @@ void draw_pipeline_run( struct draw_context *draw, * For drawing non-indexed primitives. */ void draw_pipeline_run_linear( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { - char *verts = (char *)vertices; - draw->pipeline.verts = verts; - draw->pipeline.vertex_stride = stride; - draw->pipeline.vertex_count = count; - - pipe_run_linear(draw, prim, vertices, stride, count); + unsigned i, start; + + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + unsigned count = prim_info->primitive_lengths[i]; + char *verts = ((char*)vert_info->verts) + + (start * vert_info->stride); + + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = vert_info->stride; + draw->pipeline.vertex_count = count; + + pipe_run_linear(draw, + prim_info->prim, + (struct vertex_header*)verts, + vert_info->stride, + count); + } draw->pipeline.verts = NULL; draw->pipeline.vertex_count = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 4faf0a779ca..debd17fd74b 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -788,9 +788,6 @@ draw_aaline_stage(struct draw_context *draw) if (aaline == NULL) return NULL; - if (!draw_alloc_temp_verts( &aaline->stage, 8 )) - goto fail; - aaline->stage.draw = draw; aaline->stage.name = "aaline"; aaline->stage.next = NULL; @@ -801,11 +798,14 @@ draw_aaline_stage(struct draw_context *draw) aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; aaline->stage.destroy = aaline_destroy; + if (!draw_alloc_temp_verts( &aaline->stage, 8 )) + goto fail; + return aaline; fail: if (aaline) - aaline_destroy(&aaline->stage); + aaline->stage.destroy(&aaline->stage); return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index bba6f50c020..d406a86ccb5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -780,9 +780,6 @@ draw_aapoint_stage(struct draw_context *draw) if (aapoint == NULL) goto fail; - if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) - goto fail; - aapoint->stage.draw = draw; aapoint->stage.name = "aapoint"; aapoint->stage.next = NULL; @@ -793,11 +790,14 @@ draw_aapoint_stage(struct draw_context *draw) aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; aapoint->stage.destroy = aapoint_destroy; + if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) + goto fail; + return aapoint; fail: if (aapoint) - aapoint_destroy(&aapoint->stage); + aapoint->stage.destroy(&aapoint->stage); return NULL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index df8d82e367d..122b1c79689 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -522,9 +522,6 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) if (clipper == NULL) goto fail; - if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) - goto fail; - clipper->stage.draw = draw; clipper->stage.name = "clipper"; clipper->stage.point = clip_point; @@ -536,6 +533,9 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) clipper->plane = draw->plane; + if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) + goto fail; + return &clipper->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index bf84ce30ed1..2f4d01d23ab 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -129,9 +129,6 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) if (cull == NULL) goto fail; - if (!draw_alloc_temp_verts( &cull->stage, 0 )) - goto fail; - cull->stage.draw = draw; cull->stage.name = "cull"; cull->stage.next = NULL; @@ -142,6 +139,9 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) cull->stage.reset_stipple_counter = cull_reset_stipple_counter; cull->stage.destroy = cull_destroy; + if (!draw_alloc_temp_verts( &cull->stage, 0 )) + goto fail; + return &cull->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 34afb1a0b60..693f2895aad 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -257,9 +257,6 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) if (flatshade == NULL) goto fail; - if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) - goto fail; - flatshade->stage.draw = draw; flatshade->stage.name = "flatshade"; flatshade->stage.next = NULL; @@ -270,6 +267,9 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter; flatshade->stage.destroy = flatshade_destroy; + if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) + goto fail; + return &flatshade->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 8e321946ced..8afbbfa1569 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -161,9 +161,7 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) { struct offset_stage *offset = CALLOC_STRUCT(offset_stage); if (offset == NULL) - return NULL; - - draw_alloc_temp_verts( &offset->stage, 3 ); + goto fail; offset->stage.draw = draw; offset->stage.name = "offset"; @@ -175,5 +173,14 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) offset->stage.reset_stipple_counter = offset_reset_stipple_counter; offset->stage.destroy = offset_destroy; + if (!draw_alloc_temp_verts( &offset->stage, 3 )) + goto fail; + return &offset->stage; + +fail: + if (offset) + offset->stage.destroy( &offset->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index ef30db094fe..fff960c7eb5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -607,8 +607,8 @@ static struct pstip_stage * draw_pstip_stage(struct draw_context *draw) { struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); - - draw_alloc_temp_verts( &pstip->stage, 8 ); + if (pstip == NULL) + goto fail; pstip->stage.draw = draw; pstip->stage.name = "pstip"; @@ -620,7 +620,16 @@ draw_pstip_stage(struct draw_context *draw) pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; pstip->stage.destroy = pstip_destroy; + if (!draw_alloc_temp_verts( &pstip->stage, 8 )) + goto fail; + return pstip; + +fail: + if (pstip) + pstip->stage.destroy( &pstip->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 70fbab9ea76..4b3f4e7ae11 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -235,8 +235,8 @@ stipple_destroy( struct draw_stage *stage ) struct draw_stage *draw_stipple_stage( struct draw_context *draw ) { struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage); - - draw_alloc_temp_verts( &stipple->stage, 2 ); + if (stipple == NULL) + goto fail; stipple->stage.draw = draw; stipple->stage.name = "stipple"; @@ -248,5 +248,14 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw ) stipple->stage.flush = stipple_flush; stipple->stage.destroy = stipple_destroy; + if (!draw_alloc_temp_verts( &stipple->stage, 2 )) + goto fail; + return &stipple->stage; + +fail: + if (stipple) + stipple->stage.destroy( &stipple->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 808b2fb0b58..9a3f3fee625 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -177,9 +177,6 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw ) if (twoside == NULL) goto fail; - if (!draw_alloc_temp_verts( &twoside->stage, 3 )) - goto fail; - twoside->stage.draw = draw; twoside->stage.name = "twoside"; twoside->stage.next = NULL; @@ -190,6 +187,9 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw ) twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; twoside->stage.destroy = twoside_destroy; + if (!draw_alloc_temp_verts( &twoside->stage, 3 )) + goto fail; + return &twoside->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index e333d26a932..d87741b91e7 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -202,9 +202,6 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) if (unfilled == NULL) goto fail; - if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) - goto fail; - unfilled->stage.draw = draw; unfilled->stage.name = "unfilled"; unfilled->stage.next = NULL; @@ -216,6 +213,9 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; unfilled->stage.destroy = unfilled_destroy; + if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) + goto fail; + return &unfilled->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index d7ac95b7405..98da9cfb999 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -202,8 +202,8 @@ static void wideline_destroy( struct draw_stage *stage ) struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) { struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); + if (wide == NULL) + goto fail; wide->stage.draw = draw; wide->stage.name = "wide-line"; @@ -215,5 +215,14 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; wide->stage.destroy = wideline_destroy; + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; + return &wide->stage; + +fail: + if (wide) + wide->stage.destroy( &wide->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index a86fe19586c..3e6e5389950 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -324,9 +324,6 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) if (wide == NULL) goto fail; - if (!draw_alloc_temp_verts( &wide->stage, 4 )) - goto fail; - wide->stage.draw = draw; wide->stage.name = "wide-point"; wide->stage.next = NULL; @@ -337,6 +334,9 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; wide->stage.destroy = widepoint_destroy; + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; + return &wide->stage; fail: diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index a2bfb693c09..54944a7c67a 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -81,6 +81,9 @@ struct vertex_header { #define UNDEFINED_VERTEX_ID 0xffff +/* maximum number of shader variants we can cache */ +#define DRAW_MAX_SHADER_VARIANTS 1024 + /** * Private context for the drawing module. */ @@ -235,6 +238,12 @@ struct draw_context struct tgsi_sampler **samplers; } gs; + struct { + struct pipe_stream_output_state state; + void *buffers[PIPE_MAX_SO_BUFFERS]; + uint num_buffers; + } so; + /* Clip derived state: */ float plane[12][4]; @@ -259,6 +268,34 @@ struct draw_context void *driver_private; }; + +struct draw_fetch_info { + boolean linear; + unsigned start; + const unsigned *elts; + unsigned count; +}; + +struct draw_vertex_info { + struct vertex_header *verts; + unsigned vertex_size; + unsigned stride; + unsigned count; +}; + +struct draw_prim_info { + boolean linear; + unsigned start; + + const ushort *elts; + unsigned count; + + unsigned prim; + unsigned *primitive_lengths; + unsigned primitive_count; +}; + + /******************************************************************************* * Draw common initialization code */ @@ -336,18 +373,13 @@ void draw_pipeline_destroy( struct draw_context *draw ); #define DRAW_PIPE_FLAG_MASK (0xf<<12) void draw_pipeline_run( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ); + const struct draw_vertex_info *vert, + const struct draw_prim_info *prim); void draw_pipeline_run_linear( struct draw_context *draw, - unsigned prim, - struct vertex_header *vertices, - unsigned count, - unsigned stride ); + const struct draw_vertex_info *vert, + const struct draw_prim_info *prim); + diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index b853f3a89f8..6234272d6ce 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -31,6 +31,7 @@ */ #include "draw/draw_context.h" +#include "draw/draw_gs.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" #include "tgsi/tgsi_dump.h" @@ -74,30 +75,34 @@ draw_pt_arrays(struct draw_context *draw, { unsigned first, incr; draw_pt_split_prim(prim, &first, &incr); - count = trim(count, first, incr); + count = trim(count, first, incr); if (count < first) return TRUE; } if (!draw->force_passthrough) { + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + if (!draw->render) { opt |= PT_PIPELINE; } - + if (draw_need_pipeline(draw, draw->rasterizer, - prim)) { + gs_out_prim)) { opt |= PT_PIPELINE; } if (!draw->bypass_clipping && !draw->pt.test_fse) { opt |= PT_CLIPTEST; } - + opt |= PT_SHADE; } - - if (draw->pt.middle.llvm) { + + if (draw->pt.middle.llvm && !draw->gs.geometry_shader) { middle = draw->pt.middle.llvm; } else { if (opt == 0) @@ -119,7 +124,7 @@ draw_pt_arrays(struct draw_context *draw, frontend->prepare( frontend, prim, middle, opt ); - frontend->run(frontend, + frontend->run(frontend, draw_pt_elt_func(draw), draw_pt_elt_ptr(draw, start), draw->pt.user.eltBias, diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 3e3ea320cc0..44356fba4c5 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -39,6 +39,8 @@ typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); struct draw_pt_middle_end; struct draw_context; +struct draw_prim_info; +struct draw_vertex_info; #define PT_SHADE 0x1 @@ -162,21 +164,31 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned *max_vertices ); void draw_pt_emit( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ); + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info); void draw_pt_emit_linear( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned stride, - unsigned count ); + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info); void draw_pt_emit_destroy( struct pt_emit *emit ); struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); +/******************************************************************************* + * HW stream output emit: + */ +struct pt_so_emit; + +void draw_pt_so_emit_prepare( struct pt_so_emit *emit ); + +void draw_pt_so_emit( struct pt_so_emit *emit, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info ); + +void draw_pt_so_emit_destroy( struct pt_so_emit *emit ); + +struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw ); /******************************************************************************* * API vertex fetch: @@ -208,9 +220,7 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); struct pt_post_vs; boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, - struct vertex_header *pipeline_verts, - unsigned stride, - unsigned count ); + struct draw_vertex_info *info ); void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, boolean bypass_clipping, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index f623c0743da..0229bcc7fe1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -127,15 +127,17 @@ void draw_pt_emit_prepare( struct pt_emit *emit, void draw_pt_emit( struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { + const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; + unsigned vertex_count = vert_info->count; + unsigned stride = vert_info->stride; + const ushort *elts = prim_info->elts; struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; + unsigned start, i; void *hw_verts; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? @@ -190,23 +192,31 @@ void draw_pt_emit( struct pt_emit *emit, 0, vertex_count - 1 ); - render->draw_elements(render, - elts, - count); + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + render->draw_elements(render, + elts + start, + prim_info->primitive_lengths[i]); + } render->release_vertices(render); } void draw_pt_emit_linear(struct pt_emit *emit, - const float (*vertex_data)[4], - unsigned stride, - unsigned count) + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) { + const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; + unsigned stride = vert_info->stride; + unsigned count = vert_info->count; struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; void *hw_verts; + unsigned start, i; #if 0 debug_printf("Linear emit\n"); @@ -258,7 +268,14 @@ void draw_pt_emit_linear(struct pt_emit *emit, render->unmap_vertices( render, 0, count - 1 ); - render->draw_arrays(render, 0, count); + for (start = i = 0; + i < prim_info->primitive_count; + start += prim_info->primitive_lengths[i], i++) + { + render->draw_arrays(render, + start, + prim_info->primitive_lengths[i]); + } render->release_vertices(render); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index a1347221b5d..ae12ee24bdc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -29,7 +29,6 @@ #include "util/u_math.h" #include "draw/draw_context.h" #include "draw/draw_private.h" -#include "draw/draw_vbuf.h" #include "draw/draw_pt.h" #include "translate/translate.h" #include "translate/translate_cache.h" @@ -69,31 +68,12 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->vertex_size = vertex_size; - /* Always emit/leave space for a vertex header. - * - * It's worth considering whether the vertex headers should contain - * a pointer to the 'data', rather than having it inline. - * Something to look at after we've fully switched over to the pt - * paths. + /* Leave the clipmask/edgeflags/pad/vertex_id untouched */ - { - /* Need to set header->vertex_id = 0xffff somehow. - */ - key.element[nr].type = TRANSLATE_ELEMENT_NORMAL; - key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; - key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; - key.element[nr].input_offset = 0; - key.element[nr].instance_divisor = 0; - key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; - key.element[nr].output_offset = dst_offset; - dst_offset += 1 * sizeof(float); - nr++; - - - /* Just leave the clip[] array untouched. - */ - dst_offset += 4 * sizeof(float); - } + dst_offset += 1 * sizeof(float); + /* Just leave the clip[] array untouched. + */ + dst_offset += 4 * sizeof(float); if (instance_id_index != ~0) { num_extra_inputs++; @@ -132,26 +112,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, key.nr_elements = nr; key.output_stride = vertex_size; - if (!fetch->translate || translate_key_compare(&fetch->translate->key, &key) != 0) { translate_key_sanitize(&key); fetch->translate = translate_cache_find(fetch->cache, &key); - - { - static struct vertex_header vh = { 0, - 1, - 0, - UNDEFINED_VERTEX_ID, - { .0f, .0f, .0f, .0f } }; - - fetch->translate->set_buffer(fetch->translate, - draw->pt.nr_vertex_buffers, - &vh, - 0, - ~0); - } } } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 5158c6341c0..5c8af17c8e3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_gs.h" #include "translate/translate.h" #include "translate/translate_cache.h" @@ -100,9 +101,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, boolean ok; struct translate_key key; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + + ok = draw->render->set_primitive( draw->render, - prim ); + gs_out_prim ); if (!ok) { assert(0); return; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index f0fc591bf55..b8270280b64 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -67,9 +67,8 @@ struct fetch_shade_emit { - static void fse_prepare( struct draw_pt_middle_end *middle, - unsigned prim, + unsigned prim, unsigned opt, unsigned *max_vertices ) { @@ -79,9 +78,12 @@ static void fse_prepare( struct draw_pt_middle_end *middle, const struct vertex_info *vinfo; unsigned i; unsigned nr_vbs = 0; - - if (!draw->render->set_primitive( draw->render, + /* Can't support geometry shader on this path. + */ + assert(!draw->gs.geometry_shader); + + if (!draw->render->set_primitive( draw->render, prim )) { assert(0); return; @@ -90,7 +92,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle, /* Must do this after set_primitive() above: */ fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render); - fse->key.output_stride = vinfo->size * 4; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index da5106463a7..121dfc414a4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -27,6 +27,7 @@ #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" @@ -40,16 +41,16 @@ struct fetch_pipeline_middle_end { struct draw_context *draw; struct pt_emit *emit; + struct pt_so_emit *so_emit; struct pt_fetch *fetch; struct pt_post_vs *post_vs; unsigned vertex_data_offset; unsigned vertex_size; - unsigned prim; + unsigned input_prim; unsigned opt; }; - static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned prim, unsigned opt, @@ -61,6 +62,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned i; unsigned instance_id_index = ~0; + unsigned gs_out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ @@ -76,7 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } } - fpme->prim = prim; + fpme->input_prim = prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -98,11 +103,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)draw->identity_viewport, (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); + + draw_pt_so_emit_prepare( fpme->so_emit ); if (!(opt & PT_PIPELINE)) { - draw_pt_emit_prepare( fpme->emit, - prim, + draw_pt_emit_prepare( fpme->emit, + gs_out_prim, max_vertices ); *max_vertices = MAX2( *max_vertices, @@ -121,147 +128,146 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } - -static void fetch_pipeline_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) +static void fetch( struct pt_fetch *fetch, + const struct draw_fetch_info *fetch_info, + char *output) { - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vshader = draw->vs.vertex_shader; - struct draw_geometry_shader *gshader = draw->gs.geometry_shader; - unsigned opt = fpme->opt; - unsigned alloc_count = align( fetch_count, 4 ); - - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ - assert(0); - return; + if (fetch_info->linear) { + draw_pt_fetch_run_linear( fetch, + fetch_info->start, + fetch_info->count, + output ); } - - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run( fpme->fetch, - fetch_elts, - fetch_count, - (char *)pipeline_verts ); - - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. - */ - if (opt & PT_SHADE) - { - vshader->run_linear(vshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); - if (gshader) - draw_geometry_shader_run(gshader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - fetch_count, - fpme->vertex_size, - fpme->vertex_size); + else { + draw_pt_fetch_run( fetch, + fetch_info->elts, + fetch_info->count, + output ); } +} - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - fetch_count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { +static void pipeline(struct fetch_pipeline_middle_end *fpme, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) + draw_pipeline_run_linear( fpme->draw, + vert_info, + prim_info); + else draw_pipeline_run( fpme->draw, - fpme->prim, - pipeline_verts, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + vert_info, + prim_info ); +} + +static void emit(struct pt_emit *emit, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) { + draw_pt_emit_linear(emit, vert_info, prim_info); } else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + draw_pt_emit(emit, vert_info, prim_info); } +} - FREE(pipeline_verts); +static void draw_vertex_shader_run(struct draw_vertex_shader *vshader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const struct draw_vertex_info *input_verts, + struct draw_vertex_info *output_verts ) +{ + output_verts->vertex_size = input_verts->vertex_size; + output_verts->stride = input_verts->vertex_size; + output_verts->count = input_verts->count; + output_verts->verts = + (struct vertex_header *)MALLOC(output_verts->vertex_size * + align(output_verts->count, 4)); + + vshader->run_linear(vshader, + (const float (*)[4])input_verts->verts->data, + ( float (*)[4])output_verts->verts->data, + constants, + input_verts->count, + input_verts->vertex_size, + input_verts->vertex_size); } - -static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, - unsigned start, - unsigned count) +static void fetch_pipeline_generic( struct draw_pt_middle_end *middle, + const struct draw_fetch_info *fetch_info, + const struct draw_prim_info *prim_info ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; - struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + struct draw_prim_info gs_prim_info; + struct draw_vertex_info fetched_vert_info; + struct draw_vertex_info vs_vert_info; + struct draw_vertex_info gs_vert_info; + struct draw_vertex_info *vert_info; unsigned opt = fpme->opt; - unsigned alloc_count = align( count, 4 ); - - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ + fetched_vert_info.count = fetch_info->count; + fetched_vert_info.vertex_size = fpme->vertex_size; + fetched_vert_info.stride = fpme->vertex_size; + fetched_vert_info.verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * + align(fetch_info->count, 4)); + if (!fetched_vert_info.verts) { assert(0); return; } - /* Fetch into our vertex buffer + /* Fetch into our vertex buffer. */ - draw_pt_fetch_run_linear( fpme->fetch, - start, - count, - (char *)pipeline_verts ); + fetch( fpme->fetch, fetch_info, (char *)fetched_vert_info.verts ); + + /* Finished with fetch: + */ + fetch_info = NULL; + vert_info = &fetched_vert_info; /* Run the shader, note that this overwrites the data[] parts of * the pipeline verts. */ - if (opt & PT_SHADE) - { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - - if (geometry_shader) - draw_geometry_shader_run(geometry_shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); + if (fpme->opt & PT_SHADE) { + draw_vertex_shader_run(vshader, + draw->pt.user.vs_constants, + vert_info, + &vs_vert_info); + + FREE(vert_info->verts); + vert_info = &vs_vert_info; } + if ((fpme->opt & PT_SHADE) && gshader) { + draw_geometry_shader_run(gshader, + draw->pt.user.gs_constants, + vert_info, + prim_info, + &gs_vert_info, + &gs_prim_info); + + FREE(vert_info->verts); + vert_info = &gs_vert_info; + prim_info = &gs_prim_info; + } + + + /* Stream output needs to be done before clipping. + * + * XXX: Stream output surely needs to respect the prim_info->elt + * lists. + */ + draw_pt_so_emit( fpme->so_emit, + vert_info, + prim_info ); + if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) + vert_info )) { opt |= PT_PIPELINE; } @@ -269,102 +275,96 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, /* Do we need to run the pipeline? */ if (opt & PT_PIPELINE) { - draw_pipeline_run_linear( fpme->draw, - fpme->prim, - pipeline_verts, - count, - fpme->vertex_size); + pipeline( fpme, + vert_info, + prim_info ); } else { - draw_pt_emit_linear( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fpme->vertex_size, - count ); + emit( fpme->emit, + vert_info, + prim_info ); } - - FREE(pipeline_verts); + FREE(vert_info->verts); } +static void fetch_pipeline_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = FALSE; + fetch_info.start = 0; + fetch_info.elts = fetch_elts; + fetch_info.count = fetch_count; + + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; + + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); +} -static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, - unsigned start, - unsigned count, - const ushort *draw_elts, - unsigned draw_count ) +static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vs.vertex_shader; - struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader; - unsigned opt = fpme->opt; - unsigned alloc_count = align( count, 4 ); + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; + + prim_info.linear = TRUE; + prim_info.start = 0; + prim_info.count = count; + prim_info.elts = NULL; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &count; + + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); +} - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - if (!pipeline_verts) - return FALSE; - /* Fetch into our vertex buffer - */ - draw_pt_fetch_run_linear( fpme->fetch, - start, - count, - (char *)pipeline_verts ); +static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; - /* Run the shader, note that this overwrites the data[] parts of - * the pipeline verts. - */ - if (opt & PT_SHADE) - { - shader->run_linear(shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.vs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - - if (geometry_shader) - draw_geometry_shader_run(geometry_shader, - (const float (*)[4])pipeline_verts->data, - ( float (*)[4])pipeline_verts->data, - draw->pt.user.gs_constants, - count, - fpme->vertex_size, - fpme->vertex_size); - } + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->prim, - pipeline_verts, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } - else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } + fetch_pipeline_generic( middle, &fetch_info, &prim_info ); - FREE(pipeline_verts); return TRUE; } @@ -385,6 +385,9 @@ static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) if (fpme->emit) draw_pt_emit_destroy( fpme->emit ); + if (fpme->so_emit) + draw_pt_so_emit_destroy( fpme->so_emit ); + if (fpme->post_vs) draw_pt_post_vs_destroy( fpme->post_vs ); @@ -416,7 +419,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * goto fail; fpme->emit = draw_pt_emit_create( draw ); - if (!fpme->emit) + if (!fpme->emit) + goto fail; + + fpme->so_emit = draw_pt_so_emit_create( draw ); + if (!fpme->so_emit) goto fail; return &fpme->base; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index d56889b5f65..d33969ac709 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -28,6 +28,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "draw/draw_context.h" +#include "draw/draw_gs.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" @@ -40,52 +41,58 @@ struct llvm_middle_end { struct draw_context *draw; struct pt_emit *emit; + struct pt_so_emit *so_emit; struct pt_fetch *fetch; struct pt_post_vs *post_vs; unsigned vertex_data_offset; unsigned vertex_size; - unsigned prim; + unsigned input_prim; unsigned opt; struct draw_llvm *llvm; - struct draw_llvm_variant *variants; struct draw_llvm_variant *current_variant; - int nr_variants; }; static void llvm_middle_end_prepare( struct draw_pt_middle_end *middle, - unsigned prim, + unsigned in_prim, unsigned opt, unsigned *max_vertices ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vs = draw->vs.vertex_shader; + struct llvm_vertex_shader *shader = + llvm_vertex_shader(draw->vs.vertex_shader); struct draw_llvm_variant_key key; struct draw_llvm_variant *variant = NULL; + struct draw_llvm_variant_list_item *li; unsigned i; unsigned instance_id_index = ~0; + + unsigned out_prim = (draw->gs.geometry_shader ? + draw->gs.geometry_shader->output_primitive : + in_prim); + /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. */ - unsigned nr = MAX2( vs->info.num_inputs, - vs->info.num_outputs + 1 ); + unsigned nr = MAX2( shader->base.info.num_inputs, + shader->base.info.num_outputs + 1 ); /* Scan for instanceID system value. */ - for (i = 0; i < vs->info.num_inputs; i++) { - if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { + for (i = 0; i < shader->base.info.num_inputs; i++) { + if (shader->base.info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { instance_id_index = i; break; } } - fpme->prim = prim; + fpme->input_prim = in_prim; fpme->opt = opt; /* Always leave room for the vertex header whether we need it or @@ -102,11 +109,13 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, (boolean)draw->bypass_clipping, (boolean)(draw->identity_viewport), (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? true : false) ); + (draw->vs.edgeflag_output ? TRUE : FALSE) ); + + draw_pt_so_emit_prepare( fpme->so_emit ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, - prim, + out_prim, max_vertices ); *max_vertices = MAX2( *max_vertices, @@ -121,20 +130,41 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, draw_llvm_make_variant_key(fpme->llvm, &key); - variant = fpme->variants; - while(variant) { - if(memcmp(&variant->key, &key, sizeof key) == 0) + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + if(memcmp(&li->base->key, &key, sizeof key) == 0) { + variant = li->base; break; + } + li = next_elem(li); + } - variant = variant->next; + if (variant) { + move_to_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); } + else { + unsigned i; + if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) { + /* + * XXX: should we flush here ? + */ + for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { + struct draw_llvm_variant_list_item *item = + last_elem(&fpme->llvm->vs_variants_list); + draw_llvm_destroy_variant(item->base); + } + } + + variant = draw_llvm_create_variant(fpme->llvm, nr); - if (!variant) { - variant = draw_llvm_prepare(fpme->llvm, nr); - variant->next = fpme->variants; - fpme->variants = variant; - ++fpme->nr_variants; + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); + fpme->llvm->nr_variants++; + shader->variants_cached++; + } } + fpme->current_variant = variant; /*XXX we only support one constant buffer */ @@ -145,66 +175,142 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, } +static void pipeline(struct llvm_middle_end *llvm, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) + draw_pipeline_run_linear( llvm->draw, + vert_info, + prim_info); + else + draw_pipeline_run( llvm->draw, + vert_info, + prim_info ); +} -static void llvm_middle_end_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) +static void emit(struct pt_emit *emit, + const struct draw_vertex_info *vert_info, + const struct draw_prim_info *prim_info) +{ + if (prim_info->linear) { + draw_pt_emit_linear(emit, vert_info, prim_info); + } + else { + draw_pt_emit(emit, vert_info, prim_info); + } +} + +static void +llvm_pipeline_generic( struct draw_pt_middle_end *middle, + const struct draw_fetch_info *fetch_info, + const struct draw_prim_info *prim_info ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; + struct draw_geometry_shader *gshader = draw->gs.geometry_shader; + struct draw_prim_info gs_prim_info; + struct draw_vertex_info llvm_vert_info; + struct draw_vertex_info gs_vert_info; + struct draw_vertex_info *vert_info; unsigned opt = fpme->opt; - unsigned alloc_count = align( fetch_count, 4 ); - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ + llvm_vert_info.count = fetch_info->count; + llvm_vert_info.vertex_size = fpme->vertex_size; + llvm_vert_info.stride = fpme->vertex_size; + llvm_vert_info.verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * + align(fetch_info->count, 4)); + if (!llvm_vert_info.verts) { assert(0); return; } - fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - fetch_elts, - fetch_count, - fpme->vertex_size, - draw->pt.vertex_buffer ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - fetch_count, - fpme->vertex_size )) - { + if (fetch_info->linear) + fpme->current_variant->jit_func( &fpme->llvm->jit_context, + llvm_vert_info.verts, + (const char **)draw->pt.user.vbuffer, + fetch_info->start, + fetch_info->count, + fpme->vertex_size, + draw->pt.vertex_buffer ); + else + fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + llvm_vert_info.verts, + (const char **)draw->pt.user.vbuffer, + fetch_info->elts, + fetch_info->count, + fpme->vertex_size, + draw->pt.vertex_buffer); + + /* Finished with fetch and vs: + */ + fetch_info = NULL; + vert_info = &llvm_vert_info; + + + if ((opt & PT_SHADE) && gshader) { + draw_geometry_shader_run(gshader, + draw->pt.user.gs_constants, + vert_info, + prim_info, + &gs_vert_info, + &gs_prim_info); + + FREE(vert_info->verts); + vert_info = &gs_vert_info; + prim_info = &gs_prim_info; + } + + /* stream output needs to be done before clipping */ + draw_pt_so_emit( fpme->so_emit, + vert_info, + prim_info ); + + if (draw_pt_post_vs_run( fpme->post_vs, vert_info )) { opt |= PT_PIPELINE; } /* Do we need to run the pipeline? */ if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->prim, - pipeline_verts, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + pipeline( fpme, + vert_info, + prim_info ); } else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + emit( fpme->emit, + vert_info, + prim_info ); } + FREE(vert_info->verts); +} - FREE(pipeline_verts); +static void llvm_middle_end_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = FALSE; + fetch_info.start = 0; + fetch_info.elts = fetch_elts; + fetch_info.count = fetch_count; + + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; + + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); } @@ -213,57 +319,23 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, unsigned count) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned opt = fpme->opt; - unsigned alloc_count = align( count, 4 ); - - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ - assert(0); - return; - } - -#if 0 - debug_printf("#### Pipeline = %p (data = %p)\n", - pipeline_verts, pipeline_verts->data); -#endif - fpme->current_variant->jit_func( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - start, - count, - fpme->vertex_size, - draw->pt.vertex_buffer ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } - - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run_linear( fpme->draw, - fpme->prim, - pipeline_verts, - count, - fpme->vertex_size); - } - else { - draw_pt_emit_linear( fpme->emit, - (const float (*)[4])pipeline_verts->data, - fpme->vertex_size, - count ); - } - - FREE(pipeline_verts); + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; + + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; + + prim_info.linear = TRUE; + prim_info.start = 0; + prim_info.count = count; + prim_info.elts = NULL; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &count; + + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); } @@ -276,53 +348,24 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, unsigned draw_count ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned opt = fpme->opt; - unsigned alloc_count = align( count, 4 ); - - struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - - if (!pipeline_verts) - return FALSE; - - fpme->current_variant->jit_func( &fpme->llvm->jit_context, - pipeline_verts, - (const char **)draw->pt.user.vbuffer, - start, - count, - fpme->vertex_size, - draw->pt.vertex_buffer ); - - if (draw_pt_post_vs_run( fpme->post_vs, - pipeline_verts, - count, - fpme->vertex_size )) - { - opt |= PT_PIPELINE; - } + struct draw_fetch_info fetch_info; + struct draw_prim_info prim_info; - /* Do we need to run the pipeline? - */ - if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->prim, - pipeline_verts, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } - else { - draw_pt_emit( fpme->emit, - (const float (*)[4])pipeline_verts->data, - count, - fpme->vertex_size, - draw_elts, - draw_count ); - } + fetch_info.linear = TRUE; + fetch_info.start = start; + fetch_info.count = count; + fetch_info.elts = NULL; + + prim_info.linear = FALSE; + prim_info.start = 0; + prim_info.count = draw_count; + prim_info.elts = draw_elts; + prim_info.prim = fpme->input_prim; + prim_info.primitive_count = 1; + prim_info.primitive_lengths = &draw_count; + + llvm_pipeline_generic( middle, &fetch_info, &prim_info ); - FREE(pipeline_verts); return TRUE; } @@ -336,37 +379,16 @@ static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; - struct draw_context *draw = fpme->draw; - struct draw_llvm_variant *variant = NULL; - variant = fpme->variants; - while(variant) { - struct draw_llvm_variant *next = variant->next; - - if (variant->function_elts) { - if (variant->function_elts) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function_elts); - LLVMDeleteFunction(variant->function_elts); - } - - if (variant->function) { - if (variant->function) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function); - LLVMDeleteFunction(variant->function); - } - - FREE(variant); - - variant = next; - } if (fpme->fetch) draw_pt_fetch_destroy( fpme->fetch ); if (fpme->emit) draw_pt_emit_destroy( fpme->emit ); + if (fpme->so_emit) + draw_pt_so_emit_destroy( fpme->so_emit ); + if (fpme->post_vs) draw_pt_post_vs_destroy( fpme->post_vs ); @@ -377,7 +399,8 @@ static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) } -struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw ) +struct draw_pt_middle_end * +draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw) { struct llvm_middle_end *fpme = 0; @@ -409,13 +432,15 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_cont if (!fpme->emit) goto fail; + fpme->so_emit = draw_pt_so_emit_create( draw ); + if (!fpme->so_emit) + goto fail; + fpme->llvm = draw_llvm_create(draw); if (!fpme->llvm) goto fail; - fpme->variants = NULL; fpme->current_variant = NULL; - fpme->nr_variants = 0; return &fpme->base; diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 5525dfc748d..e850cc2edd6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -29,19 +29,23 @@ #include "pipe/p_context.h" #include "draw/draw_context.h" #include "draw/draw_private.h" -#include "draw/draw_vbuf.h" #include "draw/draw_pt.h" struct pt_post_vs { struct draw_context *draw; boolean (*run)( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ); + struct draw_vertex_info *info ); }; - +static INLINE void +initialize_vertex_header(struct vertex_header *header) +{ + header->clipmask = 0; + header->edgeflag = 1; + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; +} static INLINE float dot4(const float *a, const float *b) @@ -52,8 +56,6 @@ dot4(const float *a, const float *b) a[3]*b[3]); } - - static INLINE unsigned compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) { @@ -92,22 +94,21 @@ compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) * instructions */ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned clipped = 0; unsigned j; - if (0) debug_printf("%s\n", __FUNCTION__); + if (0) debug_printf("%s count, %d\n", __FUNCTION__, info->count); - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { float *position = out->data[pos]; + initialize_vertex_header(out); #if 0 debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n", j, out, position, position[0], position[1], position[2], position[3]); @@ -143,7 +144,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, #endif } - out = (struct vertex_header *)( (char *)out + stride ); + out = (struct vertex_header *)( (char *)out + info->stride ); } return clipped != 0; @@ -153,29 +154,27 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, /* As above plus edgeflags */ -static boolean +static boolean post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info) { unsigned j; boolean needpipe; - needpipe = post_vs_cliptest_viewport_gl( pvs, vertices, count, stride); + needpipe = post_vs_cliptest_viewport_gl(pvs, info); /* If present, copy edgeflag VS output into vertex header. * Otherwise, leave header as is. */ if (pvs->draw->vs.edgeflag_output) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; int ef = pvs->draw->vs.edgeflag_output; - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { const float *edgeflag = out->data[ef]; out->edgeflag = !(edgeflag[0] != 1.0f); needpipe |= !out->edgeflag; - out = (struct vertex_header *)( (char *)out + stride ); + out = (struct vertex_header *)( (char *)out + info->stride ); } } return needpipe; @@ -187,29 +186,28 @@ post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs, /* If bypass_clipping is set, skip cliptest and rhw divide. */ static boolean post_vs_viewport( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - struct vertex_header *out = vertices; + struct vertex_header *out = info->verts; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; const unsigned pos = draw_current_shader_position_output(pvs->draw); unsigned j; if (0) debug_printf("%s\n", __FUNCTION__); - for (j = 0; j < count; j++) { + for (j = 0; j < info->count; j++) { float *position = out->data[pos]; + initialize_vertex_header(out); /* Viewport mapping only, no cliptest/rhw divide */ position[0] = position[0] * scale[0] + trans[0]; position[1] = position[1] * scale[1] + trans[1]; position[2] = position[2] * scale[2] + trans[2]; - out = (struct vertex_header *)((char *)out + stride); + out = (struct vertex_header *)((char *)out + info->stride); } - + return FALSE; } @@ -218,20 +216,25 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, * to do. */ static boolean post_vs_none( struct pt_post_vs *pvs, - struct vertex_header *vertices, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { + struct vertex_header *out = info->verts; + unsigned j; + if (0) debug_printf("%s\n", __FUNCTION__); + /* just initialize the vertex_id in all headers */ + for (j = 0; j < info->count; j++) { + initialize_vertex_header(out); + + out = (struct vertex_header *)((char *)out + info->stride); + } return FALSE; } boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, - struct vertex_header *pipeline_verts, - unsigned count, - unsigned stride ) + struct draw_vertex_info *info ) { - return pvs->run( pvs, pipeline_verts, count, stride ); + return pvs->run( pvs, info ); } @@ -272,7 +275,7 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ) return NULL; pvs->draw = draw; - + return pvs; } diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c new file mode 100644 index 00000000000..5d82934889b --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -0,0 +1,303 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +struct pt_so_emit { + struct draw_context *draw; + + void *buffers[PIPE_MAX_SO_BUFFERS]; + + unsigned input_vertex_stride; + const float (*inputs)[4]; + + boolean has_so; + + boolean single_buffer; + + unsigned emitted_primitives; + unsigned emitted_vertices; +}; + + +void draw_pt_so_emit_prepare(struct pt_so_emit *emit) +{ + struct draw_context *draw = emit->draw; + + emit->has_so = (draw->so.state.num_outputs > 0); + + /* if we have a state with outputs make sure we have + * buffers to output to */ + if (emit->has_so) { + boolean has_valid_buffer = FALSE; + unsigned i; + for (i = 0; i < draw->so.num_buffers; ++i) { + if (draw->so.buffers[i]) { + has_valid_buffer = TRUE; + break; + } + } + emit->has_so = has_valid_buffer; + } + + if (!emit->has_so) + return; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); +} + +static boolean +is_component_writable(unsigned mask, + unsigned compo) +{ + switch (mask) { + case TGSI_WRITEMASK_NONE: + return FALSE; + case TGSI_WRITEMASK_X: + return compo == 0; + case TGSI_WRITEMASK_Y: + return compo == 1; + case TGSI_WRITEMASK_XY: + return compo == 0 || compo == 1; + case TGSI_WRITEMASK_Z: + return compo == 2; + case TGSI_WRITEMASK_XZ: + return compo == 0 || compo == 2; + case TGSI_WRITEMASK_YZ: + return compo == 1 || compo == 2; + case TGSI_WRITEMASK_XYZ: + return compo == 0 || compo == 1 || compo == 2; + case TGSI_WRITEMASK_W: + return compo == 3; + case TGSI_WRITEMASK_XW: + return compo == 0 || compo == 3; + case TGSI_WRITEMASK_YW: + return compo == 1 || compo == 3; + case TGSI_WRITEMASK_XYW: + return compo == 0 || compo == 1 || compo == 3; + case TGSI_WRITEMASK_ZW: + return compo == 2 || compo == 3; + case TGSI_WRITEMASK_XZW: + return compo == 0 || compo == 1 || compo == 3; + case TGSI_WRITEMASK_YZW: + return compo == 1 || compo == 2 || compo == 4; + case TGSI_WRITEMASK_XYZW: + return compo < 4; + default: + debug_assert(!"Unknown writemask in stream out"); + return compo < 4; + } +} + +static void so_emit_prim(struct pt_so_emit *so, + unsigned *indices, + unsigned num_vertices) +{ + unsigned slot, i; + unsigned input_vertex_stride = so->input_vertex_stride; + struct draw_context *draw = so->draw; + const float (*input_ptr)[4]; + const struct pipe_stream_output_state *state = + &draw->so.state; + float **buffer = 0; + + input_ptr = so->inputs; + + for (i = 0; i < num_vertices; ++i) { + const float (*input)[4]; + unsigned total_written_compos = 0; + /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/ + input = (const float (*)[4])( + (const char *)input_ptr + (indices[i] * input_vertex_stride)); + for (slot = 0; slot < state->num_outputs; ++slot) { + unsigned idx = state->register_index[slot]; + unsigned writemask = state->register_mask[slot]; + unsigned written_compos = 0; + unsigned compo; + + buffer = (float**)&so->buffers[state->output_buffer[slot]]; + + /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx);*/ +#if 1 + assert(!util_is_inf_or_nan(input[idx][0])); + assert(!util_is_inf_or_nan(input[idx][1])); + assert(!util_is_inf_or_nan(input[idx][2])); + assert(!util_is_inf_or_nan(input[idx][3])); +#endif + for (compo = 0; compo < 4; ++compo) { + if (is_component_writable(writemask, compo)) { + float *buf = *buffer; + buf[written_compos++] = input[idx][compo]; + } + } +#if 0 + debug_printf("\t\t(writemask = %d)%f %f %f %f\n", + writemask, + input[idx][0], + input[idx][1], + input[idx][2], + input[idx][3]); +#endif + *buffer += written_compos; + total_written_compos += written_compos; + } + if (so->single_buffer) { + int stride = (int)state->stride - + sizeof(float) * total_written_compos; + + debug_assert(stride >= 0); + *buffer = (float*) (((char*)*buffer) + stride); + } + } + so->emitted_vertices += num_vertices; + ++so->emitted_primitives; +} + +static void so_point(struct pt_so_emit *so, int idx) +{ + unsigned indices[1]; + + indices[0] = idx; + + so_emit_prim(so, indices, 1); +} + +static void so_line(struct pt_so_emit *so, int i0, int i1) +{ + unsigned indices[2]; + + indices[0] = i0; + indices[1] = i1; + + so_emit_prim(so, indices, 2); +} + +static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) +{ + unsigned indices[3]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + + so_emit_prim(so, indices, 3); +} + + +#define TRIANGLE(gs,i0,i1,i2) so_tri(so,i0,i1,i2) +#define LINE(gs,i0,i1) so_line(so,i0,i1) +#define POINT(gs,i0) so_point(so,i0) +#define FUNC so_run_linear +#define LOCAL_VARS +#include "draw_so_emit_tmp.h" +#undef LOCAL_VARS +#undef FUNC + + +#define TRIANGLE(gs,i0,i1,i2) so_tri(gs,elts[i0],elts[i1],elts[i2]) +#define LINE(gs,i0,i1) so_line(gs,elts[i0],elts[i1]) +#define POINT(gs,i0) so_point(gs,elts[i0]) +#define FUNC so_run_elts +#define LOCAL_VARS \ + const ushort *elts = input_prims->elts; +#include "draw_so_emit_tmp.h" +#undef LOCAL_VARS +#undef FUNC + + +void draw_pt_so_emit( struct pt_so_emit *emit, + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prims ) +{ + struct draw_context *draw = emit->draw; + struct vbuf_render *render = draw->render; + unsigned start, i; + + if (!emit->has_so) + return; + + emit->emitted_vertices = 0; + emit->emitted_primitives = 0; + emit->input_vertex_stride = input_verts->stride; + emit->inputs = (const float (*)[4])input_verts->verts->data; + for (i = 0; i < draw->so.num_buffers; ++i) { + emit->buffers[i] = draw->so.buffers[i]; + } + emit->single_buffer = TRUE; + for (i = 0; i < draw->so.state.num_outputs; ++i) { + if (draw->so.state.output_buffer[i] != 0) + emit->single_buffer = FALSE; + } + + /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + for (start = i = 0; i < input_prims->primitive_count; + start += input_prims->primitive_lengths[i], i++) + { + unsigned count = input_prims->primitive_lengths[i]; + + if (input_prims->linear) { + so_run_linear(emit, input_prims, input_verts, + start, count); + } else { + so_run_elts(emit, input_prims, input_verts, + start, count); + } + } + + render->set_stream_output_info(render, + emit->emitted_primitives, + emit->emitted_vertices); +} + + +struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw ) +{ + struct pt_so_emit *emit = CALLOC_STRUCT(pt_so_emit); + if (!emit) + return NULL; + + emit->draw = draw; + + return emit; +} + +void draw_pt_so_emit_destroy( struct pt_so_emit *emit ) +{ + FREE(emit); +} diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index d0e16c9bc3c..d89d5cd20f4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -136,7 +136,7 @@ static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { static void varray_prepare(struct draw_pt_front_end *frontend, - unsigned prim, + unsigned in_prim, struct draw_pt_middle_end *middle, unsigned opt) { @@ -144,11 +144,13 @@ static void varray_prepare(struct draw_pt_front_end *frontend, varray->base.run = varray_run; - varray->input_prim = prim; - varray->output_prim = decompose_prim[prim]; + varray->input_prim = in_prim; + varray->output_prim = decompose_prim[in_prim]; varray->middle = middle; - middle->prepare(middle, varray->output_prim, opt, &varray->driver_fetch_max ); + middle->prepare(middle, + varray->output_prim, + opt, &varray->driver_fetch_max ); /* check that the max is even */ assert((varray->driver_fetch_max & 1) == 0); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 6a48e61e624..b7e0da7d44e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -54,7 +54,7 @@ struct vcache_frontend { unsigned draw_count; unsigned fetch_count; unsigned fetch_max; - + struct draw_pt_middle_end *middle; unsigned input_prim; @@ -64,14 +64,14 @@ struct vcache_frontend { unsigned opt; }; -static INLINE void +static INLINE void vcache_flush( struct vcache_frontend *vcache ) { if (vcache->middle_prim != vcache->output_prim) { vcache->middle_prim = vcache->output_prim; - vcache->middle->prepare( vcache->middle, - vcache->middle_prim, - vcache->opt, + vcache->middle->prepare( vcache->middle, + vcache->middle_prim, + vcache->opt, &vcache->fetch_max ); } @@ -356,19 +356,19 @@ vcache_check_run( struct draw_pt_front_end *frontend, if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, vcache->fetch_max, draw_count); - + if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES || fetch_count >= UNDEFINED_VERTEX_ID || fetch_count > draw_count) { if (0) debug_printf("fail\n"); goto fail; } - + if (vcache->middle_prim != vcache->input_prim) { vcache->middle_prim = vcache->input_prim; - vcache->middle->prepare( vcache->middle, - vcache->middle_prim, - vcache->opt, + vcache->middle->prepare( vcache->middle, + vcache->middle_prim, + vcache->opt, &vcache->fetch_max ); } @@ -467,9 +467,9 @@ vcache_check_run( struct draw_pt_front_end *frontend, -static void +static void vcache_prepare( struct draw_pt_front_end *frontend, - unsigned prim, + unsigned in_prim, struct draw_pt_middle_end *middle, unsigned opt ) { @@ -479,13 +479,19 @@ vcache_prepare( struct draw_pt_front_end *frontend, { vcache->base.run = vcache_run_extras; } - else + else { vcache->base.run = vcache_check_run; } - vcache->input_prim = prim; - vcache->output_prim = u_reduced_prim(prim); + /* VCache will always emit the reduced version of its input + * primitive, ie STRIP/FANS become TRIS, etc. + * + * This is not to be confused with what the GS might be up to, + * which is a separate issue. + */ + vcache->input_prim = in_prim; + vcache->output_prim = u_reduced_prim(in_prim); vcache->middle = middle; vcache->opt = opt; @@ -494,7 +500,9 @@ vcache_prepare( struct draw_pt_front_end *frontend, * doing so: */ vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim; - middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max ); + middle->prepare( middle, + vcache->middle_prim, + opt, &vcache->fetch_max ); } diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h new file mode 100644 index 00000000000..01212a8e536 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h @@ -0,0 +1,123 @@ + +static void FUNC( struct pt_so_emit *so, + const struct draw_prim_info *input_prims, + const struct draw_vertex_info *input_verts, + unsigned start, + unsigned count) +{ + struct draw_context *draw = so->draw; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i; + LOCAL_VARS + + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + + debug_assert(input_prims->primitive_count == 1); + + switch (input_prims->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) { + POINT( so, start + i + 0 ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( so , start + i + 0 , start + i + 1 ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + + for (i = 1; i < count; i++) { + LINE( so, start + i - 1, start + i ); + } + + LINE( so, start + i - 1, start ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < count; i++) { + LINE( so, start + i - 1, start + i ); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( so, start + i + 0, start + i + 1, start + i + 2 ); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 0, + start + i + 1 + (i&1), + start + i + 2 - (i&1) ); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 0 + (i&1), + start + i + 1 - (i&1), + start + i + 2 ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start + i + 1, + start + i + 2, + start ); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( so, + start, + start + i + 1, + start + i + 2 ); + } + } + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + + for (i = 0; i+2 < count; i++) { + + if (flatfirst) { + TRIANGLE( so, start + 0, start + i + 1, start + i + 2 ); + } + else { + TRIANGLE( so, start + i + 1, start + i + 2, start + 0 ); + } + } + } + break; + + default: + debug_assert(!"Unsupported primitive in stream output"); + break; + } +} + + +#undef TRIANGLE +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 83ae26a9c2b..e32803c0720 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -117,6 +117,14 @@ struct vbuf_render { void (*release_vertices)( struct vbuf_render * ); void (*destroy)( struct vbuf_render * ); + + + /** + * Called after writing data to the stream out buffers + */ + void (*set_stream_output_info)( struct vbuf_render *vbufr, + unsigned primitive_count, + unsigned vertices_count ); }; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index b9db886a24d..57ea63fc060 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -98,6 +98,11 @@ draw_create_vertex_shader(struct draw_context *draw, vs = draw_create_vs_ppc( draw, shader ); #endif } +#if HAVE_LLVM + else { + vs = draw_create_vs_llvm(draw, shader); + } +#endif if (!vs) { vs = draw_create_vs_exec( draw, shader ); diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 6c7e94db433..a7319945234 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -165,7 +165,6 @@ draw_create_vs_ppc(struct draw_context *draw, const struct pipe_shader_state *templ); - struct draw_vs_varient_key; struct draw_vertex_shader; @@ -173,6 +172,11 @@ struct draw_vs_varient * draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs, const struct draw_vs_varient_key *key ); +#if HAVE_LLVM +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *state); +#endif /******************************************************************************** diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c new file mode 100644 index 00000000000..6c13df79132 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -0,0 +1,120 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" +#include "draw_llvm.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + +static void +vs_llvm_prepare(struct draw_vertex_shader *shader, + struct draw_context *draw) +{ + /*struct llvm_vertex_shader *evs = llvm_vertex_shader(shader);*/ +} + +static void +vs_llvm_run_linear( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + /* we should never get here since the entire pipeline is + * generated in draw_pt_fetch_shade_pipeline_llvm.c */ + debug_assert(0); +} + + +static void +vs_llvm_delete( struct draw_vertex_shader *dvs ) +{ + struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs); + struct pipe_fence_handle *fence = NULL; + struct draw_llvm_variant_list_item *li; + struct pipe_context *pipe = dvs->draw->pipe; + + /* + * XXX: This might be not neccessary at all. + */ + pipe->flush(pipe, 0, &fence); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + + + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct draw_llvm_variant_list_item *next = next_elem(li); + draw_llvm_destroy_variant(li->base); + li = next; + } + + assert(shader->variants_cached == 0); + FREE((void*) dvs->state.tokens); + FREE( dvs ); +} + + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *state) +{ + struct llvm_vertex_shader *vs = CALLOC_STRUCT( llvm_vertex_shader ); + + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(state->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + + tgsi_scan_shader(state->tokens, &vs->base.info); + + vs->base.draw = draw; + vs->base.prepare = vs_llvm_prepare; + vs->base.run_linear = vs_llvm_run_linear; + vs->base.delete = vs_llvm_delete; + vs->base.create_varient = draw_vs_create_varient_generic; + + make_empty_list(&vs->variants); + + return &vs->base; +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index eb492042387..87e3e72a6e8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -310,21 +310,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, } -typedef void (*fetch_func)(float *, const uint8_t *, unsigned, unsigned); - -/** cast wrapper */ -static void * -fetch_func_ptr_to_voidptr(fetch_func f) -{ - union { - void *v; - fetch_func f; - } u; - u.f = f; - return u.v; -} - - /** * Fetch a pixel into a 4 float AoS. * @@ -406,7 +391,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, assert(LLVMIsDeclaration(function)); LLVMAddGlobalMapping(lp_build_engine, function, - fetch_func_ptr_to_voidptr(format_desc->fetch_rgba_float)); + func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); } tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 0a690ea7476..44cfdc4d3fb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -78,6 +78,9 @@ enum LLVM_CodeGenOpt_Level { extern void lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE); +extern void +lp_set_target_options(void); + void lp_build_init(void) @@ -86,6 +89,8 @@ lp_build_init(void) gallivm_debug = debug_get_flags_option("GALLIVM_DEBUG", lp_bld_debug_flags, 0 ); #endif + lp_set_target_options(); + LLVMInitializeNativeTarget(); LLVMLinkInJIT(); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index f004c0ae451..5a9488b5f79 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -36,6 +36,7 @@ #include <llvm-c/Core.h> #include <llvm-c/ExecutionEngine.h> +#include <llvm/Target/TargetOptions.h> #include <llvm/ExecutionEngine/ExecutionEngine.h> #include <llvm/ExecutionEngine/JITEventListener.h> @@ -119,3 +120,25 @@ lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE) { llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener()); } + + +extern "C" void +lp_set_target_options(void) +{ +#if defined(DEBUG) +#if HAVE_LLVM >= 0x0207 + llvm::JITEmitDebugInfo = true; +#endif +#endif + +#if defined(DEBUG) || defined(PROFILE) + llvm::NoFramePointerElim = true; +#endif + + llvm::NoExcessFPPrecision = false; + + /* XXX: Investigate this */ +#if 0 + llvm::UnsafeFPMath = true; +#endif +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 0890078cd05..6dbedf15ca8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -164,6 +164,7 @@ tgsi_default_full_declaration( void ) full_declaration.Declaration = tgsi_default_declaration(); full_declaration.Range = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); + full_declaration.ImmediateData.u = NULL; return full_declaration; } @@ -180,7 +181,7 @@ tgsi_build_full_declaration( struct tgsi_declaration_range *dr; if( maxsize <= size ) - return 0; + return 0; declaration = (struct tgsi_declaration *) &tokens[size]; size++; @@ -235,6 +236,24 @@ tgsi_build_full_declaration( header ); } + if (full_decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i, j; + union tgsi_immediate_data *data; + + for (i = 0; i <= dr->Last; ++i) { + for (j = 0; j < 4; ++j) { + unsigned idx = i*4 + j; + if (maxsize <= size) + return 0; + data = (union tgsi_immediate_data *) &tokens[size]; + ++size; + + *data = full_decl->ImmediateData.u[idx]; + declaration_grow( declaration, header ); + } + } + } + return size; } @@ -613,6 +632,7 @@ tgsi_build_full_instruction( reg->Register.File, reg->Register.WriteMask, reg->Register.Indirect, + reg->Register.Dimension, reg->Register.Index, instruction, header ); @@ -640,6 +660,46 @@ tgsi_build_full_instruction( instruction, header ); } + + if( reg->Register.Dimension ) { + struct tgsi_dimension *dim; + + assert( !reg->Dimension.Dimension ); + + if( maxsize <= size ) + return 0; + dim = (struct tgsi_dimension *) &tokens[size]; + size++; + + *dim = tgsi_build_dimension( + reg->Dimension.Indirect, + reg->Dimension.Index, + instruction, + header ); + + if( reg->Dimension.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->DimIndirect.File, + reg->DimIndirect.SwizzleX, + reg->DimIndirect.SwizzleY, + reg->DimIndirect.SwizzleZ, + reg->DimIndirect.SwizzleW, + reg->DimIndirect.Negate, + reg->DimIndirect.Absolute, + reg->DimIndirect.Indirect, + reg->DimIndirect.Dimension, + reg->DimIndirect.Index, + instruction, + header ); + } + } } for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { @@ -959,6 +1019,7 @@ tgsi_build_dst_register( unsigned file, unsigned mask, unsigned indirect, + unsigned dimension, int index, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -974,6 +1035,7 @@ tgsi_build_dst_register( dst_register.WriteMask = mask; dst_register.Index = index; dst_register.Indirect = indirect; + dst_register.Dimension = dimension; instruction_grow( instruction, header ); @@ -987,6 +1049,8 @@ tgsi_default_full_dst_register( void ) full_dst_register.Register = tgsi_default_dst_register(); full_dst_register.Indirect = tgsi_default_src_register(); + full_dst_register.Dimension = tgsi_default_dimension(); + full_dst_register.DimIndirect = tgsi_default_src_register(); return full_dst_register; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 13d7f5272d6..112107a0881 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -263,6 +263,7 @@ tgsi_build_dst_register( unsigned file, unsigned mask, unsigned indirect, + unsigned dimension, int index, struct tgsi_instruction *instruction, struct tgsi_header *header ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 35480076edf..9fcc28f4c96 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -101,7 +101,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static const char *interpolate_names[] = @@ -191,29 +193,30 @@ static const char *fs_coord_pixel_center_names[] = static void -_dump_register_dst( - struct dump_ctx *ctx, - uint file, - int index) -{ - ENM( file, file_names ); - - CHR( '[' ); - SID( index ); - CHR( ']' ); -} - - -static void _dump_register_src( struct dump_ctx *ctx, const struct tgsi_full_src_register *src ) { ENM(src->Register.File, file_names); if (src->Register.Dimension) { - CHR('['); - SID(src->Dimension.Index); - CHR(']'); + if (src->Dimension.Indirect) { + CHR( '[' ); + ENM( src->DimIndirect.File, file_names ); + CHR( '[' ); + SID( src->DimIndirect.Index ); + TXT( "]." ); + ENM( src->DimIndirect.SwizzleX, swizzle_names ); + if (src->Dimension.Index != 0) { + if (src->Dimension.Index > 0) + CHR( '+' ); + SID( src->Dimension.Index ); + } + CHR( ']' ); + } else { + CHR('['); + SID(src->Dimension.Index); + CHR(']'); + } } if (src->Register.Indirect) { CHR( '[' ); @@ -235,30 +238,52 @@ _dump_register_src( } } + static void -_dump_register_ind( +_dump_register_dst( struct dump_ctx *ctx, - uint file, - int index, - uint ind_file, - int ind_index, - uint ind_swizzle ) + const struct tgsi_full_dst_register *dst ) { - ENM( file, file_names ); - CHR( '[' ); - ENM( ind_file, file_names ); - CHR( '[' ); - SID( ind_index ); - TXT( "]." ); - ENM( ind_swizzle, swizzle_names ); - if (index != 0) { - if (index > 0) - CHR( '+' ); - SID( index ); + ENM(dst->Register.File, file_names); + if (dst->Register.Dimension) { + if (dst->Dimension.Indirect) { + CHR( '[' ); + ENM( dst->DimIndirect.File, file_names ); + CHR( '[' ); + SID( dst->DimIndirect.Index ); + TXT( "]." ); + ENM( dst->DimIndirect.SwizzleX, swizzle_names ); + if (dst->Dimension.Index != 0) { + if (dst->Dimension.Index > 0) + CHR( '+' ); + SID( dst->Dimension.Index ); + } + CHR( ']' ); + } else { + CHR('['); + SID(dst->Dimension.Index); + CHR(']'); + } + } + if (dst->Register.Indirect) { + CHR( '[' ); + ENM( dst->Indirect.File, file_names ); + CHR( '[' ); + SID( dst->Indirect.Index ); + TXT( "]." ); + ENM( dst->Indirect.SwizzleX, swizzle_names ); + if (dst->Register.Index != 0) { + if (dst->Register.Index > 0) + CHR( '+' ); + SID( dst->Register.Index ); + } + CHR( ']' ); + } else { + CHR( '[' ); + SID( dst->Register.Index ); + CHR( ']' ); } - CHR( ']' ); } - static void _dump_writemask( struct dump_ctx *ctx, @@ -277,6 +302,39 @@ _dump_writemask( } } +static void +dump_imm_data(struct tgsi_iterate_context *iter, + union tgsi_immediate_data *data, + unsigned num_tokens, + unsigned data_type) +{ + struct dump_ctx *ctx = (struct dump_ctx *)iter; + unsigned i ; + + TXT( " {" ); + + assert( num_tokens <= 4 ); + for (i = 0; i < num_tokens; i++) { + switch (data_type) { + case TGSI_IMM_FLOAT32: + FLT( data[i].Float ); + break; + case TGSI_IMM_UINT32: + UID(data[i].Uint); + break; + case TGSI_IMM_INT32: + SID(data[i].Int); + break; + default: + assert( 0 ); + } + + if (i < num_tokens - 1) + TXT( ", " ); + } + TXT( "}" ); +} + static boolean iter_declaration( struct tgsi_iterate_context *iter, @@ -357,6 +415,43 @@ iter_declaration( } } + if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i; + char range_indent[4]; + + TXT(" {"); + + if (decl->Range.Last < 10) + range_indent[0] = '\0'; + else if (decl->Range.Last < 100) { + range_indent[0] = ' '; + range_indent[1] = '\0'; + } else if (decl->Range.Last < 1000) { + range_indent[0] = ' '; + range_indent[1] = ' '; + range_indent[2] = '\0'; + } else { + range_indent[0] = ' '; + range_indent[1] = ' '; + range_indent[2] = ' '; + range_indent[3] = '\0'; + } + + dump_imm_data(iter, decl->ImmediateData.u, + 4, TGSI_IMM_FLOAT32); + for(i = 1; i <= decl->Range.Last; ++i) { + /* indent by strlen of: + * "DCL IMMX[0..1] {" */ + CHR('\n'); + TXT( " " ); + TXT( range_indent ); + dump_imm_data(iter, decl->ImmediateData.u + i, + 4, TGSI_IMM_FLOAT32); + } + + TXT(" }"); + } + EOL(); return TRUE; @@ -430,33 +525,11 @@ iter_immediate( { struct dump_ctx *ctx = (struct dump_ctx *) iter; - uint i; - TXT( "IMM " ); ENM( imm->Immediate.DataType, immediate_type_names ); - TXT( " { " ); - - assert( imm->Immediate.NrTokens <= 4 + 1 ); - for (i = 0; i < imm->Immediate.NrTokens - 1; i++) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - FLT( imm->u[i].Float ); - break; - case TGSI_IMM_UINT32: - UID(imm->u[i].Uint); - break; - case TGSI_IMM_INT32: - SID(imm->u[i].Int); - break; - default: - assert( 0 ); - } - - if (i < imm->Immediate.NrTokens - 2) - TXT( ", " ); - } - TXT( " }" ); + dump_imm_data(iter, imm->u, imm->Immediate.NrTokens - 1, + imm->Immediate.DataType); EOL(); @@ -487,12 +560,12 @@ iter_instruction( INSTID( instno ); TXT( ": " ); - + ctx->indent -= info->pre_dedent; for(i = 0; (int)i < ctx->indent; ++i) TXT( " " ); ctx->indent += info->post_indent; - + if (inst->Instruction.Predicate) { CHR( '(' ); @@ -539,21 +612,7 @@ iter_instruction( CHR( ',' ); CHR( ' ' ); - if (dst->Register.Indirect) { - _dump_register_ind( - ctx, - dst->Register.File, - dst->Register.Index, - dst->Indirect.File, - dst->Indirect.Index, - dst->Indirect.SwizzleX ); - } - else { - _dump_register_dst( - ctx, - dst->Register.File, - dst->Register.Index ); - } + _dump_register_dst( ctx, dst ); _dump_writemask( ctx, dst->Register.WriteMask ); first_reg = FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c15d970b573..5275faa5e22 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -699,6 +699,19 @@ tgsi_exec_machine_bind_shader( ++mach->NumOutputs; } } + if (parse.FullToken.FullDeclaration.Declaration.File == + TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned reg; + struct tgsi_full_declaration *decl = + &parse.FullToken.FullDeclaration; + debug_assert(decl->Range.Last < TGSI_EXEC_NUM_IMMEDIATES); + for (reg = decl->Range.First; reg <= decl->Range.Last; ++reg) { + for( i = 0; i < 4; i++ ) { + int idx = reg * 4 + i; + mach->ImmArray[reg][i] = decl->ImmediateData.u[idx].Float; + } + } + } memcpy(declarations + numDeclarations, &parse.FullToken.FullDeclaration, sizeof(declarations[0])); @@ -1046,8 +1059,15 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, case TGSI_FILE_INPUT: case TGSI_FILE_SYSTEM_VALUE: for (i = 0; i < QUAD_SIZE; i++) { - /* XXX: 2D indexing */ - chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; + /* + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", + index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], + index2D->i[i], index->i[i]); + }*/ + chan->u[i] = mach->Inputs[index2D->i[i] * + TGSI_EXEC_MAX_INPUT_ATTRIBS + + index->i[i]].xyzw[swizzle].u[i]; } break; @@ -1060,6 +1080,16 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_TEMPORARY_ARRAY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); + assert(index2D->i[i] < TGSI_EXEC_NUM_TEMP_ARRAYS); + + chan->u[i] = + mach->TempArray[index2D->i[i]][index->i[i]].xyzw[swizzle].u[i]; + } + break; + case TGSI_FILE_IMMEDIATE: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); @@ -1069,6 +1099,14 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, } break; + case TGSI_FILE_IMMEDIATE_ARRAY: + for (i = 0; i < QUAD_SIZE; i++) { + assert(index2D->i[i] == 0); + + chan->f[i] = mach->ImmArray[index->i[i]][swizzle]; + } + break; + case TGSI_FILE_ADDRESS: for (i = 0; i < QUAD_SIZE; i++) { assert(index->i[i] >= 0); @@ -1280,6 +1318,7 @@ store_dest(struct tgsi_exec_machine *mach, uint i; union tgsi_exec_channel null; union tgsi_exec_channel *dst; + union tgsi_exec_channel index2D; uint execmask = mach->ExecMask; int offset = 0; /* indirection offset */ int index; @@ -1325,6 +1364,77 @@ store_dest(struct tgsi_exec_machine *mach, offset = indir_index.i[0]; } + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[3][1], + * where: + * [3] = Dimension.Index + */ + if (reg->Register.Dimension) { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = reg->Dimension.Index; + + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[ind[4].y+3][1], + * where: + * ind = DimIndirect.File + * [4] = DimIndirect.Index + * .y = DimIndirect.SwizzleX + */ + if (reg->Dimension.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + const uint execmask = mach->ExecMask; + unsigned swizzle; + uint i; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->DimIndirect.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); + fetch_src_file_channel(mach, + reg->DimIndirect.File, + swizzle, + &index2, + &ZeroVec, + &indir_index); + + index2D.i[0] += indir_index.i[0]; + index2D.i[1] += indir_index.i[1]; + index2D.i[2] += indir_index.i[2]; + index2D.i[3] += indir_index.i[3]; + + /* for disabled execution channels, zero-out the index to + * avoid using a potential garbage value. + */ + for (i = 0; i < QUAD_SIZE; i++) { + if ((execmask & (1 << i)) == 0) { + index2D.i[i] = 0; + } + } + } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether Dimension is followed + * by a dimension register and continue the saga. + */ + } else { + index2D.i[0] = + index2D.i[1] = + index2D.i[2] = + index2D.i[3] = 0; + } + switch (reg->Register.File) { case TGSI_FILE_NULL: dst = &null; @@ -1351,6 +1461,16 @@ store_dest(struct tgsi_exec_machine *mach, dst = &mach->Temps[offset + index].xyzw[chan_index]; break; + case TGSI_FILE_TEMPORARY_ARRAY: + index = reg->Register.Index; + assert( index < TGSI_EXEC_NUM_TEMPS ); + assert( index2D.i[0] < TGSI_EXEC_NUM_TEMP_ARRAYS ); + /* XXX we use index2D.i[0] here but somehow we might + * end up with someone trying to store indirectly in + * different buffers */ + dst = &mach->TempArray[index2D.i[0]][offset + index].xyzw[chan_index]; + break; + case TGSI_FILE_ADDRESS: index = reg->Register.Index; dst = &mach->Addrs[index].xyzw[chan_index]; @@ -1536,6 +1656,19 @@ emit_primitive(struct tgsi_exec_machine *mach) } } +static void +conditional_emit_primitive(struct tgsi_exec_machine *mach) +{ + if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { + int emitted_verts = + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; + if (emitted_verts) { + emit_primitive(mach); + } + } +} + + /* * Fetch four texture samples using STR texture coordinates. */ @@ -3185,6 +3318,9 @@ exec_instruction( break; case TGSI_OPCODE_END: + /* make sure we end primitives which haven't + * been explicitly emitted */ + conditional_emit_primitive(mach); /* halt execution */ *pc = -1; break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 3caf820af67..ccf80ca6fd9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -93,6 +93,7 @@ struct tgsi_sampler #define TGSI_EXEC_NUM_TEMPS 128 #define TGSI_EXEC_NUM_IMMEDIATES 256 +#define TGSI_EXEC_NUM_TEMP_ARRAYS 8 /* * Locations of various utility registers (_I = Index, _C = Channel) @@ -237,9 +238,12 @@ struct tgsi_exec_machine */ struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; + struct tgsi_exec_vector TempArray[TGSI_EXEC_NUM_TEMP_ARRAYS][TGSI_EXEC_NUM_TEMPS]; float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + float ImmArray[TGSI_EXEC_NUM_IMMEDIATES][4]; + struct tgsi_exec_vector Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS]; struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES]; diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 7e19e1fe36f..db9a3422203 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -117,6 +117,17 @@ tgsi_parse_token( next_token( ctx, &decl->Semantic ); } + if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) { + unsigned i, j; + decl->ImmediateData.u = (union tgsi_immediate_data*) + &ctx->Tokens[ctx->Position]; + for (i = 0; i <= decl->Range.Last; ++i) { + for (j = 0; j < 4; ++j) { + ctx->Position++; + } + } + } + break; } @@ -181,11 +192,6 @@ tgsi_parse_token( next_token( ctx, &inst->Dst[i].Register ); - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->Dst[i].Register.Dimension ); - if( inst->Dst[i].Register.Indirect ) { next_token( ctx, &inst->Dst[i].Indirect ); @@ -195,6 +201,24 @@ tgsi_parse_token( assert( !inst->Dst[i].Indirect.Dimension ); assert( !inst->Dst[i].Indirect.Indirect ); } + if( inst->Dst[i].Register.Dimension ) { + next_token( ctx, &inst->Dst[i].Dimension ); + + /* + * No support for multi-dimensional addressing. + */ + assert( !inst->Dst[i].Dimension.Dimension ); + + if( inst->Dst[i].Dimension.Indirect ) { + next_token( ctx, &inst->Dst[i].DimIndirect ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->Dst[i].Indirect.Indirect ); + assert( !inst->Dst[i].Indirect.Dimension ); + } + } } assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h index b45ccee2f63..36de8807b44 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -44,6 +44,8 @@ struct tgsi_full_dst_register { struct tgsi_dst_register Register; struct tgsi_src_register Indirect; + struct tgsi_dimension Dimension; + struct tgsi_src_register DimIndirect; }; struct tgsi_full_src_register @@ -54,12 +56,18 @@ struct tgsi_full_src_register struct tgsi_src_register DimIndirect; }; +struct tgsi_immediate_array_data +{ + union tgsi_immediate_data *u; +}; + struct tgsi_full_declaration { struct tgsi_declaration Declaration; struct tgsi_declaration_range Range; struct tgsi_declaration_dimension Dim; struct tgsi_declaration_semantic Semantic; + struct tgsi_immediate_array_data ImmediateData; }; struct tgsi_full_immediate diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index ce0a92f7fb3..97148dbe233 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -90,9 +90,18 @@ static void scan_register_dst(scan_register *reg, struct tgsi_full_dst_register *dst) { - fill_scan_register1d(reg, - dst->Register.File, - dst->Register.Index); + if (dst->Register.Dimension) { + /*FIXME: right now we don't support indirect + * multidimensional addressing */ + fill_scan_register2d(reg, + dst->Register.File, + dst->Register.Index, + dst->Dimension.Index); + } else { + fill_scan_register1d(reg, + dst->Register.File, + dst->Register.Index); + } } static void @@ -102,7 +111,6 @@ scan_register_src(scan_register *reg, if (src->Register.Dimension) { /*FIXME: right now we don't support indirect * multidimensional addressing */ - debug_assert(!src->Dimension.Indirect); fill_scan_register2d(reg, src->Register.File, src->Register.Index, @@ -236,7 +244,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 00e6392f63a..55fccba4d8c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -138,6 +138,7 @@ static boolean parse_identifier( const char **pcur, char *ret ) ret[i++] = *cur++; while (is_alpha_underscore( cur )) ret[i++] = *cur++; + ret[i++] = '\0'; *pcur = cur; return TRUE; } @@ -279,7 +280,9 @@ static const char *file_names[TGSI_FILE_COUNT] = "ADDR", "IMM", "PRED", - "SV" + "SV", + "IMMX", + "TEMPX" }; static boolean @@ -344,12 +347,68 @@ parse_opt_writemask( return TRUE; } + +/* <register_file_bracket> ::= <file> `[' + */ static boolean -parse_register_dst( struct translate_ctx *ctx, - uint *file, - int *index ); +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; -struct parsed_src_bracket { + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse simple 1d register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_1d(struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +struct parsed_bracket { int index; uint ind_file; @@ -359,21 +418,21 @@ struct parsed_src_bracket { static boolean -parse_register_src_bracket( +parse_register_bracket( struct translate_ctx *ctx, - struct parsed_src_bracket *brackets) + struct parsed_bracket *brackets) { const char *cur; uint uindex; - memset(brackets, 0, sizeof(struct parsed_src_bracket)); + memset(brackets, 0, sizeof(struct parsed_bracket)); eat_opt_white( &ctx->cur ); cur = ctx->cur; if (parse_file( &cur, &brackets->ind_file )) { - if (!parse_register_dst( ctx, &brackets->ind_file, - &brackets->ind_index )) + if (!parse_register_1d( ctx, &brackets->ind_file, + &brackets->ind_index )) return FALSE; eat_opt_white( &ctx->cur ); @@ -442,7 +501,7 @@ parse_register_src_bracket( static boolean parse_opt_register_src_bracket( struct translate_ctx *ctx, - struct parsed_src_bracket *brackets, + struct parsed_bracket *brackets, int *parsed_brackets) { const char *cur = ctx->cur; @@ -454,7 +513,7 @@ parse_opt_register_src_bracket( ++cur; ctx->cur = cur; - if (!parse_register_src_bracket(ctx, brackets)) + if (!parse_register_bracket(ctx, brackets)) return FALSE; *parsed_brackets = 1; @@ -463,46 +522,6 @@ parse_opt_register_src_bracket( return TRUE; } -/* <register_file_bracket> ::= <file> `[' - */ -static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* <register_file_bracket_index> ::= <register_file_bracket> <uint> - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; - - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} /* Parse source register operand. * <register_src> ::= <register_file_bracket_index> `]' | @@ -514,13 +533,12 @@ static boolean parse_register_src( struct translate_ctx *ctx, uint *file, - struct parsed_src_bracket *brackets) + struct parsed_bracket *brackets) { - brackets->ind_comp = TGSI_SWIZZLE_X; if (!parse_register_file_bracket( ctx, file )) return FALSE; - if (!parse_register_src_bracket( ctx, brackets )) + if (!parse_register_bracket( ctx, brackets )) return FALSE; return TRUE; @@ -628,23 +646,19 @@ parse_register_dcl( } -/* Parse destination register operand. - * <register_dst> ::= <register_file_bracket_index> `]' - */ +/* Parse destination register operand.*/ static boolean parse_register_dst( struct translate_ctx *ctx, uint *file, - int *index ) + struct parsed_bracket *brackets) { - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); + brackets->ind_comp = TGSI_SWIZZLE_X; + if (!parse_register_file_bracket( ctx, file )) return FALSE; - } - ctx->cur++; + if (!parse_register_bracket( ctx, brackets )) + return FALSE; + return TRUE; } @@ -654,11 +668,14 @@ parse_dst_operand( struct tgsi_full_dst_register *dst ) { uint file; - int index; uint writemask; const char *cur; + struct parsed_bracket bracket[2]; + int parsed_opt_brackets; - if (!parse_register_dst( ctx, &file, &index )) + if (!parse_register_dst( ctx, &file, &bracket[0] )) + return FALSE; + if (!parse_opt_register_src_bracket(ctx, &bracket[1], &parsed_opt_brackets)) return FALSE; cur = ctx->cur; @@ -668,8 +685,24 @@ parse_dst_operand( return FALSE; dst->Register.File = file; - dst->Register.Index = index; + if (parsed_opt_brackets) { + dst->Register.Dimension = 1; + dst->Dimension.Indirect = 0; + dst->Dimension.Dimension = 0; + dst->Dimension.Index = bracket[0].index; + bracket[0] = bracket[1]; + } + dst->Register.Index = bracket[0].index; dst->Register.WriteMask = writemask; + if (bracket[0].ind_file != TGSI_FILE_NULL) { + dst->Register.Indirect = 1; + dst->Indirect.File = bracket[0].ind_file; + dst->Indirect.Index = bracket[0].ind_index; + dst->Indirect.SwizzleX = bracket[0].ind_comp; + dst->Indirect.SwizzleY = bracket[0].ind_comp; + dst->Indirect.SwizzleZ = bracket[0].ind_comp; + dst->Indirect.SwizzleW = bracket[0].ind_comp; + } return TRUE; } @@ -718,7 +751,7 @@ parse_src_operand( uint file; uint swizzle[4]; boolean parsed_swizzle; - struct parsed_src_bracket bracket[2]; + struct parsed_bracket bracket[2]; int parsed_opt_brackets; if (*ctx->cur == '-') { @@ -834,7 +867,7 @@ parse_instruction( inst.Predicate.Negate = 1; } - if (!parse_register_dst( ctx, &file, &index )) + if (!parse_register_1d( ctx, &file, &index )) return FALSE; if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) { @@ -984,6 +1017,45 @@ static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = "PERSPECTIVE" }; + +/* parses a 4-touple of the form {x, y, z, w} + * where x, y, z, w are numbers */ +static boolean parse_immediate_data(struct translate_ctx *ctx, + float *values) +{ + unsigned i; + + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '{') { + report_error( ctx, "Expected `{'" ); + return FALSE; + } + ctx->cur++; + for (i = 0; i < 4; i++) { + eat_opt_white( &ctx->cur ); + if (i > 0) { + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + if (!parse_float( &ctx->cur, &values[i] )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + report_error( ctx, "Expected `}'" ); + return FALSE; + } + ctx->cur++; + + return TRUE; +} + static boolean parse_declaration( struct translate_ctx *ctx ) { struct tgsi_full_declaration decl; @@ -994,6 +1066,7 @@ static boolean parse_declaration( struct translate_ctx *ctx ) const char *cur; uint advance; boolean is_vs_input; + boolean is_imm_array; assert(Elements(semantic_names) == TGSI_SEMANTIC_COUNT); assert(Elements(interpolate_names) == TGSI_INTERPOLATE_COUNT); @@ -1022,8 +1095,9 @@ static boolean parse_declaration( struct translate_ctx *ctx ) decl.Dim.Index2D = brackets[0].first; } - is_vs_input = (file == TGSI_FILE_INPUT && + is_vs_input = (file == TGSI_FILE_INPUT && ctx->processor == TGSI_PROCESSOR_VERTEX); + is_imm_array = (file == TGSI_FILE_IMMEDIATE_ARRAY); cur = ctx->cur; eat_opt_white( &cur ); @@ -1066,6 +1140,44 @@ static boolean parse_declaration( struct translate_ctx *ctx ) break; } } + } else if (is_imm_array) { + unsigned i; + float *vals_itr; + /* we have our immediate data */ + if (*cur != '{') { + report_error( ctx, "Immediate array without data" ); + return FALSE; + } + ++cur; + ctx->cur = cur; + + decl.ImmediateData.u = + MALLOC(sizeof(union tgsi_immediate_data) * 4 * + (decl.Range.Last + 1)); + vals_itr = (float*)decl.ImmediateData.u; + for (i = 0; i <= decl.Range.Last; ++i) { + if (!parse_immediate_data(ctx, vals_itr)) { + FREE(decl.ImmediateData.u); + return FALSE; + } + vals_itr += 4; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ',') { + if (i != decl.Range.Last) { + report_error( ctx, "Not enough data in immediate array!" ); + FREE(decl.ImmediateData.u); + return FALSE; + } + } else + ++ctx->cur; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + FREE(decl.ImmediateData.u); + report_error( ctx, "Immediate array data missing closing '}'" ); + return FALSE; + } + ++ctx->cur; } cur = ctx->cur; @@ -1096,6 +1208,10 @@ static boolean parse_declaration( struct translate_ctx *ctx ) ctx->tokens_cur, ctx->header, (uint) (ctx->tokens_end - ctx->tokens_cur) ); + + if (is_imm_array) + FREE(decl.ImmediateData.u); + if (advance == 0) return FALSE; ctx->tokens_cur += advance; @@ -1106,7 +1222,6 @@ static boolean parse_declaration( struct translate_ctx *ctx ) static boolean parse_immediate( struct translate_ctx *ctx ) { struct tgsi_full_immediate imm; - uint i; float values[4]; uint advance; @@ -1114,37 +1229,13 @@ static boolean parse_immediate( struct translate_ctx *ctx ) report_error( ctx, "Syntax error" ); return FALSE; } - if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { + if (!str_match_no_case( &ctx->cur, "FLT32" ) || + is_digit_alpha_underscore( ctx->cur )) { report_error( ctx, "Expected `FLT32'" ); return FALSE; } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '{') { - report_error( ctx, "Expected `{'" ); - return FALSE; - } - ctx->cur++; - for (i = 0; i < 4; i++) { - eat_opt_white( &ctx->cur ); - if (i > 0) { - if (*ctx->cur != ',') { - report_error( ctx, "Expected `,'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - } - if (!parse_float( &ctx->cur, &values[i] )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '}') { - report_error( ctx, "Expected `}'" ); - return FALSE; - } - ctx->cur++; + + parse_immediate_data(ctx, values); imm = tgsi_default_full_immediate(); imm.Immediate.NrTokens += 4; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 5fda808dbef..3cf6893a9b9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -732,11 +732,12 @@ ureg_DECL_immediate_int( struct ureg_program *ureg, } -void +void ureg_emit_src( struct ureg_program *ureg, struct ureg_src src ) { - unsigned size = 1 + (src.Indirect ? 1 : 0) + (src.Dimension ? 1 : 0); + unsigned size = 1 + (src.Indirect ? 1 : 0) + + (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0); union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size ); unsigned n = 0; @@ -769,11 +770,27 @@ ureg_emit_src( struct ureg_program *ureg, } if (src.Dimension) { - out[0].src.Dimension = 1; - out[n].dim.Indirect = 0; - out[n].dim.Dimension = 0; - out[n].dim.Padding = 0; - out[n].dim.Index = src.DimensionIndex; + if (src.DimIndirect) { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 1; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + n++; + out[n].value = 0; + out[n].src.File = src.DimIndFile; + out[n].src.SwizzleX = src.DimIndSwizzle; + out[n].src.SwizzleY = src.DimIndSwizzle; + out[n].src.SwizzleZ = src.DimIndSwizzle; + out[n].src.SwizzleW = src.DimIndSwizzle; + out[n].src.Index = src.DimIndIndex; + } else { + out[0].src.Dimension = 1; + out[n].dim.Indirect = 0; + out[n].dim.Dimension = 0; + out[n].dim.Padding = 0; + out[n].dim.Index = src.DimensionIndex; + } n++; } @@ -1236,7 +1253,7 @@ static void emit_decls( struct ureg_program *ureg ) assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY); emit_property(ureg, - TGSI_PROPERTY_GS_MAX_VERTICES, + TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, ureg->property_gs_max_vertices); } diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 055545f3d2a..07fb01ab7b3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -49,14 +49,18 @@ struct ureg_src unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ unsigned Indirect : 1; /* BOOL */ + unsigned DimIndirect : 1; /* BOOL */ unsigned Dimension : 1; /* BOOL */ unsigned Absolute : 1; /* BOOL */ unsigned Negate : 1; /* BOOL */ int Index : 16; /* SINT */ - unsigned IndirectFile : 4; /* TGSI_FILE_ */ - int IndirectIndex : 16; /* SINT */ - unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ - int DimensionIndex : 16; /* SINT */ + unsigned IndirectFile : 4; /* TGSI_FILE_ */ + int IndirectIndex : 16; /* SINT */ + unsigned IndirectSwizzle : 2; /* TGSI_SWIZZLE_ */ + int DimensionIndex : 16; /* SINT */ + unsigned DimIndFile : 4; /* TGSI_FILE_ */ + int DimIndIndex : 16; /* SINT */ + unsigned DimIndSwizzle : 2; /* TGSI_SWIZZLE_ */ }; /* Very similar to a tgsi_dst_register, removing unsupported fields @@ -821,12 +825,28 @@ ureg_src_indirect( struct ureg_src reg, struct ureg_src addr ) return reg; } -static INLINE struct ureg_src +static INLINE struct ureg_src ureg_src_dimension( struct ureg_src reg, int index ) { assert(reg.File != TGSI_FILE_NULL); reg.Dimension = 1; + reg.DimIndirect = 0; + reg.DimensionIndex = index; + return reg; +} + + +static INLINE struct ureg_src +ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr, + int index ) +{ + assert(reg.File != TGSI_FILE_NULL); + reg.Dimension = 1; + reg.DimIndirect = 1; reg.DimensionIndex = index; + reg.DimIndFile = addr.File; + reg.DimIndIndex = addr.Index; + reg.DimIndSwizzle = addr.SwizzleX; return reg; } @@ -874,6 +894,10 @@ ureg_src_register(unsigned file, src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; return src; } @@ -897,6 +921,10 @@ ureg_src( struct ureg_dst dst ) src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; return src; } @@ -944,7 +972,11 @@ ureg_src_undef( void ) src.Negate = 0; src.Dimension = 0; src.DimensionIndex = 0; - + src.DimIndirect = 0; + src.DimIndFile = TGSI_FILE_NULL; + src.DimIndIndex = 0; + src.DimIndSwizzle = 0; + return src; } diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index f8dbd2b36a1..0e43a512ee8 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -378,25 +378,28 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, char *dst = (vert + tg->attrib[attr].output_offset); - if (tg->attrib[attr].instance_divisor) { - index = instance_id / tg->attrib[attr].instance_divisor; - } else { - index = elt; - } + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { + if (tg->attrib[attr].instance_divisor) { + index = instance_id / tg->attrib[attr].instance_divisor; + } else { + index = elt; + } - index = MIN2(index, tg->attrib[attr].max_index); + index = MIN2(index, tg->attrib[attr].max_index); - src = tg->attrib[attr].input_ptr + - tg->attrib[attr].input_stride * index; + src = tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * index; - tg->attrib[attr].fetch( data, src, 0, 0 ); + tg->attrib[attr].fetch( data, src, 0, 0 ); + } else { + data[0] = (float)instance_id; + } if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", i, elt, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } - vert += tg->translate.key.output_stride; } } diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index ae4c80c6450..98d5a25a3f8 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -97,8 +97,6 @@ struct blitter_context_priv /* Rasterizer state. */ void *rs_state; - struct pipe_sampler_view *sampler_view; - /* Viewport state. */ struct pipe_viewport_state viewport; @@ -260,10 +258,6 @@ void util_blitter_destroy(struct blitter_context *blitter) if (ctx->sampler_state[i]) pipe->delete_sampler_state(pipe, ctx->sampler_state[i]); - if (ctx->sampler_view) { - pipe_sampler_view_reference(&ctx->sampler_view, NULL); - } - pipe_resource_reference(&ctx->vbuf, NULL); FREE(ctx); } @@ -282,6 +276,7 @@ static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx) static void blitter_restore_CSOs(struct blitter_context_priv *ctx) { struct pipe_context *pipe = ctx->pipe; + unsigned i; /* restore the state objects which are always required to be saved */ pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state); @@ -328,6 +323,13 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx) pipe->set_vertex_buffers(pipe, ctx->blitter.saved_num_vertex_buffers, ctx->blitter.saved_vertex_buffers); + + for (i = 0; i < ctx->blitter.saved_num_vertex_buffers; i++) { + if (ctx->blitter.saved_vertex_buffers[i].buffer) { + pipe_resource_reference(&ctx->blitter.saved_vertex_buffers[i].buffer, + NULL); + } + } ctx->blitter.saved_num_vertex_buffers = ~0; } } @@ -728,11 +730,6 @@ void util_blitter_copy_region(struct blitter_context *blitter, u_sampler_view_default_template(&viewTempl, src, src->format); view = pipe->create_sampler_view(pipe, src, &viewTempl); - if (ctx->sampler_view) { - pipe_sampler_view_reference(&ctx->sampler_view, NULL); - } - ctx->sampler_view = view; - /* Set rasterizer state, shaders, and textures. */ pipe->bind_rasterizer_state(pipe, ctx->rs_state); pipe->bind_vs_state(pipe, ctx->vs_tex); @@ -768,6 +765,7 @@ void util_blitter_copy_region(struct blitter_context *blitter, blitter_restore_CSOs(ctx); pipe_surface_reference(&dstsurf, NULL); + pipe_sampler_view_reference(&view, NULL); } /* Clear a region of a color surface to a constant value. */ diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h index 10143a5e0f2..22849280ab9 100644 --- a/src/gallium/auxiliary/util/u_blitter.h +++ b/src/gallium/auxiliary/util/u_blitter.h @@ -27,6 +27,7 @@ #ifndef U_BLITTER_H #define U_BLITTER_H +#include "util/u_inlines.h" #include "util/u_memory.h" #include "pipe/p_state.h" @@ -259,9 +260,18 @@ util_blitter_save_vertex_buffers(struct blitter_context *blitter, int num_vertex_buffers, struct pipe_vertex_buffer *vertex_buffers) { + unsigned i; assert(num_vertex_buffers <= Elements(blitter->saved_vertex_buffers)); blitter->saved_num_vertex_buffers = num_vertex_buffers; + + for (i = 0; i < num_vertex_buffers; i++) { + if (vertex_buffers[i].buffer) { + pipe_resource_reference(&blitter->saved_vertex_buffers[i].buffer, + vertex_buffers[i].buffer); + } + } + memcpy(blitter->saved_vertex_buffers, vertex_buffers, num_vertex_buffers * sizeof(struct pipe_vertex_buffer)); diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 954f5706ef3..5e373ff24c4 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -195,7 +195,7 @@ debug_get_flags_option(const char *name, namealign = MAX2(namealign, strlen(flags->name)); for (flags = orig; flags->name; ++flags) debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name, - sizeof(unsigned long)*CHAR_BIT/4, flags->value, + (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value, flags->desc ? " " : "", flags->desc ? flags->desc : ""); } else { diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 6370e779865..fe19466436a 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -567,12 +567,26 @@ util_bswap16(uint16_t n) #define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) +/** + * Align a value, only works pot alignemnts. + */ static INLINE int align(int value, int alignment) { return (value + alignment - 1) & ~(alignment - 1); } +/** + * Works like align but on npot alignments. + */ +static INLINE size_t +util_align_npot(size_t value, size_t alignment) +{ + if (value % alignment) + return value + (alignment - (value % alignment)); + return value; +} + static INLINE unsigned u_minify(unsigned value, unsigned levels) { diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h index ae6f43bff87..cce0c7430e7 100644 --- a/src/gallium/auxiliary/util/u_pointer.h +++ b/src/gallium/auxiliary/util/u_pointer.h @@ -111,6 +111,17 @@ pointer_to_func( void *p ) return pf.f; } +static INLINE void * +func_to_pointer( func_pointer f ) +{ + union { + void *p; + func_pointer f; + } pf; + pf.f = f; + return pf.p; +} + #ifdef __cplusplus } diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 64390e13851..606b9b5c6b9 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -169,6 +169,52 @@ u_vertices_per_prim(int primitive) } } +/** + * Returns the number of decomposed primitives for the given + * vertex count. + * Geometry shader is invoked once for each triangle in + * triangle strip, triangle fans and triangles and once + * for each line in line strip, line loop, lines. + */ +static INLINE unsigned +u_gs_prims_for_vertices(int primitive, int vertices) +{ + switch(primitive) { + case PIPE_PRIM_POINTS: + return vertices; + case PIPE_PRIM_LINES: + return vertices / 2; + case PIPE_PRIM_LINE_LOOP: + return vertices; + case PIPE_PRIM_LINE_STRIP: + return vertices - 1; + case PIPE_PRIM_TRIANGLES: + return vertices / 3; + case PIPE_PRIM_TRIANGLE_STRIP: + return vertices - 2; + case PIPE_PRIM_TRIANGLE_FAN: + return vertices - 2; + case PIPE_PRIM_LINES_ADJACENCY: + return vertices / 2; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return vertices - 1; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return vertices / 3; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return vertices - 2; + + /* following primitives should never be used + * with geometry shaders abd their size is + * undefined */ + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + default: + debug_printf("Unrecognized geometry shader primitive"); + return 3; + } +} + const char *u_prim_name( unsigned pipe_prim ); #endif diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 9dd1002f5f6..cab7691c705 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -199,7 +199,6 @@ util_resource_copy_region(struct pipe_context *pipe, * Fallback for pipe->clear_render_target() function. * XXX this looks too hackish to be really useful. * cpp > 4 looks like a gross hack at best... - * and we're missing the equivalent clear_depth_stencil fallback. * Plus can't use these transfer fallbacks when clearing * multisampled surfaces for instance. */ @@ -278,3 +277,110 @@ util_clear_render_target(struct pipe_context *pipe, pipe->transfer_unmap(pipe, dst_trans); pipe->transfer_destroy(pipe, dst_trans); } + +/** + * Fallback for pipe->clear_stencil() function. + * sw fallback doesn't look terribly useful here. + * Plus can't use these transfer fallbacks when clearing + * multisampled surfaces for instance. + */ +void +util_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct pipe_transfer *dst_trans; + ubyte *dst_map; + boolean need_rmw = FALSE; + + if ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) && + ((clear_flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && + util_format_is_depth_and_stencil(dst->format)) + need_rmw = TRUE; + + assert(dst->texture); + if (!dst->texture) + return; + dst_trans = pipe_get_transfer(pipe, + dst->texture, + dst->face, + dst->level, + dst->zslice, + (need_rmw ? PIPE_TRANSFER_READ_WRITE : + PIPE_TRANSFER_WRITE), + dstx, dsty, width, height); + + dst_map = pipe->transfer_map(pipe, dst_trans); + + assert(dst_map); + + if (dst_map) { + unsigned dst_stride = dst_trans->stride; + unsigned zstencil = util_pack_z_stencil(dst->texture->format, depth, stencil); + unsigned i, j; + assert(dst_trans->stride > 0); + + switch (util_format_get_blocksize(dst->format)) { + case 1: + assert(dst->format == PIPE_FORMAT_S8_USCALED); + if(dst_stride == width) + memset(dst_map, (ubyte) zstencil, height * width); + else { + for (i = 0; i < height; i++) { + memset(dst_map, (ubyte) zstencil, width); + dst_map += dst_stride; + } + } + break; + case 2: + assert(dst->format == PIPE_FORMAT_Z16_UNORM); + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = (uint16_t) zstencil; + dst_map += dst_stride; + } + break; + case 4: + if (!need_rmw) { + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst_map; + for (j = 0; j < width; j++) + *row++ = zstencil; + dst_map += dst_stride; + } + } + else { + uint32_t dst_mask; + if (dst->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + dst_mask = 0xffffff00; + else { + assert(dst->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM); + dst_mask = 0xffffff; + } + if (clear_flags & PIPE_CLEAR_DEPTH) + dst_mask = ~dst_mask; + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst_map; + for (j = 0; j < width; j++) { + uint32_t tmp = *row & dst_mask; + *row++ = tmp & (zstencil & ~dst_mask); + } + dst_map += dst_stride; + } + } + break; + case 8: + default: + assert(0); + break; + } + } + + pipe->transfer_unmap(pipe, dst_trans); + pipe->transfer_destroy(pipe, dst_trans); +} diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index d8fb9f1d6f9..6cd12af3a8b 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -63,6 +63,14 @@ util_clear_render_target(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height); +extern void +util_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height); #endif /* U_SURFACE_H */ diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c index 75d44432d9e..af229e61a00 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.c +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -59,6 +59,8 @@ struct u_upload_mgr *u_upload_create( struct pipe_context *pipe, unsigned usage ) { struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); + if (!upload) + return NULL; upload->pipe = pipe; upload->default_size = default_size; diff --git a/src/gallium/docs/d3d11ddi.txt b/src/gallium/docs/d3d11ddi.txt index e3368fdbd69..0954c2926df 100644 --- a/src/gallium/docs/d3d11ddi.txt +++ b/src/gallium/docs/d3d11ddi.txt @@ -177,7 +177,7 @@ CreateElementLayout -> create_vertex_elements_state ! D3D11 has an extra flag (InputSlotClass) that is the same as instance_divisor == 0 CreateGeometryShader -> create_gs_state -CreateGeometryShaderWithStreamOutput -> create_gs_state +CreateGeometryShaderWithStreamOutput -> create_gs_state + create_stream_output_state CreatePixelShader -> create_fs_state CreateVertexShader -> create_vs_state > bytecode is different (see D3d10tokenizedprogramformat.hpp) @@ -220,7 +220,6 @@ CreateResource -> texture_create or buffer_create # Note that hardware often has the implicit rule, so the D3D11 interface seems to make little sense # Also, the D3D11 API does not allow the user to specify mipmap sizes, so this really seems a dubious decision on Microsoft's part - D3D11 supports specifying initial data to write in the resource - - Gallium lacks support for stream output buffer usage - Gallium does not support unordered access buffers ! D3D11 specifies mapping flags (i.e. read/write/discard);:it's unclear what they are used for here - D3D11 supports odd things in the D3D10_DDI_RESOURCE_MISC_FLAG enum (D3D10_DDI_RESOURCE_MISC_DISCARD_ON_PRESENT, D3D11_DDI_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS, D3D11_DDI_RESOURCE_MISC_BUFFER_STRUCTURED) @@ -275,8 +274,7 @@ DispatchIndirect (D3D11 only) Draw -> draw_arrays ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better -DrawAuto - - Gallium lacks stream out and DrawAuto +DrawAuto -> draw_auto DrawIndexed -> draw_elements ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better @@ -468,8 +466,7 @@ SetViewports ShaderResourceViewReadAfterWriteHazard -> flush(PIPE_FLUSH_RENDER_CACHE) - Gallium does not support specifying this per-render-target/view -SoSetTargets - - Gallium does not support stream out +SoSetTargets -> set_stream_output_buffers VsSetConstantBuffers -> for(i = StartBuffer; i < NumBuffers; ++i) set_constant_buffer(PIPE_SHADER_VERTEX, i, phBuffers[i]) * may want to split into fragment/vertex-specific versions diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst index 27d352b7a39..4e35a4c4082 100644 --- a/src/gallium/docs/source/context.rst +++ b/src/gallium/docs/source/context.rst @@ -1,3 +1,5 @@ +.. _context: + Context ======= @@ -120,7 +122,7 @@ will be cleared. color value. While it is only possible to clear one surface at a time (which can include several layers), this surface need not be bound to the framebuffer. -``clear_depth_stencil``clears a single depth, stencil or depth/stencil surface +``clear_depth_stencil`` clears a single depth, stencil or depth/stencil surface with the specified depth and stencil values (for combined depth/stencil buffers, is is also possible to only clear one or the other part). While it is only possible to clear one surface at a time (which can include several layers), @@ -295,6 +297,23 @@ for a driver to batch multiple blits with the same source and destination. +Stream Output +^^^^^^^^^^^^^ + +Stream output, also known as transform feedback allows writing the results of the +vertex pipeline (after the geometry shader or vertex shader if no geometry shader +is present) to be written to a buffer created with a ``PIPE_BIND_STREAM_OUTPUT`` +flag. + +First a stream output state needs to be created with the +``create_stream_output_state`` call. It specific the details of what's being written, +to which buffer and with what kind of a writemask. + +Then target buffers needs to be set with the call to ``set_stream_output_buffers`` +which sets the buffers and the offsets from the start of those buffer to where +the data will be written to. + + Transfers ^^^^^^^^^ diff --git a/src/gallium/docs/source/cso/blend.rst b/src/gallium/docs/source/cso/blend.rst index c74396284cc..7bde10c1245 100644 --- a/src/gallium/docs/source/cso/blend.rst +++ b/src/gallium/docs/source/cso/blend.rst @@ -14,21 +14,74 @@ in other modern and legacy drawing APIs. XXX blurb about dual-source blends +Logical Operations +------------------ + +Logical operations, also known as logicops, lops, or rops, are supported. +Only two-operand logicops are available. When logicops are enabled, all other +blend state is ignored, including per-render-target state, so logicops are +performed on all render targets. + +.. warning:: + The blend_enable flag is ignored for all render targets when logical + operations are enabled. + +For a source component `s` and destination component `d`, the logical +operations are defined as taking the bits of each channel of each component, +and performing one of the following operations per-channel: + +* ``CLEAR``: 0 +* ``NOR``: :math:`\lnot(s \lor d)` +* ``AND_INVERTED``: :math:`\lnot s \land d` +* ``COPY_INVERTED``: :math:`\lnot s` +* ``AND_REVERSE``: :math:`s \land \lnot d` +* ``INVERT``: :math:`\lnot d` +* ``XOR``: :math:`s \oplus d` +* ``NAND``: :math:`\lnot(s \land d)` +* ``AND``: :math:`s \land d` +* ``EQUIV``: :math:`\lnot(s \oplus d)` +* ``NOOP``: :math:`d` +* ``OR_INVERTED``: :math:`\lnot s \lor d` +* ``COPY``: :math:`s` +* ``OR_REVERSE``: :math:`s \lor \lnot d` +* ``OR``: :math:`s \lor d` +* ``SET``: 1 + +.. note:: + The logical operation names and definitions match those of the OpenGL API, + and are similar to the ROP2 and ROP3 definitions of GDI. This is + intentional, to ease transitions to Gallium. + Members ------- +These members affect all render targets. + +dither +%%%%%% + +Whether dithering is enabled. + +.. note:: + Dithering is completely implementation-dependent. It may be ignored by + drivers for any reason, and some render targets may always or never be + dithered depending on their format or usage flags. + +logicop_enable +%%%%%%%%%%%%%% + +Whether the blender should perform a logicop instead of blending. + +logicop_func +%%%%%%%%%%%% + +The logicop to use. One of ``PIPE_LOGICOP``. + independent_blend_enable If enabled, blend state is different for each render target, and for each render target set in the respective member of the rt array. If disabled, blend state is the same for all render targets, and only the first member of the rt array contains valid data. -logicop_enable - Enables logic ops. Cannot be enabled at the same time as blending, and - is always the same for all render targets. -logicop_func - The logic operation to use if logic ops are enabled. One of PIPE_LOGICOP. -dither - Whether dithering is enabled. Note: Dithering is implementation-dependent. rt Contains the per-rendertarget blend state. diff --git a/src/gallium/docs/source/cso/velems.rst b/src/gallium/docs/source/cso/velems.rst index 92cde014fb8..978ad4a2434 100644 --- a/src/gallium/docs/source/cso/velems.rst +++ b/src/gallium/docs/source/cso/velems.rst @@ -3,9 +3,44 @@ Vertex Elements =============== -This state controls format etc. of the input attributes contained -in the pipe_vertex_buffer(s). There's one pipe_vertex_element array member -for each input attribute. +This state controls the format of the input attributes contained in +pipe_vertex_buffers. There is one pipe_vertex_element array member for each +input attribute. + +Input Formats +------------- + +Gallium supports a diverse range of formats for vertex data. Drivers are +guaranteed to support 32-bit floating-point vectors of one to four components. +Additionally, they may support the following formats: + +* Integers, signed or unsigned, normalized or non-normalized, 8, 16, or 32 + bits wide +* Floating-point, 16, 32, or 64 bits wide + +At this time, support for varied vertex data formats is limited by driver +deficiencies. It is planned to support a single uniform set of formats for all +Gallium drivers at some point. + +Rather than attempt to specify every small nuance of behavior, Gallium uses a +very simple set of rules for padding out unspecified components. If an input +uses less than four components, it will be padded out with the constant vector +``(0, 0, 0, 1)``. + +Fog, point size, the facing bit, and edgeflags, all are in the standard format +of ``(x, 0, 0, 1)``, and so only the first component of those inputs is used. + +Position +%%%%%%%% + +Vertex position may be specified with two to four components. Using less than +two components is not allowed. + +Colors +%%%%%% + +Colors, both front- and back-facing, may omit the alpha component, only using +three components. Using less than three components is not allowed. Members ------- diff --git a/src/gallium/docs/source/distro.rst b/src/gallium/docs/source/distro.rst index 100afe33972..6ba5a056f45 100644 --- a/src/gallium/docs/source/distro.rst +++ b/src/gallium/docs/source/distro.rst @@ -10,69 +10,100 @@ Drivers Cell ^^^^ +Simple driver for the IBM Cell architecture. Runs faster than :ref:`softpipe` +on Cell-based machines. + Failover ^^^^^^^^ -Deprecated. +Broken and deprecated. Intel i915 ^^^^^^^^^^ +Driver for Intel i915 and i945 chipsets. + Intel i965 ^^^^^^^^^^ -Highly experimental. +Highly experimental driver for Intel i965 chipsets. Identity ^^^^^^^^ -Wrapper driver. +Wrapper driver. The identity driver is a simple skeleton that passes through +all of its :ref:`Context` and :ref:`Screen` methods to an underlying Context +and Screen, and as such, it is an excellent starting point for new drivers. LLVM Softpipe ^^^^^^^^^^^^^ -nVidia nv30 -^^^^^^^^^^^ +A version of :ref:`softpipe` that uses the Low-Level Virtual Machine to +dynamically generate optimized rasterizing pipelines. -nVidia nv40 +nVidia nvfx ^^^^^^^^^^^ +Driver for the nVidia nv30 and nv40 families of GPUs. + nVidia nv50 ^^^^^^^^^^^ +Driver for the nVidia nv50 family of GPUs. + VMWare SVGA ^^^^^^^^^^^ +Driver for VMWare virtualized guest operating system graphics processing. + ATI r300 ^^^^^^^^ -Testing-quality. +Driver for the ATI/AMD r300, r400, and r500 families of GPUs. + +.. _softpipe: Softpipe ^^^^^^^^ -Reference software rasterizer. +Reference software rasterizer. Slow but accurate. Trace ^^^^^ -Wrapper driver. +Wrapper driver. Trace dumps an XML record of the calls made to the +:ref:`Context` and :ref:`Screen` objects that it wraps. State Trackers -------------- +.. _dri: + Direct Rendering Infrastructure ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Tracker that implements the client-side DRI protocol, for providing direct +acceleration services to X11 servers with the DRI extension. Supports DRI1 +and DRI2. Only GL is supported. + +.. _egl: + EGL ^^^ +Tracker for the Khronos EGL standard, used to set up GL and GLES contexts +without extra knowledge of the underlying windowing system. + GLX ^^^ MesaGL ^^^^^^ +Tracker implementing a GL state machine. Not usable as a standalone tracker; +Mesa should be built with another state tracker, such as :ref:`DRI` or +:ref:`EGL`. + Python ^^^^^^ @@ -82,9 +113,12 @@ OpenVG WGL ^^^ -Xorg XFree86 DDX +Xorg/XFree86 DDX ^^^^^^^^^^^^^^^^ +Tracker for XFree86 and Xorg X11 servers. Provides device-dependent +modesetting and acceleration as a DDX driver. + Auxiliary --------- diff --git a/src/gallium/docs/source/glossary.rst b/src/gallium/docs/source/glossary.rst index 0696cb5d277..acde56eafc4 100644 --- a/src/gallium/docs/source/glossary.rst +++ b/src/gallium/docs/source/glossary.rst @@ -21,3 +21,7 @@ Glossary LOD Level of Detail. Also spelled "LoD." The value that determines when the switches between mipmaps occur during texture sampling. + + GLSL + GL Shading Language. The official, common high-level shader language used + in GL 2.0 and above. diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 48d9d570b6f..e3ef49c862c 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -36,7 +36,9 @@ The integer capabilities: bound. * ``OCCLUSION_QUERY``: Whether occlusion queries are available. * ``TIMER_QUERY``: Whether timer queries are available. -* ``TEXTURE_SHADOW_MAP``: XXX +* ``TEXTURE_SHADOW_MAP``: indicates whether the fragment shader hardware + can do the depth texture / Z comparison operation in TEX instructions + for shadow testing. * ``MAX_TEXTURE_2D_LEVELS``: The maximum number of mipmap levels available for a 2D texture. * ``MAX_TEXTURE_3D_LEVELS``: The maximum number of mipmap levels available @@ -55,7 +57,13 @@ The integer capabilities: from color blend equations, in :ref:`Blend` state. * ``SM3``: Whether the vertex shader and fragment shader support equivalent opcodes to the Shader Model 3 specification. XXX oh god this is horrible -* ``MAX_PREDICATE_REGISTERS``: XXX +* ``MAX_PREDICATE_REGISTERS``: indicates the number of predicate registers + available. Predicate register may be set as a side-effect of ALU + instructions to indicate less than, greater than or equal to zero. + Later instructions can use a predicate register to control writing to + each channel of destination registers. NOTE: predicate registers have + not been fully implemented in Gallium at this time. See the + GL_NV_fragment_program extension for more info (look for "condition codes"). * ``MAX_COMBINED_SAMPLERS``: The total number of samplers accessible from the vertex and fragment shader, inclusive. * ``MAX_CONST_BUFFERS``: Maximum number of constant buffers that can be bound diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 411dce856a3..e588c5b7bd8 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -26,9 +26,11 @@ each of the components of *dst*. When this happens, the result is said to be Instruction Set --------------- -From GL_NV_vertex_program +Core ISA ^^^^^^^^^^^^^^^^^^^^^^^^^ +These opcodes are guaranteed to be available regardless of the driver being +used. .. opcode:: ARL - Address Register Load @@ -637,10 +639,6 @@ This instruction replicates its result. Considered for removal. -From GL_NV_vertex_program2 -^^^^^^^^^^^^^^^^^^^^^^^^^^ - - .. opcode:: ARA - Address Register Add TBD @@ -827,11 +825,14 @@ This instruction replicates its result. Considered for removal. -From GL_NV_gpu_program4 +Compute ISA ^^^^^^^^^^^^^^^^^^^^^^^^ +These opcodes are primarily provided for special-use computational shaders. Support for these opcodes indicated by a special pipe capability bit (TBD). +XXX so let's discuss it, yeah? + .. opcode:: CEIL - Ceiling .. math:: @@ -989,10 +990,17 @@ Support for these opcodes indicated by a special pipe capability bit (TBD). TBD +.. note:: + + Support for CONT is determined by a special capability bit, + ``TGSI_CONT_SUPPORTED``. See :ref:`Screen` for more information. + -From GL_NV_geometry_program4 +Geometry ISA ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +These opcodes are only supported in geometry shaders; they have no meaning +in any other type of shader. .. opcode:: EMIT - Emit @@ -1004,9 +1012,11 @@ From GL_NV_geometry_program4 TBD -From GLSL +GLSL ISA ^^^^^^^^^^ +These opcodes are part of :term:`GLSL`'s opcode set. Support for these +opcodes is determined by a special capability bit, ``GLSL``. .. opcode:: BGNLOOP - Begin a Loop @@ -1045,6 +1055,7 @@ This instruction replicates its result. ps_2_x ^^^^^^^^^^^^ +XXX wait what .. opcode:: CALLNZ - Subroutine Call If Not Zero @@ -1062,10 +1073,15 @@ ps_2_x .. _doubleopcodes: -Double Opcodes +Double ISA ^^^^^^^^^^^^^^^ -.. opcode:: DADD - Add Double +The double-precision opcodes reinterpret four-component vectors into +two-component vectors with doubled precision in each component. + +Support for these opcodes is XXX undecided. :T + +.. opcode:: DADD - Add .. math:: @@ -1074,7 +1090,7 @@ Double Opcodes dst.zw = src0.zw + src1.zw -.. opcode:: DDIV - Divide Double +.. opcode:: DDIV - Divide .. math:: @@ -1082,7 +1098,7 @@ Double Opcodes dst.zw = src0.zw / src1.zw -.. opcode:: DSEQ - Set Double on Equal +.. opcode:: DSEQ - Set on Equal .. math:: @@ -1090,7 +1106,7 @@ Double Opcodes dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F -.. opcode:: DSLT - Set Double on Less than +.. opcode:: DSLT - Set on Less than .. math:: @@ -1098,7 +1114,7 @@ Double Opcodes dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F -.. opcode:: DFRAC - Double Fraction +.. opcode:: DFRAC - Fraction .. math:: @@ -1107,23 +1123,33 @@ Double Opcodes dst.zw = src.zw - \lfloor src.zw\rfloor -.. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components +.. opcode:: DFRACEXP - Convert Number to Fractional and Integral Components + +Like the ``frexp()`` routine in many math libraries, this opcode stores the +exponent of its source to ``dst0``, and the significand to ``dst1``, such that +:math:`dst1 \times 2^{dst0} = src` . .. math:: - dst0.xy = frexp(src.xy, dst1.xy) + dst0.xy = exp(src.xy) + + dst1.xy = frac(src.xy) + + dst0.zw = exp(src.zw) - dst0.zw = frexp(src.zw, dst1.zw) + dst1.zw = frac(src.zw) -.. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2 +.. opcode:: DLDEXP - Multiply Number by Integral Power of 2 + +This opcode is the inverse of :opcode:`DFRACEXP`. .. math:: - dst.xy = ldexp(src0.xy, src1.xy) + dst.xy = src0.xy \times 2^{src1.xy} - dst.zw = ldexp(src0.zw, src1.zw) + dst.zw = src0.zw \times 2^{src1.zw} -.. opcode:: DMIN - Minimum Double +.. opcode:: DMIN - Minimum .. math:: @@ -1131,7 +1157,7 @@ Double Opcodes dst.zw = min(src0.zw, src1.zw) -.. opcode:: DMAX - Maximum Double +.. opcode:: DMAX - Maximum .. math:: @@ -1139,7 +1165,7 @@ Double Opcodes dst.zw = max(src0.zw, src1.zw) -.. opcode:: DMUL - Multiply Double +.. opcode:: DMUL - Multiply .. math:: @@ -1148,7 +1174,7 @@ Double Opcodes dst.zw = src0.zw \times src1.zw -.. opcode:: DMAD - Multiply And Add Doubles +.. opcode:: DMAD - Multiply And Add .. math:: @@ -1157,7 +1183,7 @@ Double Opcodes dst.zw = src0.zw \times src1.zw + src2.zw -.. opcode:: DRCP - Reciprocal Double +.. opcode:: DRCP - Reciprocal .. math:: @@ -1165,7 +1191,7 @@ Double Opcodes dst.zw = \frac{1}{src.zw} -.. opcode:: DSQRT - Square root double +.. opcode:: DSQRT - Square Root .. math:: @@ -1260,6 +1286,8 @@ wrapping rules. Declaration Semantic ^^^^^^^^^^^^^^^^^^^^^^^^ + Vertex and fragment shader input and output registers may be labeled + with semantic information consisting of a name and index. Follows Declaration token if Semantic bit is set. @@ -1280,90 +1308,115 @@ Declaration Semantic TGSI_SEMANTIC_POSITION """""""""""""""""""""" -Position, sometimes known as HPOS or WPOS for historical reasons, is the -location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z`` -are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used -for the perspective divide, if enabled. +For vertex shaders, TGSI_SEMANTIC_POSITION indicates the vertex shader +output register which contains the homogeneous vertex position in the clip +space coordinate system. After clipping, the X, Y and Z components of the +vertex will be divided by the W value to get normalized device coordinates. -As a vertex shader output, position should be scaled to the viewport. When -used in fragment shaders, position will be in window coordinates. The convention -used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties. +For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that +fragment shader input contains the fragment's window position. The X +component starts at zero and always increases from left to right. +The Y component starts at zero and always increases but Y=0 may either +indicate the top of the window or the bottom depending on the fragment +coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN). +The Z coordinate ranges from 0 to 1 to represent depth from the front +to the back of the Z buffer. The W component contains the reciprocol +of the interpolated vertex position W component. -XXX additionally, is there a way to configure the perspective divide? it's -accelerated on most chipsets AFAIK... +Fragment shaders may also declare an output register with +TGSI_SEMANTIC_POSITION. Only the Z component is writable. This allows +the fragment shader to change the fragment's Z position. -Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can -be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``. -XXX usually? can we solidify that? TGSI_SEMANTIC_COLOR """"""""""""""""""" -Colors are used to, well, color the primitives. Colors are always in -``(r, g, b, a)`` format. +For vertex shader outputs or fragment shader inputs/outputs, this +label indicates that the resister contains an R,G,B,A color. + +Several shader inputs/outputs may contain colors so the semantic index +is used to distinguish them. For example, color[0] may be the diffuse +color while color[1] may be the specular color. + +This label is needed so that the flat/smooth shading can be applied +to the right interpolants during rasterization. + -If alpha is not specified, it defaults to 1. TGSI_SEMANTIC_BCOLOR """""""""""""""""""" Back-facing colors are only used for back-facing polygons, and are only valid in vertex shader outputs. After rasterization, all polygons are front-facing -and COLOR and BCOLOR end up occupying the same slots in the fragment, so -all BCOLORs effectively become regular COLORs in the fragment shader. +and COLOR and BCOLOR end up occupying the same slots in the fragment shader, +so all BCOLORs effectively become regular COLORs in the fragment shader. + TGSI_SEMANTIC_FOG """"""""""""""""" -The fog coordinate historically has been used to replace the depth coordinate -for generation of fog in dedicated fog blocks. Gallium, however, does not use -dedicated fog acceleration, placing it entirely in the fragment shader -instead. +Vertex shader inputs and outputs and fragment shader inputs may be +labeled with TGSI_SEMANTIC_FOG to indicate that the register contains +a fog coordinate in the form (F, 0, 0, 1). Typically, the fragment +shader will use the fog coordinate to compute a fog blend factor which +is used to blend the normal fragment color with a constant fog color. + +Only the first component matters when writing from the vertex shader; +the driver will ensure that the coordinate is in this format when used +as a fragment shader input. -The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first -component matters when writing from the vertex shader; the driver will ensure -that the coordinate is in this format when used as a fragment shader input. TGSI_SEMANTIC_PSIZE """"""""""""""""""" -PSIZE, or point size, is used to specify point sizes per-vertex. It should -be in ``(s, 0, 0, 1)`` format, where ``s`` is the (possibly clamped) point size. -Only the first component matters when writing from the vertex shader. +Vertex shader input and output registers may be labeled with +TGIS_SEMANTIC_PSIZE to indicate that the register contains a point size +in the form (S, 0, 0, 1). The point size controls the width or diameter +of points for rasterization. This label cannot be used in fragment +shaders. When using this semantic, be sure to set the appropriate state in the :ref:`rasterizer` first. + TGSI_SEMANTIC_GENERIC """"""""""""""""""""" -Generic semantics are nearly always used for texture coordinate attributes, -in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds -of lookups, and ``q`` is the level-of-detail bias for biased sampling. +All vertex/fragment shader inputs/outputs not labeled with any other +semantic label can be considered to be generic attributes. Typical +uses of generic inputs/outputs are texcoords and user-defined values. -These attributes are called "generic" because they may be used for anything -else, including parameters, texture generation information, or anything that -can be stored inside a four-component vector. TGSI_SEMANTIC_NORMAL """""""""""""""""""" -Vertex normal; could be used to implement per-pixel lighting for legacy APIs -that allow mixing fixed-function and programmable stages. +Indicates that a vertex shader input is a normal vector. This is +typically only used for legacy graphics APIs. + TGSI_SEMANTIC_FACE """""""""""""""""" -FACE is the facing bit, to store the facing information for the fragment -shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive -when the fragment is front-facing, and negative when the component is -back-facing. +This label applies to fragment shader inputs only and indicates that +the register contains front/back-face information of the form (F, 0, +0, 1). The first component will be positive when the fragment belongs +to a front-facing polygon, and negative when the fragment belongs to a +back-facing polygon. + TGSI_SEMANTIC_EDGEFLAG """""""""""""""""""""" -XXX no clue +For vertex shaders, this sematic label indicates that an input or +output is a boolean edge flag. The register layout is [F, x, x, x] +where F is 0.0 or 1.0 and x = don't care. Normally, the vertex shader +simply copies the edge flag input to the edgeflag output. + +Edge flags are used to control which lines or points are actually +drawn when the polygon mode converts triangles/quads/polygons into +points or lines. + Properties @@ -1420,9 +1473,9 @@ well. +--------------------+--------------+--------------------+--------------+ | Texture Components | Gallium | OpenGL | Direct3D 9 | +====================+==============+====================+==============+ -| R | XXX TBD | (r, 0, 0, 1) | (r, 1, 1, 1) | +| R | (r, 0, 0, 1) | (r, 0, 0, 1) | (r, 1, 1, 1) | +--------------------+--------------+--------------------+--------------+ -| RG | XXX TBD | (r, g, 0, 1) | (r, g, 1, 1) | +| RG | (r, g, 0, 1) | (r, g, 0, 1) | (r, g, 1, 1) | +--------------------+--------------+--------------------+--------------+ | RGB | (r, g, b, 1) | (r, g, b, 1) | (r, g, b, 1) | +--------------------+--------------+--------------------+--------------+ diff --git a/src/gallium/drivers/galahad/Makefile b/src/gallium/drivers/galahad/Makefile new file mode 100644 index 00000000000..e9c4f7e28c8 --- /dev/null +++ b/src/gallium/drivers/galahad/Makefile @@ -0,0 +1,11 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = galahad + +C_SOURCES = \ + glhd_objects.c \ + glhd_context.c \ + glhd_screen.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/galahad/SConscript b/src/gallium/drivers/galahad/SConscript new file mode 100644 index 00000000000..ca6213cfff1 --- /dev/null +++ b/src/gallium/drivers/galahad/SConscript @@ -0,0 +1,13 @@ +Import('*') + +env = env.Clone() + +identity = env.ConvenienceLibrary( + target = 'identity', + source = [ + 'glhd_context.c', + 'glhd_objects.c', + 'glhd_screen.c', + ]) + +Export('identity') diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c new file mode 100644 index 00000000000..ab6f17b3ab8 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -0,0 +1,997 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_context.h" + +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "glhd_context.h" +#include "glhd_objects.h" + + +static void +galahad_destroy(struct pipe_context *_pipe) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->destroy(pipe); + + FREE(glhd_pipe); +} + +static void +galahad_draw_arrays(struct pipe_context *_pipe, + unsigned prim, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->draw_arrays(pipe, + prim, + start, + count); +} + +static void +galahad_draw_elements(struct pipe_context *_pipe, + struct pipe_resource *_indexResource, + unsigned indexSize, + int indexBias, + unsigned prim, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_indexResource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *indexResource = glhd_resource->resource; + + pipe->draw_elements(pipe, + indexResource, + indexSize, + indexBias, + prim, + start, + count); +} + +static void +galahad_draw_range_elements(struct pipe_context *_pipe, + struct pipe_resource *_indexResource, + unsigned indexSize, + int indexBias, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_indexResource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *indexResource = glhd_resource->resource; + + pipe->draw_range_elements(pipe, + indexResource, + indexSize, + indexBias, + minIndex, + maxIndex, + mode, + start, + count); +} + +static struct pipe_query * +galahad_create_query(struct pipe_context *_pipe, + unsigned query_type) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + if (query_type == PIPE_QUERY_OCCLUSION_COUNTER && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_OCCLUSION_QUERY)) { + glhd_error("Occlusion query requested but not supported"); + } + + if (query_type == PIPE_QUERY_TIME_ELAPSED && + !pipe->screen->get_param(pipe->screen, PIPE_CAP_TIMER_QUERY)) { + glhd_error("Timer query requested but not supported"); + } + + return pipe->create_query(pipe, + query_type); +} + +static void +galahad_destroy_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->destroy_query(pipe, + query); +} + +static void +galahad_begin_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->begin_query(pipe, + query); +} + +static void +galahad_end_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->end_query(pipe, + query); +} + +static boolean +galahad_get_query_result(struct pipe_context *_pipe, + struct pipe_query *query, + boolean wait, + void *result) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->get_query_result(pipe, + query, + wait, + result); +} + +static void * +galahad_create_blend_state(struct pipe_context *_pipe, + const struct pipe_blend_state *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + if (blend->logicop_enable) { + if (blend->rt[0].blend_enable) { + glhd_warn("Blending enabled for render target 0, but logicops " + "are enabled"); + } + } + + return pipe->create_blend_state(pipe, + blend); +} + +static void +galahad_bind_blend_state(struct pipe_context *_pipe, + void *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_blend_state(pipe, + blend); +} + +static void +galahad_delete_blend_state(struct pipe_context *_pipe, + void *blend) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_blend_state(pipe, + blend); +} + +static void * +galahad_create_sampler_state(struct pipe_context *_pipe, + const struct pipe_sampler_state *sampler) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_sampler_state(pipe, + sampler); +} + +static void +galahad_bind_fragment_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_fragment_sampler_states(pipe, + num_samplers, + samplers); +} + +static void +galahad_bind_vertex_sampler_states(struct pipe_context *_pipe, + unsigned num_samplers, + void **samplers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vertex_sampler_states(pipe, + num_samplers, + samplers); +} + +static void +galahad_delete_sampler_state(struct pipe_context *_pipe, + void *sampler) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_sampler_state(pipe, + sampler); +} + +static void * +galahad_create_rasterizer_state(struct pipe_context *_pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + if (rasterizer->point_quad_rasterization) { + if (rasterizer->point_smooth) { + glhd_warn("Point smoothing requested but ignored"); + } + } else { + if (rasterizer->sprite_coord_enable) { + glhd_warn("Point sprites requested but ignored"); + } + } + + return pipe->create_rasterizer_state(pipe, + rasterizer); +} + +static void +galahad_bind_rasterizer_state(struct pipe_context *_pipe, + void *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_rasterizer_state(pipe, + rasterizer); +} + +static void +galahad_delete_rasterizer_state(struct pipe_context *_pipe, + void *rasterizer) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_rasterizer_state(pipe, + rasterizer); +} + +static void * +galahad_create_depth_stencil_alpha_state(struct pipe_context *_pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void +galahad_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void +galahad_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *depth_stencil_alpha) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_depth_stencil_alpha_state(pipe, + depth_stencil_alpha); +} + +static void * +galahad_create_fs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_fs_state(pipe, + fs); +} + +static void +galahad_bind_fs_state(struct pipe_context *_pipe, + void *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_fs_state(pipe, + fs); +} + +static void +galahad_delete_fs_state(struct pipe_context *_pipe, + void *fs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_fs_state(pipe, + fs); +} + +static void * +galahad_create_vs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_vs_state(pipe, + vs); +} + +static void +galahad_bind_vs_state(struct pipe_context *_pipe, + void *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vs_state(pipe, + vs); +} + +static void +galahad_delete_vs_state(struct pipe_context *_pipe, + void *vs) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_vs_state(pipe, + vs); +} + + +static void * +galahad_create_vertex_elements_state(struct pipe_context *_pipe, + unsigned num_elements, + const struct pipe_vertex_element *vertex_elements) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + return pipe->create_vertex_elements_state(pipe, + num_elements, + vertex_elements); +} + +static void +galahad_bind_vertex_elements_state(struct pipe_context *_pipe, + void *velems) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->bind_vertex_elements_state(pipe, + velems); +} + +static void +galahad_delete_vertex_elements_state(struct pipe_context *_pipe, + void *velems) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->delete_vertex_elements_state(pipe, + velems); +} + +static void +galahad_set_blend_color(struct pipe_context *_pipe, + const struct pipe_blend_color *blend_color) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_blend_color(pipe, + blend_color); +} + +static void +galahad_set_stencil_ref(struct pipe_context *_pipe, + const struct pipe_stencil_ref *stencil_ref) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_stencil_ref(pipe, + stencil_ref); +} + +static void +galahad_set_clip_state(struct pipe_context *_pipe, + const struct pipe_clip_state *clip) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_clip_state(pipe, + clip); +} + +static void +galahad_set_sample_mask(struct pipe_context *_pipe, + unsigned sample_mask) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_sample_mask(pipe, + sample_mask); +} + +static void +galahad_set_constant_buffer(struct pipe_context *_pipe, + uint shader, + uint index, + struct pipe_resource *_resource) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *unwrapped_resource; + struct pipe_resource *resource = NULL; + + /* XXX hmm? unwrap the input state */ + if (_resource) { + unwrapped_resource = galahad_resource_unwrap(_resource); + resource = unwrapped_resource; + } + + pipe->set_constant_buffer(pipe, + shader, + index, + resource); +} + +static void +galahad_set_framebuffer_state(struct pipe_context *_pipe, + const struct pipe_framebuffer_state *_state) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_framebuffer_state unwrapped_state; + struct pipe_framebuffer_state *state = NULL; + unsigned i; + + if (_state->nr_cbufs > PIPE_MAX_COLOR_BUFS) { + glhd_error("%d render targets bound, but only %d are permitted by API", + _state->nr_cbufs, PIPE_MAX_COLOR_BUFS); + } else if (_state->nr_cbufs > + pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS)) { + glhd_warn("%d render targets bound, but only %d are supported", + _state->nr_cbufs, + pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS)); + } + + /* unwrap the input state */ + if (_state) { + memcpy(&unwrapped_state, _state, sizeof(unwrapped_state)); + for(i = 0; i < _state->nr_cbufs; i++) + unwrapped_state.cbufs[i] = galahad_surface_unwrap(_state->cbufs[i]); + for (; i < PIPE_MAX_COLOR_BUFS; i++) + unwrapped_state.cbufs[i] = NULL; + unwrapped_state.zsbuf = galahad_surface_unwrap(_state->zsbuf); + state = &unwrapped_state; + } + + pipe->set_framebuffer_state(pipe, + state); +} + +static void +galahad_set_polygon_stipple(struct pipe_context *_pipe, + const struct pipe_poly_stipple *poly_stipple) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_polygon_stipple(pipe, + poly_stipple); +} + +static void +galahad_set_scissor_state(struct pipe_context *_pipe, + const struct pipe_scissor_state *scissor) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_scissor_state(pipe, + scissor); +} + +static void +galahad_set_viewport_state(struct pipe_context *_pipe, + const struct pipe_viewport_state *viewport) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->set_viewport_state(pipe, + viewport); +} + +static void +galahad_set_fragment_sampler_views(struct pipe_context *_pipe, + unsigned num, + struct pipe_sampler_view **_views) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_sampler_view *unwrapped_views[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_view **views = NULL; + unsigned i; + + if (_views) { + for (i = 0; i < num; i++) + unwrapped_views[i] = galahad_sampler_view_unwrap(_views[i]); + for (; i < PIPE_MAX_SAMPLERS; i++) + unwrapped_views[i] = NULL; + + views = unwrapped_views; + } + + pipe->set_fragment_sampler_views(pipe, num, views); +} + +static void +galahad_set_vertex_sampler_views(struct pipe_context *_pipe, + unsigned num, + struct pipe_sampler_view **_views) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_sampler_view *unwrapped_views[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_sampler_view **views = NULL; + unsigned i; + + if (_views) { + for (i = 0; i < num; i++) + unwrapped_views[i] = galahad_sampler_view_unwrap(_views[i]); + for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++) + unwrapped_views[i] = NULL; + + views = unwrapped_views; + } + + pipe->set_vertex_sampler_views(pipe, num, views); +} + +static void +galahad_set_vertex_buffers(struct pipe_context *_pipe, + unsigned num_buffers, + const struct pipe_vertex_buffer *_buffers) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_vertex_buffer unwrapped_buffers[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_buffer *buffers = NULL; + unsigned i; + + if (num_buffers) { + memcpy(unwrapped_buffers, _buffers, num_buffers * sizeof(*_buffers)); + for (i = 0; i < num_buffers; i++) + unwrapped_buffers[i].buffer = galahad_resource_unwrap(_buffers[i].buffer); + buffers = unwrapped_buffers; + } + + pipe->set_vertex_buffers(pipe, + num_buffers, + buffers); +} +static void +galahad_resource_copy_region(struct pipe_context *_pipe, + struct pipe_resource *_dst, + struct pipe_subresource subdst, + unsigned dstx, + unsigned dsty, + unsigned dstz, + struct pipe_resource *_src, + struct pipe_subresource subsrc, + unsigned srcx, + unsigned srcy, + unsigned srcz, + unsigned width, + unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource_dst = galahad_resource(_dst); + struct galahad_resource *glhd_resource_src = galahad_resource(_src); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *dst = glhd_resource_dst->resource; + struct pipe_resource *src = glhd_resource_src->resource; + + if (_dst->format != _src->format) { + glhd_warn("Format mismatch: Source is %s, destination is %s", + util_format_short_name(_src->format), + util_format_short_name(_dst->format)); + } + + pipe->resource_copy_region(pipe, + dst, + subdst, + dstx, + dsty, + dstz, + src, + subsrc, + srcx, + srcy, + srcz, + width, + height); +} + +static void +galahad_clear(struct pipe_context *_pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->clear(pipe, + buffers, + rgba, + depth, + stencil); +} + +static void +galahad_clear_render_target(struct pipe_context *_pipe, + struct pipe_surface *_dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_surface *glhd_surface_dst = galahad_surface(_dst); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_surface *dst = glhd_surface_dst->surface; + + pipe->clear_render_target(pipe, + dst, + rgba, + dstx, + dsty, + width, + height); +} +static void +galahad_clear_depth_stencil(struct pipe_context *_pipe, + struct pipe_surface *_dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_surface *glhd_surface_dst = galahad_surface(_dst); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_surface *dst = glhd_surface_dst->surface; + + pipe->clear_depth_stencil(pipe, + dst, + clear_flags, + depth, + stencil, + dstx, + dsty, + width, + height); + +} + +static void +galahad_flush(struct pipe_context *_pipe, + unsigned flags, + struct pipe_fence_handle **fence) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct pipe_context *pipe = glhd_pipe->pipe; + + pipe->flush(pipe, + flags, + fence); +} + +static unsigned int +galahad_is_resource_referenced(struct pipe_context *_pipe, + struct pipe_resource *_resource, + unsigned face, + unsigned level) +{ + struct galahad_context *glhd_pipe = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *pipe = glhd_pipe->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + return pipe->is_resource_referenced(pipe, + resource, + face, + level); +} + +static struct pipe_sampler_view * +galahad_context_create_sampler_view(struct pipe_context *_pipe, + struct pipe_resource *_resource, + const struct pipe_sampler_view *templ) +{ + struct galahad_context *glhd_context = galahad_context(_pipe); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *pipe = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_sampler_view *result; + + result = pipe->create_sampler_view(pipe, + resource, + templ); + + if (result) + return galahad_sampler_view_create(glhd_context, glhd_resource, result); + return NULL; +} + +static void +galahad_context_sampler_view_destroy(struct pipe_context *_pipe, + struct pipe_sampler_view *_view) +{ + galahad_sampler_view_destroy(galahad_context(_pipe), + galahad_sampler_view(_view)); +} + +static struct pipe_transfer * +galahad_context_get_transfer(struct pipe_context *_context, + struct pipe_resource *_resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_transfer *result; + + result = context->get_transfer(context, + resource, + sr, + usage, + box); + + if (result) + return galahad_transfer_create(glhd_context, glhd_resource, result); + return NULL; +} + +static void +galahad_context_transfer_destroy(struct pipe_context *_pipe, + struct pipe_transfer *_transfer) +{ + galahad_transfer_destroy(galahad_context(_pipe), + galahad_transfer(_transfer)); +} + +static void * +galahad_context_transfer_map(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + return context->transfer_map(context, + transfer); +} + + + +static void +galahad_context_transfer_flush_region(struct pipe_context *_context, + struct pipe_transfer *_transfer, + const struct pipe_box *box) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + context->transfer_flush_region(context, + transfer, + box); +} + + +static void +galahad_context_transfer_unmap(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_transfer *glhd_transfer = galahad_transfer(_transfer); + struct pipe_context *context = glhd_context->pipe; + struct pipe_transfer *transfer = glhd_transfer->transfer; + + context->transfer_unmap(context, + transfer); +} + + +static void +galahad_context_transfer_inline_write(struct pipe_context *_context, + struct pipe_resource *_resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride) +{ + struct galahad_context *glhd_context = galahad_context(_context); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_context *context = glhd_context->pipe; + struct pipe_resource *resource = glhd_resource->resource; + + context->transfer_inline_write(context, + resource, + sr, + usage, + box, + data, + stride, + slice_stride); +} + + +struct pipe_context * +galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) +{ + struct galahad_context *glhd_pipe; + (void)galahad_screen(_screen); + + glhd_pipe = CALLOC_STRUCT(galahad_context); + if (!glhd_pipe) { + return NULL; + } + + glhd_pipe->base.winsys = NULL; + glhd_pipe->base.screen = _screen; + glhd_pipe->base.priv = pipe->priv; /* expose wrapped data */ + glhd_pipe->base.draw = NULL; + + glhd_pipe->base.destroy = galahad_destroy; + glhd_pipe->base.draw_arrays = galahad_draw_arrays; + glhd_pipe->base.draw_elements = galahad_draw_elements; + glhd_pipe->base.draw_range_elements = galahad_draw_range_elements; + glhd_pipe->base.create_query = galahad_create_query; + glhd_pipe->base.destroy_query = galahad_destroy_query; + glhd_pipe->base.begin_query = galahad_begin_query; + glhd_pipe->base.end_query = galahad_end_query; + glhd_pipe->base.get_query_result = galahad_get_query_result; + glhd_pipe->base.create_blend_state = galahad_create_blend_state; + glhd_pipe->base.bind_blend_state = galahad_bind_blend_state; + glhd_pipe->base.delete_blend_state = galahad_delete_blend_state; + glhd_pipe->base.create_sampler_state = galahad_create_sampler_state; + glhd_pipe->base.bind_fragment_sampler_states = galahad_bind_fragment_sampler_states; + glhd_pipe->base.bind_vertex_sampler_states = galahad_bind_vertex_sampler_states; + glhd_pipe->base.delete_sampler_state = galahad_delete_sampler_state; + glhd_pipe->base.create_rasterizer_state = galahad_create_rasterizer_state; + glhd_pipe->base.bind_rasterizer_state = galahad_bind_rasterizer_state; + glhd_pipe->base.delete_rasterizer_state = galahad_delete_rasterizer_state; + glhd_pipe->base.create_depth_stencil_alpha_state = galahad_create_depth_stencil_alpha_state; + glhd_pipe->base.bind_depth_stencil_alpha_state = galahad_bind_depth_stencil_alpha_state; + glhd_pipe->base.delete_depth_stencil_alpha_state = galahad_delete_depth_stencil_alpha_state; + glhd_pipe->base.create_fs_state = galahad_create_fs_state; + glhd_pipe->base.bind_fs_state = galahad_bind_fs_state; + glhd_pipe->base.delete_fs_state = galahad_delete_fs_state; + glhd_pipe->base.create_vs_state = galahad_create_vs_state; + glhd_pipe->base.bind_vs_state = galahad_bind_vs_state; + glhd_pipe->base.delete_vs_state = galahad_delete_vs_state; + glhd_pipe->base.create_vertex_elements_state = galahad_create_vertex_elements_state; + glhd_pipe->base.bind_vertex_elements_state = galahad_bind_vertex_elements_state; + glhd_pipe->base.delete_vertex_elements_state = galahad_delete_vertex_elements_state; + glhd_pipe->base.set_blend_color = galahad_set_blend_color; + glhd_pipe->base.set_stencil_ref = galahad_set_stencil_ref; + glhd_pipe->base.set_clip_state = galahad_set_clip_state; + glhd_pipe->base.set_sample_mask = galahad_set_sample_mask; + glhd_pipe->base.set_constant_buffer = galahad_set_constant_buffer; + glhd_pipe->base.set_framebuffer_state = galahad_set_framebuffer_state; + glhd_pipe->base.set_polygon_stipple = galahad_set_polygon_stipple; + glhd_pipe->base.set_scissor_state = galahad_set_scissor_state; + glhd_pipe->base.set_viewport_state = galahad_set_viewport_state; + glhd_pipe->base.set_fragment_sampler_views = galahad_set_fragment_sampler_views; + glhd_pipe->base.set_vertex_sampler_views = galahad_set_vertex_sampler_views; + glhd_pipe->base.set_vertex_buffers = galahad_set_vertex_buffers; + glhd_pipe->base.resource_copy_region = galahad_resource_copy_region; + glhd_pipe->base.clear = galahad_clear; + glhd_pipe->base.clear_render_target = galahad_clear_render_target; + glhd_pipe->base.clear_depth_stencil = galahad_clear_depth_stencil; + glhd_pipe->base.flush = galahad_flush; + glhd_pipe->base.is_resource_referenced = galahad_is_resource_referenced; + glhd_pipe->base.create_sampler_view = galahad_context_create_sampler_view; + glhd_pipe->base.sampler_view_destroy = galahad_context_sampler_view_destroy; + glhd_pipe->base.get_transfer = galahad_context_get_transfer; + glhd_pipe->base.transfer_destroy = galahad_context_transfer_destroy; + glhd_pipe->base.transfer_map = galahad_context_transfer_map; + glhd_pipe->base.transfer_unmap = galahad_context_transfer_unmap; + glhd_pipe->base.transfer_flush_region = galahad_context_transfer_flush_region; + glhd_pipe->base.transfer_inline_write = galahad_context_transfer_inline_write; + + glhd_pipe->pipe = pipe; + + return &glhd_pipe->base; +} diff --git a/src/gallium/drivers/galahad/glhd_context.h b/src/gallium/drivers/galahad/glhd_context.h new file mode 100644 index 00000000000..4e71753ac37 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_context.h @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_CONTEXT_H +#define GLHD_CONTEXT_H + +#include <stdio.h> + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + + +struct galahad_context { + struct pipe_context base; /**< base class */ + + struct pipe_context *pipe; +}; + + +struct pipe_context * +galahad_context_create(struct pipe_screen *screen, struct pipe_context *pipe); + + +static INLINE struct galahad_context * +galahad_context(struct pipe_context *pipe) +{ + return (struct galahad_context *)pipe; +} + +#define glhd_warn(...) \ +do { \ + fprintf(stderr, "galahad: %s: ", __FUNCTION__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ +} while (0) + +#define glhd_error(...) \ + glhd_warn(__VA_ARGS__); + +#endif /* GLHD_CONTEXT_H */ diff --git a/src/gallium/drivers/galahad/glhd_drm.h b/src/gallium/drivers/galahad/glhd_drm.h new file mode 100644 index 00000000000..613ac24946d --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_drm.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_DRM_H +#define GLHD_DRM_H + +struct drm_api; + +struct drm_api* galahad_drm_create(struct drm_api *api); + +#endif /* GLHD_DRM_H */ diff --git a/src/gallium/drivers/galahad/glhd_objects.c b/src/gallium/drivers/galahad/glhd_objects.c new file mode 100644 index 00000000000..6c5a21ae704 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_objects.c @@ -0,0 +1,187 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include "glhd_screen.h" +#include "glhd_objects.h" +#include "glhd_context.h" + + + +struct pipe_resource * +galahad_resource_create(struct galahad_screen *glhd_screen, + struct pipe_resource *resource) +{ + struct galahad_resource *glhd_resource; + + if(!resource) + goto error; + + assert(resource->screen == glhd_screen->screen); + + glhd_resource = CALLOC_STRUCT(galahad_resource); + if(!glhd_resource) + goto error; + + memcpy(&glhd_resource->base, resource, sizeof(struct pipe_resource)); + + pipe_reference_init(&glhd_resource->base.reference, 1); + glhd_resource->base.screen = &glhd_screen->base; + glhd_resource->resource = resource; + + return &glhd_resource->base; + +error: + pipe_resource_reference(&resource, NULL); + return NULL; +} + +void +galahad_resource_destroy(struct galahad_resource *glhd_resource) +{ + pipe_resource_reference(&glhd_resource->resource, NULL); + FREE(glhd_resource); +} + + +struct pipe_surface * +galahad_surface_create(struct galahad_resource *glhd_resource, + struct pipe_surface *surface) +{ + struct galahad_surface *glhd_surface; + + if(!surface) + goto error; + + assert(surface->texture == glhd_resource->resource); + + glhd_surface = CALLOC_STRUCT(galahad_surface); + if(!glhd_surface) + goto error; + + memcpy(&glhd_surface->base, surface, sizeof(struct pipe_surface)); + + pipe_reference_init(&glhd_surface->base.reference, 1); + glhd_surface->base.texture = NULL; + pipe_resource_reference(&glhd_surface->base.texture, &glhd_resource->base); + glhd_surface->surface = surface; + + return &glhd_surface->base; + +error: + pipe_surface_reference(&surface, NULL); + return NULL; +} + +void +galahad_surface_destroy(struct galahad_surface *glhd_surface) +{ + pipe_resource_reference(&glhd_surface->base.texture, NULL); + pipe_surface_reference(&glhd_surface->surface, NULL); + FREE(glhd_surface); +} + + +struct pipe_sampler_view * +galahad_sampler_view_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_sampler_view *view) +{ + struct galahad_sampler_view *glhd_view; + + if (!view) + goto error; + + assert(view->texture == glhd_resource->resource); + + glhd_view = CALLOC_STRUCT(galahad_sampler_view); + + glhd_view->base = *view; + glhd_view->base.reference.count = 1; + glhd_view->base.texture = NULL; + pipe_resource_reference(&glhd_view->base.texture, glhd_resource->resource); + glhd_view->base.context = glhd_context->pipe; + glhd_view->sampler_view = view; + + return &glhd_view->base; +error: + return NULL; +} + +void +galahad_sampler_view_destroy(struct galahad_context *glhd_context, + struct galahad_sampler_view *glhd_view) +{ + pipe_resource_reference(&glhd_view->base.texture, NULL); + glhd_context->pipe->sampler_view_destroy(glhd_context->pipe, + glhd_view->sampler_view); + FREE(glhd_view); +} + + +struct pipe_transfer * +galahad_transfer_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_transfer *transfer) +{ + struct galahad_transfer *glhd_transfer; + + if(!transfer) + goto error; + + assert(transfer->resource == glhd_resource->resource); + + glhd_transfer = CALLOC_STRUCT(galahad_transfer); + if(!glhd_transfer) + goto error; + + memcpy(&glhd_transfer->base, transfer, sizeof(struct pipe_transfer)); + + glhd_transfer->base.resource = NULL; + glhd_transfer->transfer = transfer; + + pipe_resource_reference(&glhd_transfer->base.resource, &glhd_resource->base); + assert(glhd_transfer->base.resource == &glhd_resource->base); + + return &glhd_transfer->base; + +error: + glhd_context->pipe->transfer_destroy(glhd_context->pipe, transfer); + return NULL; +} + +void +galahad_transfer_destroy(struct galahad_context *glhd_context, + struct galahad_transfer *glhd_transfer) +{ + pipe_resource_reference(&glhd_transfer->base.resource, NULL); + glhd_context->pipe->transfer_destroy(glhd_context->pipe, + glhd_transfer->transfer); + FREE(glhd_transfer); +} diff --git a/src/gallium/drivers/galahad/glhd_objects.h b/src/gallium/drivers/galahad/glhd_objects.h new file mode 100644 index 00000000000..935803915db --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_objects.h @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_OBJECTS_H +#define GLHD_OBJECTS_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "glhd_screen.h" + +struct galahad_context; + + +struct galahad_resource +{ + struct pipe_resource base; + + struct pipe_resource *resource; +}; + + +struct galahad_sampler_view +{ + struct pipe_sampler_view base; + + struct pipe_sampler_view *sampler_view; +}; + + +struct galahad_surface +{ + struct pipe_surface base; + + struct pipe_surface *surface; +}; + + +struct galahad_transfer +{ + struct pipe_transfer base; + + struct pipe_transfer *transfer; +}; + + +static INLINE struct galahad_resource * +galahad_resource(struct pipe_resource *_resource) +{ + if(!_resource) + return NULL; + (void)galahad_screen(_resource->screen); + return (struct galahad_resource *)_resource; +} + +static INLINE struct galahad_sampler_view * +galahad_sampler_view(struct pipe_sampler_view *_sampler_view) +{ + if (!_sampler_view) { + return NULL; + } + return (struct galahad_sampler_view *)_sampler_view; +} + +static INLINE struct galahad_surface * +galahad_surface(struct pipe_surface *_surface) +{ + if(!_surface) + return NULL; + (void)galahad_resource(_surface->texture); + return (struct galahad_surface *)_surface; +} + +static INLINE struct galahad_transfer * +galahad_transfer(struct pipe_transfer *_transfer) +{ + if(!_transfer) + return NULL; + (void)galahad_resource(_transfer->resource); + return (struct galahad_transfer *)_transfer; +} + +static INLINE struct pipe_resource * +galahad_resource_unwrap(struct pipe_resource *_resource) +{ + if(!_resource) + return NULL; + return galahad_resource(_resource)->resource; +} + +static INLINE struct pipe_sampler_view * +galahad_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view) +{ + if (!_sampler_view) { + return NULL; + } + return galahad_sampler_view(_sampler_view)->sampler_view; +} + +static INLINE struct pipe_surface * +galahad_surface_unwrap(struct pipe_surface *_surface) +{ + if(!_surface) + return NULL; + return galahad_surface(_surface)->surface; +} + +static INLINE struct pipe_transfer * +galahad_transfer_unwrap(struct pipe_transfer *_transfer) +{ + if(!_transfer) + return NULL; + return galahad_transfer(_transfer)->transfer; +} + + +struct pipe_resource * +galahad_resource_create(struct galahad_screen *glhd_screen, + struct pipe_resource *resource); + +void +galahad_resource_destroy(struct galahad_resource *glhd_resource); + +struct pipe_surface * +galahad_surface_create(struct galahad_resource *glhd_resource, + struct pipe_surface *surface); + +void +galahad_surface_destroy(struct galahad_surface *glhd_surface); + +struct pipe_sampler_view * +galahad_sampler_view_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_sampler_view *view); + +void +galahad_sampler_view_destroy(struct galahad_context *glhd_context, + struct galahad_sampler_view *glhd_sampler_view); + +struct pipe_transfer * +galahad_transfer_create(struct galahad_context *glhd_context, + struct galahad_resource *glhd_resource, + struct pipe_transfer *transfer); + +void +galahad_transfer_destroy(struct galahad_context *glhd_context, + struct galahad_transfer *glhd_transfer); + + +#endif /* GLHD_OBJECTS_H */ diff --git a/src/gallium/drivers/galahad/glhd_public.h b/src/gallium/drivers/galahad/glhd_public.h new file mode 100644 index 00000000000..77a380196a1 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_public.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_PUBLIC_H +#define GLHD_PUBLIC_H + +struct pipe_screen; +struct pipe_context; + +struct pipe_screen * +galahad_screen_create(struct pipe_screen *screen); + +#endif /* GLHD_PUBLIC_H */ diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c new file mode 100644 index 00000000000..bcc37cb633a --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_screen.c @@ -0,0 +1,330 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * 2010 Corbin Simpson <[email protected]> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "glhd_public.h" +#include "glhd_screen.h" +#include "glhd_context.h" +#include "glhd_objects.h" + + +static void +galahad_screen_destroy(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + screen->destroy(screen); + + FREE(glhd_screen); +} + +static const char * +galahad_screen_get_name(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_name(screen); +} + +static const char * +galahad_screen_get_vendor(struct pipe_screen *_screen) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_vendor(screen); +} + +static int +galahad_screen_get_param(struct pipe_screen *_screen, + enum pipe_cap param) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_param(screen, + param); +} + +static float +galahad_screen_get_paramf(struct pipe_screen *_screen, + enum pipe_cap param) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->get_paramf(screen, + param); +} + +static boolean +galahad_screen_is_format_supported(struct pipe_screen *_screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned tex_usage, + unsigned geom_flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + if (target >= PIPE_MAX_TEXTURE_TYPES) { + glhd_warn("Received bogus texture target %d", target); + } + + return screen->is_format_supported(screen, + format, + target, + sample_count, + tex_usage, + geom_flags); +} + +static struct pipe_context * +galahad_screen_context_create(struct pipe_screen *_screen, + void *priv) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_context *result; + + result = screen->context_create(screen, priv); + if (result) + return galahad_context_create(_screen, result); + return NULL; +} + +static struct pipe_resource * +galahad_screen_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + result = screen->resource_create(screen, + templat); + + if (result) + return galahad_resource_create(glhd_screen, result); + return NULL; +} + +static struct pipe_resource * +galahad_screen_resource_from_handle(struct pipe_screen *_screen, + const struct pipe_resource *templ, + struct winsys_handle *handle) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + /* TODO trace call */ + + result = screen->resource_from_handle(screen, templ, handle); + + result = galahad_resource_create(galahad_screen(_screen), result); + + return result; +} + +static boolean +galahad_screen_resource_get_handle(struct pipe_screen *_screen, + struct pipe_resource *_resource, + struct winsys_handle *handle) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *resource = glhd_resource->resource; + + /* TODO trace call */ + + return screen->resource_get_handle(screen, resource, handle); +} + + + +static void +galahad_screen_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *_resource) +{ + galahad_resource_destroy(galahad_resource(_resource)); +} + +static struct pipe_surface * +galahad_screen_get_tex_surface(struct pipe_screen *_screen, + struct pipe_resource *_resource, + unsigned face, + unsigned level, + unsigned zslice, + unsigned usage) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_resource *glhd_resource = galahad_resource(_resource); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *resource = glhd_resource->resource; + struct pipe_surface *result; + + result = screen->get_tex_surface(screen, + resource, + face, + level, + zslice, + usage); + + if (result) + return galahad_surface_create(glhd_resource, result); + return NULL; +} + +static void +galahad_screen_tex_surface_destroy(struct pipe_surface *_surface) +{ + galahad_surface_destroy(galahad_surface(_surface)); +} + + + +static struct pipe_resource * +galahad_screen_user_buffer_create(struct pipe_screen *_screen, + void *ptr, + unsigned bytes, + unsigned usage) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_resource *result; + + result = screen->user_buffer_create(screen, + ptr, + bytes, + usage); + + if (result) + return galahad_resource_create(glhd_screen, result); + return NULL; +} + + + +static void +galahad_screen_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_surface *_surface, + void *context_private) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct galahad_surface *glhd_surface = galahad_surface(_surface); + struct pipe_screen *screen = glhd_screen->screen; + struct pipe_surface *surface = glhd_surface->surface; + + screen->flush_frontbuffer(screen, + surface, + context_private); +} + +static void +galahad_screen_fence_reference(struct pipe_screen *_screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + screen->fence_reference(screen, + ptr, + fence); +} + +static int +galahad_screen_fence_signalled(struct pipe_screen *_screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->fence_signalled(screen, + fence, + flags); +} + +static int +galahad_screen_fence_finish(struct pipe_screen *_screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct galahad_screen *glhd_screen = galahad_screen(_screen); + struct pipe_screen *screen = glhd_screen->screen; + + return screen->fence_finish(screen, + fence, + flags); +} + +struct pipe_screen * +galahad_screen_create(struct pipe_screen *screen) +{ + struct galahad_screen *glhd_screen; + + glhd_screen = CALLOC_STRUCT(galahad_screen); + if (!glhd_screen) { + return NULL; + } + + glhd_screen->base.winsys = NULL; + + glhd_screen->base.destroy = galahad_screen_destroy; + glhd_screen->base.get_name = galahad_screen_get_name; + glhd_screen->base.get_vendor = galahad_screen_get_vendor; + glhd_screen->base.get_param = galahad_screen_get_param; + glhd_screen->base.get_paramf = galahad_screen_get_paramf; + glhd_screen->base.is_format_supported = galahad_screen_is_format_supported; + glhd_screen->base.context_create = galahad_screen_context_create; + glhd_screen->base.resource_create = galahad_screen_resource_create; + glhd_screen->base.resource_from_handle = galahad_screen_resource_from_handle; + glhd_screen->base.resource_get_handle = galahad_screen_resource_get_handle; + glhd_screen->base.resource_destroy = galahad_screen_resource_destroy; + glhd_screen->base.get_tex_surface = galahad_screen_get_tex_surface; + glhd_screen->base.tex_surface_destroy = galahad_screen_tex_surface_destroy; + glhd_screen->base.user_buffer_create = galahad_screen_user_buffer_create; + glhd_screen->base.flush_frontbuffer = galahad_screen_flush_frontbuffer; + glhd_screen->base.fence_reference = galahad_screen_fence_reference; + glhd_screen->base.fence_signalled = galahad_screen_fence_signalled; + glhd_screen->base.fence_finish = galahad_screen_fence_finish; + + glhd_screen->screen = screen; + + return &glhd_screen->base; +} diff --git a/src/gallium/drivers/galahad/glhd_screen.h b/src/gallium/drivers/galahad/glhd_screen.h new file mode 100644 index 00000000000..7862f4af2b3 --- /dev/null +++ b/src/gallium/drivers/galahad/glhd_screen.h @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GLHD_SCREEN_H +#define GLHD_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" + + +struct galahad_screen { + struct pipe_screen base; + + struct pipe_screen *screen; +}; + + +static INLINE struct galahad_screen * +galahad_screen(struct pipe_screen *screen) +{ + return (struct galahad_screen *)screen; +} + +#endif /* GLHD_SCREEN_H */ diff --git a/src/gallium/drivers/i915/SConscript b/src/gallium/drivers/i915/SConscript index 7b69681096d..d6e7a8dbd37 100644 --- a/src/gallium/drivers/i915/SConscript +++ b/src/gallium/drivers/i915/SConscript @@ -2,6 +2,10 @@ Import('*') env = env.Clone() +if msvc: + print 'warning: not building i915g' + Return() + i915 = env.ConvenienceLibrary( target = 'i915', source = [ diff --git a/src/gallium/drivers/i915/i915_blit.c b/src/gallium/drivers/i915/i915_blit.c index c5b5979bf98..0a1b3e0d66b 100644 --- a/src/gallium/drivers/i915/i915_blit.c +++ b/src/gallium/drivers/i915/i915_blit.c @@ -31,7 +31,6 @@ #include "i915_batch.h" #include "i915_debug.h" -#define FILE_DEBUG_FLAG DEBUG_BLIT void i915_fill_blit(struct i915_context *i915, @@ -47,10 +46,8 @@ i915_fill_blit(struct i915_context *i915, unsigned BR13, CMD; - I915_DBG(i915, - "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, - dst_buffer, dst_pitch, dst_offset, x, y, w, h); + I915_DBG(DBG_BLIT, "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); switch (cpp) { case 1: @@ -100,11 +97,11 @@ i915_copy_blit(struct i915_context *i915, int dst_x2 = dst_x + w; - I915_DBG(i915, - "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, - src_buffer, src_pitch, src_offset, src_x, src_y, - dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + I915_DBG(DBG_BLIT, + "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); switch (cpp) { case 1: diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index acc0ffe037f..ac02ab23325 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -237,8 +237,6 @@ struct i915_context struct i915_state current; unsigned hardware_dirty; - - unsigned debug; }; /* A flag for each state_tracker state object: diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c index 663fac3055c..57d3390dea3 100644 --- a/src/gallium/drivers/i915/i915_debug.c +++ b/src/gallium/drivers/i915/i915_debug.c @@ -27,11 +27,37 @@ #include "i915_reg.h" #include "i915_context.h" +#include "i915_screen.h" #include "i915_debug.h" +#include "i915_debug_private.h" #include "i915_batch.h" #include "util/u_debug.h" + +static const struct debug_named_value debug_options[] = { + {"blit", DBG_BLIT, "Print when using the 2d blitter"}, + {"emit", DBG_EMIT, "State emit information"}, + {"atoms", DBG_ATOMS, "Print dirty state atoms"}, + {"flush", DBG_FLUSH, "Flushing information"}, + {"texture", DBG_TEXTURE, "Texture information"}, + {"constants", DBG_CONSTANTS, "Constant buffers"}, + DEBUG_NAMED_VALUE_END +}; + +unsigned i915_debug = 0; + +void i915_debug_init(struct i915_screen *screen) +{ + i915_debug = debug_get_flags_option("I915_DEBUG", debug_options, 0); +} + + + +/*********************************************************************** + * Batchbuffer dumping + */ + static void PRINTF( struct debug_stream *stream, @@ -896,3 +922,66 @@ i915_dump_batchbuffer( struct i915_winsys_batchbuffer *batch ) } + +/*********************************************************************** + * Dirty state atom dumping + */ + +void +i915_dump_dirty(struct i915_context *i915, const char *func) +{ + struct { + unsigned dirty; + const char *name; + } l[] = { + {I915_NEW_VIEWPORT, "viewport"}, + {I915_NEW_RASTERIZER, "rasterizer"}, + {I915_NEW_FS, "fs"}, + {I915_NEW_BLEND, "blend"}, + {I915_NEW_CLIP, "clip"}, + {I915_NEW_SCISSOR, "scissor"}, + {I915_NEW_STIPPLE, "stipple"}, + {I915_NEW_FRAMEBUFFER, "framebuffer"}, + {I915_NEW_ALPHA_TEST, "alpha_test"}, + {I915_NEW_DEPTH_STENCIL, "depth_stencil"}, + {I915_NEW_SAMPLER, "sampler"}, + {I915_NEW_SAMPLER_VIEW, "sampler_view"}, + {I915_NEW_CONSTANTS, "constants"}, + {I915_NEW_VBO, "vbo"}, + {I915_NEW_VS, "vs"}, + {0, NULL}, + }; + int i; + + debug_printf("%s: ", func); + for (i = 0; l[i].name; i++) + if (i915->dirty & l[i].dirty) + debug_printf("%s ", l[i].name); + debug_printf("\n"); +} + +void +i915_dump_hardware_dirty(struct i915_context *i915, const char *func) +{ + struct { + unsigned dirty; + const char *name; + } l[] = { + {I915_HW_STATIC, "static"}, + {I915_HW_DYNAMIC, "dynamic"}, + {I915_HW_SAMPLER, "sampler"}, + {I915_HW_MAP, "map"}, + {I915_HW_PROGRAM, "program"}, + {I915_HW_CONSTANTS, "constants"}, + {I915_HW_IMMEDIATE, "immediate"}, + {I915_HW_INVARIENT, "invarient"}, + {0, NULL}, + }; + int i; + + debug_printf("%s: ", func); + for (i = 0; l[i].name; i++) + if (i915->hardware_dirty & l[i].dirty) + debug_printf("%s ", l[i].name); + debug_printf("\n"); +} diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h index 67b8d9c2f63..8aa09f9c1f2 100644 --- a/src/gallium/drivers/i915/i915_debug.h +++ b/src/gallium/drivers/i915/i915_debug.h @@ -26,89 +26,51 @@ **************************************************************************/ /* Authors: Keith Whitwell <[email protected]> + * Jakob Bornecrantz <[email protected]> */ #ifndef I915_DEBUG_H #define I915_DEBUG_H -#include <stdarg.h> +#include "util/u_debug.h" +struct i915_screen; struct i915_context; +struct i915_winsys_batchbuffer; -struct debug_stream -{ - unsigned offset; /* current gtt offset */ - char *ptr; /* pointer to gtt offset zero */ - char *end; /* pointer to gtt offset zero */ - unsigned print_addresses; -}; - - -/* Internal functions - */ -void i915_disassemble_program(struct debug_stream *stream, - const unsigned *program, unsigned sz); - -void i915_print_ureg(const char *msg, unsigned ureg); - - -#define DEBUG_BATCH 0x1 -#define DEBUG_BLIT 0x2 -#define DEBUG_BUFFER 0x4 -#define DEBUG_CONSTANTS 0x8 -#define DEBUG_CONTEXT 0x10 -#define DEBUG_DRAW 0x20 -#define DEBUG_DYNAMIC 0x40 -#define DEBUG_FLUSH 0x80 -#define DEBUG_MAP 0x100 -#define DEBUG_PROGRAM 0x200 -#define DEBUG_REGIONS 0x400 -#define DEBUG_SAMPLER 0x800 -#define DEBUG_STATIC 0x1000 -#define DEBUG_SURFACE 0x2000 -#define DEBUG_WINSYS 0x4000 - -#include "pipe/p_compiler.h" +#define DBG_BLIT 0x1 +#define DBG_EMIT 0x2 +#define DBG_ATOMS 0x4 +#define DBG_FLUSH 0x8 +#define DBG_TEXTURE 0x10 +#define DBG_CONSTANTS 0x20 -#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) +extern unsigned i915_debug; -#include "util/u_simple_screen.h" +static INLINE boolean +I915_DBG_ON(unsigned flags) +{ + return i915_debug & flags; +} static INLINE void -I915_DBG( - struct i915_context *i915, - const char *fmt, - ... ) +I915_DBG(unsigned flags, const char *fmt, ...) { - if ((i915)->debug & FILE_DEBUG_FLAG) { + if (I915_DBG_ON(flags)) { va_list args; - va_start( args, fmt ); - debug_vprintf( fmt, args ); - va_end( args ); + va_start(args, fmt); + debug_vprintf(fmt, args); + va_end(args); } } -#else - -static INLINE void -I915_DBG( - struct i915_context *i915, - const char *fmt, - ... ) -{ - (void) i915; - (void) fmt; -} - -#endif - - -struct i915_winsys_batchbuffer; +void i915_debug_init(struct i915_screen *i915); -void i915_dump_batchbuffer( struct i915_winsys_batchbuffer *i915 ); +void i915_dump_batchbuffer(struct i915_winsys_batchbuffer *i915); -void i915_debug_init( struct i915_context *i915 ); +void i915_dump_dirty(struct i915_context *i915, const char *func); +void i915_dump_hardware_dirty(struct i915_context *i915, const char *func); #endif diff --git a/src/gallium/drivers/i915/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c index f41c51f2991..50f49c540fe 100644 --- a/src/gallium/drivers/i915/i915_debug_fp.c +++ b/src/gallium/drivers/i915/i915_debug_fp.c @@ -28,6 +28,7 @@ #include "i915_reg.h" #include "i915_debug.h" +#include "i915_debug_private.h" #include "util/u_debug.h" diff --git a/src/gallium/drivers/i915/i915_debug_private.h b/src/gallium/drivers/i915/i915_debug_private.h new file mode 100644 index 00000000000..b3668d08482 --- /dev/null +++ b/src/gallium/drivers/i915/i915_debug_private.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <[email protected]> + */ + +#ifndef I915_DEBUG_PRIVATE_H +#define I915_DEBUG_PRIVATE_H + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; +}; + +void i915_disassemble_program(struct debug_stream *stream, + const unsigned *program, unsigned sz); + +#endif diff --git a/src/gallium/drivers/i915/i915_flush.c b/src/gallium/drivers/i915/i915_flush.c index 1582168eba5..967146479d1 100644 --- a/src/gallium/drivers/i915/i915_flush.c +++ b/src/gallium/drivers/i915/i915_flush.c @@ -35,6 +35,7 @@ #include "i915_context.h" #include "i915_reg.h" #include "i915_batch.h" +#include "i915_debug.h" static void i915_flush( struct pipe_context *pipe, @@ -76,9 +77,9 @@ static void i915_flush( struct pipe_context *pipe, */ FLUSH_BATCH(fence); i915->vbo_flushed = 1; -} - + I915_DBG(DBG_FLUSH, "%s: #####\n", __FUNCTION__); +} void i915_init_flush_functions( struct i915_context *i915 ) { diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c index f8665acbe18..bd046bd9058 100644 --- a/src/gallium/drivers/i915/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915/i915_prim_vbuf.c @@ -52,8 +52,7 @@ #include "i915_state.h" -#undef VBUF_USE_FIFO -#undef VBUF_MAP_BUFFER +#define VBUF_MAP_BUFFER /** * Primitive renderer for i915. @@ -79,23 +78,18 @@ struct i915_vbuf_render { struct i915_winsys_buffer *vbo; size_t vbo_size; /**< current size of allocated buffer */ size_t vbo_alloc_size; /**< minimum buffer size to allocate */ - size_t vbo_offset; + size_t vbo_hw_offset; /**< offset that we program the hardware with */ + size_t vbo_sw_offset; /**< offset that we work with */ + size_t vbo_index; /**< index offset to be added to all indices */ void *vbo_ptr; size_t vbo_max_used; + size_t vbo_max_index; /**< index offset to be added to all indices */ #ifndef VBUF_MAP_BUFFER size_t map_used_start; size_t map_used_end; size_t map_size; #endif - -#ifdef VBUF_USE_FIFO - /* Stuff for the pool */ - struct util_fifo *pool_fifo; - unsigned pool_used; - unsigned pool_buffer_size; - boolean pool_not_used; -#endif }; @@ -109,6 +103,35 @@ i915_vbuf_render(struct vbuf_render *render) return (struct i915_vbuf_render *)render; } +/** + * If vbo state differs between renderer and context + * push state to the context. This function pushes + * hw_offset to i915->vbo_offset and vbo to i915->vbo. + * + * Side effects: + * May updates context vbo_offset and vbo fields. + */ +static void +i915_vbuf_update_vbo_state(struct vbuf_render *render) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + + if (i915->vbo != i915_render->vbo || + i915->vbo_offset != i915_render->vbo_hw_offset) { + i915->vbo = i915_render->vbo; + i915->vbo_offset = i915_render->vbo_hw_offset; + i915->dirty |= I915_NEW_VBO; + } +} + +/** + * Callback exported to the draw module. + * Returns the current vertex_info. + * + * Side effects: + * If state is dirty update derived state. + */ static const struct vertex_info * i915_vbuf_render_get_vertex_info(struct vbuf_render *render) { @@ -123,12 +146,18 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render) return &i915->current.vertex_info; } +/** + * Reserve space in the vbo for vertices. + * + * Side effects: + * None. + */ static boolean i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) { struct i915_context *i915 = i915_render->i915; - if (i915_render->vbo_size < size + i915_render->vbo_offset) + if (i915_render->vbo_size < size + i915_render->vbo_sw_offset) return FALSE; if (i915->vbo_flushed) @@ -137,28 +166,28 @@ i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size) return TRUE; } +/** + * Allocate a new vbo buffer should there not be enough space for + * the requested number of vertices by the draw module. + * + * Side effects: + * Updates hw_offset, sw_offset, index and allocates a new buffer. + */ static void i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) { struct i915_context *i915 = i915_render->i915; struct i915_winsys *iws = i915->iws; - if (i915_render->vbo) { -#ifdef VBUF_USE_FIFO - if (i915_render->pool_not_used) - iws->buffer_destroy(iws, i915_render->vbo); - else - u_fifo_add(i915_render->pool_fifo, i915_render->vbo); - i915_render->vbo = NULL; -#else + if (i915_render->vbo) iws->buffer_destroy(iws, i915_render->vbo); -#endif - } i915->vbo_flushed = 0; i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); - i915_render->vbo_offset = 0; + i915_render->vbo_hw_offset = 0; + i915_render->vbo_sw_offset = 0; + i915_render->vbo_index = 0; #ifndef VBUF_MAP_BUFFER if (i915_render->vbo_size > i915_render->map_size) { @@ -168,52 +197,51 @@ i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size) } #endif -#ifdef VBUF_USE_FIFO - if (i915_render->vbo_size != i915_render->pool_buffer_size) { - i915_render->pool_not_used = TRUE; - i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, - I915_NEW_VERTEX); - } else { - i915_render->pool_not_used = FALSE; - - if (i915_render->pool_used >= 2) { - FLUSH_BATCH(NULL); - i915->vbo_flushed = 0; - i915_render->pool_used = 0; - } - u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo); - } -#else i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64, I915_NEW_VERTEX); -#endif } +/** + * Callback exported to the draw module. + * + * Side effects: + * Updates hw_offset, sw_offset, index and may allocate + * a new buffer. Also updates may update the vbo state + * on the i915 context. + */ static boolean i915_vbuf_render_allocate_vertices(struct vbuf_render *render, ushort vertex_size, ushort nr_vertices) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; size_t size = (size_t)vertex_size * (size_t)nr_vertices; + size_t offset; - /* FIXME: handle failure */ - assert(!i915->vbo); + /* + * Align sw_offset with first multiple of vertex size from hw_offset. + * Set index to be the multiples from from hw_offset to sw_offset. + * i915_vbuf_render_new_buf will reset index, sw_offset, hw_offset + * when it allocates a new buffer this is correct. + */ + { + offset = i915_render->vbo_sw_offset - i915_render->vbo_hw_offset; + offset = util_align_npot(offset, vertex_size); + i915_render->vbo_sw_offset = i915_render->vbo_hw_offset + offset; + i915_render->vbo_index = offset / vertex_size; + } - if (!i915_vbuf_render_reserve(i915_render, size)) { -#ifdef VBUF_USE_FIFO - /* incase we flushed reset the number of pool buffers used */ - if (i915->vbo_flushed) - i915_render->pool_used = 0; -#endif + if (!i915_vbuf_render_reserve(i915_render, size)) i915_vbuf_render_new_buf(i915_render, size); - } + + /* + * If a new buffer has been alocated sw_offset, + * hw_offset & index will be reset by new_buf + */ i915_render->vertex_size = vertex_size; - i915->vbo = i915_render->vbo; - i915->vbo_offset = i915_render->vbo_offset; - i915->dirty |= I915_NEW_VBO; + + i915_vbuf_update_vbo_state(render); if (!i915_render->vbo) return FALSE; @@ -232,7 +260,7 @@ i915_vbuf_render_map_vertices(struct vbuf_render *render) #ifdef VBUF_MAP_BUFFER i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE); - return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_offset; + return (unsigned char *)i915_render->vbo_ptr + i915_render->vbo_sw_offset; #else (void)iws; return (unsigned char *)i915_render->vbo_ptr; @@ -248,6 +276,7 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, struct i915_context *i915 = i915_render->i915; struct i915_winsys *iws = i915->iws; + i915_render->vbo_max_index = max_index; i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); #ifdef VBUF_MAP_BUFFER iws->buffer_unmap(iws, i915_render->vbo); @@ -255,13 +284,36 @@ i915_vbuf_render_unmap_vertices(struct vbuf_render *render, i915_render->map_used_start = i915_render->vertex_size * min_index; i915_render->map_used_end = i915_render->vertex_size * (max_index + 1); iws->buffer_write(iws, i915_render->vbo, - i915_render->map_used_start + i915_render->vbo_offset, + i915_render->map_used_start + i915_render->vbo_sw_offset, i915_render->map_used_end - i915_render->map_used_start, (unsigned char *)i915_render->vbo_ptr + i915_render->map_used_start); #endif } +/** + * Ensure that the given max_index given is not larger ushort max. + * If it is larger then ushort max it advanced the hw_offset to the + * same position in the vbo as sw_offset and set index to zero. + * + * Side effects: + * On failure update hw_offset and index. + */ +static void +i915_vbuf_ensure_index_bounds(struct vbuf_render *render, + unsigned max_index) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + if (max_index + i915_render->vbo_index < ((1 << 17) - 1)) + return; + + i915_render->vbo_hw_offset = i915_render->vbo_sw_offset; + i915_render->vbo_index = 0; + + i915_vbuf_update_vbo_state(render); +} + static boolean i915_vbuf_render_set_primitive(struct vbuf_render *render, unsigned prim) @@ -327,7 +379,9 @@ draw_arrays_generate_indices(struct vbuf_render *render, struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; unsigned i; - unsigned end = start + nr; + unsigned end = start + nr + i915_render->vbo_index; + start += i915_render->vbo_index; + switch(type) { case 0: for (i = start; i+1 < end; i += 2) @@ -391,16 +445,18 @@ draw_arrays_fallback(struct vbuf_render *render, struct i915_context *i915 = i915_render->i915; unsigned nr_indices; + nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback); + if (!nr_indices) + return; + + i915_vbuf_ensure_index_bounds(render, start + nr_indices); + if (i915->dirty) i915_update_derived(i915); if (i915->hardware_dirty) i915_emit_hardware_state(i915); - nr_indices = draw_arrays_calc_nr_indices(nr, i915_render->fallback); - if (!nr_indices) - return; - if (!BEGIN_BATCH(1 + (nr_indices + 1)/2, 1)) { FLUSH_BATCH(NULL); @@ -415,6 +471,7 @@ draw_arrays_fallback(struct vbuf_render *render, goto out; } } + OUT_BATCH(_3DPRIMITIVE | PRIM_INDIRECT | i915_render->hwprim | @@ -440,6 +497,9 @@ i915_vbuf_render_draw_arrays(struct vbuf_render *render, return; } + i915_vbuf_ensure_index_bounds(render, start + nr); + start += i915_render->vbo_index; + if (i915->dirty) i915_update_derived(i915); @@ -485,35 +545,36 @@ draw_generate_indices(struct vbuf_render *render, struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; unsigned i; + unsigned o = i915_render->vbo_index; switch(type) { case 0: for (i = 0; i + 1 < nr_indices; i += 2) { - OUT_BATCH(indices[i] | indices[i+1] << 16); + OUT_BATCH((o+indices[i]) | (o+indices[i+1]) << 16); } if (i < nr_indices) { - OUT_BATCH(indices[i]); + OUT_BATCH((o+indices[i])); } break; case PIPE_PRIM_LINE_LOOP: if (nr_indices >= 2) { for (i = 1; i < nr_indices; i++) - OUT_BATCH(indices[i-1] | indices[i] << 16); - OUT_BATCH(indices[i-1] | indices[0] << 16); + OUT_BATCH((o+indices[i-1]) | (o+indices[i]) << 16); + OUT_BATCH((o+indices[i-1]) | (o+indices[0]) << 16); } break; case PIPE_PRIM_QUADS: for (i = 0; i + 3 < nr_indices; i += 4) { - OUT_BATCH(indices[i+0] | indices[i+1] << 16); - OUT_BATCH(indices[i+3] | indices[i+1] << 16); - OUT_BATCH(indices[i+2] | indices[i+3] << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+3]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+2]) | (o+indices[i+3]) << 16); } break; case PIPE_PRIM_QUAD_STRIP: for (i = 0; i + 3 < nr_indices; i += 2) { - OUT_BATCH(indices[i+0] | indices[i+1] << 16); - OUT_BATCH(indices[i+3] | indices[i+2] << 16); - OUT_BATCH(indices[i+0] | indices[i+3] << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+1]) << 16); + OUT_BATCH((o+indices[i+3]) | (o+indices[i+2]) << 16); + OUT_BATCH((o+indices[i+0]) | (o+indices[i+3]) << 16); } break; default: @@ -558,6 +619,8 @@ i915_vbuf_render_draw_elements(struct vbuf_render *render, if (!nr_indices) return; + i915_vbuf_ensure_index_bounds(render, i915_render->vbo_max_index); + if (i915->dirty) i915_update_derived(i915); @@ -597,14 +660,15 @@ static void i915_vbuf_render_release_vertices(struct vbuf_render *render) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - struct i915_context *i915 = i915_render->i915; - - assert(i915->vbo); - i915_render->vbo_offset += i915_render->vbo_max_used; + i915_render->vbo_sw_offset += i915_render->vbo_max_used; i915_render->vbo_max_used = 0; - i915->vbo = NULL; - i915->dirty |= I915_NEW_VBO; + + /* + * Micro optimization, by calling update here we the offset change + * will be picked up on the next pipe_context::draw_*. + */ + i915_vbuf_update_vbo_state(render); } static void @@ -652,7 +716,8 @@ i915_vbuf_render_create(struct i915_context *i915) i915_render->vbo = NULL; i915_render->vbo_ptr = NULL; i915_render->vbo_size = 0; - i915_render->vbo_offset = 0; + i915_render->vbo_hw_offset = 0; + i915_render->vbo_sw_offset = 0; i915_render->vbo_alloc_size = i915_render->base.max_vertex_buffer_bytes * 4; #ifdef VBUF_USE_POOL diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index bb0b85d74c7..77345d5f711 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -31,6 +31,7 @@ #include "util/u_string.h" #include "i915_reg.h" +#include "i915_debug.h" #include "i915_context.h" #include "i915_screen.h" #include "i915_surface.h" @@ -331,5 +332,7 @@ i915_screen_create(struct i915_winsys *iws) i915_init_screen_resource_functions(is); i915_init_screen_surface_functions(is); + i915_debug_init(is); + return &is->base; } diff --git a/src/gallium/drivers/i915/i915_state.h b/src/gallium/drivers/i915/i915_state.h index 86c6b0027d5..7795046f06d 100644 --- a/src/gallium/drivers/i915/i915_state.h +++ b/src/gallium/drivers/i915/i915_state.h @@ -35,16 +35,21 @@ struct i915_context; struct i915_tracked_state { + const char *name; + void (*update)(struct i915_context *); unsigned dirty; - void (*update)( struct i915_context * ); }; -void i915_update_immediate( struct i915_context *i915 ); -void i915_update_dynamic( struct i915_context *i915 ); -void i915_update_derived( struct i915_context *i915 ); -void i915_update_samplers( struct i915_context *i915 ); -void i915_update_textures(struct i915_context *i915); +extern struct i915_tracked_state i915_update_vertex_layout; -void i915_emit_hardware_state( struct i915_context *i915 ); +extern struct i915_tracked_state i915_hw_samplers; +extern struct i915_tracked_state i915_hw_sampler_views; +extern struct i915_tracked_state i915_hw_immediate; +extern struct i915_tracked_state i915_hw_dynamic; +extern struct i915_tracked_state i915_hw_fs; +extern struct i915_tracked_state i915_hw_framebuffer; + +void i915_update_derived(struct i915_context *i915); +void i915_emit_hardware_state(struct i915_context *i915); #endif diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 4da46772b5d..c0595403571 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -32,15 +32,16 @@ #include "draw/draw_vertex.h" #include "i915_context.h" #include "i915_state.h" +#include "i915_debug.h" #include "i915_reg.h" -/** +/*********************************************************************** * Determine the hardware vertex layout. * Depends on vertex/fragment shader state. */ -static void calculate_vertex_layout( struct i915_context *i915 ) +static void calculate_vertex_layout(struct i915_context *i915) { const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; @@ -146,37 +147,71 @@ static void calculate_vertex_layout( struct i915_context *i915 ) } } +struct i915_tracked_state i915_update_vertex_layout = { + "vertex_layout", + calculate_vertex_layout, + I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS +}; -/* Hopefully this will remain quite simple, otherwise need to pull in - * something like the state tracker mechanism. +/*********************************************************************** + * Update fragment state */ -void i915_update_derived( struct i915_context *i915 ) +static void update_fs(struct i915_context *i915) { - if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) - calculate_vertex_layout( i915 ); - - if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_SAMPLER_VIEW)) - i915_update_samplers(i915); - - if (i915->dirty & I915_NEW_SAMPLER_VIEW) - i915_update_textures(i915); + i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ +} - if (i915->dirty) - i915_update_immediate( i915 ); +struct i915_tracked_state i915_hw_fs = { + "fs", + update_fs, + I915_NEW_FS +}; - if (i915->dirty) - i915_update_dynamic( i915 ); - if (i915->dirty & I915_NEW_FS) { - i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ - } +/*********************************************************************** + * Update framebuffer state + */ +static void update_framebuffer(struct i915_context *i915) +{ /* HW emit currently references framebuffer state directly: */ - if (i915->dirty & I915_NEW_FRAMEBUFFER) - i915->hardware_dirty |= I915_HW_STATIC; + i915->hardware_dirty |= I915_HW_STATIC; +} + +struct i915_tracked_state i915_hw_framebuffer = { + "framebuffer", + update_framebuffer, + I915_NEW_FRAMEBUFFER +}; + + + +/*********************************************************************** + */ +static struct i915_tracked_state *atoms[] = { + &i915_update_vertex_layout, + &i915_hw_samplers, + &i915_hw_sampler_views, + &i915_hw_immediate, + &i915_hw_dynamic, + &i915_hw_fs, + &i915_hw_framebuffer, + NULL, +}; + +void i915_update_derived(struct i915_context *i915) +{ + int i; + + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_dirty(i915, __FUNCTION__); + + for (i = 0; atoms[i]; i++) + if (atoms[i]->dirty & i915->dirty) + atoms[i]->update(i915); i915->dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c index 9c6723b3916..d964483ac7e 100644 --- a/src/gallium/drivers/i915/i915_state_dynamic.c +++ b/src/gallium/drivers/i915/i915_state_dynamic.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ #include "i915_batch.h" @@ -34,10 +34,9 @@ #include "util/u_memory.h" #include "util/u_pack_color.h" -#define FILE_DEBUG_FLAG DEBUG_STATE /* State that we have chosen to store in the DYNAMIC segment of the - * i915 indirect state mechanism. + * i915 indirect state mechanism. * * Can't cache these in the way we do the static state, as there is no * start/size in the command packet, instead an 'end' value that gets @@ -47,10 +46,10 @@ * (active) state every time a 4kb boundary is crossed. */ -static INLINE void set_dynamic_indirect( struct i915_context *i915, - unsigned offset, - const unsigned *src, - unsigned dwords ) +static INLINE void set_dynamic_indirect(struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords) { unsigned i; @@ -61,38 +60,41 @@ static INLINE void set_dynamic_indirect( struct i915_context *i915, } + /*********************************************************************** - * Modes4: stencil masks and logicop + * Modes4: stencil masks and logicop */ -static void upload_MODES4( struct i915_context *i915 ) +static void upload_MODES4(struct i915_context *i915) { unsigned modes4 = 0; - /* I915_NEW_STENCIL */ + /* I915_NEW_STENCIL + */ modes4 |= i915->depth_stencil->stencil_modes4; - /* I915_NEW_BLEND */ + + /* I915_NEW_BLEND + */ modes4 |= i915->blend->modes4; - /* Always, so that we know when state is in-active: + /* Always, so that we know when state is in-active: */ - set_dynamic_indirect( i915, - I915_DYNAMIC_MODES4, - &modes4, - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_MODES4, + &modes4, + 1); } const struct i915_tracked_state i915_upload_MODES4 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, - upload_MODES4 + "MODES4", + upload_MODES4, + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL }; - /*********************************************************************** */ - -static void upload_BFO( struct i915_context *i915 ) +static void upload_BFO(struct i915_context *i915) { unsigned bfo[2]; bfo[0] = i915->depth_stencil->bfo[0]; @@ -101,88 +103,89 @@ static void upload_BFO( struct i915_context *i915 ) if (bfo[0] & BFO_ENABLE_STENCIL_REF) { bfo[0] |= i915->stencil_ref.ref_value[1] << BFO_STENCIL_REF_SHIFT; } - set_dynamic_indirect( i915, - I915_DYNAMIC_BFO_0, - &(bfo[0]), - 2 ); + + set_dynamic_indirect(i915, + I915_DYNAMIC_BFO_0, + &(bfo[0]), + 2); } const struct i915_tracked_state i915_upload_BFO = { - I915_NEW_DEPTH_STENCIL, - upload_BFO + "BFO", + upload_BFO, + I915_NEW_DEPTH_STENCIL }; + /*********************************************************************** */ - - -static void upload_BLENDCOLOR( struct i915_context *i915 ) +static void upload_BLENDCOLOR(struct i915_context *i915) { unsigned bc[2]; - memset( bc, 0, sizeof(bc) ); + memset(bc, 0, sizeof(bc)); - /* I915_NEW_BLEND {_COLOR} + /* I915_NEW_BLEND */ { const float *color = i915->blend_color.color; bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; - bc[1] = pack_ui32_float4( color[0], - color[1], - color[2], - color[3] ); + bc[1] = pack_ui32_float4(color[0], + color[1], + color[2], + color[3]); } - set_dynamic_indirect( i915, - I915_DYNAMIC_BC_0, - bc, - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_BC_0, + bc, + 2); } const struct i915_tracked_state i915_upload_BLENDCOLOR = { - I915_NEW_BLEND, - upload_BLENDCOLOR + "BLENDCOLOR", + upload_BLENDCOLOR, + I915_NEW_BLEND }; -/*********************************************************************** - */ -static void upload_IAB( struct i915_context *i915 ) +/*********************************************************************** + */ +static void upload_IAB(struct i915_context *i915) { unsigned iab = i915->blend->iab; - - set_dynamic_indirect( i915, - I915_DYNAMIC_IAB, - &iab, - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_IAB, + &iab, + 1); } const struct i915_tracked_state i915_upload_IAB = { - I915_NEW_BLEND, - upload_IAB + "IAB", + upload_IAB, + I915_NEW_BLEND }; + /*********************************************************************** */ - - - -static void upload_DEPTHSCALE( struct i915_context *i915 ) +static void upload_DEPTHSCALE(struct i915_context *i915) { - set_dynamic_indirect( i915, - I915_DYNAMIC_DEPTHSCALE_0, - &(i915->rasterizer->ds[0].u), - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_DEPTHSCALE_0, + &(i915->rasterizer->ds[0].u), + 2); } const struct i915_tracked_state i915_upload_DEPTHSCALE = { - I915_NEW_RASTERIZER, - upload_DEPTHSCALE + "DEPTHSCALE", + upload_DEPTHSCALE, + I915_NEW_RASTERIZER }; @@ -196,10 +199,9 @@ const struct i915_tracked_state i915_upload_DEPTHSCALE = { * XXX: does stipple pattern need to be adjusted according to * the window position? * - * XXX: possibly need workaround for conform paths test. + * XXX: possibly need workaround for conform paths test. */ - -static void upload_STIPPLE( struct i915_context *i915 ) +static void upload_STIPPLE(struct i915_context *i915) { unsigned st[2]; @@ -210,7 +212,6 @@ static void upload_STIPPLE( struct i915_context *i915 ) */ st[1] |= i915->rasterizer->st; - /* I915_NEW_STIPPLE */ { @@ -225,73 +226,75 @@ static void upload_STIPPLE( struct i915_context *i915 ) /* Not sure what to do about fallbacks, so for now just dont: */ st[1] |= ((p[0] << 0) | - (p[1] << 4) | - (p[2] << 8) | - (p[3] << 12)); + (p[1] << 4) | + (p[2] << 8) | + (p[3] << 12)); } - - set_dynamic_indirect( i915, - I915_DYNAMIC_STP_0, - &st[0], - 2 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_STP_0, + &st[0], + 2); } - const struct i915_tracked_state i915_upload_STIPPLE = { - I915_NEW_RASTERIZER | I915_NEW_STIPPLE, - upload_STIPPLE + "STIPPLE", + upload_STIPPLE, + I915_NEW_RASTERIZER | I915_NEW_STIPPLE }; /*********************************************************************** - * Scissor. + * Scissor enable */ static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) { - set_dynamic_indirect( i915, - I915_DYNAMIC_SC_ENA_0, - &(i915->rasterizer->sc[0]), - 1 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_SC_ENA_0, + &(i915->rasterizer->sc[0]), + 1); } const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { - I915_NEW_RASTERIZER, - upload_SCISSOR_ENABLE + "SCISSOR ENABLE", + upload_SCISSOR_ENABLE, + I915_NEW_RASTERIZER }; -static void upload_SCISSOR_RECT( struct i915_context *i915 ) +/*********************************************************************** + * Scissor rect + */ +static void upload_SCISSOR_RECT(struct i915_context *i915) { unsigned x1 = i915->scissor.minx; unsigned y1 = i915->scissor.miny; unsigned x2 = i915->scissor.maxx; unsigned y2 = i915->scissor.maxy; unsigned sc[3]; - + sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; sc[1] = (y1 << 16) | (x1 & 0xffff); sc[2] = (y2 << 16) | (x2 & 0xffff); - set_dynamic_indirect( i915, - I915_DYNAMIC_SC_RECT_0, - &sc[0], - 3 ); + set_dynamic_indirect(i915, + I915_DYNAMIC_SC_RECT_0, + &sc[0], + 3); } - const struct i915_tracked_state i915_upload_SCISSOR_RECT = { - I915_NEW_SCISSOR, - upload_SCISSOR_RECT + "SCISSOR RECT", + upload_SCISSOR_RECT, + I915_NEW_SCISSOR }; - - - +/*********************************************************************** + */ static const struct i915_tracked_state *atoms[] = { &i915_upload_MODES4, &i915_upload_BFO, @@ -306,12 +309,17 @@ static const struct i915_tracked_state *atoms[] = { /* These will be dynamic indirect state commands, but for now just end * up on the batch buffer with everything else. */ -void i915_update_dynamic( struct i915_context *i915 ) +static void update_dynamic(struct i915_context *i915) { int i; for (i = 0; i < Elements(atoms); i++) if (i915->dirty & atoms[i]->dirty) - atoms[i]->update( i915 ); + atoms[i]->update(i915); } +struct i915_tracked_state i915_hw_dynamic = { + "dynamic", + update_dynamic, + ~0 /* all state atoms, becuase we do internal checking */ +}; diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 22082fece82..bbf9ff51f5f 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -29,6 +29,7 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_batch.h" +#include "i915_debug.h" #include "i915_reg.h" #include "i915_resource.h" @@ -111,15 +112,20 @@ i915_emit_hardware_state(struct i915_context *i915 ) 3 ) * 3/2; /* plus 50% margin */ -#if 0 - debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); -#endif - + uintptr_t save_ptr; + size_t save_relocs; + + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_hardware_dirty(i915, __FUNCTION__); + if(!BEGIN_BATCH(dwords, relocs)) { FLUSH_BATCH(NULL); assert(BEGIN_BATCH(dwords, relocs)); } + save_ptr = (uintptr_t)i915->batch->ptr; + save_relocs = i915->batch->relocs; + /* 14 dwords, 0 relocs */ if (i915->hardware_dirty & I915_HW_INVARIENT) { @@ -399,6 +405,9 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(0); } + I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__, + ((uintptr_t)i915->batch->ptr - save_ptr) / 4, + i915->batch->relocs - save_relocs); i915->hardware_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c index 8cec699285c..f9ade7077f2 100644 --- a/src/gallium/drivers/i915/i915_state_immediate.c +++ b/src/gallium/drivers/i915/i915_state_immediate.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,13 +22,13 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /* * Authors: * Keith Whitwell <[email protected]> */ - + #include "i915_state_inlines.h" #include "i915_context.h" #include "i915_state.h" @@ -46,30 +46,31 @@ /*********************************************************************** - * S0,S1: Vertex buffer state. + * S0,S1: Vertex buffer state. */ static void upload_S0S1(struct i915_context *i915) { unsigned LIS0, LIS1; - /* I915_NEW_VBO */ - /* TODO: re-use vertex buffers here? */ + /* I915_NEW_VBO + */ LIS0 = i915->vbo_offset; - /* I915_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + /* I915_NEW_VERTEX_SIZE */ + /* XXX do this where the vertex size is calculated! */ { unsigned vertex_size = i915->current.vertex_info.size; LIS1 = ((vertex_size << 24) | - (vertex_size << 16)); + (vertex_size << 16)); } - /* I915_NEW_VBO */ - /* TODO: use a vertex generation number to track vbo changes */ + /* I915_NEW_VBO + */ if (1 || i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || - i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) + i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) { i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; @@ -78,13 +79,13 @@ static void upload_S0S1(struct i915_context *i915) } const struct i915_tracked_state i915_upload_S0S1 = { - I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, - upload_S0S1 + "imm S0 S1", + upload_S0S1, + I915_NEW_VBO | I915_NEW_VERTEX_FORMAT }; - /*********************************************************************** * S4: Vertex format, rasterization state */ @@ -92,7 +93,8 @@ static void upload_S2S4(struct i915_context *i915) { unsigned LIS2, LIS4; - /* I915_NEW_VERTEX_FORMAT */ + /* I915_NEW_VERTEX_FORMAT + */ { LIS2 = i915->current.vertex_info.hwfmt[1]; LIS4 = i915->current.vertex_info.hwfmt[0]; @@ -113,35 +115,38 @@ static void upload_S2S4(struct i915_context *i915) } } - const struct i915_tracked_state i915_upload_S2S4 = { - I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, - upload_S2S4 + "imm S2 S4", + upload_S2S4, + I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT }; /*********************************************************************** - * */ -static void upload_S5( struct i915_context *i915 ) +static void upload_S5(struct i915_context *i915) { unsigned LIS5 = 0; + /* I915_NEW_DEPTH_STENCIL + */ LIS5 |= i915->depth_stencil->stencil_LIS5; /* hope it's safe to set stencil ref value even if stencil test is disabled? */ LIS5 |= i915->stencil_ref.ref_value[0] << S5_STENCIL_REF_SHIFT; + /* I915_NEW_BLEND + */ LIS5 |= i915->blend->LIS5; #if 0 - /* I915_NEW_RASTERIZER */ + /* I915_NEW_RASTERIZER + */ if (i915->state.Polygon->OffsetFill) { LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; } #endif - if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; i915->hardware_dirty |= I915_HW_IMMEDIATE; @@ -149,14 +154,16 @@ static void upload_S5( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S5 = { - (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), - upload_S5 + "imm S5", + upload_S5, + I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER }; + /*********************************************************************** */ -static void upload_S6( struct i915_context *i915 ) +static void upload_S6(struct i915_context *i915) { unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); @@ -180,14 +187,16 @@ static void upload_S6( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S6 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, - upload_S6 + "imm s6", + upload_S6, + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER }; + /*********************************************************************** */ -static void upload_S7( struct i915_context *i915 ) +static void upload_S7(struct i915_context *i915) { unsigned LIS7; @@ -202,11 +211,15 @@ static void upload_S7( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S7 = { - I915_NEW_RASTERIZER, - upload_S7 + "imm S7", + upload_S7, + I915_NEW_RASTERIZER }; + +/*********************************************************************** + */ static const struct i915_tracked_state *atoms[] = { &i915_upload_S0S1, &i915_upload_S2S4, @@ -215,13 +228,17 @@ static const struct i915_tracked_state *atoms[] = { &i915_upload_S7 }; -/* - */ -void i915_update_immediate( struct i915_context *i915 ) +static void update_immediate(struct i915_context *i915) { int i; for (i = 0; i < Elements(atoms); i++) if (i915->dirty & atoms[i]->dirty) - atoms[i]->update( i915 ); + atoms[i]->update(i915); } + +struct i915_tracked_state i915_hw_immediate = { + "immediate", + update_immediate, + ~0 /* all state atoms, becuase we do internal checking */ +}; diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index 77b9bccbb74..941259eb766 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -53,17 +53,23 @@ * * So we need to update the map state when we change samplers and * we need to be change the sampler state when map state is changed. - * The first part is done by calling i915_update_texture in - * i915_update_samplers and the second part is done else where in - * code tracking the state changes. + * The first part is done by calling update_texture in update_samplers + * and the second part is done else where in code tracking the state + * changes. + */ + +static void update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]); + + + +/*********************************************************************** + * Samplers */ -static void -i915_update_texture(struct i915_context *i915, - uint unit, - const struct i915_texture *tex, - const struct i915_sampler_state *sampler, - uint state[6]); /** * Compute i915 texture sampling state. * @@ -74,16 +80,13 @@ i915_update_texture(struct i915_context *i915, */ static void update_sampler(struct i915_context *i915, uint unit, - const struct i915_sampler_state *sampler, - const struct i915_texture *tex, - unsigned state[3] ) + const struct i915_sampler_state *sampler, + const struct i915_texture *tex, + unsigned state[3]) { const struct pipe_resource *pt = &tex->b.b; unsigned minlod, lastlod; - /* Need to do this after updating the maps, which call the - * intel_finalize_mipmap_tree and hence can update firstLevel: - */ state[0] = sampler->state[0]; state[1] = sampler->state[1]; state[2] = sampler->state[2]; @@ -118,7 +121,7 @@ static void update_sampler(struct i915_context *i915, wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { if (i915->conformance_mode > 0) { assert(0); - /* sampler->fallback = true; */ + /* sampler->fallback = true; */ /* TODO */ } } @@ -137,8 +140,7 @@ static void update_sampler(struct i915_context *i915, state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); } - -void i915_update_samplers( struct i915_context *i915 ) +static void update_samplers(struct i915_context *i915) { uint unit; @@ -152,29 +154,38 @@ void i915_update_samplers( struct i915_context *i915 ) if (i915->fragment_sampler_views[unit]) { struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - update_sampler( i915, - unit, - i915->sampler[unit], /* sampler state */ - texture, /* texture */ - i915->current.sampler[unit] /* the result */ - ); - i915_update_texture( i915, - unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit] ); - - i915->current.sampler_enable_nr++; - i915->current.sampler_enable_flags |= (1 << unit); + update_sampler(i915, + unit, + i915->sampler[unit], /* sampler state */ + texture, /* texture */ + i915->current.sampler[unit]); /* the result */ + update_texture(i915, + unit, + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit]); /* the result */ + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); } } i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; } +struct i915_tracked_state i915_hw_samplers = { + "sampler_views", + update_samplers, + I915_NEW_SAMPLER | I915_NEW_SAMPLER_VIEW +}; -static uint -translate_texture_format(enum pipe_format pipeFormat) + + +/*********************************************************************** + * Sampler views + */ + +static uint translate_texture_format(enum pipe_format pipeFormat) { switch (pipeFormat) { case PIPE_FORMAT_L8_UNORM: @@ -226,19 +237,17 @@ translate_texture_format(enum pipe_format pipeFormat) return (MAPSURF_32BIT | MT_32BIT_xI824); default: debug_printf("i915: translate_texture_format() bad image format %x\n", - pipeFormat); + pipeFormat); assert(0); return 0; } } - -static void -i915_update_texture(struct i915_context *i915, - uint unit, - const struct i915_texture *tex, - const struct i915_sampler_state *sampler, - uint state[6]) +static void update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]) { const struct pipe_resource *pt = &tex->b.b; uint format, pitch; @@ -287,9 +296,7 @@ i915_update_texture(struct i915_context *i915, | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); } - -void -i915_update_textures(struct i915_context *i915) +static void update_textures(struct i915_context *i915) { uint unit; @@ -300,13 +307,19 @@ i915_update_textures(struct i915_context *i915) if (i915->fragment_sampler_views[unit]) { struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - i915_update_texture( i915, - unit, - texture, /* texture */ - i915->sampler[unit], /* sampler state */ - i915->current.texbuffer[unit] ); + update_texture(i915, + unit, + texture, /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit]); } } i915->hardware_dirty |= I915_HW_MAP; } + +struct i915_tracked_state i915_hw_sampler_views = { + "sampler_views", + update_textures, + I915_NEW_SAMPLER_VIEW +}; diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript index 019af682f68..119f914a16d 100644 --- a/src/gallium/drivers/i965/SConscript +++ b/src/gallium/drivers/i965/SConscript @@ -2,6 +2,10 @@ Import('*') env = env.Clone() +if msvc: + print 'warning: not building i965g' + Return(); + i965 = env.ConvenienceLibrary( target = 'i965', source = [ diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 4c8579365d5..28c83515ba9 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -239,7 +239,7 @@ char *imm_encoding[8] = { [2] = "UW", [3] = "W", [5] = "VF", - [5] = "V", + [6] = "V", [7] = "F" }; diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index 2eb862635cc..0745254c3cc 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -50,10 +50,11 @@ static boolean brw_query_get_result(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64_t *result) + void *vresult) { struct brw_context *brw = brw_context(pipe); struct brw_query_object *query = (struct brw_query_object *)q; + uint64_t *result = (uint64_t*)vresult; /* Map and count the pixels from the current query BO */ if (query->bo) { diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 898e0b3af83..67be895b385 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -158,7 +158,7 @@ static boolean identity_get_query_result(struct pipe_context *_pipe, struct pipe_query *query, boolean wait, - uint64_t *result) + void *result) { struct identity_context *id_pipe = identity_context(_pipe); struct pipe_context *pipe = id_pipe->pipe; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index ca4743f9ef7..593928f399c 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -120,13 +120,14 @@ identity_sampler_view_create(struct identity_context *id_context, assert(view->texture == id_resource->resource); - id_view = MALLOC(sizeof(struct identity_sampler_view)); + id_view = CALLOC_STRUCT(identity_sampler_view); id_view->base = *view; id_view->base.reference.count = 1; id_view->base.texture = NULL; pipe_resource_reference(&id_view->base.texture, id_resource->resource); id_view->base.context = id_context->pipe; + id_view->sampler_view = view; return &id_view->base; error: @@ -180,8 +181,8 @@ identity_transfer_destroy(struct identity_context *id_context, struct identity_transfer *id_transfer) { pipe_resource_reference(&id_transfer->base.resource, NULL); - id_transfer->pipe->transfer_destroy(id_context->pipe, - id_transfer->transfer); + id_context->pipe->transfer_destroy(id_context->pipe, + id_transfer->transfer); FREE(id_transfer); } diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index 5eea10b0b5a..e8deabf4fc7 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -65,7 +65,6 @@ struct identity_transfer { struct pipe_transfer base; - struct pipe_context *pipe; struct pipe_transfer *transfer; }; diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore index a1b6f56e0d2..4e0d4c3fc0c 100644 --- a/src/gallium/drivers/llvmpipe/.gitignore +++ b/src/gallium/drivers/llvmpipe/.gitignore @@ -3,3 +3,4 @@ lp_test_blend lp_test_conv lp_test_format lp_test_printf +lp_test_sincos diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 213e4c4a99a..ee28179c303 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -34,8 +34,10 @@ C_SOURCES = \ lp_state_clip.c \ lp_state_derived.c \ lp_state_fs.c \ + lp_state_gs.c \ lp_state_rasterizer.c \ lp_state_sampler.c \ + lp_state_so.c \ lp_state_surface.c \ lp_state_vertex.c \ lp_state_vs.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index a0646692e7b..a1ef71da89d 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -57,6 +57,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_gs.c', 'lp_state_rasterizer.c', 'lp_state_sampler.c', + 'lp_state_so.c', 'lp_state_surface.c', 'lp_state_vertex.c', 'lp_state_vs.c', diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 9e88a6e09f4..3db4f12ebb6 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -36,6 +36,7 @@ #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_simple_list.h" #include "lp_clear.h" #include "lp_context.h" #include "lp_flush.h" @@ -94,6 +95,8 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) memset(llvmpipe, 0, sizeof *llvmpipe); + make_empty_list(&llvmpipe->fs_variants_list); + llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; llvmpipe->pipe.priv = priv; @@ -110,6 +113,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe_init_sampler_funcs(llvmpipe); llvmpipe_init_query_funcs( llvmpipe ); llvmpipe_init_vertex_funcs(llvmpipe); + llvmpipe_init_so_funcs(llvmpipe); llvmpipe_init_fs_funcs(llvmpipe); llvmpipe_init_vs_funcs(llvmpipe); llvmpipe_init_gs_funcs(llvmpipe); diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index cb04d4a4d54..986e604ce7c 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -38,6 +38,7 @@ #include "lp_tex_sample.h" #include "lp_jit.h" #include "lp_setup.h" +#include "lp_state_fs.h" struct llvmpipe_vbuf_render; @@ -62,6 +63,7 @@ struct llvmpipe_context { const struct lp_vertex_shader *vs; const struct lp_geometry_shader *gs; const struct lp_velems_state *velems; + const struct lp_so_state *so; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -75,6 +77,12 @@ struct llvmpipe_context { struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct { + struct llvmpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; + int offset[PIPE_MAX_SO_BUFFERS]; + int so_count[PIPE_MAX_SO_BUFFERS]; + int num_buffers; + } so_target; unsigned num_samplers; unsigned num_fragment_sampler_views; @@ -105,6 +113,8 @@ struct llvmpipe_context { unsigned tex_timestamp; boolean no_rast; + struct lp_fs_variant_list_item fs_variants_list; + unsigned nr_fs_variants; }; diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h index 4102a9df67c..d1c431475d8 100644 --- a/src/gallium/drivers/llvmpipe/lp_limits.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -66,5 +66,10 @@ */ #define LP_MAX_SCENE_SIZE (512 * 1024 * 1024) +/** + * Max number of shader variants (for all shaders combined, + * per context) that will be kept around. + */ +#define LP_MAX_SHADER_VARIANTS 1024 #endif /* LP_LIMITS_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index c4864422860..c902c046845 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -76,10 +76,11 @@ static boolean llvmpipe_get_query_result(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64_t *result ) + void *vresult) { struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); struct llvmpipe_query *pq = llvmpipe_query(q); + uint64_t *result = (uint64_t *)vresult; if (!pq->done) { lp_setup_flush(llvmpipe->setup, 0); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 50e44dcb2b3..1fadb9cca1e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -627,7 +627,7 @@ void lp_rast_begin_query(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { - /* Reset the the per-task counter */ + /* Reset the per-task counter */ task->vis_counter = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index b6db66fe64e..9e319fd9f00 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -297,6 +297,11 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup, */ set_scene_state( setup, SETUP_FLUSHED ); + /* + * Ensure the old scene is not reused. + */ + assert(!setup->scene); + /* Set new state. This will be picked up later when we next need a * scene. */ @@ -326,7 +331,7 @@ lp_setup_clear( struct lp_setup_context *setup, if (flags & PIPE_CLEAR_DEPTHSTENCIL) { if (setup->fb.zsbuf && - ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && + ((flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && util_format_is_depth_and_stencil(setup->fb.zsbuf->format)) full_zs_clear = FALSE; @@ -933,6 +938,8 @@ lp_setup_begin_query(struct lp_setup_context *setup, memset(pq->count, 0, sizeof(pq->count)); /* reset all counters */ + set_scene_state( setup, SETUP_ACTIVE ); + cmd_arg.query_obj = pq; lp_scene_bin_everywhere(scene, lp_rast_begin_query, cmd_arg); pq->binned = TRUE; @@ -948,6 +955,8 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) struct lp_scene * scene = lp_setup_get_current_scene(setup); union lp_rast_cmd_arg cmd_arg; + set_scene_state( setup, SETUP_ACTIVE ); + cmd_arg.query_obj = pq; lp_scene_bin_everywhere(scene, lp_rast_end_query, cmd_arg); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index f6a424f25a8..e53a62cb729 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -60,10 +60,6 @@ static const struct vertex_info * lp_setup_get_vertex_info(struct vbuf_render *vbr) { struct lp_setup_context *setup = lp_setup_context(vbr); - - /* vertex size/info depends on the latest state */ - lp_setup_update_state(setup); - return setup->vertex_info; } diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 3f7a85b6827..05d1b937944 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -54,6 +54,9 @@ #define LP_NEW_QUERY 0x4000 #define LP_NEW_BLEND_COLOR 0x8000 #define LP_NEW_GS 0x10000 +#define LP_NEW_SO 0x20000 +#define LP_NEW_SO_BUFFERS 0x40000 + struct vertex_info; @@ -82,6 +85,10 @@ struct lp_velems_state struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; }; +struct lp_so_state { + struct pipe_stream_output_state base; +}; + void llvmpipe_set_framebuffer_state(struct pipe_context *, @@ -120,5 +127,9 @@ llvmpipe_init_gs_funcs(struct llvmpipe_context *llvmpipe); void llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe); +void +llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe); + + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 2619e043fdf..65115052cdd 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -68,6 +68,7 @@ #include "util/u_format.h" #include "util/u_dump.h" #include "util/u_string.h" +#include "util/u_simple_list.h" #include "os/os_time.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" @@ -95,6 +96,7 @@ #include "lp_setup.h" #include "lp_state.h" #include "lp_tex_sample.h" +#include "lp_flush.h" #include <llvm-c/Analysis.h> @@ -936,7 +938,10 @@ generate_variant(struct llvmpipe_context *lp, if(!variant) return NULL; - variant->no = shader->variant_no++; + variant->shader = shader; + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + variant->no = shader->variants_created++; memcpy(&variant->key, key, sizeof *key); @@ -962,10 +967,6 @@ generate_variant(struct llvmpipe_context *lp, !shader->info.uses_kill ? TRUE : FALSE; - /* insert new variant into linked list */ - variant->next = shader->variants; - shader->variants = variant; - return variant; } @@ -981,6 +982,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, return NULL; shader->no = fs_no++; + make_empty_list(&shader->variants); /* get/save the summary info for this shader */ tgsi_scan_shader(templ->tokens, &shader->info); @@ -1024,14 +1026,40 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) llvmpipe->dirty |= LP_NEW_FS; } +static void +remove_shader_variant(struct llvmpipe_context *lp, + struct lp_fragment_shader_variant *variant) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + unsigned i; + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached #%u v total cached #%u\n", + variant->shader->no, variant->no, variant->shader->variants_created, + variant->shader->variants_cached, lp->nr_fs_variants); + } + for (i = 0; i < Elements(variant->function); i++) { + if (variant->function[i]) { + if (variant->jit_function[i]) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function[i]); + LLVMDeleteFunction(variant->function[i]); + } + } + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + lp->nr_fs_variants--; + FREE(variant); +} static void llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + struct pipe_fence_handle *fence = NULL; struct lp_fragment_shader *shader = fs; - struct lp_fragment_shader_variant *variant; + struct lp_fs_variant_list_item *li; assert(fs != llvmpipe->fs); (void) llvmpipe; @@ -1039,29 +1067,24 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) /* * XXX: we need to flush the context until we have some sort of reference * counting in fragment shaders as they may still be binned + * Flushing alone might not sufficient we need to wait on it too. */ - draw_flush(llvmpipe->draw); - lp_setup_flush(llvmpipe->setup, 0); - - variant = shader->variants; - while(variant) { - struct lp_fragment_shader_variant *next = variant->next; - unsigned i; - for (i = 0; i < Elements(variant->function); i++) { - if (variant->function[i]) { - if (variant->jit_function[i]) - LLVMFreeMachineCodeForFunction(screen->engine, - variant->function[i]); - LLVMDeleteFunction(variant->function[i]); - } - } + llvmpipe_flush(pipe, 0, &fence); - FREE(variant); + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } - variant = next; + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct lp_fs_variant_list_item *next = next_elem(li); + remove_shader_variant(llvmpipe, li->base); + li = next; } + assert(shader->variants_cached == 0); FREE((void *) shader->base.tokens); FREE(shader); } @@ -1088,9 +1111,10 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, /* note: reference counting */ pipe_resource_reference(&llvmpipe->constants[shader][index], constants); - if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, index, - data, size); + if(shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_GEOMETRY) { + draw_set_mapped_constant_buffer(llvmpipe->draw, shader, + index, data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; @@ -1215,7 +1239,6 @@ make_variant_key(struct llvmpipe_context *lp, lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); } - /** * Update fragment state. This is called just prior to drawing * something when some fragment-related state has changed. @@ -1225,21 +1248,47 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) { struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; - struct lp_fragment_shader_variant *variant; + struct lp_fragment_shader_variant *variant = NULL; + struct lp_fs_variant_list_item *li; make_variant_key(lp, shader, &key); - variant = shader->variants; - while(variant) { - if(memcmp(&variant->key, &key, sizeof key) == 0) + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + if(memcmp(&li->base->key, &key, sizeof key) == 0) { + variant = li->base; break; - - variant = variant->next; + } + li = next_elem(li); } - if (!variant) { + if (variant) { + move_to_head(&lp->fs_variants_list, &variant->list_item_global); + } + else { int64_t t0, t1; int64_t dt; + unsigned i; + if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) { + struct pipe_context *pipe = &lp->pipe; + struct pipe_fence_handle *fence = NULL; + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_flush(pipe, 0, &fence); + + if (fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) { + struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list); + remove_shader_variant(lp, item->base); + } + } t0 = os_time_get(); variant = generate_variant(lp, shader, &key); @@ -1248,6 +1297,13 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) dt = t1 - t0; LP_COUNT_ADD(llvm_compile_time, dt); LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */ + + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&lp->fs_variants_list, &variant->list_item_global); + lp->nr_fs_variants++; + shader->variants_cached++; + } } lp_setup_set_fs_variant(lp->setup, variant); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 64ead2a9973..593cd4de6be 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -64,6 +64,11 @@ struct lp_fragment_shader_variant_key struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS]; }; +struct lp_fs_variant_list_item +{ + struct lp_fragment_shader_variant *base; + struct lp_fs_variant_list_item *next, *prev; +}; struct lp_fragment_shader_variant { @@ -75,7 +80,8 @@ struct lp_fragment_shader_variant lp_jit_frag_func jit_function[2]; - struct lp_fragment_shader_variant *next; + struct lp_fs_variant_list_item list_item_global, list_item_local; + struct lp_fragment_shader *shader; /* For debugging/profiling purposes */ unsigned no; @@ -89,11 +95,12 @@ struct lp_fragment_shader struct tgsi_shader_info info; - struct lp_fragment_shader_variant *variants; + struct lp_fs_variant_list_item variants; /* For debugging/profiling purposes */ unsigned no; - unsigned variant_no; + unsigned variants_created; + unsigned variants_cached; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_gs.c b/src/gallium/drivers/llvmpipe/lp_state_gs.c index e5eeb88e2ba..1ba6f10821e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_gs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_gs.c @@ -33,7 +33,6 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "draw/draw_context.h" -#include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 55d43368a3e..e94065fb6ab 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -105,6 +105,13 @@ llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe, static void +llvmpipe_bind_geometry_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + /* XXX: implementation missing */ +} + +static void llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe, unsigned num, struct pipe_sampler_view **views) @@ -163,6 +170,14 @@ llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe, } +static void +llvmpipe_set_geometry_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) +{ + /*XXX: implementation missing */ +} + static struct pipe_sampler_view * llvmpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, @@ -206,8 +221,10 @@ llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe) llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states; llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states; + llvmpipe->pipe.bind_geometry_sampler_states = llvmpipe_bind_geometry_sampler_states; llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views; llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views; + llvmpipe->pipe.set_geometry_sampler_views = llvmpipe_set_geometry_sampler_views; llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view; llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy; llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state; diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c b/src/gallium/drivers/llvmpipe/lp_state_so.c new file mode 100644 index 00000000000..30b17c98816 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_so.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "lp_context.h" +#include "lp_state.h" +#include "lp_texture.h" + +#include "util/u_memory.h" +#include "draw/draw_context.h" + + +static void * +llvmpipe_create_stream_output_state(struct pipe_context *pipe, + const struct pipe_stream_output_state *templ) +{ + struct lp_so_state *so; + so = (struct lp_so_state *) CALLOC_STRUCT(lp_so_state); + + if (so) { + so->base.num_outputs = templ->num_outputs; + so->base.stride = templ->stride; + memcpy(so->base.output_buffer, + templ->output_buffer, + sizeof(int) * templ->num_outputs); + memcpy(so->base.register_index, + templ->register_index, + sizeof(int) * templ->num_outputs); + memcpy(so->base.register_mask, + templ->register_mask, + sizeof(ubyte) * templ->num_outputs); + } + return so; +} + +static void +llvmpipe_bind_stream_output_state(struct pipe_context *pipe, + void *so) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + struct lp_so_state *lp_so = (struct lp_so_state *) so; + + lp->so = lp_so; + + lp->dirty |= LP_NEW_SO; + + if (lp_so) + draw_set_so_state(lp->draw, &lp_so->base); +} + +static void +llvmpipe_delete_stream_output_state(struct pipe_context *pipe, void *so) +{ + FREE( so ); +} + +static void +llvmpipe_set_stream_output_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct llvmpipe_context *lp = llvmpipe_context(pipe); + int i; + void *map_buffers[PIPE_MAX_SO_BUFFERS]; + + assert(num_buffers <= PIPE_MAX_SO_BUFFERS); + if (num_buffers > PIPE_MAX_SO_BUFFERS) + num_buffers = PIPE_MAX_SO_BUFFERS; + + lp->dirty |= LP_NEW_SO_BUFFERS; + + for (i = 0; i < num_buffers; ++i) { + void *mapped; + struct llvmpipe_resource *res = llvmpipe_resource(buffers[i]); + + if (!res) { + /* the whole call is invalid, bail out */ + lp->so_target.num_buffers = 0; + draw_set_mapped_so_buffers(lp->draw, 0, 0); + return; + } + + lp->so_target.buffer[i] = res; + lp->so_target.offset[i] = offsets[i]; + lp->so_target.so_count[i] = 0; + + mapped = res->data; + if (offsets[i] >= 0) + map_buffers[i] = ((char*)mapped) + offsets[i]; + else { + /* this is a buffer append */ + assert(!"appending not implemented"); + map_buffers[i] = mapped; + } + } + lp->so_target.num_buffers = num_buffers; + + draw_set_mapped_so_buffers(lp->draw, map_buffers, num_buffers); +} + +void +llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe) +{ + llvmpipe->pipe.create_stream_output_state = + llvmpipe_create_stream_output_state; + llvmpipe->pipe.bind_stream_output_state = + llvmpipe_bind_stream_output_state; + llvmpipe->pipe.delete_stream_output_state = + llvmpipe_delete_stream_output_state; + + llvmpipe->pipe.set_stream_output_buffers = + llvmpipe_set_stream_output_buffers; +} diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 4f409f9b638..f0cbbe8c560 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -67,14 +67,14 @@ lp_resource_copy(struct pipe_context *pipe, dst, subdst.face, subdst.level, 0, /* flush_flags */ FALSE, /* read_only */ - FALSE, /* cpu_access */ + TRUE, /* cpu_access */ FALSE); /* do_not_block */ llvmpipe_flush_resource(pipe, src, subsrc.face, subsrc.level, 0, /* flush_flags */ TRUE, /* read_only */ - FALSE, /* cpu_access */ + TRUE, /* cpu_access */ FALSE); /* do_not_block */ /* @@ -106,19 +106,27 @@ lp_resource_copy(struct pipe_context *pipe, unsigned x, y; enum lp_texture_usage usage; - /* XXX for the tiles which are completely contained by the - * dest rectangle, we could set the usage mode to WRITE_ALL. - * Just test for the case of replacing the whole dest region for now. - */ - if (width == dst_tex->base.width0 && height == dst_tex->base.height0) - usage = LP_TEX_USAGE_WRITE_ALL; - else - usage = LP_TEX_USAGE_READ_WRITE; - adjust_to_tile_bounds(dstx, dsty, width, height, &tx, &ty, &tw, &th); for (y = 0; y < th; y += TILE_SIZE) { + boolean contained_y = ty + y >= dsty && + ty + y + TILE_SIZE <= dsty + height ? + TRUE : FALSE; + for (x = 0; x < tw; x += TILE_SIZE) { + boolean contained_x = tx + x >= dstx && + tx + x + TILE_SIZE <= dstx + width ? + TRUE : FALSE; + + /* + * Set the usage mode to WRITE_ALL for the tiles which are + * completely contained by the dest rectangle. + */ + if (contained_y && contained_x) + usage = LP_TEX_USAGE_WRITE_ALL; + else + usage = LP_TEX_USAGE_READ_WRITE; + (void) llvmpipe_get_texture_tile_linear(dst_tex, subdst.face, subdst.level, usage, @@ -153,4 +161,6 @@ void llvmpipe_init_surface_functions(struct llvmpipe_context *lp) { lp->pipe.resource_copy_region = lp_resource_copy; + lp->pipe.clear_render_target = util_clear_render_target; + lp->pipe.clear_depth_stencil = util_clear_depth_stencil; } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 97f938e6980..8cb1639013e 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -3924,7 +3924,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) case TGSI_PROPERTY_GS_OUTPUT_PRIM: p->cfg.prim_type = nv50_map_gs_output_prim(data[0]); break; - case TGSI_PROPERTY_GS_MAX_VERTICES: + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: p->cfg.vert_count = data[0]; break; default: diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 57b16a355dc..53f94820ce0 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -106,8 +106,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) static boolean nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) + boolean wait, void *vresult) { + uint64_t *result = (uint64_t*)vresult; struct nv50_query *q = nv50_query(pq); int ret; diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 814921f7645..3e61203adff 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -115,7 +115,6 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) } if (!bo->tile_flags) { - MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */ BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); @@ -126,7 +125,6 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) OUT_RELOCh(chan, bo, ps->offset, flags); OUT_RELOCl(chan, bo, ps->offset, flags); } else { - MARK_RING (chan, 11, 2); /* flush on lack of space or relocs */ BEGIN_RING(chan, eng2d, mthd, 5); OUT_RING (chan, format); OUT_RING (chan, 0); @@ -162,7 +160,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, struct nouveau_grobj *eng2d = screen->eng2d; int ret; - WAIT_RING (chan, 32); + ret = MARK_RING(chan, 2*16 + 32, 4); + if (ret) + return ret; ret = nv50_surface_set(screen, dst, 1); if (ret) @@ -209,7 +209,7 @@ nv50_surface_copy(struct pipe_context *pipe, (nv50_2d_format_faithful(src->format) && nv50_2d_format_faithful(dest->format))); - ps_src = nv50_miptree_surface_new(pipe->screen, dest, subsrc.face, + ps_src = nv50_miptree_surface_new(pipe->screen, src, subsrc.face, subsrc.level, srcz, 0 /* bind flags */); ps_dst = nv50_miptree_surface_new(pipe->screen, dest, subdst.face, subdst.level, destz, 0 /* bindflags */); @@ -241,7 +241,9 @@ nv50_clear_render_target(struct pipe_context *pipe, if (format < 0) return; - WAIT_RING (chan, 32); + ret = MARK_RING (chan, 16 + 32, 2); + if (ret) + return; ret = nv50_surface_set(screen, dst, 1); if (ret) diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c index 1b20b5245d7..1dab20c41a0 100644 --- a/src/gallium/drivers/nvfx/nvfx_query.c +++ b/src/gallium/drivers/nvfx/nvfx_query.c @@ -96,8 +96,9 @@ nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq) static boolean nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) + boolean wait, void *vresult) { + uint64_t *result = (uint64_t *)vresult; struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_query *q = nvfx_query(pq); diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index 44e1af2e6a2..a605d2b7545 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -44,7 +44,7 @@ nvfx_surface_copy(struct pipe_context *pipe, struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; struct pipe_surface *ps_dst, *ps_src; - ps_src = nvfx_miptree_surface_new(pipe->screen, dest, subsrc.face, + ps_src = nvfx_miptree_surface_new(pipe->screen, src, subsrc.face, subsrc.level, srcz, 0 /* bind flags */); ps_dst = nvfx_miptree_surface_new(pipe->screen, dest, subdst.face, subdst.level, destz, 0 /* bindflags */); diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 6bb82e5ed08..13152635a65 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -14,12 +14,13 @@ C_SOURCES = \ r300_hyperz.c \ r300_query.c \ r300_render.c \ + r300_render_stencilref.c \ + r300_render_translate.c \ r300_resource.c \ r300_screen.c \ r300_screen_buffer.c \ r300_state.c \ r300_state_derived.c \ - r300_state_invariant.c \ r300_vs.c \ r300_vs_draw.c \ r300_texture.c \ diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index eb3e1d365e9..552ed4e5bef 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -24,12 +24,13 @@ r300 = env.ConvenienceLibrary( 'r300_hyperz.c', 'r300_query.c', 'r300_render.c', + 'r300_render_stencilref.c', + 'r300_render_translate.c', 'r300_resource.c', 'r300_screen.c', 'r300_screen_buffer.c', 'r300_state.c', 'r300_state_derived.c', - 'r300_state_invariant.c', 'r300_vs.c', 'r300_vs_draw.c', 'r300_texture.c', diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 49ec51fd132..97d53a14f86 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -25,8 +25,23 @@ #include "util/u_format.h" -static void r300_blitter_save_states(struct r300_context* r300) +enum r300_blitter_op /* bitmask */ { + R300_CLEAR = 1, + R300_CLEAR_SURFACE = 2, + R300_COPY = 4 +}; + +static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op) +{ + if (r300->query_current) { + r300->blitter_saved_query = r300->query_current; + r300_stop_query(r300); + } + + /* Yeah we have to save all those states to ensure the blitter operation + * is really transparent. The states will be restored by the blitter once + * copying is done. */ util_blitter_save_blend(r300->blitter, r300->blend_state.state); util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref)); @@ -34,11 +49,34 @@ static void r300_blitter_save_states(struct r300_context* r300) util_blitter_save_fragment_shader(r300->blitter, r300->fs.state); util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); util_blitter_save_viewport(r300->blitter, &r300->viewport); - util_blitter_save_clip(r300->blitter, &r300->clip); + util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state); util_blitter_save_vertex_elements(r300->blitter, r300->velems); - /* XXX this crashes the driver util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count, - r300->vertex_buffer); */ + r300->vertex_buffer); + + if (op & (R300_CLEAR_SURFACE | R300_COPY)) + util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); + + if (op & R300_COPY) { + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; + + util_blitter_save_fragment_sampler_states( + r300->blitter, state->sampler_state_count, + (void**)state->sampler_states); + + util_blitter_save_fragment_sampler_views( + r300->blitter, state->sampler_view_count, + (struct pipe_sampler_view**)state->sampler_views); + } +} + +static void r300_blitter_end(struct r300_context *r300) +{ + if (r300->blitter_saved_query) { + r300_resume_query(r300, r300->blitter_saved_query); + r300->blitter_saved_query = NULL; + } } /* Clear currently bound buffers. */ @@ -48,24 +86,37 @@ static void r300_clear(struct pipe_context* pipe, double depth, unsigned stencil) { - /* XXX Implement fastfill. + /* My notes about fastfill: + * + * 1) Only the zbuffer is cleared. + * + * 2) The zbuffer must be micro-tiled and whole microtiles must be + * written. If microtiling is disabled, it locks up. * - * If fastfill is enabled, a few facts should be considered: + * 3) There is Z Mask RAM which contains a compressed zbuffer and + * it interacts with fastfill. We should figure out how to use it + * to get more performance. + * This is what we know about the Z Mask: * - * 1) Zbuffer must be micro-tiled and whole microtiles must be - * written. + * Each dword of the Z Mask contains compression information + * for 16 4x4 pixel blocks, that is 2 bits for each block. + * On chips with 2 Z pipes, every other dword maps to a different + * pipe. * - * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be - * equal to 0. + * 4) ZB_DEPTHCLEARVALUE is used to clear the zbuffer and the Z Mask must + * be equal to 0. (clear the Z Mask RAM with zeros) * - * 3) For 16-bit integer buffering, compression causes a hung with one or + * 5) For 16-bit zbuffer, compression causes a hung with one or * two samples and should not be used. * - * 4) Fastfill must not be used if reading of compressed Z data is disabled + * 6) FORCE_COMPRESSED_STENCIL_VALUE should be enabled for stencil clears + * to avoid needless decompression. + * + * 7) Fastfill must not be used if reading of compressed Z data is disabled * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE), * i.e. it cannot be used to compress the zbuffer. - * (what the hell does that mean and how does it fit in clearing - * the buffers?) + * + * 8) ZB_CB_CLEAR does not interact with fastfill in any way. * * - Marek */ @@ -74,13 +125,45 @@ static void r300_clear(struct pipe_context* pipe, struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - r300_blitter_save_states(r300); - + r300_blitter_begin(r300, R300_CLEAR); util_blitter_clear(r300->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, rgba, depth, stencil); + r300_blitter_end(r300); +} + +/* Clear a region of a color surface to a constant value. */ +static void r300_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const float *rgba, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct r300_context *r300 = r300_context(pipe); + + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_clear_render_target(r300->blitter, dst, rgba, + dstx, dsty, width, height); + r300_blitter_end(r300); +} + +/* Clear a region of a depth stencil surface. */ +static void r300_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct r300_context *r300 = r300_context(pipe); + + r300_blitter_begin(r300, R300_CLEAR_SURFACE); + util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, + dstx, dsty, width, height); + r300_blitter_end(r300); } /* Copy a block of pixels from one surface to another using HW. */ @@ -94,27 +177,12 @@ static void r300_hw_copy_region(struct pipe_context* pipe, unsigned width, unsigned height) { struct r300_context* r300 = r300_context(pipe); - struct r300_textures_state* state = - (struct r300_textures_state*)r300->textures_state.state; - /* Yeah we have to save all those states to ensure this blitter operation - * is really transparent. The states will be restored by the blitter once - * copying is done. */ - r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); - - util_blitter_save_fragment_sampler_states( - r300->blitter, state->sampler_state_count, - (void**)state->sampler_states); - - util_blitter_save_fragment_sampler_views( - r300->blitter, state->sampler_view_count, - (struct pipe_sampler_view**)state->sampler_views); - - /* Do a copy */ + r300_blitter_begin(r300, R300_COPY); util_blitter_copy_region(r300->blitter, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height, TRUE); + r300_blitter_end(r300); } /* Copy a block of pixels from one surface to another. */ @@ -130,14 +198,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, enum pipe_format old_format = dst->format; enum pipe_format new_format = old_format; - if (dst->format != src->format) { - debug_printf("r300: Implementation error: Format mismatch in %s\n" - " : src: %s dst: %s\n", __FUNCTION__, - util_format_short_name(src->format), - util_format_short_name(dst->format)); - debug_assert(0); - } - if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->target, src->nr_samples, @@ -188,40 +248,6 @@ static void r300_resource_copy_region(struct pipe_context *pipe, } } -/* Clear a region of a color surface to a constant value. */ -static void r300_clear_render_target(struct pipe_context *pipe, - struct pipe_surface *dst, - const float *rgba, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r300_context *r300 = r300_context(pipe); - - r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); - - util_blitter_clear_render_target(r300->blitter, dst, rgba, - dstx, dsty, width, height); -} - -/* Clear a region of a depth stencil surface. */ -static void r300_clear_depth_stencil(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r300_context *r300 = r300_context(pipe); - - r300_blitter_save_states(r300); - util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); - - util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil, - dstx, dsty, width, height); -} - void r300_init_blit_functions(struct r300_context *r300) { r300->context.clear = r300_clear; diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h new file mode 100644 index 00000000000..61b28330b0b --- /dev/null +++ b/src/gallium/drivers/r300/r300_cb.h @@ -0,0 +1,142 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * This file contains macros for building command buffers in memory. + * + * Use NEW_CB for buffers with a varying size and it will also allocate + * the buffer. + * Use BEGIN_CB for arrays with a static size. + * + * Example: + * + * uint32_t cb[3]; + * CB_LOCALS; + * + * BEGIN_CB(cb, 3); + * OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); + * OUT_CB(blend_color_red_alpha); + * OUT_CB(blend_color_green_blue); + * END_CB; + * + * And later: + * + * CS_LOCALS; + * WRITE_CS_TABLE(cb, 3); + * + * Or using a little slower variant: + * + * CS_LOCALS; + * BEGIN_CS(cb, 3); + * OUT_CS_TABLE(cb, 3); + * END_CS; + */ + +#ifndef R300_CB_H +#define R300_CB_H + +#include "r300_reg.h" + +/* Yes, I know macros are ugly. However, they are much prettier than the code + * that they neatly hide away, and don't have the cost of function setup, so + * we're going to use them. */ + +#ifdef DEBUG +#define CB_DEBUG(x) x +#else +#define CB_DEBUG(x) +#endif + + +/** + * Command buffer setup. + */ + +#define CB_LOCALS \ + CB_DEBUG(int cs_count = 0;) \ + uint32_t *cs_ptr = NULL; \ + CB_DEBUG((void) cs_count;) (void) cs_ptr; + +#define NEW_CB(ptr, size) do { \ + assert(sizeof(*ptr) == sizeof(uint32_t)); \ + cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \ + CB_DEBUG(cs_count = size;) \ +} while (0) + +#define BEGIN_CB(ptr, size) do { \ + assert(sizeof(*ptr) == sizeof(uint32_t)); \ + cs_ptr = ptr; \ + CB_DEBUG(cs_count = size;) \ +} while (0) + +#define BEGIN_CS_AS_CB(r300, size) \ + BEGIN_CB(r300->rws->get_cs_pointer(r300->rws, size), size) + +#define END_CB do { \ + CB_DEBUG(if (cs_count != 0) \ + debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ + cs_count, __FUNCTION__, __FILE__, __LINE__);) \ +} while (0) + + +/** + * Storing pure DWORDs. + */ + +#define OUT_CB(value) do { \ + *cs_ptr = (value); \ + cs_ptr++; \ + CB_DEBUG(cs_count--;) \ +} while (0) + +#define OUT_CB_TABLE(values, count) do { \ + memcpy(cs_ptr, values, count * sizeof(uint32_t)); \ + cs_ptr += count; \ + CB_DEBUG(cs_count -= count;) \ +} while (0) + +#define OUT_CB_32F(value) \ + OUT_CB(fui(value)); + +#define OUT_CB_REG(register, value) do { \ + assert(register); \ + OUT_CB(CP_PACKET0(register, 0)); \ + OUT_CB(value); \ +} while (0) + +/* Note: This expects count to be the number of registers, + * not the actual packet0 count! */ +#define OUT_CB_REG_SEQ(register, count) do { \ + assert(register); \ + OUT_CB(CP_PACKET0(register, (count) - 1)); \ +} while (0) + +#define OUT_CB_ONE_REG(register, count) do { \ + assert(register); \ + OUT_CB(CP_PACKET0(register, (count) - 1) | RADEON_ONE_REG_WR); \ +} while (0) + +#define OUT_CB_PKT3(op, count) \ + OUT_CB(CP_PACKET3(op, count)) + +#endif /* R300_CB_H */ diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index e6dca66d4a0..21f3b9d2610 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -36,6 +36,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) caps->num_vert_fpus = 2; caps->num_tex_units = 16; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->has_hiz = TRUE; caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -76,6 +77,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4E54: case 0x4E56: caps->family = CHIP_FAMILY_RV350; + caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; break; @@ -106,6 +108,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5B64: case 0x5B65: caps->family = CHIP_FAMILY_RV370; + caps->has_hiz = FALSE; caps->high_second_pipe = TRUE; break; @@ -201,24 +204,28 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5954: case 0x5955: caps->family = CHIP_FAMILY_RS480; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5974: case 0x5975: caps->family = CHIP_FAMILY_RS482; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5A41: case 0x5A42: caps->family = CHIP_FAMILY_RS400; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; case 0x5A61: case 0x5A62: caps->family = CHIP_FAMILY_RC410; + caps->has_hiz = FALSE; caps->has_tcl = FALSE; break; diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index ab649c38573..65750f54e71 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -42,6 +42,8 @@ struct r300_capabilities { unsigned num_tex_units; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Some chipsets do not have HiZ RAM. */ + boolean has_hiz; /* Whether or not this is RV350 or newer, including all r400 and r500 * chipsets. The differences compared to the oldest r300 chips are: * - Blend LTE/GTE thresholds diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 88ce1867982..df2874d5bf2 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -23,22 +23,64 @@ #include "draw/draw_context.h" #include "util/u_memory.h" +#include "util/u_sampler.h" #include "util/u_simple_list.h" #include "util/u_upload_mgr.h" +#include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" #include "r300_screen.h" #include "r300_screen_buffer.h" -#include "r300_state_invariant.h" #include "r300_winsys.h" #include <inttypes.h> +static void r300_release_referenced_objects(struct r300_context *r300) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_textures_state *textures = + (struct r300_textures_state*)r300->textures_state.state; + struct r300_query *query, *temp; + unsigned i; + + /* Framebuffer state. */ + for (i = 0; i < fb->nr_cbufs; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + pipe_surface_reference(&fb->zsbuf, NULL); + + /* Textures. */ + for (i = 0; i < textures->sampler_view_count; i++) + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&textures->sampler_views[i], NULL); + + /* The special dummy texture for texkill. */ + if (r300->texkill_sampler) { + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&r300->texkill_sampler, + NULL); + } + + /* The SWTCL VBO. */ + pipe_resource_reference(&r300->vbo, NULL); + + /* Vertex buffers. */ + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); + } + + /* If there are any queries pending or not destroyed, remove them now. */ + foreach_s(query, temp, &r300->query_list) { + remove_from_list(query); + FREE(query); + } +} + static void r300_destroy_context(struct pipe_context* context) { struct r300_context* r300 = r300_context(context); - struct r300_query* query, * temp; struct r300_atom *atom; util_blitter_destroy(r300->blitter); @@ -54,24 +96,24 @@ static void r300_destroy_context(struct pipe_context* context) } } - /* Free the OQ BO. */ - context->screen->resource_destroy(context->screen, r300->oqbo); - - /* If there are any queries pending or not destroyed, remove them now. */ - foreach_s(query, temp, &r300->query_list) { - remove_from_list(query); - FREE(query); - } - u_upload_destroy(r300->upload_vb); u_upload_destroy(r300->upload_ib); + translate_cache_destroy(r300->tran.translate_cache); + + r300_release_referenced_objects(r300); + + FREE(r300->aa_state.state); FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); FREE(r300->fb_state.state); + FREE(r300->gpu_flush.state); + FREE(r300->hyperz_state.state); + FREE(r300->invariant_state.state); FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); + FREE(r300->vap_invariant_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300->fs_constants.state); @@ -99,8 +141,10 @@ static void r300_flush_cb(void *data) static void r300_setup_atoms(struct r300_context* r300) { + boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; + boolean drm_2_3_0 = r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); /* Create the actual atom list. * @@ -108,44 +152,75 @@ static void r300_setup_atoms(struct r300_context* r300) * can affect performance and conformance if not handled with care. * * Some atoms never change size, others change every emit - those have - * the size of 0 here. */ + * the size of 0 here. + * + * NOTE: The framebuffer state is split into these atoms: + * - gpu_flush (unpipelined regs) + * - aa_state (unpipelined regs) + * - fb_state (unpipelined regs) + * - hyperz_state (unpipelined regs followed by pipelined ones) + * - fb_state_pipelined (pipelined regs) + * The motivation behind this is to be able to emit a strict + * subset of the regs, and to have reasonable register ordering. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(invariant_state, 71); - R300_INIT_ATOM(query_start, 4); + /* SC, GB (unpipelined), RB3D (unpipelined), ZB (unpipelined). */ + R300_INIT_ATOM(gpu_flush, 9); + R300_INIT_ATOM(aa_state, 4); + R300_INIT_ATOM(fb_state, 0); + /* ZB (unpipelined), SC. */ + R300_INIT_ATOM(hyperz_state, 6); R300_INIT_ATOM(ztop_state, 2); + /* ZB, FG. */ + R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); + /* RB3D. */ R300_INIT_ATOM(blend_state, 8); R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); - R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); - R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); - R300_INIT_ATOM(fb_state, 0); - R300_INIT_ATOM(rs_state, 0); + /* SC. */ R300_INIT_ATOM(scissor_state, 3); + /* GB, FG, GA, SU, SC, RB3D. */ + R300_INIT_ATOM(invariant_state, 16 + (is_rv350 ? 4 : 0)); + /* VAP. */ R300_INIT_ATOM(viewport_state, 9); - R300_INIT_ATOM(rs_block_state, 0); - R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(pvs_flush, 2); + R300_INIT_ATOM(vap_invariant_state, 9); + R300_INIT_ATOM(vertex_stream_state, 0); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); - R300_INIT_ATOM(texture_cache_inval, 2); - R300_INIT_ATOM(textures_state, 0); + R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); + /* VAP, RS, GA, GB, SU, SC. */ + R300_INIT_ATOM(rs_block_state, 0); + R300_INIT_ATOM(rs_state, 0); + /* SC, US. */ + R300_INIT_ATOM(fb_state_pipelined, 5 + (drm_2_3_0 ? 3 : 0)); + /* US. */ R300_INIT_ATOM(fs, 0); R300_INIT_ATOM(fs_rc_constant_state, 0); R300_INIT_ATOM(fs_constants, 0); + /* TX. */ + R300_INIT_ATOM(texture_cache_inval, 2); + R300_INIT_ATOM(textures_state, 0); + /* ZB (unpipelined), SU. */ + R300_INIT_ATOM(query_start, 4); /* Replace emission functions for r500. */ - if (r300->screen->caps.is_r500) { + if (is_r500) { r300->fs.emit = r500_emit_fs; r300->fs_rc_constant_state.emit = r500_emit_fs_rc_constant_state; r300->fs_constants.emit = r500_emit_fs_constants; } /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->aa_state.state = CALLOC_STRUCT(r300_aa_state); r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); - r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); + r300->clip_state.state = CALLOC_STRUCT(r300_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->gpu_flush.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->hyperz_state.state = CALLOC_STRUCT(r300_hyperz_state); + r300->invariant_state.state = CALLOC_STRUCT(r300_invariant_state); r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); + r300->vap_invariant_state.state = CALLOC_STRUCT(r300_vap_invariant_state); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); @@ -155,11 +230,112 @@ static void r300_setup_atoms(struct r300_context* r300) } /* Some non-CSO atoms don't use the state pointer. */ - r300->invariant_state.allow_null_state = TRUE; + r300->fb_state_pipelined.allow_null_state = TRUE; r300->fs_rc_constant_state.allow_null_state = TRUE; r300->pvs_flush.allow_null_state = TRUE; r300->query_start.allow_null_state = TRUE; r300->texture_cache_inval.allow_null_state = TRUE; + + /* Some states must be marked dirty here to properly set up + * hardware in the first command stream. */ + r300->invariant_state.dirty = TRUE; + r300->pvs_flush.dirty = TRUE; + r300->vap_invariant_state.dirty = TRUE; + r300->texture_cache_inval.dirty = TRUE; + r300->textures_state.dirty = TRUE; +} + +/* Not every state tracker calls every driver function before the first draw + * call and we must initialize the command buffers somehow. */ +static void r300_init_states(struct pipe_context *pipe) +{ + struct r300_context *r300 = r300_context(pipe); + struct pipe_blend_color bc = {{0}}; + struct pipe_clip_state cs = {{{0}}}; + struct pipe_scissor_state ss = {0}; + struct r300_clip_state *clip = + (struct r300_clip_state*)r300->clip_state.state; + struct r300_gpu_flush *gpuflush = + (struct r300_gpu_flush*)r300->gpu_flush.state; + struct r300_vap_invariant_state *vap_invariant = + (struct r300_vap_invariant_state*)r300->vap_invariant_state.state; + struct r300_invariant_state *invariant = + (struct r300_invariant_state*)r300->invariant_state.state; + struct r300_hyperz_state *hyperz = + (struct r300_hyperz_state*)r300->hyperz_state.state; + CB_LOCALS; + + pipe->set_blend_color(pipe, &bc); + pipe->set_scissor_state(pipe, &ss); + + /* Initialize the clip state. */ + if (r300_context(pipe)->screen->caps.has_tcl) { + pipe->set_clip_state(pipe, &cs); + } else { + BEGIN_CB(clip->cb, 2); + OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); + END_CB; + } + + /* Initialize the GPU flush. */ + { + BEGIN_CB(gpuflush->cb_flush_clean, 6); + + /* Flush and free renderbuffer caches. */ + OUT_CB_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CB_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + /* Wait until the GPU is idle. + * This fixes random pixels sometimes appearing probably caused + * by incomplete rendering. */ + OUT_CB_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); + END_CB; + } + + /* Initialize the VAP invariant state. */ + { + BEGIN_CB(vap_invariant->cb, 9); + OUT_CB_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); + OUT_CB_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_32F(1.0); + OUT_CB_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); + END_CB; + } + + /* Initialize the invariant state. */ + { + BEGIN_CB(invariant->cb, r300->invariant_state.size); + OUT_CB_REG(R300_GB_SELECT, 0); + OUT_CB_REG(R300_FG_FOG_BLEND, 0); + OUT_CB_REG(R300_GA_ROUND_MODE, 1); + OUT_CB_REG(R300_GA_OFFSET, 0); + OUT_CB_REG(R300_SU_TEX_WRAP, 0); + OUT_CB_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); + OUT_CB_REG(R300_SU_DEPTH_OFFSET, 0); + OUT_CB_REG(R300_SC_EDGERULE, 0x2DA49525); + + if (r300->screen->caps.is_rv350) { + OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); + OUT_CB_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); + } + END_CB; + } + + /* Initialize the hyperz state. */ + { + BEGIN_CB(&hyperz->cb_begin, 6); + OUT_CB_REG(R300_ZB_BW_CNTL, 0); + OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); + OUT_CB_REG(R300_SC_HYPERZ, 0x1C); + END_CB; + } } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -195,9 +371,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_setup_atoms(r300); - /* Open up the OQ BO. */ - r300->oqbo = pipe_buffer_create(screen, - R300_BIND_OQBO, 4096); make_empty_list(&r300->query_list); r300_init_blit_functions(r300); @@ -207,8 +380,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_state_functions(r300); r300_init_resource_functions(r300); - r300->invariant_state.dirty = TRUE; - rws->set_flush_cb(r300->rws, r300_flush_cb, r300); r300->dirty_hw++; @@ -227,6 +398,34 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (r300->upload_vb == NULL) goto no_upload_vb; + r300->tran.translate_cache = translate_cache_create(); + + r300_init_states(&r300->context); + + /* The KIL opcode needs the first texture unit to be enabled + * on r3xx-r4xx. In order to calm down the CS checker, we bind this + * dummy texture there. */ + if (!r300->screen->caps.is_r500) { + struct pipe_resource *tex; + struct pipe_resource rtempl = {{0}}; + struct pipe_sampler_view vtempl = {{0}}; + + rtempl.target = PIPE_TEXTURE_2D; + rtempl.format = PIPE_FORMAT_I8_UNORM; + rtempl.bind = PIPE_BIND_SAMPLER_VIEW; + rtempl.width0 = 1; + rtempl.height0 = 1; + rtempl.depth0 = 1; + tex = screen->resource_create(screen, &rtempl); + + u_sampler_view_default_template(&vtempl, tex, tex->format); + + r300->texkill_sampler = (struct r300_sampler_view*) + r300->context.create_sampler_view(&r300->context, tex, &vtempl); + + pipe_resource_reference(&tex, NULL); + } + return &r300->context; no_upload_ib: @@ -238,10 +437,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, boolean r300_check_cs(struct r300_context *r300, unsigned size) { - struct r300_cs_info cs_info; - - r300->rws->get_cs_info(r300->rws, &cs_info); - return size <= cs_info.free; + return size <= r300->rws->get_cs_free_dwords(r300->rws); } void r300_finish(struct r300_context *r300) diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 48ec52d26cf..50dcd0fc67c 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -31,6 +31,8 @@ #include "util/u_inlines.h" #include "util/u_transfer.h" +#include "translate/translate_cache.h" + #include "r300_defines.h" #include "r300_screen.h" @@ -59,68 +61,83 @@ struct r300_atom { boolean allow_null_state; }; +struct r300_aa_state { + struct r300_surface *dest; + + uint32_t aa_config; + uint32_t aaresolve_ctl; +}; + struct r300_blend_state { - uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ - uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ - uint32_t color_channel_mask; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ - uint32_t rop; /* R300_RB3D_ROPCNTL: 0x4e18 */ - uint32_t dither; /* R300_RB3D_DITHER_CTL: 0x4e50 */ + uint32_t cb[8]; + uint32_t cb_no_readwrite[8]; }; struct r300_blend_color_state { - /* RV515 and earlier */ - uint32_t blend_color; /* R300_RB3D_BLEND_COLOR: 0x4e10 */ - /* R520 and newer */ - uint32_t blend_color_red_alpha; /* R500_RB3D_CONSTANT_COLOR_AR: 0x4ef8 */ - uint32_t blend_color_green_blue; /* R500_RB3D_CONSTANT_COLOR_GB: 0x4efc */ + uint32_t cb[3]; +}; + +struct r300_clip_state { + struct pipe_clip_state clip; + + uint32_t cb[29]; }; struct r300_dsa_state { + struct pipe_depth_stencil_alpha_state dsa; + + /* This is actually a command buffer with named dwords. */ + uint32_t cb_begin; uint32_t alpha_function; /* R300_FG_ALPHA_FUNC: 0x4bd4 */ - uint32_t alpha_reference; /* R500_FG_ALPHA_VALUE: 0x4be0 */ + uint32_t cb_reg_seq; uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */ uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ + uint32_t cb_reg; uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ + /* The second command buffer disables zbuffer reads and writes. */ + uint32_t cb_no_readwrite[8]; + /* Whether a two-sided stencil is enabled. */ boolean two_sided; /* Whether a fallback should be used for a two-sided stencil ref value. */ boolean two_sided_stencil_ref; }; +struct r300_hyperz_state { + /* This is actually a command buffer with named dwords. */ + uint32_t cb_begin; + uint32_t zb_bw_cntl; /* R300_ZB_BW_CNTL */ + uint32_t cb_reg1; + uint32_t zb_depthclearvalue; /* R300_ZB_DEPTHCLEARVALUE */ + uint32_t cb_reg2; + uint32_t sc_hyperz; /* R300_SC_HYPERZ */ +}; + +struct r300_gpu_flush { + uint32_t cb_flush_clean[6]; +}; + struct r300_rs_state { /* Original rasterizer state. */ struct pipe_rasterizer_state rs; /* Draw-specific rasterizer state. */ struct pipe_rasterizer_state rs_draw; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ - uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ - uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ - uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ - uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ - float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ - /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ - float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ - /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ - uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ - uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ - uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ - uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ + /* Command buffers. */ + uint32_t cb_main[25]; + uint32_t cb_poly_offset_zb16[5]; + uint32_t cb_poly_offset_zb24[5]; + + /* The index to cb_main where the cull_mode register value resides. */ + unsigned cull_mode_index; + + /* Whether polygon offset is enabled. */ + boolean polygon_offset_enable; + + /* This is emitted in the draw function. */ uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */ - uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ - uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ - - /* Specifies top of Raster pipe specific enable controls, - * i.e. texture coordinates stuffing for points, lines, triangles */ - uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */ - - /* Point sprites texture coordinates, 0: lower left, 1: upper right */ - float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */ - float point_texcoord_bottom; /* R300_GA_POINT_T0: 0x4204 */ - float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */ - float point_texcoord_top; /* R300_GA_POINT_T1: 0x420c */ }; struct r300_rs_block { @@ -163,16 +180,14 @@ struct r300_sampler_view { /* Copy of r300_texture::texture_format_state with format-specific bits * added. */ struct r300_texture_format_state format; + + /* The texture cache region for this texture. */ + uint32_t texcache_region; }; struct r300_texture_fb_state { - /* Colorbuffer. */ - uint32_t colorpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_COLORPITCH[0-3]*/ - uint32_t us_out_fmt; /* R300_US_OUT_FMT[0-3] */ - - /* Zbuffer. */ - uint32_t depthpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_DEPTHPITCH */ - uint32_t zb_format; /* R300_ZB_FORMAT */ + uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */ + uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */ }; struct r300_texture_sampler_state { @@ -205,6 +220,14 @@ struct r300_vertex_stream_state { unsigned count; }; +struct r300_invariant_state { + uint32_t cb[20]; +}; + +struct r300_vap_invariant_state { + uint32_t cb[9]; +}; + struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -224,7 +247,7 @@ struct r300_ztop_state { struct r300_constant_buffer { /* Buffer of constants */ - float constants[256][4]; + uint32_t constants[256][4]; /* Total number of constants */ unsigned count; }; @@ -239,14 +262,23 @@ struct r300_constant_buffer { struct r300_query { /* The kind of query. Currently only OQ is supported. */ unsigned type; - /* The current count of this query. Required to be at least 32 bits. */ - unsigned int count; - /* The offset of this query into the query buffer, in bytes. */ - unsigned offset; + /* The number of pipes where query results are stored. */ + unsigned num_pipes; + /* How many results have been written, in dwords. It's incremented + * after end_query and flush. */ + unsigned num_results; /* if we've flushed the query */ boolean flushed; /* if begin has been emitted */ boolean begin_emitted; + + /* The buffer where query results are stored. */ + struct r300_winsys_buffer *buffer; + /* The size of the buffer. */ + unsigned buffer_size; + /* The domain of the buffer. */ + enum r300_buffer_domain domain; + /* Linked list members. */ struct r300_query* prev; struct r300_query* next; @@ -268,6 +300,19 @@ struct r300_fence { boolean signalled; }; +struct r300_surface { + struct pipe_surface base; + + /* Winsys buffer backing the texture. */ + struct r300_winsys_buffer *buffer; + + enum r300_buffer_domain domain; + + uint32_t offset; + uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */ + uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT. */ +}; + struct r300_texture { /* Parent class */ struct u_resource b; @@ -326,9 +371,34 @@ struct r300_vertex_element_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; + /* If (velem[i].src_format != hw_format[i]), the vertex buffer + * referenced by this vertex element cannot be used for rendering and + * its vertex data must be translated to hw_format[i]. */ + enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; + unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + + /* The size of the vertex, in dwords. */ + unsigned vertex_size_dwords; + + /* This might mean two things: + * - src_format != hw_format, as discussed above. + * - src_offset % 4 != 0. */ + boolean incompatible_layout; + struct r300_vertex_stream_state vertex_stream; }; +struct r300_translate_context { + /* Translate cache for incompatible vertex offset/stride/format fallback. */ + struct translate_cache *translate_cache; + + /* The vertex buffer slot containing the translated buffer. */ + unsigned vb_slot; + + /* Saved and new vertex element state. */ + void *saved_velems, *new_velems; +}; + struct r300_context { /* Parent class */ struct pipe_context context; @@ -343,21 +413,30 @@ struct r300_context { struct blitter_context* blitter; /* Stencil two-sided reference value fallback. */ struct r300_stencilref_context *stencilref_fallback; + /* For translating vertex buffers having incompatible vertex layout. */ + struct r300_translate_context tran; /* Vertex buffer for rendering. */ struct pipe_resource* vbo; + /* The KIL opcode needs the first texture unit to be enabled + * on r3xx-r4xx. In order to calm down the CS checker, we bind this + * dummy texture there. */ + struct r300_sampler_view *texkill_sampler; /* Offset into the VBO. */ size_t vbo_offset; - /* Occlusion query buffer. */ - struct pipe_resource* oqbo; - /* Query list. */ + /* The currently active query. */ struct r300_query *query_current; + /* The saved query for blitter operations. */ + struct r300_query *blitter_saved_query; + /* Query list. */ struct r300_query query_list; /* Various CSO state objects. */ /* Beginning of atom list. */ struct r300_atom atom_list; + /* Anti-aliasing (MSAA) state. */ + struct r300_atom aa_state; /* Blend state. */ struct r300_atom blend_state; /* Blend color state. */ @@ -374,6 +453,10 @@ struct r300_context { struct r300_atom fs_constants; /* Framebuffer state. */ struct r300_atom fb_state; + /* Framebuffer state (pipelined regs). */ + struct r300_atom fb_state_pipelined; + /* HyperZ state (various SC/ZB bits). */ + struct r300_atom hyperz_state; /* Occlusion query. */ struct r300_atom query_start; /* Rasterizer state. */ @@ -396,8 +479,12 @@ struct r300_context { struct r300_atom ztop_state; /* PVS flush. */ struct r300_atom pvs_flush; + /* VAP invariant state. */ + struct r300_atom vap_invariant_state; /* Texture cache invalidate. */ struct r300_atom texture_cache_inval; + /* GPU flush. */ + struct r300_atom gpu_flush; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -414,9 +501,6 @@ struct r300_context { struct vertex_info vertex_info; struct pipe_stencil_ref stencil_ref; - - struct pipe_clip_state clip; - struct pipe_viewport_state viewport; /* Stream locations for SWTCL. */ @@ -434,6 +518,8 @@ struct r300_context { int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ boolean two_sided_color; + /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */ + boolean incompatible_vb_layout; /* upload managers */ struct u_upload_mgr *upload_vb; @@ -449,6 +535,11 @@ static INLINE struct r300_query* r300_query(struct pipe_query* q) return (struct r300_query*)q; } +static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf) +{ + return (struct r300_surface*)surf; +} + static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex) { return (struct r300_texture*)tex; @@ -467,6 +558,9 @@ static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300) struct pipe_context* r300_create_context(struct pipe_screen* screen, void *priv); +boolean r300_check_cs(struct r300_context *r300, unsigned size); +void r300_finish(struct r300_context *r300); + /* Context initialization. */ struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_blit_functions(struct r300_context *r300); @@ -476,10 +570,29 @@ void r300_init_render_functions(struct r300_context *r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_resource_functions(struct r300_context* r300); -boolean r300_check_cs(struct r300_context *r300, unsigned size); -void r300_finish(struct r300_context *r300); +/* r300_query.c */ +void r300_resume_query(struct r300_context *r300, + struct r300_query *query); +void r300_stop_query(struct r300_context *r300); + +/* r300_render_translate.c */ +void r300_begin_vertex_translate(struct r300_context *r300); +void r300_end_vertex_translate(struct r300_context *r300); +void r300_translate_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned *index_size, unsigned index_offset, + unsigned *start, unsigned count); + +/* r300_render_stencilref.c */ +void r300_plug_in_stencil_ref_fallback(struct r300_context *r300); + +/* r300_state.c */ +void r300_mark_fs_code_dirty(struct r300_context *r300); + +/* r300_debug.c */ void r500_dump_rs_block(struct r300_rs_block *rs); + static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { return SCREEN_DBG_ON(ctx->screen, flags); diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 9c8c273902b..1db7da642bd 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -20,171 +20,133 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/** + * This file contains macros for immediate command submission. + */ + #ifndef R300_CS_H #define R300_CS_H -#include "util/u_math.h" - #include "r300_reg.h" +#include "r300_context.h" #include "r300_winsys.h" /* Yes, I know macros are ugly. However, they are much prettier than the code * that they neatly hide away, and don't have the cost of function setup,so * we're going to use them. */ -#define MAX_CS_SIZE 64 * 1024 / 4 - -#define VERY_VERBOSE_CS 1 -#define VERY_VERBOSE_REGISTERS 1 +#ifdef DEBUG +#define CS_DEBUG(x) x +#else +#define CS_DEBUG(x) +#endif -/* XXX stolen from radeon_reg.h */ -#define RADEON_CP_PACKET0 0x0 - -#define CP_PACKET0(register, count) \ - (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) +/** + * Command submission setup. + */ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \ - int cs_count = 0; (void) cs_count; - -#define CHECK_CS(size) \ - assert(r300_check_cs(cs_context_copy, (size))) + CS_DEBUG(int cs_count = 0; (void) cs_count;) #define BEGIN_CS(size) do { \ - CHECK_CS(size); \ - if (VERY_VERBOSE_CS) { \ - DBG(cs_context_copy, DBG_CS, "r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ - size, __FUNCTION__, __FILE__, __LINE__); \ - } \ - cs_winsys->begin_cs(cs_winsys, (size), \ - __FILE__, __FUNCTION__, __LINE__); \ - cs_count = size; \ + assert(r300_check_cs(cs_context_copy, (size))); \ + CS_DEBUG(cs_count = size;) \ +} while (0) + +#ifdef DEBUG +#define END_CS do { \ + if (cs_count != 0) \ + debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \ + cs_count, __FUNCTION__, __FILE__, __LINE__); \ + cs_count = 0; \ } while (0) +#else +#define END_CS +#endif + +/** + * Writing pure DWORDs. + */ #define OUT_CS(value) do { \ - if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \ - DBG(cs_context_copy, DBG_CS, "r300: writing %08x\n", value); \ - } \ cs_winsys->write_cs_dword(cs_winsys, (value)); \ - cs_count--; \ + CS_DEBUG(cs_count--;) \ } while (0) #define OUT_CS_32F(value) do { \ - if (VERY_VERBOSE_CS || VERY_VERBOSE_REGISTERS) { \ - DBG(cs_context_copy, DBG_CS, "r300: writing %f\n", value); \ - } \ cs_winsys->write_cs_dword(cs_winsys, fui(value)); \ - cs_count--; \ + CS_DEBUG(cs_count--;) \ } while (0) #define OUT_CS_REG(register, value) do { \ - if (VERY_VERBOSE_REGISTERS) \ - DBG(cs_context_copy, DBG_CS, "r300: writing 0x%08X to register 0x%04X\n", \ - value, register); \ assert(register); \ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \ cs_winsys->write_cs_dword(cs_winsys, value); \ - cs_count -= 2; \ + CS_DEBUG(cs_count -= 2;) \ } while (0) /* Note: This expects count to be the number of registers, * not the actual packet0 count! */ #define OUT_CS_REG_SEQ(register, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - DBG(cs_context_copy, DBG_CS, "r300: writing register sequence of %d to 0x%04X\n", \ - count, register); \ assert(register); \ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \ - cs_count--; \ + CS_DEBUG(cs_count--;) \ } while (0) #define OUT_CS_TABLE(values, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - DBG(cs_context_copy, DBG_CS, "r300: writing table of %d dwords\n", count); \ cs_winsys->write_cs_table(cs_winsys, values, count); \ - cs_count -= count; \ + CS_DEBUG(cs_count -= count;) \ } while (0) -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ - DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ - "domains (%d, %d, %d)\n", \ - bo, offset, rd, wd, flags); \ - assert(bo); \ - cs_winsys->write_cs_dword(cs_winsys, offset); \ - r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ - cs_count -= 3; \ +#define OUT_CS_ONE_REG(register, count) do { \ + assert(register); \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \ + CS_DEBUG(cs_count--;) \ } while (0) - -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ - DBG(cs_context_copy, DBG_CS, "r300: writing relocation for texture %p, offset %d, " \ - "domains (%d, %d, %d)\n", \ - tex, offset, rd, wd, flags); \ - assert(tex); \ - cs_winsys->write_cs_dword(cs_winsys, offset); \ - r300_texture_write_reloc(cs_winsys, tex, rd, wd, flags); \ - cs_count -= 3; \ +#define OUT_CS_PKT3(op, count) do { \ + cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \ + CS_DEBUG(cs_count--;) \ } while (0) -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ - DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \ - "domains (%d, %d, %d)\n", \ - bo, rd, wd, flags); \ +/** + * Writing relocations. + */ + +#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ assert(bo); \ - r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ - cs_count -= 2; \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ + cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + CS_DEBUG(cs_count -= 3;) \ } while (0) -#define END_CS do { \ - if (VERY_VERBOSE_CS) { \ - DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \ - __FILE__, __LINE__); \ - } \ - if (cs_count != 0) \ - debug_printf("r300: Warning: cs_count off by %d\n", cs_count); \ - cs_winsys->end_cs(cs_winsys, __FILE__, __FUNCTION__, __LINE__); \ +#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ + assert(bo); \ + OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd, flags); \ } while (0) -#define FLUSH_CS do { \ - if (VERY_VERBOSE_CS) { \ - DBG(cs_context_copy, DBG_CS, "r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, \ - __FILE__, __LINE__); \ - } \ - if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { \ - r300->flush_counter++; \ - } \ - cs_winsys->flush_cs(cs_winsys); \ +#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ + assert(tex); \ + OUT_CS_RELOC(tex->buffer, offset, rd, wd, flags); \ } while (0) -#define RADEON_ONE_REG_WR (1 << 15) - -#define OUT_CS_ONE_REG(register, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - DBG(cs_context_copy, DBG_CS, "r300: writing data sequence of %d to 0x%04X\n", \ - count, register); \ - assert(register); \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \ - cs_count--; \ +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ + assert(bo); \ + cs_winsys->write_cs_reloc(cs_winsys, r300_buffer(bo)->buf, rd, wd, flags); \ + CS_DEBUG(cs_count -= 2;) \ } while (0) -#define CP_PACKET3(op, count) \ - (RADEON_CP_PACKET3 | (op) | ((count) << 16)) -#define OUT_CS_PKT3(op, count) do { \ - cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \ - cs_count--; \ -} while (0) +/** + * Command buffer emission. + */ -#define OUT_CS_INDEX_RELOC(bo, offset, count, rd, wd, flags) do { \ - DBG(cs_context_copy, DBG_CS, "r300: writing relocation for index buffer %p," \ - "offset %d\n", bo, offset); \ - assert(bo); \ - cs_winsys->write_cs_dword(cs_winsys, offset); \ - cs_winsys->write_cs_dword(cs_winsys, count); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ - cs_count -= 4; \ +#define WRITE_CS_TABLE(values, count) do { \ + CS_DEBUG(assert(cs_count == 0);) \ + cs_winsys->write_cs_table(cs_winsys, values, count); \ } while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 297791f3ffe..a6cd86e3920 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -29,7 +29,6 @@ static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Fragment program handling (for debugging)" }, { "vp", DBG_VP, "Vertex program handling (for debugging)" }, - { "cs", DBG_CS, "Command submissions (for debugging)" }, { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" }, @@ -39,6 +38,7 @@ static const struct debug_named_value debug_options[] = { { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" }, + { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for lulz)" }, { "stats", DBG_STATS, "Gather statistics (for lulz)" }, /* must be last */ diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h index 83c9ec7e399..d510d80a7bb 100644 --- a/src/gallium/drivers/r300/r300_defines.h +++ b/src/gallium/drivers/r300/r300_defines.h @@ -30,13 +30,7 @@ #define R300_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV -#define R300_INVALID_FORMAT (~0) - -/* XXX: this is just a bandaid on larger problems in - * r300_screen_buffer.h which doesn't seem to be fully ported to - * gallium-resources. - */ -#define R300_BIND_OQBO (1<<21) +#define R300_INVALID_FORMAT 0xffff /* Tiling flags. */ enum r300_buffer_tiling { diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index c329c0f30ff..014b382edf3 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -43,21 +43,11 @@ void r300_emit_blend_state(struct r300_context* r300, (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); - BEGIN_CS(size); - OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); - OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (fb->nr_cbufs) { - OUT_CS(blend->blend_control); - OUT_CS(blend->alpha_blend_control); - OUT_CS(blend->color_channel_mask); + WRITE_CS_TABLE(blend->cb, size); } else { - OUT_CS(0); - OUT_CS(0); - OUT_CS(0); - /* XXX also disable fastfill here once it's supported */ + WRITE_CS_TABLE(blend->cb_no_readwrite, size); } - OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); - END_CS; } void r300_emit_blend_color_state(struct r300_context* r300, @@ -66,40 +56,16 @@ void r300_emit_blend_color_state(struct r300_context* r300, struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; CS_LOCALS(r300); - if (r300->screen->caps.is_r500) { - BEGIN_CS(size); - OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); - OUT_CS(bc->blend_color_red_alpha); - OUT_CS(bc->blend_color_green_blue); - END_CS; - } else { - BEGIN_CS(size); - OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color); - END_CS; - } + WRITE_CS_TABLE(bc->cb, size); } void r300_emit_clip_state(struct r300_context* r300, unsigned size, void* state) { - struct pipe_clip_state* clip = (struct pipe_clip_state*)state; + struct r300_clip_state* clip = (struct r300_clip_state*)state; CS_LOCALS(r300); - if (r300->screen->caps.has_tcl) { - BEGIN_CS(size); - OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, - (r300->screen->caps.is_r500 ? - R500_PVS_UCP_START : R300_PVS_UCP_START)); - OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); - OUT_CS_TABLE(clip->ucp, 6 * 4); - OUT_CS_REG(R300_VAP_CLIP_CNTL, ((1 << clip->nr) - 1) | - R300_PS_UCP_MODE_CLIP_AS_TRIFAN); - END_CS; - } else { - BEGIN_CS(size); - OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); - END_CS; - } + WRITE_CS_TABLE(clip->cb, size); } void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) @@ -107,27 +73,13 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; - struct pipe_stencil_ref stencil_ref = r300->stencil_ref; CS_LOCALS(r300); - BEGIN_CS(size); - OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); - OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - if (fb->zsbuf) { - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); + WRITE_CS_TABLE(&dsa->cb_begin, size); } else { - OUT_CS(0); - OUT_CS(0); + WRITE_CS_TABLE(dsa->cb_no_readwrite, size); } - - OUT_CS(dsa->stencil_ref_mask | stencil_ref.ref_value[0]); - - if (r300->screen->caps.is_r500) { - OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf | stencil_ref.ref_value[1]); - } - END_CS; } static const float * get_rc_constant_state( @@ -175,7 +127,7 @@ static const float * get_rc_constant_state( /* Convert a normal single-precision float into the 7.16 format * used by the R300 fragment shader. */ -static uint32_t pack_float24(float f) +uint32_t pack_float24(float f) { union { float fl; @@ -206,101 +158,27 @@ static uint32_t pack_float24(float f) return float24; } -unsigned r300_get_fs_atom_size(struct r300_context *r300) -{ - struct r300_fragment_shader *fs = r300_fs(r300); - unsigned imm_count = fs->shader->immediates_count; - struct r300_fragment_program_code *code = &fs->shader->code.code.r300; - - return 19 + - code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - (imm_count ? imm_count * 5 : 0); -} - void r300_emit_fs(struct r300_context* r300, unsigned size, void *state) { struct r300_fragment_shader *fs = r300_fs(r300); - struct rX00_fragment_program_code* generic_code = &fs->shader->code; - struct r300_fragment_program_code * code = &generic_code->code.r300; - unsigned i; - unsigned imm_count = fs->shader->immediates_count; - unsigned imm_first = fs->shader->externals_count; - unsigned imm_end = generic_code->constants.Count; - struct rc_constant *constants = generic_code->constants.Constants; CS_LOCALS(r300); - BEGIN_CS(size); - OUT_CS_REG(R300_US_CONFIG, code->config); - OUT_CS_REG(R300_US_PIXSIZE, code->pixsize); - OUT_CS_REG(R300_US_CODE_OFFSET, code->code_offset); - - OUT_CS_REG_SEQ(R300_US_CODE_ADDR_0, 4); - OUT_CS_TABLE(code->code_addr, 4); - - OUT_CS_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CS(code->alu.inst[i].rgb_inst); - - OUT_CS_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CS(code->alu.inst[i].rgb_addr); - - OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CS(code->alu.inst[i].alpha_inst); - - OUT_CS_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CS(code->alu.inst[i].alpha_addr); - - if (code->tex.length) { - OUT_CS_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - OUT_CS_TABLE(code->tex.inst, code->tex.length); - } - - /* Emit immediates. */ - if (imm_count) { - for(i = imm_first; i < imm_end; ++i) { - if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { - const float *data = constants[i].u.Immediate; - - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); - OUT_CS(pack_float24(data[0])); - OUT_CS(pack_float24(data[1])); - OUT_CS(pack_float24(data[2])); - OUT_CS(pack_float24(data[3])); - } - } - } - - OUT_CS_REG(R300_FG_DEPTH_SRC, fs->shader->fg_depth_src); - OUT_CS_REG(R300_US_W_FMT, fs->shader->us_out_w); - END_CS; + WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size); } void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state) { struct r300_fragment_shader *fs = r300_fs(r300); - struct rc_constant_list *constants = &fs->shader->code.constants; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned i, count = fs->shader->externals_count; + unsigned count = fs->shader->externals_count * 4; CS_LOCALS(r300); if (count == 0) return; BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4); - for(i = 0; i < count; ++i) { - const float *data; - assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL); - data = buf->constants[i]; - OUT_CS(pack_float24(data[0])); - OUT_CS(pack_float24(data[1])); - OUT_CS(pack_float24(data[2])); - OUT_CS(pack_float24(data[3])); - } + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count); + OUT_CS_TABLE(buf->constants, count); END_CS; } @@ -312,6 +190,8 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo unsigned count = fs->shader->rc_state_count; unsigned first = fs->shader->externals_count; unsigned end = constants->Count; + uint32_t cdata[4]; + unsigned j; CS_LOCALS(r300); if (count == 0) @@ -323,85 +203,29 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo const float *data = get_rc_constant_state(r300, &constants->Constants[i]); + for (j = 0; j < 4; j++) + cdata[j] = pack_float24(data[j]); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); - OUT_CS(pack_float24(data[0])); - OUT_CS(pack_float24(data[1])); - OUT_CS(pack_float24(data[2])); - OUT_CS(pack_float24(data[3])); + OUT_CS_TABLE(cdata, 4); } } END_CS; } -unsigned r500_get_fs_atom_size(struct r300_context *r300) -{ - struct r300_fragment_shader *fs = r300_fs(r300); - unsigned imm_count = fs->shader->immediates_count; - struct r500_fragment_program_code *code = &fs->shader->code.code.r500; - - return 17 + - ((code->inst_end + 1) * 6) + - (imm_count ? imm_count * 7 : 0); -} - void r500_emit_fs(struct r300_context* r300, unsigned size, void *state) { struct r300_fragment_shader *fs = r300_fs(r300); - struct rX00_fragment_program_code* generic_code = &fs->shader->code; - struct r500_fragment_program_code * code = &generic_code->code.r500; - unsigned i; - unsigned imm_count = fs->shader->immediates_count; - unsigned imm_first = fs->shader->externals_count; - unsigned imm_end = generic_code->constants.Count; - struct rc_constant *constants = generic_code->constants.Constants; CS_LOCALS(r300); - BEGIN_CS(size); - OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx); - OUT_CS_REG(R500_US_CODE_RANGE, - R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); - OUT_CS_REG(R500_US_CODE_OFFSET, 0); - OUT_CS_REG(R500_US_CODE_ADDR, - R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end)); - - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6); - for (i = 0; i <= code->inst_end; i++) { - OUT_CS(code->inst[i].inst0); - OUT_CS(code->inst[i].inst1); - OUT_CS(code->inst[i].inst2); - OUT_CS(code->inst[i].inst3); - OUT_CS(code->inst[i].inst4); - OUT_CS(code->inst[i].inst5); - } - - /* Emit immediates. */ - if (imm_count) { - for(i = imm_first; i < imm_end; ++i) { - if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { - const float *data = constants[i].u.Immediate; - - OUT_CS_REG(R500_GA_US_VECTOR_INDEX, - R500_GA_US_VECTOR_INDEX_TYPE_CONST | - (i & R500_GA_US_VECTOR_INDEX_MASK)); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4); - OUT_CS_TABLE(data, 4); - } - } - } - - OUT_CS_REG(R300_FG_DEPTH_SRC, fs->shader->fg_depth_src); - OUT_CS_REG(R300_US_W_FMT, fs->shader->us_out_w); - END_CS; + WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size); } void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state) { struct r300_fragment_shader *fs = r300_fs(r300); - struct rc_constant_list *constants = &fs->shader->code.constants; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned i, count = fs->shader->externals_count; + unsigned count = fs->shader->externals_count * 4; CS_LOCALS(r300); if (count == 0) @@ -409,11 +233,8 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat BEGIN_CS(size); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); - for(i = 0; i < count; ++i) { - assert(constants->Constants[i].Type == RC_CONSTANT_EXTERNAL); - } - OUT_CS_TABLE(buf->constants, count * 4); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count); + OUT_CS_TABLE(buf->constants, count); END_CS; } @@ -446,83 +267,170 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo END_CS; } +void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) +{ + struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + CS_LOCALS(r300); + + BEGIN_CS(size); + + /* Set up scissors. + * By writing to the SC registers, SC & US assert idle. */ + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + if (r300->screen->caps.is_r500) { + OUT_CS(0); + OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | + ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); + } else { + OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | + (1440 << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | + ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + } + + /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ + OUT_CS_TABLE(gpuflush->cb_flush_clean, 6); + END_CS; +} + +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) +{ + struct r300_aa_state *aa = (struct r300_aa_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(size); + OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); + + if (aa->dest) { + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain, 0); + + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain, 0); + } + + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); + END_CS; +} + void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; - struct r300_texture* tex; - struct pipe_surface* surf; - int i; + struct r300_surface* surf; + unsigned i; CS_LOCALS(r300); BEGIN_CS(size); - /* Flush and free renderbuffer caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - - /* Set the number of colorbuffers. */ - if (fb->nr_cbufs > 1) { - if (r300->screen->caps.is_r500) { - OUT_CS_REG(R300_RB3D_CCTL, - R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs) | - R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); - } else { - OUT_CS_REG(R300_RB3D_CCTL, - R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs)); - } + /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not + * what we usually want. */ + if (r300->screen->caps.is_r500) { + OUT_CS_REG(R300_RB3D_CCTL, + R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE); } else { - OUT_CS_REG(R300_RB3D_CCTL, 0x0); + OUT_CS_REG(R300_RB3D_CCTL, 0); } /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { - surf = fb->cbufs[i]; - tex = r300_texture(surf->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); + surf = r300_surface(fb->cbufs[i]); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_TEX_RELOC(tex, surf->offset, 0, tex->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_TEX_RELOC(tex, tex->fb_state.colorpitch[surf->level], - 0, tex->domain, 0); - - OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt); - } - for (; i < 4; i++) { - OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); } /* Set up a zbuffer. */ if (fb->zsbuf) { - surf = fb->zsbuf; - tex = r300_texture(surf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + surf = r300_surface(fb->zsbuf); - OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_TEX_RELOC(tex, surf->offset, 0, tex->domain, 0); + OUT_CS_REG(R300_ZB_FORMAT, surf->format); - OUT_CS_REG(R300_ZB_FORMAT, tex->fb_state.zb_format); + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_TEX_RELOC(tex, tex->fb_state.depthpitch[surf->level], - 0, tex->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); + + /* HiZ RAM. */ + if (r300->screen->caps.has_hiz) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + + /* Z Mask RAM. (compressed zbuffer) */ + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); } - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - if (r300->screen->caps.is_r500) { - OUT_CS(0); - OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | - ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); - } else { - OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | - (1440 << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | - ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + END_CS; +} + +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + +void r300_emit_fb_state_pipelined(struct r300_context *r300, + unsigned size, void *state) +{ + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + unsigned i; + CS_LOCALS(r300); + + BEGIN_CS(size); + + /* Colorbuffer format in the US block. + * (must be written after unpipelined regs) */ + OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); + for (i = 0; i < fb->nr_cbufs; i++) { + OUT_CS(r300_surface(fb->cbufs[i])->format); + } + for (; i < 4; i++) { + OUT_CS(R300_US_OUT_FMT_UNUSED); + } + + /* Multisampling. Depends on framebuffer sample count. + * These are pipelined regs and as such cannot be moved + * to the AA state. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + unsigned mspos0 = 0x66666666; + unsigned mspos1 = 0x6666666; + + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } + } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); } END_CS; } @@ -544,13 +452,14 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) OUT_CS_REG(R300_ZB_ZPASS_DATA, 0); END_CS; query->begin_emitted = TRUE; + query->flushed = FALSE; } - static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; + struct r300_winsys_buffer *buf = r300->query_current->buffer; CS_LOCALS(r300); assert(caps->num_frag_pipes); @@ -569,28 +478,28 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), - 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 3) * 4, + 0, query->domain, 0); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), - 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 2) * 4, + 0, query->domain, 0); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), - 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4, + 0, query->domain, 0); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), - 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4, + 0, query->domain, 0); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -606,12 +515,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, static void rv530_emit_query_end_single_z(struct r300_context *r300, struct r300_query *query) { + struct r300_winsys_buffer *buf = r300->query_current->buffer; CS_LOCALS(r300); BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -619,15 +529,16 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, static void rv530_emit_query_end_double_z(struct r300_context *r300, struct r300_query *query) { + struct r300_winsys_buffer *buf = r300->query_current->buffer; CS_LOCALS(r300); BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, r300_buffer(r300->oqbo)->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -652,57 +563,36 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_end_frag_pipes(r300, query); query->begin_emitted = FALSE; + query->num_results += query->num_pipes; + + /* XXX grab all the results and reset the counter. */ + if (query->num_results >= query->buffer_size / 4 - 4) { + query->num_results = (query->buffer_size / 4) / 2; + fprintf(stderr, "r300: Rewinding OQBO...\n"); + } +} + +void r300_emit_invariant_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); } void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) { - struct r300_rs_state* rs = (struct r300_rs_state*)state; - float scale, offset; + struct r300_rs_state* rs = state; CS_LOCALS(r300); BEGIN_CS(size); - OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); - - OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); - - OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); - OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); - OUT_CS(rs->point_minmax); - OUT_CS(rs->line_control); - + OUT_CS_TABLE(rs->cb_main, 25); if (rs->polygon_offset_enable) { - scale = rs->depth_scale * 12; - offset = rs->depth_offset; - - switch (r300->zbuffer_bpp) { - case 16: - offset *= 4; - break; - case 24: - offset *= 2; - break; + if (r300->zbuffer_bpp == 16) { + OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5); + } else { + OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5); } - - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); - OUT_CS_32F(scale); - OUT_CS_32F(offset); - OUT_CS_32F(scale); - OUT_CS_32F(offset); } - - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); - OUT_CS(rs->polygon_offset_enable); - OUT_CS(rs->cull_mode); - OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); - OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); - OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode); - OUT_CS_REG(R300_SC_CLIP_RULE, rs->clip_rule); - OUT_CS_REG(R300_GB_ENABLE, rs->stuffing_enable); - OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); - OUT_CS_32F(rs->point_texcoord_left); - OUT_CS_32F(rs->point_texcoord_bottom); - OUT_CS_32F(rs->point_texcoord_right); - OUT_CS_32F(rs->point_texcoord_top); END_CS; } @@ -717,9 +607,18 @@ void r300_emit_rs_block_state(struct r300_context* r300, if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) { r500_dump_rs_block(rs); - } - DBG(r300, DBG_DRAW, "r300: RS emit:\n"); + fprintf(stderr, "r300: RS emit:\n"); + + for (i = 0; i < count; i++) + fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]); + + for (i = 0; i < count; i++) + fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]); + + fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n", + rs->count, rs->inst_count); + } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); @@ -735,9 +634,6 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RS_IP_0, count); } OUT_CS_TABLE(rs->ip, count); - for (i = 0; i < count; i++) { - DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); - } OUT_CS_REG_SEQ(R300_RS_COUNT, 2); OUT_CS(rs->count); @@ -749,13 +645,6 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RS_INST_0, count); } OUT_CS_TABLE(rs->inst, count); - for (i = 0; i < count; i++) { - DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); - } - - DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n", - rs->count, rs->inst_count); - END_CS; } @@ -821,19 +710,11 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) struct pipe_vertex_element *velem = r300->velems->velem; struct r300_buffer *buf; int i; + unsigned *hw_format_size = r300->velems->hw_format_size; unsigned size1, size2, aos_count = r300->velems->count; unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); - for (i = 0; i < aos_count; i++) { - if ((vbuf[velem[i].vertex_buffer_index].buffer_offset + velem[i].src_offset) % 4 != 0) { - /* XXX We must align the buffer. */ - assert(0); - fprintf(stderr, "r300: Unaligned vertex buffer offsets aren't supported, aborting..\n"); - abort(); - } - } - BEGIN_CS(2 + packet_size + aos_count * 2); OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size); OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); @@ -841,8 +722,8 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) for (i = 0; i < aos_count - 1; i += 2) { vb1 = &vbuf[velem[i].vertex_buffer_index]; vb2 = &vbuf[velem[i+1].vertex_buffer_index]; - size1 = util_format_get_blocksize(velem[i].src_format); - size2 = util_format_get_blocksize(velem[i+1].src_format); + size1 = hw_format_size[i]; + size2 = hw_format_size[i+1]; OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); @@ -852,7 +733,7 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) if (aos_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; - size1 = util_format_get_blocksize(velem[i].src_format); + size1 = hw_format_size[i]; OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); @@ -897,21 +778,25 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned i; CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: PSC emit:\n"); + if (DBG_ON(r300, DBG_DRAW)) { + fprintf(stderr, "r300: PSC emit:\n"); + + for (i = 0; i < streams->count; i++) { + fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i, + streams->vap_prog_stream_cntl[i]); + } + + for (i = 0; i < streams->count; i++) { + fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i, + streams->vap_prog_stream_cntl_ext[i]); + } + } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); - for (i = 0; i < streams->count; i++) { - DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - streams->vap_prog_stream_cntl[i]); - } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count); - for (i = 0; i < streams->count; i++) { - DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - streams->vap_prog_stream_cntl_ext[i]); - } END_CS; } @@ -924,6 +809,13 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) END_CS; } +void r300_emit_vap_invariant_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; @@ -948,6 +840,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); BEGIN_CS(size); + /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -1012,12 +905,7 @@ void r300_emit_viewport_state(struct r300_context* r300, BEGIN_CS(size); OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); + OUT_CS_TABLE(&viewport->xscale, 6); OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); END_CS; } @@ -1099,10 +987,9 @@ validate: } } /* ...occlusion query buffer... */ - if (r300->query_start.dirty || - (r300->query_current && r300->query_current->begin_emitted)) { - if (!r300_add_buffer(r300->rws, r300->oqbo, - 0, r300_buffer(r300->oqbo)->domain)) { + if (r300->query_current) { + if (!r300->rws->add_buffer(r300->rws, r300->query_current->buffer, + 0, r300->query_current->domain)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 90660883423..586ccda620b 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -29,6 +29,8 @@ struct rX00_fragment_program_code; struct r300_vertex_program_code; +uint32_t pack_float24(float f); + void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed); void r300_emit_blend_state(struct r300_context* r300, @@ -43,16 +45,12 @@ void r300_emit_clip_state(struct r300_context* r300, void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state); -unsigned r300_get_fs_atom_size(struct r300_context *r300); - void r300_emit_fs(struct r300_context* r300, unsigned size, void *state); void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state); -unsigned r500_get_fs_atom_size(struct r300_context *r300); - void r500_emit_fs(struct r300_context* r300, unsigned size, void *state); void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state); @@ -61,6 +59,16 @@ void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); +void r300_emit_fb_state_pipelined(struct r300_context *r300, + unsigned size, void *state); + +void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state); + +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state); + +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state); + void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state); void r300_emit_query_end(struct r300_context* r300); @@ -78,6 +86,9 @@ void r300_emit_textures_state(struct r300_context *r300, void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed); +void r300_emit_vap_invariant_state(struct r300_context *r300, + unsigned size, void *state); + void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state); @@ -96,6 +107,9 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state); void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state); +void r300_emit_invariant_state(struct r300_context *r300, + unsigned size, void *state); + unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 360b19a0c1e..ba840bfff81 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -39,8 +39,6 @@ static void r300_flush(struct pipe_context* pipe, struct r300_atom *atom; struct r300_fence **rfence = (struct r300_fence**)fence; - CS_LOCALS(r300); - (void) cs_count; /* We probably need to flush Draw, but we may have been called from * within Draw. This feels kludgy, but it might be the best thing. * @@ -52,7 +50,10 @@ static void r300_flush(struct pipe_context* pipe, if (r300->dirty_hw) { r300_emit_query_end(r300); - FLUSH_CS; + if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) { + r300->flush_counter++; + } + r300->rws->flush_cs(r300->rws); r300->dirty_hw = 0; /* New kitchen sink, baby. */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index a434808046c..424f831731d 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -28,7 +28,9 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_ureg.h" +#include "r300_cb.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_screen.h" #include "r300_fs.h" #include "r300_reg.h" @@ -229,6 +231,122 @@ static void r300_dummy_fragment_shader( ureg_destroy(ureg); } +static void r300_emit_fs_code_to_buffer( + struct r300_context *r300, + struct r300_fragment_shader_code *shader) +{ + struct rX00_fragment_program_code *generic_code = &shader->code; + unsigned imm_count = shader->immediates_count; + unsigned imm_first = shader->externals_count; + unsigned imm_end = generic_code->constants.Count; + struct rc_constant *constants = generic_code->constants.Constants; + unsigned i; + CB_LOCALS; + + if (r300->screen->caps.is_r500) { + struct r500_fragment_program_code *code = &generic_code->code.r500; + + shader->cb_code_size = 17 + + ((code->inst_end + 1) * 6) + + imm_count * 7; + + NEW_CB(shader->cb_code, shader->cb_code_size); + OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); + OUT_CB_REG(R500_US_CODE_RANGE, + R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); + OUT_CB_REG(R500_US_CODE_OFFSET, 0); + OUT_CB_REG(R500_US_CODE_ADDR, + R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end)); + + OUT_CB_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); + OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6); + for (i = 0; i <= code->inst_end; i++) { + OUT_CB(code->inst[i].inst0); + OUT_CB(code->inst[i].inst1); + OUT_CB(code->inst[i].inst2); + OUT_CB(code->inst[i].inst3); + OUT_CB(code->inst[i].inst4); + OUT_CB(code->inst[i].inst5); + } + + /* Emit immediates. */ + if (imm_count) { + for(i = imm_first; i < imm_end; ++i) { + if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { + const float *data = constants[i].u.Immediate; + + OUT_CB_REG(R500_GA_US_VECTOR_INDEX, + R500_GA_US_VECTOR_INDEX_TYPE_CONST | + (i & R500_GA_US_VECTOR_INDEX_MASK)); + OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, 4); + OUT_CB_TABLE(data, 4); + } + } + } + } else { /* r300 */ + struct r300_fragment_program_code *code = &generic_code->code.r300; + + shader->cb_code_size = 19 + + (r300->screen->caps.is_r400 ? 2 : 0) + + code->alu.length * 4 + + (code->tex.length ? (1 + code->tex.length) : 0) + + imm_count * 5; + + NEW_CB(shader->cb_code, shader->cb_code_size); + + if (r300->screen->caps.is_r400) + OUT_CB_REG(R400_US_CODE_BANK, 0); + + OUT_CB_REG(R300_US_CONFIG, code->config); + OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); + OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); + + OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4); + OUT_CB_TABLE(code->code_addr, 4); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CB(code->alu.inst[i].rgb_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CB(code->alu.inst[i].rgb_addr); + + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CB(code->alu.inst[i].alpha_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + for (i = 0; i < code->alu.length; i++) + OUT_CB(code->alu.inst[i].alpha_addr); + + if (code->tex.length) { + OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); + OUT_CB_TABLE(code->tex.inst, code->tex.length); + } + + /* Emit immediates. */ + if (imm_count) { + for(i = imm_first; i < imm_end; ++i) { + if (constants[i].Type == RC_CONSTANT_IMMEDIATE) { + const float *data = constants[i].u.Immediate; + + OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); + OUT_CB(pack_float24(data[0])); + OUT_CB(pack_float24(data[1])); + OUT_CB(pack_float24(data[2])); + OUT_CB(pack_float24(data[3])); + } + } + } + } + + OUT_CB_REG(R300_FG_DEPTH_SRC, shader->fg_depth_src); + OUT_CB_REG(R300_US_W_FMT, shader->us_out_w); + END_CB; +} + static void r300_translate_fragment_shader( struct r300_context* r300, struct r300_fragment_shader_code* shader, @@ -338,6 +456,9 @@ static void r300_translate_fragment_shader( /* And, finally... */ rc_destroy(&compiler.Base); + + /* Build the command buffer. */ + r300_emit_fs_code_to_buffer(r300, shader); } boolean r300_pick_fragment_shader(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h index 1cc43553595..51bfa88c5ef 100644 --- a/src/gallium/drivers/r300/r300_fs.h +++ b/src/gallium/drivers/r300/r300_fs.h @@ -50,6 +50,9 @@ struct r300_fragment_shader_code { struct r300_fragment_program_external_state compare_state; struct rX00_fragment_program_code code; + unsigned cb_code_size; + uint32_t *cb_code; + struct r300_fragment_shader_code* next; }; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index b41b6b1508d..e5c76589528 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -31,33 +31,46 @@ /* The ZTOP state */ /*****************************************************************************/ -static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) +static boolean r300_dsa_writes_stencil( + struct pipe_stencil_state *s) { - /* We are interested only in the cases when a new depth or stencil value - * can be written and changed. */ + return s->enabled && s->writemask && + (s->fail_op != PIPE_STENCIL_OP_KEEP || + s->zfail_op != PIPE_STENCIL_OP_KEEP || + s->zpass_op != PIPE_STENCIL_OP_KEEP); +} + +static boolean r300_dsa_writes_depth_stencil( + struct pipe_depth_stencil_alpha_state *dsa) +{ + /* We are interested only in the cases when a depth or stencil value + * can be changed. */ + + if (dsa->depth.enabled && dsa->depth.writemask && + dsa->depth.func != PIPE_FUNC_NEVER) + return TRUE; + + if (r300_dsa_writes_stencil(&dsa->stencil[0]) || + r300_dsa_writes_stencil(&dsa->stencil[1])) + return TRUE; - /* We might optionally check for [Z func: never] and inspect the stencil - * state in a similar fashion, but it's not terribly important. */ - return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) || - (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) || - ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) && - (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK)); + return FALSE; } -static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) +static boolean r300_dsa_alpha_test_enabled( + struct pipe_depth_stencil_alpha_state *dsa) { /* We are interested only in the cases when alpha testing can kill * a fragment. */ - uint32_t af = dsa->alpha_function; - return (af & R300_FG_ALPHA_FUNC_ENABLE) && - (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS; + return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS; } static void r300_update_ztop(struct r300_context* r300) { struct r300_ztop_state* ztop_state = (struct r300_ztop_state*)r300->ztop_state.state; + uint32_t old_ztop = ztop_state->z_buffer_top; /* This is important enough that I felt it warranted a comment. * @@ -99,7 +112,8 @@ static void r300_update_ztop(struct r300_context* r300) ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } - r300->ztop_state.dirty = TRUE; + if (ztop_state->z_buffer_top != old_ztop) + r300->ztop_state.dirty = TRUE; } void r300_update_hyperz_state(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 97081c4a00f..10086ee9256 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -26,6 +26,7 @@ #include "r300_context.h" #include "r300_screen.h" #include "r300_emit.h" +#include "r300_winsys.h" #include <stdio.h> @@ -34,30 +35,29 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_screen *r300screen = r300->screen; - unsigned query_size; - struct r300_query *q, *qptr; + struct r300_query *q; + + if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) { + return NULL; + } q = CALLOC_STRUCT(r300_query); + if (!q) + return NULL; q->type = query_type; - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + q->domain = R300_DOMAIN_GTT; + q->buffer_size = 4096; if (r300screen->caps.family == CHIP_FAMILY_RV530) - query_size = r300screen->caps.num_z_pipes * sizeof(uint32_t); + q->num_pipes = r300screen->caps.num_z_pipes; else - query_size = r300screen->caps.num_frag_pipes * sizeof(uint32_t); + q->num_pipes = r300screen->caps.num_frag_pipes; - if (!is_empty_list(&r300->query_list)) { - qptr = last_elem(&r300->query_list); - q->offset = qptr->offset + query_size; - } insert_at_tail(&r300->query_list, q); - /* XXX */ - if (q->offset >= 4096) { - q->offset = 0; - fprintf(stderr, "r300: Rewinding OQBO...\n"); - } + /* Open up the occlusion query buffer. */ + q->buffer = r300->rws->buffer_create(r300->rws, 4096, 0, q->domain, q->buffer_size); return (struct pipe_query*)q; } @@ -65,18 +65,26 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, static void r300_destroy_query(struct pipe_context* pipe, struct pipe_query* query) { - struct r300_query* q = (struct r300_query*)query; + struct r300_context *r300 = r300_context(pipe); + struct r300_query* q = r300_query(query); + r300->rws->buffer_reference(r300->rws, &q->buffer, NULL); remove_from_list(q); FREE(query); } +void r300_resume_query(struct r300_context *r300, + struct r300_query *query) +{ + r300->query_current = query; + r300->query_start.dirty = TRUE; +} + static void r300_begin_query(struct pipe_context* pipe, struct pipe_query* query) { - uint32_t value = ~0U; struct r300_context* r300 = r300_context(pipe); - struct r300_query* q = (struct r300_query*)query; + struct r300_query* q = r300_query(query); if (r300->query_current != NULL) { fprintf(stderr, "r300: begin_query: " @@ -85,85 +93,59 @@ static void r300_begin_query(struct pipe_context* pipe, return; } - pipe_buffer_write(pipe, - r300->oqbo, - q->offset, - sizeof value, - &value); + q->num_results = 0; + r300_resume_query(r300, q); +} - q->flushed = FALSE; - r300->query_current = q; - r300->query_start.dirty = TRUE; +void r300_stop_query(struct r300_context *r300) +{ + r300_emit_query_end(r300); + r300->query_current = NULL; } static void r300_end_query(struct pipe_context* pipe, struct pipe_query* query) { struct r300_context* r300 = r300_context(pipe); + struct r300_query *q = r300_query(query); - if ((struct r300_query*)query != r300->query_current) { + if (q != r300->query_current) { fprintf(stderr, "r300: end_query: Got invalid query.\n"); assert(0); return; } - r300_emit_query_end(r300); - r300->query_current = NULL; + r300_stop_query(r300); } static boolean r300_get_query_result(struct pipe_context* pipe, struct pipe_query* query, boolean wait, - uint64_t* result) + void* vresult) { struct r300_context* r300 = r300_context(pipe); - struct r300_screen* r300screen = r300->screen; - struct r300_query *q = (struct r300_query*)query; - struct pipe_transfer *transfer; - unsigned flags = PIPE_TRANSFER_READ; - uint32_t* map; - uint32_t temp = 0; - unsigned i, num_results; - - if (q->flushed == FALSE) + struct r300_query *q = r300_query(query); + unsigned flags, i; + uint32_t temp, *map; + uint64_t *result = (uint64_t*)vresult; + + if (!q->flushed) pipe->flush(pipe, 0, NULL); - if (!wait) { - flags |= PIPE_TRANSFER_DONTBLOCK; - } - map = pipe_buffer_map(pipe, r300->oqbo, flags, &transfer); + flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0); + + map = r300->rws->buffer_map(r300->rws, q->buffer, flags); if (!map) return FALSE; - map += q->offset / 4; - if (r300screen->caps.family == CHIP_FAMILY_RV530) - num_results = r300screen->caps.num_z_pipes; - else - num_results = r300screen->caps.num_frag_pipes; - - for (i = 0; i < num_results; i++) { - if (*map == ~0U) { - /* Looks like our results aren't ready yet. */ - if (wait) { - fprintf(stderr, "r300: Despite waiting, OQ results haven't " - "come in yet. This is a driver bug.\n" - "r300: Returning bogus results to avoid " - "a possible infinite loop...\n"); - temp = 987654321; - } else { - temp = ~0U; - } - break; - } + /* Sum up the results. */ + temp = 0; + for (i = 0; i < q->num_results; i++) { temp += *map; map++; } - pipe_buffer_unmap(pipe, r300->oqbo, transfer); - if (temp == ~0U) { - /* Our results haven't been written yet... */ - return FALSE; - } + r300->rws->buffer_unmap(r300->rws, q->buffer); *result = temp; return TRUE; @@ -191,11 +173,61 @@ static void r300_render_condition(struct pipe_context *pipe, } } +/*************************************************************************** + * Fake occlusion queries (for debugging) + ***************************************************************************/ + +static unsigned r300_fake_query; + +static struct pipe_query *r300_fake_create_query(struct pipe_context *pipe, + unsigned query_type) +{ + return (struct pipe_query*)&r300_fake_query; +} + +static void r300_fake_destroy_query(struct pipe_context* pipe, + struct pipe_query* query) +{ +} + +static void r300_fake_begin_query(struct pipe_context* pipe, + struct pipe_query* query) +{ +} + +static void r300_fake_end_query(struct pipe_context* pipe, + struct pipe_query* query) +{ +} + +static boolean r300_fake_get_query_result(struct pipe_context* pipe, + struct pipe_query* query, + boolean wait, void* vresult) +{ + uint64_t *result = (uint64_t*)vresult; + *result = 1000000; + return TRUE; +} + +static void r300_fake_render_condition(struct pipe_context *pipe, + struct pipe_query *query, uint mode) +{ +} + void r300_init_query_functions(struct r300_context* r300) { - r300->context.create_query = r300_create_query; - r300->context.destroy_query = r300_destroy_query; - r300->context.begin_query = r300_begin_query; - r300->context.end_query = r300_end_query; - r300->context.get_query_result = r300_get_query_result; - r300->context.render_condition = r300_render_condition; + if (DBG_ON(r300, DBG_FAKE_OCC)) { + r300->context.create_query = r300_fake_create_query; + r300->context.destroy_query = r300_fake_destroy_query; + r300->context.begin_query = r300_fake_begin_query; + r300->context.end_query = r300_fake_end_query; + r300->context.get_query_result = r300_fake_get_query_result; + r300->context.render_condition = r300_fake_render_condition; + } else { + r300->context.create_query = r300_create_query; + r300->context.destroy_query = r300_destroy_query; + r300->context.begin_query = r300_begin_query; + r300->context.end_query = r300_end_query; + r300->context.get_query_result = r300_get_query_result; + r300->context.render_condition = r300_render_condition; + } } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index c4fa19ace53..180560175a4 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1630,6 +1630,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_FORMAT_GAMMA (1 << 21) # define R300_TX_FORMAT_YUV_TO_RGB (1 << 22) +# define R300_TX_CACHE(x) ((x) << 27) +# define R300_TX_CACHE_WHOLE 0 +/* reserved */ +# define R300_TX_CACHE_HALF_0 2 +# define R300_TX_CACHE_HALF_1 3 +# define R300_TX_CACHE_FOURTH_0 4 +# define R300_TX_CACHE_FOURTH_1 5 +# define R300_TX_CACHE_FOURTH_2 6 +# define R300_TX_CACHE_FOURTH_3 7 +# define R300_TX_CACHE_EIGHTH_0 8 +# define R300_TX_CACHE_EIGHTH_1 9 +# define R300_TX_CACHE_EIGHTH_2 10 +# define R300_TX_CACHE_EIGHTH_3 11 +# define R300_TX_CACHE_EIGHTH_4 12 +# define R300_TX_CACHE_EIGHTH_5 13 +# define R300_TX_CACHE_EIGHTH_6 14 +# define R300_TX_CACHE_EIGHTH_7 15 +# define R300_TX_CACHE_SIXTEENTH_0 16 +# define R300_TX_CACHE_SIXTEENTH_1 17 +# define R300_TX_CACHE_SIXTEENTH_2 18 +# define R300_TX_CACHE_SIXTEENTH_3 19 +# define R300_TX_CACHE_SIXTEENTH_4 20 +# define R300_TX_CACHE_SIXTEENTH_5 21 +# define R300_TX_CACHE_SIXTEENTH_6 22 +# define R300_TX_CACHE_SIXTEENTH_7 23 +# define R300_TX_CACHE_SIXTEENTH_8 24 +# define R300_TX_CACHE_SIXTEENTH_9 25 +# define R300_TX_CACHE_SIXTEENTH_10 26 +# define R300_TX_CACHE_SIXTEENTH_11 27 +# define R300_TX_CACHE_SIXTEENTH_12 28 +# define R300_TX_CACHE_SIXTEENTH_13 29 +# define R300_TX_CACHE_SIXTEENTH_14 30 +# define R300_TX_CACHE_SIXTEENTH_15 31 + #define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */ # define R300_TX_PITCHMASK_SHIFT 0 # define R300_TX_PITCHMASK_MASK (2047 << 0) @@ -2639,6 +2673,24 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Z Buffer Clear Value */ #define R300_ZB_DEPTHCLEARVALUE 0x4f28 +/* Z Mask RAM is a Z compression buffer. + * Each dword of the Z Mask contains compression info for 16 4x4 pixel blocks, + * that is 2 bits for each block. + * On chips with 2 Z pipes, every other dword maps to a different pipe. + */ + +/* The dword offset into Z mask RAM (bits 18:4) */ +#define R300_ZB_ZMASK_OFFSET 0x4f30 + +/* Z Mask Pitch. */ +#define R300_ZB_ZMASK_PITCH 0x4f34 + +/* Access to Z Mask RAM in a manner similar to HiZ RAM. + * The indices are autoincrementing. */ +#define R300_ZB_ZMASK_WRINDEX 0x4f38 +#define R300_ZB_ZMASK_DWORD 0x4f3c +#define R300_ZB_ZMASK_RDINDEX 0x4f40 + /* Hierarchical Z Memory Offset */ #define R300_ZB_HIZ_OFFSET 0x4f44 @@ -3437,9 +3489,18 @@ enum { # define RADEON_WAIT_3D_IDLECLEAN (1 << 17) # define RADEON_WAIT_HOST_IDLECLEAN (1 << 18) +#define R200_3D_DRAW_IMMD_2 0xC0003500 + +#define RADEON_CP_PACKET0 0x0 /* XXX stolen from radeon_reg.h */ #define RADEON_CP_PACKET3 0xC0000000 -#define R200_3D_DRAW_IMMD_2 0xC0003500 +#define RADEON_ONE_REG_WR (1 << 15) + +#define CP_PACKET0(register, count) \ + (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) + +#define CP_PACKET3(op, count) \ + (RADEON_CP_PACKET3 | (op) | ((count) << 16)) #endif /* _R300_REG_H */ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 23caaa54730..53728431a63 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -35,6 +35,7 @@ #include "util/u_prim.h" #include "r300_cs.h" +#include "r300_cb.h" #include "r300_context.h" #include "r300_screen_buffer.h" #include "r300_emit.h" @@ -43,6 +44,8 @@ #include <limits.h> +#define IMMD_DWORDS 32 + static uint32_t r300_translate_primitive(unsigned prim) { switch (prim) { @@ -269,7 +272,7 @@ static boolean immd_is_good_idea(struct r300_context *r300, return FALSE; } - if (count > 10) { + if (count * r300->velems->vertex_size_dwords > IMMD_DWORDS) { return FALSE; } @@ -308,84 +311,75 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; unsigned vertex_element_count = r300->velems->count; - unsigned i, v, vbi, dw, elem_offset, dwords; + unsigned i, v, vbi, dwords; /* Size of the vertex, in dwords. */ - unsigned vertex_size = 0; - - /* Offsets of the attribute, in dwords, from the start of the vertex. */ - unsigned offset[PIPE_MAX_ATTRIBS]; + unsigned vertex_size = r300->velems->vertex_size_dwords; /* Size of the vertex element, in dwords. */ unsigned size[PIPE_MAX_ATTRIBS]; /* Stride to the same attrib in the next vertex in the vertex buffer, * in dwords. */ - unsigned stride[PIPE_MAX_ATTRIBS] = {0}; + unsigned stride[PIPE_MAX_ATTRIBS]; /* Mapped vertex buffers. */ - uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; - struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL}; + uint32_t* map[PIPE_MAX_ATTRIBS]; + uint32_t* mapelem[PIPE_MAX_ATTRIBS]; + struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {0}; - CS_LOCALS(r300); + CB_LOCALS; /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { velem = &r300->velems->velem[i]; - offset[i] = velem->src_offset / 4; - size[i] = util_format_get_blocksize(velem->src_format) / 4; - vertex_size += size[i]; + size[i] = r300->velems->hw_format_size[i] / 4; vbi = velem->vertex_buffer_index; + vbuf = &r300->vertex_buffer[vbi]; + stride[i] = vbuf->stride / 4; /* Map the buffer. */ - if (!map[vbi]) { - vbuf = &r300->vertex_buffer[vbi]; + if (!transfer[vbi]) { map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, vbuf->buffer, PIPE_TRANSFER_READ, &transfer[vbi]); - map[vbi] += vbuf->buffer_offset / 4; - stride[vbi] = vbuf->stride / 4; + map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * start; } + mapelem[i] = map[vbi] + (velem->src_offset / 4); } dwords = 9 + count * vertex_size; r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); - BEGIN_CS(dwords); - OUT_CS_REG(R300_GA_COLOR_CONTROL, + BEGIN_CS_AS_CB(r300, dwords); + OUT_CB_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); - OUT_CS(count - 1); - OUT_CS(0); - OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); - OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CB(count - 1); + OUT_CB(0); + OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); /* Emit vertices. */ for (v = 0; v < count; v++) { for (i = 0; i < vertex_element_count; i++) { - velem = &r300->velems->velem[i]; - vbi = velem->vertex_buffer_index; - elem_offset = offset[i] + stride[vbi] * (v + start); - - for (dw = 0; dw < size[i]; dw++) { - OUT_CS(map[vbi][elem_offset + dw]); - } + OUT_CB_TABLE(&mapelem[i][stride[i] * v], size[i]); } } - END_CS; + END_CB; /* Unmap buffers. */ for (i = 0; i < vertex_element_count; i++) { vbi = r300->velems->velem[i].vertex_buffer_index; - if (map[vbi]) { + if (transfer[vbi]) { vbuf = &r300->vertex_buffer[vbi]; pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]); - map[vbi] = NULL; + transfer[vbi] = NULL; } } } @@ -482,111 +476,6 @@ static void r300_emit_draw_elements(struct r300_context *r300, END_CS; } -static void r300_shorten_ubyte_elts(struct r300_context* r300, - struct pipe_resource** elts, - int index_bias, - unsigned start, - unsigned count) -{ - struct pipe_context* context = &r300->context; - struct pipe_screen* screen = r300->context.screen; - struct pipe_resource* new_elts; - unsigned char *in_map; - unsigned short *out_map; - struct pipe_transfer *src_transfer, *dst_transfer; - unsigned i; - - new_elts = pipe_buffer_create(screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); - out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); - - in_map += start; - - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, src_transfer); - pipe_buffer_unmap(context, new_elts, dst_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_ushort_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned short *in_map; - unsigned short *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned short)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - -static void r300_rebuild_uint_elts(struct r300_context *r300, - struct pipe_resource **elts, - int index_bias, - unsigned start, unsigned count) -{ - struct pipe_context *context = &r300->context; - struct pipe_transfer *in_transfer = NULL; - struct pipe_transfer *out_transfer = NULL; - struct pipe_resource *new_elts; - unsigned int *in_map; - unsigned int *out_map; - unsigned i; - - new_elts = pipe_buffer_create(context->screen, - PIPE_BIND_INDEX_BUFFER, - 2 * count); - - in_map = pipe_buffer_map(context, *elts, - PIPE_TRANSFER_READ, &in_transfer); - out_map = pipe_buffer_map(context, new_elts, - PIPE_TRANSFER_WRITE, &out_transfer); - - in_map += start; - for (i = 0; i < count; i++) { - *out_map = (unsigned int)(*in_map + index_bias); - in_map++; - out_map++; - } - - pipe_buffer_unmap(context, *elts, in_transfer); - pipe_buffer_unmap(context, new_elts, out_transfer); - - *elts = new_elts; -} - /* This is the fast-path drawing & emission for HW TCL. */ static void r300_draw_range_elements(struct pipe_context* pipe, struct pipe_resource* indexBuffer, @@ -605,6 +494,7 @@ static void r300_draw_range_elements(struct pipe_context* pipe, r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; int buffer_offset = 0, index_offset = 0; /* for index bias emulation */ + boolean translate = FALSE; if (r300->skip_rendering) { return; @@ -614,33 +504,25 @@ static void r300_draw_range_elements(struct pipe_context* pipe, return; } - if (indexBias && !index_bias_supported(r300)) { - r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); + /* Index buffer range checking. */ + if ((start + count) * indexSize > indexBuffer->width0) { + fprintf(stderr, "r300: Invalid index buffer range. Skipping rendering.\n"); + return; } - /* Rebuild the index buffer if needed. */ - switch (indexSize) { - case 1: - r300_shorten_ubyte_elts(r300, &indexBuffer, index_offset, start, count); - indexSize = 2; - start = 0; - break; - - case 2: - if (start % 2 != 0 || index_offset) { - r300_rebuild_ushort_elts(r300, &indexBuffer, index_offset, start, count); - start = 0; - } - break; + /* Set up fallback for incompatible vertex layout if needed. */ + if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { + r300_begin_vertex_translate(r300); + translate = TRUE; + } - case 4: - if (index_offset) { - r300_rebuild_uint_elts(r300, &indexBuffer, index_offset, start, count); - start = 0; - } - break; + if (indexBias && !index_bias_supported(r300)) { + r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset); } + r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset, + &start, count); + r300_update_derived_state(r300); r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count); @@ -676,6 +558,10 @@ static void r300_draw_range_elements(struct pipe_context* pipe, if (indexBuffer != orgIndexBuffer) { pipe_resource_reference( &indexBuffer, NULL ); } + + if (translate) { + r300_end_vertex_translate(r300); + } } /* Simple helpers for context setup. Should probably be moved to util. */ @@ -699,6 +585,7 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, count > 65536 && r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); unsigned short_count; + boolean translate = FALSE; if (r300->skip_rendering) { return; @@ -708,6 +595,12 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, return; } + /* Set up fallback for incompatible vertex layout if needed. */ + if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) { + r300_begin_vertex_translate(r300); + translate = TRUE; + } + r300_update_derived_state(r300); if (immd_is_good_idea(r300, count)) { @@ -737,6 +630,10 @@ static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, } u_upload_flush(r300->upload_vb); } + + if (translate) { + r300_end_vertex_translate(r300); + } } /**************************************************************************** @@ -1016,7 +913,7 @@ static void r300_render_draw_elements(struct vbuf_render* render, unsigned max_index = (r300render->vbo_size - r300render->vbo_offset) / (r300render->r300->vertex_info.size * 4) - 1; unsigned short_count; - struct r300_cs_info cs_info; + unsigned free_dwords; CS_LOCALS(r300); @@ -1029,9 +926,9 @@ static void r300_render_draw_elements(struct vbuf_render* render, NULL, 256, 0, 0, &end_cs_dwords); while (count) { - r300->rws->get_cs_info(r300->rws, &cs_info); + free_dwords = r300->rws->get_cs_free_dwords(r300->rws); - short_count = MIN2(count, (cs_info.free - end_cs_dwords - 6) * 2); + short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2); BEGIN_CS(6 + (short_count+1)/2); OUT_CS_REG(R300_GA_COLOR_CONTROL, @@ -1117,132 +1014,45 @@ struct draw_stage* r300_draw_stage(struct r300_context* r300) } /**************************************************************************** - * Two-sided stencil reference value fallback. It's designed to be as much - * separate from rest of the driver as possible. + * End of SW TCL functions * ***************************************************************************/ -struct r300_stencilref_context { - void (*draw_arrays)(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count); - - void (*draw_range_elements)( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count); - - uint32_t rs_cull_mode; - uint32_t zb_stencilrefmask; - ubyte ref_value_front; -}; - -static boolean r300_stencilref_needed(struct r300_context *r300) +static void r300_resource_resolve(struct pipe_context* pipe, + struct pipe_resource* dest, + struct pipe_subresource subdest, + struct pipe_resource* src, + struct pipe_subresource subsrc) { - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - return dsa->two_sided_stencil_ref || - (dsa->two_sided && - r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); -} - -/* Set drawing for front faces. */ -static void r300_stencilref_begin(struct r300_context *r300) -{ - struct r300_stencilref_context *sr = r300->stencilref_fallback; - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - /* Save state. */ - sr->rs_cull_mode = rs->cull_mode; - sr->zb_stencilrefmask = dsa->stencil_ref_mask; - sr->ref_value_front = r300->stencil_ref.ref_value[0]; - - /* We *cull* pixels, therefore no need to mask out the bits. */ - rs->cull_mode |= R300_CULL_BACK; - - r300->rs_state.dirty = TRUE; -} - -/* Set drawing for back faces. */ -static void r300_stencilref_switch_side(struct r300_context *r300) -{ - struct r300_stencilref_context *sr = r300->stencilref_fallback; - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT; - dsa->stencil_ref_mask = dsa->stencil_ref_bf; - r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; - - r300->rs_state.dirty = TRUE; - r300->dsa_state.dirty = TRUE; -} - -/* Restore the original state. */ -static void r300_stencilref_end(struct r300_context *r300) -{ - struct r300_stencilref_context *sr = r300->stencilref_fallback; - struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; - struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; - - /* Restore state. */ - rs->cull_mode = sr->rs_cull_mode; - dsa->stencil_ref_mask = sr->zb_stencilrefmask; - r300->stencil_ref.ref_value[0] = sr->ref_value_front; - - r300->rs_state.dirty = TRUE; - r300->dsa_state.dirty = TRUE; -} - -static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_arrays(pipe, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_arrays(pipe, mode, start, count); - r300_stencilref_end(r300); - } -} - -static void r300_stencilref_draw_range_elements( - struct pipe_context *pipe, struct pipe_resource *indexBuffer, - unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, - unsigned mode, unsigned start, unsigned count) -{ - struct r300_context *r300 = r300_context(pipe); - struct r300_stencilref_context *sr = r300->stencilref_fallback; - - if (!r300_stencilref_needed(r300)) { - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - } else { - r300_stencilref_begin(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - r300_stencilref_switch_side(r300); - sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, - minIndex, maxIndex, mode, start, count); - r300_stencilref_end(r300); - } -} - -static void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) -{ - r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); - - /* Save original draw functions. */ - r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; - r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; - - /* Override the draw functions. */ - r300->context.draw_arrays = r300_stencilref_draw_arrays; - r300->context.draw_range_elements = r300_stencilref_draw_range_elements; + struct r300_context* r300 = r300_context(pipe); + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; + struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen, + src, subsrc.face, subsrc.level, 0, 0); + float color[] = {0, 0, 0, 0}; + + DBG(r300, DBG_DRAW, "r300: Resolving resource...\n"); + + /* Enable AA resolve. */ + aa->dest = r300_surface( + dest->screen->get_tex_surface(dest->screen, dest, subdest.face, + subdest.level, 0, 0)); + + aa->aaresolve_ctl = + R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | + R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE; + r300->aa_state.size = 12; + r300->aa_state.dirty = TRUE; + + /* Resolve the surface. */ + r300->context.clear_render_target(pipe, + srcsurf, color, 0, 0, src->width0, src->height0); + + /* Disable AA resolve. */ + aa->aaresolve_ctl = 0; + r300->aa_state.size = 4; + r300->aa_state.dirty = TRUE; + + pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL); + pipe_surface_reference((struct pipe_surface**)&aa->dest, NULL); } void r300_init_render_functions(struct r300_context *r300) @@ -1259,7 +1069,9 @@ void r300_init_render_functions(struct r300_context *r300) r300->context.draw_range_elements = r300_swtcl_draw_range_elements; } - /* Plug in two-sided stencil reference value fallback if needed. */ + r300->context.resource_resolve = r300_resource_resolve; + + /* Plug in the two-sided stencil reference value fallback if needed. */ if (!r300->screen->caps.is_r500) r300_plug_in_stencil_ref_fallback(r300); } diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c new file mode 100644 index 00000000000..9a6b4e12ff1 --- /dev/null +++ b/src/gallium/drivers/r300/r300_render_stencilref.c @@ -0,0 +1,158 @@ +/* + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * The two-sided stencil reference value fallback for r3xx-r4xx chips. + * These chips support two-sided stencil functions but they do not support + * a two-sided reference value. + * + * The functions below split every draw call which uses the two-sided + * reference value into two draw calls -- the first one renders front faces + * and the second renders back faces with the other reference value. + */ + +#include "r300_context.h" +#include "r300_reg.h" + +struct r300_stencilref_context { + void (*draw_arrays)(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count); + + void (*draw_range_elements)( + struct pipe_context *pipe, struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count); + + uint32_t rs_cull_mode; + uint32_t zb_stencilrefmask; + ubyte ref_value_front; +}; + +static boolean r300_stencilref_needed(struct r300_context *r300) +{ + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + return dsa->two_sided_stencil_ref || + (dsa->two_sided && + r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); +} + +/* Set drawing for front faces. */ +static void r300_stencilref_begin(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + /* Save state. */ + sr->rs_cull_mode = rs->cb_main[rs->cull_mode_index]; + sr->zb_stencilrefmask = dsa->stencil_ref_mask; + sr->ref_value_front = r300->stencil_ref.ref_value[0]; + + /* We *cull* pixels, therefore no need to mask out the bits. */ + rs->cb_main[rs->cull_mode_index] |= R300_CULL_BACK; + + r300->rs_state.dirty = TRUE; +} + +/* Set drawing for back faces. */ +static void r300_stencilref_switch_side(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode | R300_CULL_FRONT; + dsa->stencil_ref_mask = dsa->stencil_ref_bf; + r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; + + r300->rs_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; +} + +/* Restore the original state. */ +static void r300_stencilref_end(struct r300_context *r300) +{ + struct r300_stencilref_context *sr = r300->stencilref_fallback; + struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; + struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; + + /* Restore state. */ + rs->cb_main[rs->cull_mode_index] = sr->rs_cull_mode; + dsa->stencil_ref_mask = sr->zb_stencilrefmask; + r300->stencil_ref.ref_value[0] = sr->ref_value_front; + + r300->rs_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; +} + +static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_arrays(pipe, mode, start, count); + } else { + r300_stencilref_begin(r300); + sr->draw_arrays(pipe, mode, start, count); + r300_stencilref_switch_side(r300); + sr->draw_arrays(pipe, mode, start, count); + r300_stencilref_end(r300); + } +} + +static void r300_stencilref_draw_range_elements( + struct pipe_context *pipe, struct pipe_resource *indexBuffer, + unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_stencilref_context *sr = r300->stencilref_fallback; + + if (!r300_stencilref_needed(r300)) { + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + } else { + r300_stencilref_begin(r300); + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + r300_stencilref_switch_side(r300); + sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, + minIndex, maxIndex, mode, start, count); + r300_stencilref_end(r300); + } +} + +void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) +{ + r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); + + /* Save original draw functions. */ + r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; + r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; + + /* Override the draw functions. */ + r300->context.draw_arrays = r300_stencilref_draw_arrays; + r300->context.draw_range_elements = r300_stencilref_draw_range_elements; +} diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c new file mode 100644 index 00000000000..0ea11e5bfc2 --- /dev/null +++ b/src/gallium/drivers/r300/r300_render_translate.c @@ -0,0 +1,322 @@ +/* + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/** + * The functions below translate vertex and index buffers to the layout + * compatible with the hardware, so that all vertex and index fetches are + * DWORD-aligned and all used vertex and index formats are supported. + * For indices, an optional index offset is added to each index. + */ + +#include "r300_context.h" +#include "translate/translate.h" + +void r300_begin_vertex_translate(struct r300_context *r300) +{ + struct pipe_context *pipe = &r300->context; + struct translate_key key = {0}; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; + struct translate *tr; + struct r300_vertex_element_state *ve = r300->velems; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; + struct pipe_resource *out_buffer; + unsigned i, num_verts; + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + for (i = 0; i < ve->count; i++) { + struct pipe_vertex_buffer *vb = + &r300->vertex_buffer[ve->velem[i].vertex_buffer_index]; + enum pipe_format output_format = ve->hw_format[i]; + unsigned output_format_size = ve->hw_format_size[i]; + + /* Check for support. */ + if (ve->velem[i].src_format == ve->hw_format[i] && + (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 && + vb->stride % 4 == 0) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = ve->velem[i].vertex_buffer_index; + te->input_format = ve->velem[i].src_format; + te->input_offset = vb->buffer_offset + ve->velem[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[ve->velem[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(r300->tran.translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < r300->vertex_buffer_count; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + } + } + + /* Create and map the output buffer. */ + num_verts = r300->vertex_buffer_max_index + 1; + + out_buffer = pipe_buffer_create(&r300->screen->screen, + PIPE_BIND_VERTEX_BUFFER, + key.output_stride * num_verts); + + out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, + &out_transfer); + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < r300->vertex_buffer_count; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, + vb_transfer[i]); + } + } + + pipe_buffer_unmap(pipe, out_buffer, out_transfer); + + /* Setup the new vertex buffer in the first free slot. */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i]; + + if (!vb->buffer) { + pipe_resource_reference(&vb->buffer, out_buffer); + vb->buffer_offset = 0; + vb->max_index = num_verts - 1; + vb->stride = key.output_stride; + r300->tran.vb_slot = i; + break; + } + } + + /* Save and replace vertex elements. */ + { + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + r300->tran.saved_velems = r300->velems; + + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->velem[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->velem[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = r300->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->velem[i], + sizeof(struct pipe_vertex_element)); + } + } + + r300->tran.new_velems = + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems); + } + + pipe_resource_reference(&out_buffer, NULL); +} + +void r300_end_vertex_translate(struct r300_context *r300) +{ + struct pipe_context *pipe = &r300->context; + + /* Restore vertex elements. */ + pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems); + pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems); + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer, + NULL); +} + +static void r300_shorten_ubyte_elts(struct r300_context* r300, + struct pipe_resource** elts, + int index_bias, + unsigned start, + unsigned count) +{ + struct pipe_context* context = &r300->context; + struct pipe_screen* screen = r300->context.screen; + struct pipe_resource* new_elts; + unsigned char *in_map; + unsigned short *out_map; + struct pipe_transfer *src_transfer, *dst_transfer; + unsigned i; + + new_elts = pipe_buffer_create(screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); + out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); + + in_map += start; + + for (i = 0; i < count; i++) { + *out_map = (unsigned short)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, src_transfer); + pipe_buffer_unmap(context, new_elts, dst_transfer); + + *elts = new_elts; +} + +static void r300_rebuild_ushort_elts(struct r300_context *r300, + struct pipe_resource **elts, + int index_bias, + unsigned start, unsigned count) +{ + struct pipe_context *context = &r300->context; + struct pipe_transfer *in_transfer = NULL; + struct pipe_transfer *out_transfer = NULL; + struct pipe_resource *new_elts; + unsigned short *in_map; + unsigned short *out_map; + unsigned i; + + new_elts = pipe_buffer_create(context->screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, + PIPE_TRANSFER_READ, &in_transfer); + out_map = pipe_buffer_map(context, new_elts, + PIPE_TRANSFER_WRITE, &out_transfer); + + in_map += start; + for (i = 0; i < count; i++) { + *out_map = (unsigned short)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, in_transfer); + pipe_buffer_unmap(context, new_elts, out_transfer); + + *elts = new_elts; +} + +static void r300_rebuild_uint_elts(struct r300_context *r300, + struct pipe_resource **elts, + int index_bias, + unsigned start, unsigned count) +{ + struct pipe_context *context = &r300->context; + struct pipe_transfer *in_transfer = NULL; + struct pipe_transfer *out_transfer = NULL; + struct pipe_resource *new_elts; + unsigned int *in_map; + unsigned int *out_map; + unsigned i; + + new_elts = pipe_buffer_create(context->screen, + PIPE_BIND_INDEX_BUFFER, + 2 * count); + + in_map = pipe_buffer_map(context, *elts, + PIPE_TRANSFER_READ, &in_transfer); + out_map = pipe_buffer_map(context, new_elts, + PIPE_TRANSFER_WRITE, &out_transfer); + + in_map += start; + for (i = 0; i < count; i++) { + *out_map = (unsigned int)(*in_map + index_bias); + in_map++; + out_map++; + } + + pipe_buffer_unmap(context, *elts, in_transfer); + pipe_buffer_unmap(context, new_elts, out_transfer); + + *elts = new_elts; +} + +void r300_translate_index_buffer(struct r300_context *r300, + struct pipe_resource **index_buffer, + unsigned *index_size, unsigned index_offset, + unsigned *start, unsigned count) +{ + switch (*index_size) { + case 1: + r300_shorten_ubyte_elts(r300, index_buffer, index_offset, *start, count); + *index_size = 2; + *start = 0; + break; + + case 2: + if (*start % 2 != 0 || index_offset) { + r300_rebuild_ushort_elts(r300, index_buffer, index_offset, *start, count); + *start = 0; + } + break; + + case 4: + if (index_offset) { + r300_rebuild_uint_elts(r300, index_buffer, index_offset, *start, count); + *start = 0; + } + break; + } +} diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index fd522b84e1a..5bba55906c3 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -270,15 +270,24 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_R16G16B16_FLOAT || format == PIPE_FORMAT_R16G16B16A16_FLOAT; - if (target >= PIPE_MAX_TEXTURE_TYPES) { - fprintf(stderr, "r300: Implementation error: Received bogus texture " - "target %d in %s\n", target, __FUNCTION__); - return FALSE; + switch (sample_count) { + case 0: + case 1: + break; + case 2: + case 3: + case 4: + case 6: + if (usage != PIPE_BIND_RENDER_TARGET || + !util_format_is_rgba8_variant( + util_format_description(format))) { + return FALSE; + } + break; + default: + return FALSE; } - if (sample_count > 1) - return FALSE; - /* Check sampler format support. */ if ((usage & PIPE_BIND_SAMPLER_VIEW) && /* Z24 cannot be sampled from on non-r5xx. */ @@ -320,6 +329,12 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, retval |= PIPE_BIND_VERTEX_BUFFER; } + /* Transfers are always supported. */ + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + return retval == usage; } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 29492024fe3..29cd5dbe267 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -65,7 +65,7 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { /* Logging. */ #define DBG_FP (1 << 1) #define DBG_VP (1 << 2) -#define DBG_CS (1 << 3) +/* The bit (1 << 3) is unused. */ #define DBG_DRAW (1 << 4) #define DBG_TEX (1 << 5) #define DBG_TEXALLOC (1 << 6) @@ -76,6 +76,7 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) #define DBG_NO_IMMD (1 << 18) +#define DBG_FAKE_OCC (1 << 19) /* Statistics. */ #define DBG_STATS (1 << 24) /*@}*/ diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 44179f19ed4..7959e6a2f9e 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -93,25 +93,28 @@ int r300_upload_user_buffers(struct r300_context *r300) enum pipe_error ret = PIPE_OK; int i, nr; - nr = r300->vertex_buffer_count; + nr = r300->velems->count; for (i = 0; i < nr; i++) { - if (r300_buffer_is_user_buffer(r300->vertex_buffer[i].buffer)) { - struct pipe_resource *upload_buffer = NULL; - unsigned offset = 0; /*r300->vertex_buffer[i].buffer_offset * 4;*/ - unsigned size = r300->vertex_buffer[i].buffer->width0; - unsigned upload_offset; - ret = u_upload_buffer(r300->upload_vb, - offset, size, - r300->vertex_buffer[i].buffer, - &upload_offset, &upload_buffer); - if (ret) - return ret; - - pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL); - r300->vertex_buffer[i].buffer = upload_buffer; - r300->vertex_buffer[i].buffer_offset = upload_offset; - } + struct pipe_vertex_buffer *vb = + &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index]; + + if (r300_buffer_is_user_buffer(vb->buffer)) { + struct pipe_resource *upload_buffer = NULL; + unsigned offset = 0; /*vb->buffer_offset * 4;*/ + unsigned size = vb->buffer->width0; + unsigned upload_offset; + ret = u_upload_buffer(r300->upload_vb, + offset, size, + vb->buffer, + &upload_offset, &upload_buffer); + if (ret) + return ret; + + pipe_resource_reference(&vb->buffer, NULL); + vb->buffer = upload_buffer; + vb->buffer_offset = upload_offset; + } } return ret; } @@ -261,10 +264,6 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->b.vtbl = &r300_buffer_vtbl; pipe_reference_init(&rbuf->b.b.reference, 1); rbuf->b.b.screen = screen; - - if (rbuf->b.b.bind & R300_BIND_OQBO) - alignment = 4096; - rbuf->domain = R300_DOMAIN_GTT; rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h index 87b42b94122..ff355858704 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.h +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -116,25 +116,4 @@ static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws, return rws->add_buffer(rws, tex->buffer, rd, wr); } -static INLINE void r300_buffer_write_reloc(struct r300_winsys_screen *rws, - struct r300_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags) -{ - if (!buf->buf) - return; - - rws->write_cs_reloc(rws, buf->buf, rd, wd, flags); -} - -static INLINE void r300_texture_write_reloc(struct r300_winsys_screen *rws, - struct r300_texture *texture, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd, - uint32_t flags) -{ - rws->write_cs_reloc(rws, texture->buffer, rd, wd, flags); -} - #endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index cfec8ac2b7e..15de533778f 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -31,12 +31,12 @@ #include "pipe/p_config.h" +#include "r300_cb.h" #include "r300_context.h" #include "r300_emit.h" #include "r300_reg.h" #include "r300_screen.h" #include "r300_screen_buffer.h" -#include "r300_state.h" #include "r300_state_inlines.h" #include "r300_fs.h" #include "r300_texture.h" @@ -183,6 +183,12 @@ static void* r300_create_blend_state(struct pipe_context* pipe, { struct r300_screen* r300screen = r300_screen(pipe->screen); struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); + uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */ + uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */ + uint32_t color_channel_mask = 0; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */ + uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */ + uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */ + CB_LOCALS; if (state->rt[0].blend_enable) { @@ -196,7 +202,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha, * this is just the crappy D3D naming */ - blend->blend_control = R300_ALPHA_BLEND_ENABLE | + blend_control = R300_ALPHA_BLEND_ENABLE | r300_translate_blend_function(eqRGB) | ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); @@ -220,7 +226,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA || srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { /* Enable reading from the colorbuffer. */ - blend->blend_control |= R300_READ_ENABLE; + blend_control |= R300_READ_ENABLE; if (r300screen->caps.is_r500) { /* Optimization: Depending on incoming pixels, we can @@ -233,7 +239,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, (dstA == PIPE_BLENDFACTOR_SRC_COLOR || dstA == PIPE_BLENDFACTOR_SRC_ALPHA || dstA == PIPE_BLENDFACTOR_ZERO)) { - blend->blend_control |= R500_SRC_ALPHA_0_NO_READ; + blend_control |= R500_SRC_ALPHA_0_NO_READ; } /* Disable reading if SRC_ALPHA == 1. */ @@ -242,7 +248,7 @@ static void* r300_create_blend_state(struct pipe_context* pipe, (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstA == PIPE_BLENDFACTOR_ZERO)) { - blend->blend_control |= R500_SRC_ALPHA_1_NO_READ; + blend_control |= R500_SRC_ALPHA_1_NO_READ; } } } @@ -272,31 +278,31 @@ static void* r300_create_blend_state(struct pipe_context* pipe, * pixels. */ if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; } else if (blend_discard_if_src_alpha_1(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; } else if (blend_discard_if_src_color_0(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; } else if (blend_discard_if_src_color_1(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0; } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA, dstRGB, dstA)) { - blend->blend_control |= + blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1; } } /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { - blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE; - blend->alpha_blend_control = + blend_control |= R300_SEPARATE_ALPHA_ENABLE; + alpha_blend_control = r300_translate_blend_function(eqA) | (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) | (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT); @@ -305,21 +311,21 @@ static void* r300_create_blend_state(struct pipe_context* pipe, /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ if (state->logicop_enable) { - blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE | + rop = R300_RB3D_ROPCNTL_ROP_ENABLE | (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; } /* Color channel masks for all MRTs. */ - blend->color_channel_mask = bgra_cmask(state->rt[0].colormask); + color_channel_mask = bgra_cmask(state->rt[0].colormask); if (r300screen->caps.is_r500 && state->independent_blend_enable) { if (state->rt[1].blend_enable) { - blend->color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4; + color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4; } if (state->rt[2].blend_enable) { - blend->color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8; + color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8; } if (state->rt[3].blend_enable) { - blend->color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12; + color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12; } } @@ -330,11 +336,31 @@ static void* r300_create_blend_state(struct pipe_context* pipe, * This could be revisited if we ever get quality or conformance hints. * if (state->dither) { - blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | + dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT; } */ + /* Build a command buffer. */ + BEGIN_CB(blend->cb, 8); + OUT_CB_REG(R300_RB3D_ROPCNTL, rop); + OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); + OUT_CB(blend_control); + OUT_CB(alpha_blend_control); + OUT_CB(color_channel_mask); + OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); + END_CB; + + /* The same as above, but with no colorbuffer reads and writes. */ + BEGIN_CB(blend->cb_no_readwrite, 8); + OUT_CB_REG(R300_RB3D_ROPCNTL, rop); + OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3); + OUT_CB(0); + OUT_CB(0); + OUT_CB(0); + OUT_CB_REG(R300_RB3D_DITHER_CTL, dither); + END_CB; + return (void*)blend; } @@ -368,20 +394,26 @@ static void r300_set_blend_color(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); struct r300_blend_color_state* state = (struct r300_blend_color_state*)r300->blend_color_state.state; - union util_color uc; + CB_LOCALS; - util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - state->blend_color = uc.ui; + if (r300->screen->caps.is_r500) { + /* XXX if FP16 blending is enabled, we should use the FP16 format */ + BEGIN_CB(state->cb, 3); + OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); + OUT_CB(float_to_fixed10(color->color[0]) | + (float_to_fixed10(color->color[3]) << 16)); + OUT_CB(float_to_fixed10(color->color[2]) | + (float_to_fixed10(color->color[1]) << 16)); + END_CB; + } else { + union util_color uc; + util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - /* XXX if FP16 blending is enabled, we should use the FP16 format */ - state->blend_color_red_alpha = - float_to_fixed10(color->color[0]) | - (float_to_fixed10(color->color[3]) << 16); - state->blend_color_green_blue = - float_to_fixed10(color->color[2]) | - (float_to_fixed10(color->color[1]) << 16); + BEGIN_CB(state->cb, 2); + OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui); + END_CB; + } - r300->blend_color_state.size = r300->screen->caps.is_r500 ? 3 : 2; r300->blend_color_state.dirty = TRUE; } @@ -389,18 +421,27 @@ static void r300_set_clip_state(struct pipe_context* pipe, const struct pipe_clip_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_clip_state *clip = + (struct r300_clip_state*)r300->clip_state.state; + CB_LOCALS; - r300->clip = *state; + clip->clip = *state; if (r300->screen->caps.has_tcl) { - memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); - r300->clip_state.size = 29; + BEGIN_CB(clip->cb, 29); + OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG, + (r300->screen->caps.is_r500 ? + R500_PVS_UCP_START : R300_PVS_UCP_START)); + OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4); + OUT_CB_TABLE(state->ucp, 6 * 4); + OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) | + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); + END_CB; r300->clip_state.dirty = TRUE; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); - r300->clip_state.size = 2; } } @@ -422,6 +463,9 @@ static void* { struct r300_capabilities *caps = &r300_screen(pipe->screen)->caps; struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); + CB_LOCALS; + + dsa->dsa = *state; /* Depth test setup. */ if (state->depth.enabled) { @@ -494,9 +538,43 @@ static void* dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT; } + BEGIN_CB(&dsa->cb_begin, 8); + OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); + OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CB(dsa->z_buffer_control); + OUT_CB(dsa->z_stencil_control); + OUT_CB(dsa->stencil_ref_mask); + OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); + END_CB; + + BEGIN_CB(dsa->cb_no_readwrite, 8); + OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); + OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CB(0); + OUT_CB(0); + OUT_CB(0); + OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); + END_CB; + return (void*)dsa; } +static void r300_dsa_inject_stencilref(struct r300_context *r300) +{ + struct r300_dsa_state *dsa = + (struct r300_dsa_state*)r300->dsa_state.state; + + if (!dsa) + return; + + dsa->stencil_ref_mask = + (dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) | + r300->stencil_ref.ref_value[0]; + dsa->stencil_ref_bf = + (dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) | + r300->stencil_ref.ref_value[1]; +} + /* Bind DSA state. */ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) @@ -508,6 +586,8 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, } UPDATE_STATE(state, r300->dsa_state); + + r300_dsa_inject_stencilref(r300); } /* Free DSA state. */ @@ -523,6 +603,8 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); r300->stencil_ref = *sr; + + r300_dsa_inject_stencilref(r300); r300->dsa_state.dirty = TRUE; } @@ -577,21 +659,34 @@ static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, tex->last_level, util_format_short_name(tex->format)); } +static void copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + for (i = 0; i < src->nr_cbufs; i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + for (; i < dst->nr_cbufs; i++) { + pipe_surface_reference(&dst->cbufs[i], NULL); + } + pipe_surface_reference(&dst->zsbuf, src->zsbuf); + + dst->nr_cbufs = src->nr_cbufs; + dst->width = src->width; + dst->height = src->height; +} + static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct pipe_framebuffer_state *old_state = r300->fb_state.state; unsigned max_width, max_height, i; uint32_t zbuffer_bpp = 0; - if (state->nr_cbufs > 4) { - fprintf(stderr, "r300: Implementation error: Too many MRTs in %s, " - "refusing to bind framebuffer state!\n", __FUNCTION__); - return; - } - if (r300->screen->caps.is_r500) { max_width = max_height = 4096; } else if (r300->screen->caps.is_r400) { @@ -610,7 +705,11 @@ static void draw_flush(r300->draw); } + r300->gpu_flush.dirty = TRUE; + r300->aa_state.dirty = TRUE; r300->fb_state.dirty = TRUE; + r300->hyperz_state.dirty = TRUE; + r300->fb_state_pipelined.dirty = TRUE; /* If nr_cbufs is changed from zero to non-zero or vice versa... */ if (!!old_state->nr_cbufs != !!state->nr_cbufs) { @@ -624,10 +723,12 @@ static void /* The tiling flags are dependent on the surface miplevel, unfortunately. */ r300_fb_set_tiling_flags(r300, r300->fb_state.state, state); - memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); + copy_framebuffer_state(r300->fb_state.state, state); - r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) + - (state->zsbuf ? 10 : 0) + 9; + r300->fb_state.size = + 2 + + (8 * state->nr_cbufs) + + (state->zsbuf ? (r300->screen->caps.has_hiz ? 18 : 14) : 0); /* Polygon offset depends on the zbuffer bit depth. */ if (state->zsbuf && r300->polygon_offset_enabled) { @@ -646,6 +747,30 @@ static void } } + /* Set up AA config. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + if (state->nr_cbufs && state->cbufs[0]->texture->nr_samples > 1) { + aa->aa_config = R300_GB_AA_CONFIG_AA_ENABLE; + + switch (state->cbufs[0]->texture->nr_samples) { + case 2: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; + break; + case 3: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; + break; + case 4: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; + break; + case 6: + aa->aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; + break; + } + } else { + aa->aa_config = 0; + } + } + if (DBG_ON(r300, DBG_FB)) { fprintf(stderr, "r300: set_framebuffer_state:\n"); for (i = 0; i < state->nr_cbufs; i++) { @@ -679,13 +804,12 @@ void r300_mark_fs_code_dirty(struct r300_context *r300) r300->fs.dirty = TRUE; r300->fs_rc_constant_state.dirty = TRUE; r300->fs_constants.dirty = TRUE; + r300->fs.size = fs->shader->cb_code_size; if (r300->screen->caps.is_r500) { - r300->fs.size = r500_get_fs_atom_size(r300); r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7; r300->fs_constants.size = fs->shader->externals_count * 4 + 3; } else { - r300->fs.size = r300_get_fs_atom_size(r300); r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5; r300->fs_constants.size = fs->shader->externals_count * 4 + 1; } @@ -719,6 +843,7 @@ static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) tmp = ptr; ptr = ptr->next; rc_constants_destroy(&tmp->code.constants); + FREE(tmp->cb_code); FREE(tmp); } FREE((void*)fs->state.tokens); @@ -744,6 +869,27 @@ static void* r300_create_rs_state(struct pipe_context* pipe, struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state); int i; float psiz; + uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ + uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ + uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ + uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ + uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ + uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ + uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ + uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ + uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */ + uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */ + + /* Specifies top of Raster pipe specific enable controls, + * i.e. texture coordinates stuffing for points, lines, triangles */ + uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */ + + /* Point sprites texture coordinates, 0: lower left, 1: upper right */ + float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */ + float point_texcoord_bottom; /* R300_GA_POINT_T0: 0x4204 */ + float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */ + float point_texcoord_top; /* R300_GA_POINT_T1: 0x420c */ + CB_LOCALS; /* Copy rasterizer state. */ rs->rs = *state; @@ -753,18 +899,18 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */ #ifdef PIPE_ARCH_LITTLE_ENDIAN - rs->vap_control_status = R300_VC_NO_SWAP; + vap_control_status = R300_VC_NO_SWAP; #else - rs->vap_control_status = R300_VC_32BIT_SWAP; + vap_control_status = R300_VC_32BIT_SWAP; #endif /* If no TCL engine is present, turn off the HW TCL. */ if (!r300_screen(pipe->screen)->caps.has_tcl) { - rs->vap_control_status |= R300_VAP_TCL_BYPASS; + vap_control_status |= R300_VAP_TCL_BYPASS; } /* Point size width and height. */ - rs->point_size = + point_size = pack_float_16_6x(state->point_size) | (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); @@ -774,68 +920,67 @@ static void* r300_create_rs_state(struct pipe_context* pipe, * Clamp to [0, max FB size] */ psiz = pipe->screen->get_paramf(pipe->screen, PIPE_CAP_MAX_POINT_WIDTH); - rs->point_minmax = + point_minmax = pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT; } else { /* We cannot disable the point-size vertex output, * so clamp it. */ psiz = state->point_size; - rs->point_minmax = + point_minmax = (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) | (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT); } /* Line control. */ - rs->line_control = pack_float_16_6x(state->line_width) | + line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; /* Enable polygon mode */ + polygon_mode = 0; if (state->fill_front != PIPE_POLYGON_MODE_FILL || state->fill_back != PIPE_POLYGON_MODE_FILL) { - rs->polygon_mode = R300_GA_POLY_MODE_DUAL; + polygon_mode = R300_GA_POLY_MODE_DUAL; } /* Front face */ if (state->front_ccw) - rs->cull_mode = R300_FRONT_FACE_CCW; + cull_mode = R300_FRONT_FACE_CCW; else - rs->cull_mode = R300_FRONT_FACE_CW; + cull_mode = R300_FRONT_FACE_CW; /* Polygon offset */ + polygon_offset_enable = 0; if (util_get_offset(state, state->fill_front)) { - rs->polygon_offset_enable |= R300_FRONT_ENABLE; + polygon_offset_enable |= R300_FRONT_ENABLE; } if (util_get_offset(state, state->fill_back)) { - rs->polygon_offset_enable |= R300_BACK_ENABLE; + polygon_offset_enable |= R300_BACK_ENABLE; } + rs->polygon_offset_enable = polygon_offset_enable != 0; + /* Polygon mode */ - if (rs->polygon_mode) { - rs->polygon_mode |= + if (polygon_mode) { + polygon_mode |= r300_translate_polygon_mode_front(state->fill_front); - rs->polygon_mode |= + polygon_mode |= r300_translate_polygon_mode_back(state->fill_back); } if (state->cull_face & PIPE_FACE_FRONT) { - rs->cull_mode |= R300_CULL_FRONT; + cull_mode |= R300_CULL_FRONT; } if (state->cull_face & PIPE_FACE_BACK) { - rs->cull_mode |= R300_CULL_BACK; - } - - if (rs->polygon_offset_enable) { - rs->depth_offset = state->offset_units; - rs->depth_scale = state->offset_scale; + cull_mode |= R300_CULL_BACK; } if (state->line_stipple_enable) { - rs->line_stipple_config = + line_stipple_config = R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE | (fui((float)state->line_stipple_factor) & R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK); /* XXX this might need to be scaled up */ - rs->line_stipple_value = state->line_stipple_pattern; + line_stipple_value = state->line_stipple_pattern; } if (state->flatshade) { @@ -844,32 +989,80 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->color_control = R300_SHADE_MODEL_SMOOTH; } - rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; /* Point sprites */ + stuffing_enable = 0; if (state->sprite_coord_enable) { - rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE; + stuffing_enable = R300_GB_POINT_STUFF_ENABLE; for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) - rs->stuffing_enable |= + stuffing_enable |= R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2)); } - rs->point_texcoord_left = 0.0f; - rs->point_texcoord_right = 1.0f; + point_texcoord_left = 0.0f; + point_texcoord_right = 1.0f; switch (state->sprite_coord_mode) { case PIPE_SPRITE_COORD_UPPER_LEFT: - rs->point_texcoord_top = 0.0f; - rs->point_texcoord_bottom = 1.0f; + point_texcoord_top = 0.0f; + point_texcoord_bottom = 1.0f; break; case PIPE_SPRITE_COORD_LOWER_LEFT: - rs->point_texcoord_top = 1.0f; - rs->point_texcoord_bottom = 0.0f; + point_texcoord_top = 1.0f; + point_texcoord_bottom = 0.0f; break; } } + /* Build the main command buffer. */ + BEGIN_CB(rs->cb_main, 25); + OUT_CB_REG(R300_VAP_CNTL_STATUS, vap_control_status); + OUT_CB_REG(R300_GA_POINT_SIZE, point_size); + OUT_CB_REG_SEQ(R300_GA_POINT_MINMAX, 2); + OUT_CB(point_minmax); + OUT_CB(line_control); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); + OUT_CB(polygon_offset_enable); + rs->cull_mode_index = 9; + OUT_CB(cull_mode); + OUT_CB_REG(R300_GA_LINE_STIPPLE_CONFIG, line_stipple_config); + OUT_CB_REG(R300_GA_LINE_STIPPLE_VALUE, line_stipple_value); + OUT_CB_REG(R300_GA_POLY_MODE, polygon_mode); + OUT_CB_REG(R300_SC_CLIP_RULE, clip_rule); + OUT_CB_REG(R300_GB_ENABLE, stuffing_enable); + OUT_CB_REG_SEQ(R300_GA_POINT_S0, 4); + OUT_CB_32F(point_texcoord_left); + OUT_CB_32F(point_texcoord_bottom); + OUT_CB_32F(point_texcoord_right); + OUT_CB_32F(point_texcoord_top); + END_CB; + + /* Build the two command buffers for polygon offset setup. */ + if (polygon_offset_enable) { + float scale = state->offset_scale * 12; + float offset = state->offset_units * 4; + + BEGIN_CB(rs->cb_poly_offset_zb16, 5); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + END_CB; + + offset = state->offset_units * 2; + + BEGIN_CB(rs->cb_poly_offset_zb24, 5); + OUT_CB_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + OUT_CB_32F(scale); + OUT_CB_32F(offset); + END_CB; + } + return (void*)rs; } @@ -899,7 +1092,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } UPDATE_STATE(state, r300->rs_state); - r300->rs_state.size = 27 + (r300->polygon_offset_enabled ? 5 : 0); + r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0); if (last_sprite_coord_enable != r300->sprite_coord_enable || last_two_sided_color != r300->two_sided_color) { @@ -925,10 +1118,34 @@ static void* sampler->state = *state; + /* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG + * or MIN filter is NEAREST. Since texwrap produces same results + * for CLAMP and CLAMP_TO_EDGE, we use them instead. */ + if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST || + sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) { + /* Wrap S. */ + if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP) + sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP) + sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + + /* Wrap T. */ + if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP) + sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP) + sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + + /* Wrap R. */ + if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP) + sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP) + sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + } + sampler->filter0 |= - (r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) | - (r300_translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) | - (r300_translate_wrap(state->wrap_r) << R300_TX_WRAP_R_SHIFT); + (r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) | + (r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) | + (r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT); sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, @@ -995,6 +1212,31 @@ static void r300_delete_sampler_state(struct pipe_context* pipe, void* state) FREE(state); } +static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num) +{ + /* This looks like a hack, but I believe it's suppose to work like + * that. To illustrate how this works, let's assume you have 5 textures. + * From docs, 5 and the successive numbers are: + * + * FOURTH_1 = 5 + * FOURTH_2 = 6 + * FOURTH_3 = 7 + * EIGHTH_0 = 8 + * EIGHTH_1 = 9 + * + * First 3 textures will get 3/4 of size of the cache, divived evenly + * between them. The last 1/4 of the cache must be divided between + * the last 2 textures, each will therefore get 1/8 of the cache. + * Why not just to use "5 + texture_index" ? + * + * This simple trick works for all "num" <= 16. + */ + if (num <= 1) + return R300_TX_CACHE(R300_TX_CACHE_WHOLE); + else + return R300_TX_CACHE(num + index); +} + static void r300_set_fragment_sampler_views(struct pipe_context* pipe, unsigned count, struct pipe_sampler_view** views) @@ -1003,7 +1245,7 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, struct r300_textures_state* state = (struct r300_textures_state*)r300->textures_state.state; struct r300_texture *texture; - unsigned i; + unsigned i, real_num_views = 0, view_index = 0; unsigned tex_units = r300->screen->caps.num_tex_units; boolean dirty_tex = FALSE; @@ -1011,6 +1253,12 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, return; } + /* Calculate the real number of views. */ + for (i = 0; i < count; i++) { + if (views[i]) + real_num_views++; + } + for (i = 0; i < count; i++) { if (&state->sampler_views[i]->base != views[i]) { pipe_sampler_view_reference( @@ -1030,6 +1278,10 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe, if (texture->uses_pitch) { r300->fs_rc_constant_state.dirty = TRUE; } + + state->sampler_views[i]->texcache_region = + r300_assign_texture_cache_region(view_index, real_num_views); + view_index++; } } @@ -1167,15 +1419,15 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, if (r300->screen->caps.has_tcl) { /* HW TCL. */ - /* Check if the stride is aligned to the size of DWORD. */ + r300->incompatible_vb_layout = FALSE; + + /* Check if the strides and offsets are aligned to the size of DWORD. */ for (i = 0; i < count; i++) { if (buffers[i].buffer) { - if (buffers[i].stride % 4 != 0) { - // XXX Shouldn't we align the buffer? - fprintf(stderr, "r300: set_vertex_buffers: " - "Unaligned buffer stride %i isn't supported.\n", - buffers[i].stride); - abort(); + if (buffers[i].stride % 4 != 0 || + buffers[i].buffer_offset % 4 != 0) { + r300->incompatible_vb_layout = TRUE; + break; } } } @@ -1248,7 +1500,7 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) * so PSC should just route stuff based on the vertex elements, * and not on attrib information. */ for (i = 0; i < velems->count; i++) { - format = velems->velem[i].src_format; + format = velems->hw_format[i]; type = r300_translate_vertex_data_type(format); if (type == R300_INVALID_FORMAT) { @@ -1280,12 +1532,15 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems) vstream->count = (i >> 1) + 1; } +#define FORMAT_REPLACE(what, withwhat) \ + case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break + static void* r300_create_vertex_elements_state(struct pipe_context* pipe, unsigned count, const struct pipe_vertex_element* attribs) { struct r300_vertex_element_state *velems; - unsigned i, size; + unsigned i; enum pipe_format *format; assert(count <= PIPE_MAX_ATTRIBS); @@ -1295,86 +1550,68 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); if (r300_screen(pipe->screen)->caps.has_tcl) { - r300_vertex_psc(velems); - - /* Check if the format is aligned to the size of DWORD. - * We only care about the blocksizes of the formats since - * swizzles are already set up. */ + /* Set the best hw format in case the original format is not + * supported by hw. */ for (i = 0; i < count; i++) { - format = &velems->velem[i].src_format; + velems->hw_format[i] = velems->velem[i].src_format; + format = &velems->hw_format[i]; - /* Replace some formats with their aligned counterparts, - * this is OK because we check for aligned strides too. */ + /* This is basically the list of unsupported formats. + * For now we don't care about the alignment, that's going to + * be sorted out after the PSC setup. */ switch (*format) { - /* Align to RGBA8. */ - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - *format = PIPE_FORMAT_R8G8B8A8_UNORM; - continue; - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8G8B8_SNORM: - *format = PIPE_FORMAT_R8G8B8A8_SNORM; - continue; - case PIPE_FORMAT_R8_USCALED: - case PIPE_FORMAT_R8G8_USCALED: - case PIPE_FORMAT_R8G8B8_USCALED: - *format = PIPE_FORMAT_R8G8B8A8_USCALED; - continue; - case PIPE_FORMAT_R8_SSCALED: - case PIPE_FORMAT_R8G8_SSCALED: - case PIPE_FORMAT_R8G8B8_SSCALED: - *format = PIPE_FORMAT_R8G8B8A8_SSCALED; - continue; - - /* Align to RG16. */ - case PIPE_FORMAT_R16_UNORM: - *format = PIPE_FORMAT_R16G16_UNORM; - continue; - case PIPE_FORMAT_R16_SNORM: - *format = PIPE_FORMAT_R16G16_SNORM; - continue; - case PIPE_FORMAT_R16_USCALED: - *format = PIPE_FORMAT_R16G16_USCALED; - continue; - case PIPE_FORMAT_R16_SSCALED: - *format = PIPE_FORMAT_R16G16_SSCALED; - continue; - case PIPE_FORMAT_R16_FLOAT: - *format = PIPE_FORMAT_R16G16_FLOAT; - continue; - - /* Align to RGBA16. */ - case PIPE_FORMAT_R16G16B16_UNORM: - *format = PIPE_FORMAT_R16G16B16A16_UNORM; - continue; - case PIPE_FORMAT_R16G16B16_SNORM: - *format = PIPE_FORMAT_R16G16B16A16_SNORM; - continue; - case PIPE_FORMAT_R16G16B16_USCALED: - *format = PIPE_FORMAT_R16G16B16A16_USCALED; - continue; - case PIPE_FORMAT_R16G16B16_SSCALED: - *format = PIPE_FORMAT_R16G16B16A16_SSCALED; - continue; - case PIPE_FORMAT_R16G16B16_FLOAT: - *format = PIPE_FORMAT_R16G16B16A16_FLOAT; - continue; + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + + FORMAT_REPLACE(R32_UNORM, R32_FLOAT); + FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT); + + FORMAT_REPLACE(R32_USCALED, R32_FLOAT); + FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT); + + FORMAT_REPLACE(R32_SNORM, R32_FLOAT); + FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT); + + FORMAT_REPLACE(R32_SSCALED, R32_FLOAT); + FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT); + + FORMAT_REPLACE(R32_FIXED, R32_FLOAT); + FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT); + FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT); + FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT); default:; } - size = util_format_get_blocksize(*format); + velems->incompatible_layout = + velems->incompatible_layout || + velems->velem[i].src_format != velems->hw_format[i] || + velems->velem[i].src_offset % 4 != 0; + } - if (size % 4 != 0) { - /* XXX Shouldn't we align the format? */ - fprintf(stderr, "r300_create_vertex_elements_state: " - "Unaligned format %s:%i isn't supported\n", - util_format_short_name(*format), size); - assert(0); - abort(); - } + /* Now setup PSC. + * The unused components will be replaced by (..., 0, 1). */ + r300_vertex_psc(velems); + + /* Align the formats to the size of DWORD. + * We only care about the blocksizes of the formats since + * swizzles are already set up. + * Also compute the vertex size. */ + for (i = 0; i < count; i++) { + /* This is OK because we check for aligned strides too. */ + velems->hw_format_size[i] = + align(util_format_get_blocksize(velems->hw_format[i]), 4); + velems->vertex_size_dwords += velems->hw_format_size[i] / 4; } } } @@ -1412,7 +1649,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe, const struct pipe_shader_state* shader) { struct r300_context* r300 = r300_context(pipe); - struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); /* Copy state directly into shader. */ @@ -1490,7 +1726,7 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, struct r300_context* r300 = r300_context(pipe); struct r300_constant_buffer *cbuf; struct pipe_transfer *tr; - void *mapped; + float *mapped; int max_size = 0, max_size_bytes = 0, clamped_size = 0; switch (shader) { @@ -1529,10 +1765,20 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, fprintf(stderr, "r300: Max size of the constant buffer is " "%i*4 floats.\n", max_size); } - clamped_size = MIN2(buf->width0, max_size_bytes); - memcpy(cbuf->constants, mapped, clamped_size); + clamped_size = MIN2(buf->width0, max_size_bytes); cbuf->count = clamped_size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_FRAGMENT && !r300->screen->caps.is_r500) { + unsigned i,j; + + /* Convert constants to float24. */ + for (i = 0; i < cbuf->count; i++) + for (j = 0; j < 4; j++) + cbuf->constants[i][j] = pack_float24(mapped[i*4+j]); + } else { + memcpy(cbuf->constants, mapped, clamped_size); + } } if (shader == PIPE_SHADER_VERTEX) { diff --git a/src/gallium/drivers/r300/r300_state.h b/src/gallium/drivers/r300/r300_state.h deleted file mode 100644 index 1d557506cf3..00000000000 --- a/src/gallium/drivers/r300/r300_state.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2008 Marek Olšák <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_H -#define R300_STATE_H - -struct r300_context; - -void r300_mark_fs_code_dirty(struct r300_context *r300); - -#endif /* R300_STATE_H */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index cc75fad3bb9..3aa8deb63c8 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -31,7 +31,6 @@ #include "r300_hyperz.h" #include "r300_screen.h" #include "r300_shader_semantics.h" -#include "r300_state.h" #include "r300_state_derived.h" #include "r300_state_inlines.h" #include "r300_texture.h" @@ -537,6 +536,10 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) UTIL_FORMAT_SWIZZLE_X }; + /* The KIL opcode fix, see below. */ + if (!count && !r300->screen->caps.is_r500) + count = 1; + state->tx_enable = 0; state->count = 0; size = 2; @@ -555,6 +558,9 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) texstate->filter1 = sampler->filter1; texstate->border_color = sampler->border_color; + /* Assign a texture cache region. */ + texstate->format.format1 |= view->texcache_region; + /* If compare mode is disabled, the sampler view swizzles * are stored in the format. * Otherwise, swizzles must be applied after the compare mode @@ -613,6 +619,36 @@ static void r300_merge_textures_and_samplers(struct r300_context* r300) size += 16; state->count = i+1; + } else { + /* For the KIL opcode to work on r3xx-r4xx, the texture unit + * assigned to this opcode (it's always the first one) must be + * enabled. Otherwise the opcode doesn't work. + * + * In order to not depend on the fragment shader, we just make + * the first unit enabled all the time. */ + if (i == 0 && !r300->screen->caps.is_r500) { + pipe_sampler_view_reference( + (struct pipe_sampler_view**)&state->sampler_views[i], + &r300->texkill_sampler->base); + + state->tx_enable |= 1 << i; + + texstate = &state->regs[i]; + + /* Just set some valid state. */ + texstate->format = r300->texkill_sampler->format; + texstate->filter0 = + r300_translate_tex_filters(PIPE_TEX_FILTER_NEAREST, + PIPE_TEX_FILTER_NEAREST, + PIPE_TEX_FILTER_NEAREST, + FALSE); + texstate->filter1 = 0; + texstate->border_color = 0; + + texstate->filter0 |= i << 28; + size += 16; + state->count = i+1; + } } } diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c deleted file mode 100644 index 34d3a169d57..00000000000 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright 2009 Joakim Sindholt <[email protected]> - * Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "r300_context.h" -#include "r300_cs.h" -#include "r300_reg.h" -#include "r300_screen.h" -#include "r300_state_invariant.h" - -/* Calculate and emit invariant state. This is data that the 3D engine - * will probably want at the beginning of every CS, but it's not currently - * handled by any CSO setup, and in addition it doesn't really change much. - * - * Note that eventually this should be empty, but it's useful for development - * and general unduplication of code. */ -void r300_emit_invariant_state(struct r300_context* r300, - unsigned size, void* state) -{ - CS_LOCALS(r300); - - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - /* Subpixel multisampling for AA. */ - BEGIN_CS(4); - OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); - OUT_CS_REG(R300_GB_MSPOS1, 0x6666666); - END_CS; - } - - BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0)); - - /*** Graphics Backend (GB) ***/ - /* Source of fog depth */ - OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); - - /*** Fog (FG) ***/ - OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); - OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - - /*** VAP ***/ - /* Sign/normalize control */ - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); - /* TCL-only stuff */ - if (r300->screen->caps.has_tcl) { - /* Amount of time to wait for vertex fetches in PVS */ - OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); - } - - END_CS; - - /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(38 + (r300->screen->caps.has_tcl ? 7 : 0) + - (r300->screen->caps.is_rv350 ? 4 : 0)); - - if (r300->screen->caps.has_tcl) { - /*Flushing PVS is required before the VAP_GB registers can be changed*/ - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0); - OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - } - /* XXX line tex stuffing */ - OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1); - OUT_CS_32F(0.0); - OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1); - OUT_CS_32F(1.0); - OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | - (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); - /* XXX this big chunk should be refactored into rs_state */ - OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); - OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); - OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); - OUT_CS_REG(R300_GA_OFFSET, 0x00000000); - OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); - OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000); - OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000); - OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); - OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); - OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); - OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - - if (r300->screen->caps.is_rv350) { - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); - } - - OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); - OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); - END_CS; -} diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h deleted file mode 100644 index 83d031c7fe9..00000000000 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <[email protected]> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef R300_STATE_INVARIANT_H -#define R300_STATE_INVARIANT_H - -struct r300_context; - -void r300_emit_invariant_state(struct r300_context* r300, - unsigned size, void* state); - -#endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 5a013e2a25a..6206570fca4 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -21,30 +21,28 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "pipe/p_screen.h" - -#include "util/u_format.h" -#include "util/u_format_s3tc.h" -#include "util/u_math.h" -#include "util/u_memory.h" +/* Always include headers in the reverse order!! ~ M. */ +#include "r300_texture.h" #include "r300_context.h" #include "r300_reg.h" -#include "r300_texture.h" #include "r300_transfer.h" #include "r300_screen.h" #include "r300_winsys.h" -#define TILE_WIDTH 0 -#define TILE_HEIGHT 1 +#include "util/u_format.h" +#include "util/u_format_s3tc.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "pipe/p_screen.h" -static const unsigned microblock_table[5][3][2] = { - /*linear tiled square-tiled */ - {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ - {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ - {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ - {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ - {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +/* XXX NO! just no! */ +#include "state_tracker/drm_driver.h" + +enum r300_dim { + DIM_WIDTH = 0, + DIM_HEIGHT = 1 }; unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format, @@ -596,21 +594,21 @@ static void r300_texture_setup_fb_state(struct r300_screen* screen, /* Set framebuffer state. */ if (util_format_is_depth_or_stencil(tex->b.b.format)) { for (i = 0; i <= tex->b.b.last_level; i++) { - tex->fb_state.depthpitch[i] = + tex->fb_state.pitch[i] = tex->hwpitch[i] | R300_DEPTHMACROTILE(tex->mip_macrotile[i]) | R300_DEPTHMICROTILE(tex->microtile); } - tex->fb_state.zb_format = r300_translate_zsformat(tex->b.b.format); + tex->fb_state.format = r300_translate_zsformat(tex->b.b.format); } else { for (i = 0; i <= tex->b.b.last_level; i++) { - tex->fb_state.colorpitch[i] = + tex->fb_state.pitch[i] = tex->hwpitch[i] | r300_translate_colorformat(tex->b.b.format) | R300_COLOR_TILE(tex->mip_macrotile[i]) | R300_COLOR_MICROTILE(tex->microtile); } - tex->fb_state.us_out_fmt = r300_translate_out_fmt(tex->b.b.format); + tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format); } } @@ -620,8 +618,10 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, { struct r300_screen *r300screen = r300_screen(screen); - SCREEN_DBG(r300screen, DBG_TEX, "r300: texture_reinterpret_format: %s -> %s\n", - util_format_short_name(tex->format), util_format_short_name(new_format)); + SCREEN_DBG(r300screen, DBG_TEX, + "r300: texture_reinterpret_format: %s -> %s\n", + util_format_short_name(tex->format), + util_format_short_name(new_format)); tex->format = new_format; @@ -648,36 +648,65 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } } -/** - * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile - * of the given texture. - */ -static unsigned r300_texture_get_tile_size(struct r300_texture* tex, - int dim, boolean macrotile) +/* Returns the number of pixels that the texture should be aligned to + * in the given dimension. */ +static unsigned r300_get_pixel_alignment(struct r300_texture *tex, + enum r300_buffer_tiling macrotile, + enum r300_dim dim) { - unsigned pixsize, tile_size; - - pixsize = util_format_get_blocksize(tex->b.b.format); - tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim]; - - if (macrotile) { - tile_size *= 8; + static const unsigned table[2][5][3][2] = + { + { + /* Macro: linear linear linear + Micro: linear tiled square-tiled */ + {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */ + {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + }, + { + /* Macro: tiled tiled tiled + Micro: linear tiled square-tiled */ + {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */ + {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */ + {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */ + {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */ + {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */ + } + }; + static const unsigned aa_block[2] = {4, 8}; + unsigned res = 0; + unsigned pixsize = util_format_get_blocksize(tex->b.b.format); + + assert(macrotile <= R300_BUFFER_TILED); + assert(tex->microtile <= R300_BUFFER_SQUARETILED); + assert(pixsize <= 16); + assert(dim <= DIM_HEIGHT); + + if (tex->b.b.nr_samples > 1) { + /* Multisampled textures have their own alignment scheme. */ + if (pixsize == 4) + res = aa_block[dim]; + } else { + /* Standard alignment. */ + res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim]; } - assert(tile_size); - return tile_size; + assert(res); + return res; } /* Return true if macrotiling should be enabled on the miplevel. */ static boolean r300_texture_macro_switch(struct r300_texture *tex, unsigned level, boolean rv350_mode, - int dim) + enum r300_dim dim) { unsigned tile, texdim; - tile = r300_texture_get_tile_size(tex, dim, TRUE); - if (dim == TILE_WIDTH) { + tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim); + if (dim == DIM_WIDTH) { texdim = u_minify(tex->b.b.width0, level); } else { texdim = u_minify(tex->b.b.height0, level); @@ -713,8 +742,8 @@ unsigned r300_texture_get_stride(struct r300_screen* screen, width = u_minify(tex->b.b.width0, level); if (util_format_is_plain(tex->b.b.format)) { - tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH, - tex->mip_macrotile[level]); + tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + DIM_WIDTH); width = align(width, tile_width); stride = util_format_get_stride(tex->b.b.format, width); @@ -743,8 +772,8 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, height = u_minify(tex->b.b.height0, level); if (util_format_is_plain(tex->b.b.format)) { - tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT, - tex->mip_macrotile[level]); + tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level], + DIM_HEIGHT); height = align(height, tile_height); /* This is needed for the kernel checker, unfortunately. */ @@ -784,21 +813,26 @@ static void r300_setup_miptree(struct r300_screen* screen, unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.is_rv350; - SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", - util_format_short_name(base->format)); + SCREEN_DBG(screen, DBG_TEXALLOC, + "r300: Making miptree for texture, format %s\n", + util_format_short_name(base->format)); for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ tex->mip_macrotile[i] = (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode, TILE_WIDTH) && - r300_texture_macro_switch(tex, i, rv350_mode, TILE_HEIGHT)) ? + r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ? R300_BUFFER_TILED : R300_BUFFER_LINEAR; stride = r300_texture_get_stride(screen, tex, i); nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; + if (base->nr_samples) { + layer_size *= base->nr_samples; + } + if (base->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; else @@ -864,8 +898,8 @@ static void r300_setup_tiling(struct pipe_screen *screen, } /* Set macrotiling. */ - if (r300_texture_macro_switch(tex, 0, rv350_mode, TILE_WIDTH) && - r300_texture_macro_switch(tex, 0, rv350_mode, TILE_HEIGHT)) { + if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) && + r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) { tex->macrotile = R300_BUFFER_TILED; } } @@ -899,17 +933,14 @@ static boolean r300_texture_get_handle(struct pipe_screen* screen, { struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; struct r300_texture* tex = (struct r300_texture*)texture; - unsigned stride; if (!tex) { return FALSE; } - stride = r300_texture_get_stride(r300_screen(screen), tex, 0); - - rws->buffer_get_handle(rws, tex->buffer, stride, whandle); + whandle->stride = r300_texture_get_stride(r300_screen(screen), tex, 0); - return TRUE; + return rws->buffer_get_handle(rws, tex->buffer, whandle); } struct u_resource_vtbl r300_texture_vtbl = @@ -966,11 +997,12 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, SCREEN_DBG(rscreen, DBG_TEX, "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, " - "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n", + "Dim: %ix%ix%i, LastLevel: %i, Size: %i, Format: %s\n", tex->macrotile ? "YES" : " NO", tex->microtile ? "YES" : " NO", tex->hwpitch[0], base->width0, base->height0, base->depth0, base->last_level, + tex->size, util_format_short_name(base->format)); tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT : @@ -979,16 +1011,16 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, tex->buffer = rws->buffer_create(rws, 2048, base->bind, tex->domain, tex->size); + if (!tex->buffer) { + FREE(tex); + return NULL; + } + rws->buffer_set_tiling(rws, tex->buffer, tex->pitch[0] * util_format_get_blocksize(tex->b.b.format), tex->microtile, tex->macrotile); - if (!tex->buffer) { - FREE(tex); - return NULL; - } - return (struct pipe_resource*)tex; } @@ -1002,26 +1034,27 @@ struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, unsigned flags) { struct r300_texture* tex = r300_texture(texture); - struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface); - unsigned offset; - - offset = r300_texture_get_offset(tex, level, zslice, face); + struct r300_surface* surface = CALLOC_STRUCT(r300_surface); if (surface) { - pipe_reference_init(&surface->reference, 1); - pipe_resource_reference(&surface->texture, texture); - surface->format = texture->format; - surface->width = u_minify(texture->width0, level); - surface->height = u_minify(texture->height0, level); - surface->offset = offset; - surface->usage = flags; - surface->zslice = zslice; - surface->texture = texture; - surface->face = face; - surface->level = level; + pipe_reference_init(&surface->base.reference, 1); + pipe_resource_reference(&surface->base.texture, texture); + surface->base.format = texture->format; + surface->base.width = u_minify(texture->width0, level); + surface->base.height = u_minify(texture->height0, level); + surface->base.usage = flags; + surface->base.zslice = zslice; + surface->base.face = face; + surface->base.level = level; + + surface->buffer = tex->buffer; + surface->domain = tex->domain; + surface->offset = r300_texture_get_offset(tex, level, zslice, face); + surface->pitch = tex->fb_state.pitch[level]; + surface->format = tex->fb_state.format; } - return surface; + return &surface->base; } /* Not required to implement u_resource_vtbl, consider moving to another file: @@ -1041,7 +1074,6 @@ r300_texture_from_handle(struct pipe_screen* screen, struct r300_screen* rscreen = r300_screen(screen); struct r300_winsys_buffer *buffer; struct r300_texture* tex; - unsigned stride; boolean override_zb_flags; /* Support only 2D textures without mipmaps */ @@ -1051,7 +1083,8 @@ r300_texture_from_handle(struct pipe_screen* screen, return NULL; } - buffer = rws->buffer_from_handle(rws, screen, whandle, &stride); + /* XXX make the winsys return the stride_override, see i915_resource_texture.c:830 */ + buffer = rws->buffer_from_handle(rws, whandle->handle); if (!buffer) { return NULL; } @@ -1067,7 +1100,7 @@ r300_texture_from_handle(struct pipe_screen* screen, tex->b.b.screen = screen; tex->domain = R300_DOMAIN_VRAM; - tex->stride_override = stride; + tex->stride_override = whandle->stride; /* one ref already taken */ tex->buffer = buffer; @@ -1079,7 +1112,7 @@ r300_texture_from_handle(struct pipe_screen* screen, "Pitch: % 4i, Dim: %ix%i, Format: %s\n", tex->macrotile ? "YES" : " NO", tex->microtile ? "YES" : " NO", - stride / util_format_get_blocksize(base->format), + whandle->stride / util_format_get_blocksize(base->format), base->width0, base->height0, util_format_short_name(base->format)); diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index ff640c56eed..99e7694254e 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -23,8 +23,11 @@ #ifndef R300_TEXTURE_H #define R300_TEXTURE_H -#include "util/u_format.h" +#include "pipe/p_format.h" +struct pipe_screen; +struct pipe_resource; +struct winsys_handle; struct r300_texture; struct r300_screen; diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 89f39af9761..5394e04f727 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -105,12 +105,12 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */ /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */ case TGSI_OPCODE_TXL: return RC_OPCODE_TXL; - /* case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; */ + case TGSI_OPCODE_BRK: return RC_OPCODE_BRK; case TGSI_OPCODE_IF: return RC_OPCODE_IF; - /* case TGSI_OPCODE_LOOP: return RC_OPCODE_LOOP; */ + case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP; case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE; case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF; - /* case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; */ + case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP; /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */ /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */ case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL; diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 0d88d745c07..02421a58b83 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -57,22 +57,11 @@ static void r300_copy_from_tiled_texture(struct pipe_context *ctx, subdst.face = 0; subdst.level = 0; - /* XXX if we don't flush before copying the texture and mapping it, - * we get wrong pixels, i.e. it's like latest draw calls didn't happen, - * including this blit. Tests: e.g. piglit/provoking-vertex - * - * Since the flush immediately before mapping is implicit (the buffer is - * always referenced in resource_copy_region), every read transfer costs - * 2 flushes. That sucks. */ - ctx->flush(ctx, 0, NULL); - ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst, 0, 0, 0, tex, transfer->sr, transfer->box.x, transfer->box.y, transfer->box.z, transfer->box.width, transfer->box.height); - - /* Flushing after the copy is implicit, issued by winsys. */ } /* Copy a detiled texture to a tiled one. */ @@ -92,7 +81,6 @@ static void r300_copy_into_tiled_texture(struct pipe_context *ctx, 0, 0, 0, transfer->box.width, transfer->box.height); - /* XXX this flush fixes a few piglit tests (e.g. glean/pixelFormats). */ ctx->flush(ctx, 0, NULL); } @@ -107,7 +95,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, struct r300_screen *r300screen = r300_screen(ctx->screen); struct r300_transfer *trans; struct pipe_resource base; - boolean referenced_cs, referenced_hw; + boolean referenced_cs, referenced_hw, blittable; referenced_cs = r300screen->rws->is_buffer_referenced( r300screen->rws, tex->buffer, R300_REF_CS); @@ -118,6 +106,10 @@ r300_texture_get_transfer(struct pipe_context *ctx, r300screen->rws, tex->buffer, R300_REF_HW); } + blittable = ctx->screen->is_format_supported( + ctx->screen, texture->format, texture->target, 0, + PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0); + trans = CALLOC_STRUCT(r300_transfer); if (trans) { /* Initialize the transfer object. */ @@ -130,7 +122,7 @@ r300_texture_get_transfer(struct pipe_context *ctx, * for this transfer. * Also make write transfers pipelined. */ if (tex->microtile || tex->macrotile || - (referenced_hw & !(usage & PIPE_TRANSFER_READ))) { + ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) { base.target = PIPE_TEXTURE_2D; base.format = texture->format; base.width0 = box->width; @@ -158,6 +150,30 @@ r300_texture_get_transfer(struct pipe_context *ctx, ctx->screen->resource_create(ctx->screen, &base)); + if (!trans->detiled_texture) { + /* Oh crap, the thing can't create the texture. + * Let's flush and try again. */ + ctx->flush(ctx, 0, NULL); + + trans->detiled_texture = r300_texture( + ctx->screen->resource_create(ctx->screen, + &base)); + + if (!trans->detiled_texture) { + /* For linear textures, it's safe to fallback to + * an unpipelined transfer. */ + if (!tex->microtile && !tex->macrotile) { + goto unpipelined; + } + + /* Otherwise, go to hell. */ + fprintf(stderr, + "r300: Failed to create a transfer object, praise.\n"); + FREE(trans); + return NULL; + } + } + assert(!trans->detiled_texture->microtile && !trans->detiled_texture->macrotile); @@ -179,16 +195,20 @@ r300_texture_get_transfer(struct pipe_context *ctx, /* Always referenced in the blit. */ ctx->flush(ctx, 0, NULL); } - } else { - trans->transfer.stride = + return &trans->transfer; + } + + unpipelined: + /* Unpipelined transfer. */ + trans->transfer.stride = r300_texture_get_stride(r300screen, tex, sr.level); - trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face); + trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face); - if (referenced_cs && (usage & PIPE_TRANSFER_READ)) - ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); - } + if (referenced_cs && (usage & PIPE_TRANSFER_READ)) + ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + return &trans->transfer; } - return &trans->transfer; + return NULL; } void r300_texture_transfer_destroy(struct pipe_context *ctx, diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 8a8888d4813..3b5e9eec602 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -47,13 +47,6 @@ enum r300_reference_domain { /* bitfield */ R300_REF_HW = 2 }; -struct r300_cs_info { - /* In DWORDs. */ - unsigned used; - unsigned free; - unsigned capacity; -}; - struct r300_winsys_screen { void (*destroy)(struct r300_winsys_screen *ws); @@ -109,16 +102,13 @@ struct r300_winsys_screen { * Returns TRUE if a flush is required. */ boolean (*validate)(struct r300_winsys_screen* winsys); - /* Return current CS info. */ - void (*get_cs_info)(struct r300_winsys_screen *winsys, - struct r300_cs_info *info); + /* Return the number of free dwords in CS. */ + unsigned (*get_cs_free_dwords)(struct r300_winsys_screen *winsys); - /* Start a command emit. */ - void (*begin_cs)(struct r300_winsys_screen* winsys, - int size, - const char* file, - const char* function, - int line); + /* Return the pointer to the first free dword in CS and assume a pipe + * driver wants to fill "count" dwords. */ + uint32_t *(*get_cs_pointer)(struct r300_winsys_screen *winsys, + unsigned count); /* Write a dword to the command buffer. */ void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); @@ -134,12 +124,6 @@ struct r300_winsys_screen { enum r300_buffer_domain wd, uint32_t flags); - /* Finish a command emit. */ - void (*end_cs)(struct r300_winsys_screen* winsys, - const char* file, - const char* function, - int line); - /* Flush the CS. */ void (*flush_cs)(struct r300_winsys_screen* winsys); @@ -164,12 +148,10 @@ struct r300_winsys_screen { enum r300_value_id vid); struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys, - struct pipe_screen *screen, - struct winsys_handle *whandle, - unsigned *stride); + unsigned handle); + boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys, struct r300_winsys_buffer *buffer, - unsigned stride, struct winsys_handle *whandle); boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys, diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 80c3069cf5b..1dcb19babc9 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -49,6 +49,9 @@ static void r600_blitter_save_states(struct r600_context *rctx) rctx->vertex_elements); util_blitter_save_viewport(rctx->blitter, &rctx->viewport); + /* XXX util_blitter_save_clip(rctx->blitter, &rctx->clip); */ + util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, + rctx->vertex_buffer); } static void r600_clear(struct pipe_context *ctx, unsigned buffers, diff --git a/src/gallium/drivers/r600/r600_helper.c b/src/gallium/drivers/r600/r600_helper.c index 9370a5355ee..e3175b627aa 100644 --- a/src/gallium/drivers/r600/r600_helper.c +++ b/src/gallium/drivers/r600/r600_helper.c @@ -26,7 +26,6 @@ #include <stdio.h> #include <errno.h> #include <util/u_inlines.h> -#include <util/u_format.h> #include "r600_screen.h" #include "r600d.h" diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 24746e78771..9b02ae680e7 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -50,7 +50,7 @@ static void r600_end_query(struct pipe_context *pipe, struct pipe_query *query) static boolean r600_get_query_result(struct pipe_context *pipe, struct pipe_query *query, - boolean wait, uint64_t *result) + boolean wait, void *result) { return TRUE; } diff --git a/src/gallium/drivers/rbug/rbug_context.c b/src/gallium/drivers/rbug/rbug_context.c index 164d811ab48..00b167e256c 100644 --- a/src/gallium/drivers/rbug/rbug_context.c +++ b/src/gallium/drivers/rbug/rbug_context.c @@ -241,7 +241,7 @@ static boolean rbug_get_query_result(struct pipe_context *_pipe, struct pipe_query *query, boolean wait, - uint64_t *result) + void *result) { struct rbug_context *rb_pipe = rbug_context(_pipe); struct pipe_context *pipe = rb_pipe->pipe; diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 83f3e4a19b6..35d426aa3ea 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -25,6 +25,7 @@ C_SOURCES = \ sp_state_derived.c \ sp_state_fs.c \ sp_state_sampler.c \ + sp_state_so.c \ sp_state_rasterizer.c \ sp_state_surface.c \ sp_state_vertex.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index b80c6dea93a..be5917a6886 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -27,6 +27,7 @@ softpipe = env.ConvenienceLibrary( 'sp_state_fs.c', 'sp_state_rasterizer.c', 'sp_state_sampler.c', + 'sp_state_so.c', 'sp_state_surface.c', 'sp_state_vertex.c', 'sp_surface.c', diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2f10b46e989..12ef98aac75 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -114,6 +114,11 @@ softpipe_destroy( struct pipe_context *pipe ) pipe_sampler_view_reference(&softpipe->vertex_sampler_views[i], NULL); } + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + sp_destroy_tex_tile_cache(softpipe->geometry_tex_cache[i]); + pipe_sampler_view_reference(&softpipe->geometry_sampler_views[i], NULL); + } + for (i = 0; i < PIPE_SHADER_TYPES; i++) { uint j; @@ -174,7 +179,12 @@ softpipe_is_resource_referenced( struct pipe_context *pipe, softpipe->vertex_tex_cache[i]->texture == texture) return PIPE_REFERENCED_FOR_READ; } - + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + if (softpipe->geometry_tex_cache[i] && + softpipe->geometry_tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } + return PIPE_UNREFERENCED; } @@ -225,6 +235,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; softpipe->pipe.bind_fragment_sampler_states = softpipe_bind_sampler_states; softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states; + softpipe->pipe.bind_geometry_sampler_states = softpipe_bind_geometry_sampler_states; softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; @@ -251,6 +262,10 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.bind_vertex_elements_state = softpipe_bind_vertex_elements_state; softpipe->pipe.delete_vertex_elements_state = softpipe_delete_vertex_elements_state; + softpipe->pipe.create_stream_output_state = softpipe_create_stream_output_state; + softpipe->pipe.bind_stream_output_state = softpipe_bind_stream_output_state; + softpipe->pipe.delete_stream_output_state = softpipe_delete_stream_output_state; + softpipe->pipe.set_blend_color = softpipe_set_blend_color; softpipe->pipe.set_stencil_ref = softpipe_set_stencil_ref; softpipe->pipe.set_clip_state = softpipe_set_clip_state; @@ -261,10 +276,11 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; softpipe->pipe.set_fragment_sampler_views = softpipe_set_sampler_views; softpipe->pipe.set_vertex_sampler_views = softpipe_set_vertex_sampler_views; + softpipe->pipe.set_geometry_sampler_views = softpipe_set_geometry_sampler_views; softpipe->pipe.create_sampler_view = softpipe_create_sampler_view; softpipe->pipe.sampler_view_destroy = softpipe_sampler_view_destroy; softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - + softpipe->pipe.set_stream_output_buffers = softpipe_set_stream_output_buffers; softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; softpipe->pipe.draw_arrays = softpipe_draw_arrays; @@ -272,6 +288,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.draw_arrays_instanced = softpipe_draw_arrays_instanced; softpipe->pipe.draw_elements_instanced = softpipe_draw_elements_instanced; + softpipe->pipe.draw_stream_output = softpipe_draw_stream_output; softpipe->pipe.clear = softpipe_clear; softpipe->pipe.flush = softpipe_flush; @@ -296,6 +313,9 @@ softpipe_create_context( struct pipe_screen *screen, for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); } + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + softpipe->geometry_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); + } softpipe->fs_machine = tgsi_exec_machine_create(); @@ -314,10 +334,17 @@ softpipe_create_context( struct pipe_screen *screen, goto fail; draw_texture_samplers(softpipe->draw, + PIPE_SHADER_VERTEX, PIPE_MAX_VERTEX_SAMPLERS, (struct tgsi_sampler **) softpipe->tgsi.vert_samplers_list); + draw_texture_samplers(softpipe->draw, + PIPE_SHADER_GEOMETRY, + PIPE_MAX_GEOMETRY_SAMPLERS, + (struct tgsi_sampler **) + softpipe->tgsi.geom_samplers_list); + if (debug_get_bool_option( "SP_NO_RAST", FALSE )) softpipe->no_rast = TRUE; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index b3d3fe620fd..53115a827d0 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -50,6 +50,7 @@ struct softpipe_tex_tile_cache; struct sp_fragment_shader; struct sp_vertex_shader; struct sp_velems_state; +struct sp_so_state; struct softpipe_context { @@ -59,12 +60,14 @@ struct softpipe_context { struct pipe_blend_state *blend; struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_sampler_state *geometry_samplers[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_depth_stencil_alpha_state *depth_stencil; struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; struct sp_vertex_shader *vs; struct sp_geometry_shader *gs; struct sp_velems_state *velems; + struct sp_so_state *so; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -76,13 +79,23 @@ struct softpipe_context { struct pipe_scissor_state scissor; struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS]; + struct pipe_sampler_view *geometry_sampler_views[PIPE_MAX_GEOMETRY_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct { + struct softpipe_resource *buffer[PIPE_MAX_SO_BUFFERS]; + int offset[PIPE_MAX_SO_BUFFERS]; + int so_count[PIPE_MAX_SO_BUFFERS]; + int num_buffers; + } so_target; + struct pipe_query_data_so_statistics so_stats; unsigned num_samplers; unsigned num_sampler_views; unsigned num_vertex_samplers; unsigned num_vertex_sampler_views; + unsigned num_geometry_samplers; + unsigned num_geometry_sampler_views; unsigned num_vertex_buffers; unsigned dirty; /**< Mask of SP_NEW_x flags */ @@ -139,6 +152,7 @@ struct softpipe_context { /** TGSI exec things */ struct { + struct sp_sampler_varient *geom_samplers_list[PIPE_MAX_GEOMETRY_SAMPLERS]; struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS]; struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS]; } tgsi; @@ -160,6 +174,7 @@ struct softpipe_context { unsigned tex_timestamp; struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS]; + struct softpipe_tex_tile_cache *geometry_tex_cache[PIPE_MAX_GEOMETRY_SAMPLERS]; unsigned use_sse : 1; unsigned dump_fs : 1; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index b30036e2303..79daa68f3b3 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -84,6 +84,57 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, 1); } +void +softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct draw_context *draw = sp->draw; + const unsigned start = 0; + const unsigned count = sp->so_target.so_count[0]; + void *buf = sp->so_target.buffer[0]->data; + int offset = sp->so_target.offset[0]; + + if (!softpipe_check_render_cond(sp) || + sp->so_target.num_buffers != 1) + return; + + sp->reduced_api_prim = u_reduced_prim(mode); + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + softpipe_map_transfers(sp); + + /* Map so buffers */ + if (offset < 0) /* we were appending so start from beginning */ + offset = 0; + buf = (void*)((int32_t*)buf + offset); + draw_set_mapped_vertex_buffer(draw, 0, buf); + + draw_set_mapped_element_buffer_range(draw, + 0, 0, + start, + start + count - 1, + NULL); + + /* draw! */ + draw_arrays_instanced(draw, mode, start, count, 0, 1); + + /* unmap vertex/index buffers - will cause draw module to flush */ + draw_set_mapped_vertex_buffer(draw, 0, NULL); + + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); + + /* Note: leave drawing surfaces mapped */ + sp->dirty_render_cache = TRUE; +} + void softpipe_draw_range_elements(struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 5024fc8a819..4a53ef048f3 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -56,6 +56,9 @@ softpipe_flush( struct pipe_context *pipe, for (i = 0; i < softpipe->num_vertex_sampler_views; i++) { sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]); } + for (i = 0; i < softpipe->num_geometry_sampler_views; i++) { + sp_flush_tex_tile_cache(softpipe->geometry_tex_cache[i]); + } } if (flags & PIPE_FLUSH_SWAPBUFFERS) { diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index db0d1755103..c60249dbfbc 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -542,6 +542,22 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } } +static void +sp_vbuf_so_info(struct vbuf_render *vbr, uint primitives, uint vertices) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + struct softpipe_context *softpipe = cvbr->softpipe; + unsigned i; + + for (i = 0; i < softpipe->so_target.num_buffers; ++i) { + softpipe->so_target.so_count[i] += vertices; + } + + softpipe->so_stats.num_primitives_written = primitives; + softpipe->so_stats.primitives_storage_needed = + vertices * 4 /*sizeof(float|int32)*/ * 4 /*x,y,z,w*/; +} + static void sp_vbuf_destroy(struct vbuf_render *vbr) @@ -575,6 +591,7 @@ sp_create_vbuf_backend(struct softpipe_context *sp) cvbr->base.draw_elements = sp_vbuf_draw_elements; cvbr->base.draw_arrays = sp_vbuf_draw_arrays; cvbr->base.release_vertices = sp_vbuf_release_vertices; + cvbr->base.set_stream_output_info = sp_vbuf_so_info; cvbr->base.destroy = sp_vbuf_destroy; cvbr->softpipe = sp; diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 907e94b59b9..d240bcbf3bb 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -109,7 +109,7 @@ shade_quads(struct quad_stage *qs, { struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = softpipe->fs_machine; - unsigned i, pass = 0; + unsigned i, nr_quads = 0; for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { machine->Consts[i] = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT][i]; @@ -123,11 +123,11 @@ shade_quads(struct quad_stage *qs, if (/*do_coverage*/ 0) coverage_quad( qs, quads[i] ); - quads[pass++] = quads[i]; + quads[nr_quads++] = quads[i]; } - if (pass) - qs->next->run(qs->next, quads, pass); + if (nr_quads) + qs->next->run(qs->next, quads, nr_quads); } diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index b959af63aff..4ae69c1c2bd 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -41,6 +41,7 @@ struct softpipe_query { unsigned type; uint64_t start; uint64_t end; + struct pipe_query_data_so_statistics so; }; @@ -55,7 +56,11 @@ softpipe_create_query(struct pipe_context *pipe, { struct softpipe_query* sq; - assert(type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_TIME_ELAPSED); + assert(type == PIPE_QUERY_OCCLUSION_COUNTER || + type == PIPE_QUERY_TIME_ELAPSED || + type == PIPE_QUERY_SO_STATISTICS || + type == PIPE_QUERY_GPU_FINISHED || + type == PIPE_QUERY_TIMESTAMP_DISJOINT); sq = CALLOC_STRUCT( softpipe_query ); sq->type = type; @@ -75,7 +80,7 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) { struct softpipe_context *softpipe = softpipe_context( pipe ); struct softpipe_query *sq = softpipe_query(q); - + switch (sq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: sq->start = softpipe->occlusion_count; @@ -83,6 +88,13 @@ softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) case PIPE_QUERY_TIME_ELAPSED: sq->start = 1000*os_time_get(); break; + case PIPE_QUERY_SO_STATISTICS: + sq->so.num_primitives_written = 0; + sq->so.primitives_storage_needed = 0; + break; + case PIPE_QUERY_GPU_FINISHED: + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: default: assert(0); break; @@ -106,6 +118,15 @@ softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) case PIPE_QUERY_TIME_ELAPSED: sq->end = 1000*os_time_get(); break; + case PIPE_QUERY_SO_STATISTICS: + sq->so.num_primitives_written = + softpipe->so_stats.num_primitives_written; + sq->so.primitives_storage_needed = + softpipe->so_stats.primitives_storage_needed; + break; + case PIPE_QUERY_GPU_FINISHED: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + break; default: assert(0); break; @@ -118,10 +139,32 @@ static boolean softpipe_get_query_result(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64_t *result ) + void *vresult) { struct softpipe_query *sq = softpipe_query(q); - *result = sq->end - sq->start; + uint64_t *result = (uint64_t*)vresult; + + switch (sq->type) { + case PIPE_QUERY_SO_STATISTICS: + memcpy(vresult, &sq->so, + sizeof(struct pipe_query_data_so_statistics)); + break; + case PIPE_QUERY_GPU_FINISHED: + *result = TRUE; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: { + struct pipe_query_data_timestamp_disjoint td; + /*os_get_time is in microseconds*/ + td.frequency = 1000000; + td.disjoint = FALSE; + memcpy(vresult, &sq->so, + sizeof(struct pipe_query_data_timestamp_disjoint)); + } + break; + default: + *result = sq->end - sq->start; + break; + } return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 8abe7f50e17..fc57d3eb611 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -115,6 +115,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 1; + case PIPE_CAP_STREAM_OUTPUT: + return 1; case PIPE_CAP_MAX_VS_INSTRUCTIONS: case PIPE_CAP_MAX_FS_INSTRUCTIONS: diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 5b0faabeaef..7d6b86dce04 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -51,6 +51,8 @@ #define SP_NEW_VS 0x2000 #define SP_NEW_QUERY 0x4000 #define SP_NEW_GS 0x8000 +#define SP_NEW_SO 0x10000 +#define SP_NEW_SO_BUFFERS 0x20000 struct tgsi_sampler; @@ -98,6 +100,7 @@ struct sp_vertex_shader { struct sp_geometry_shader { struct pipe_shader_state shader; struct draw_geometry_shader *draw_data; + int max_sampler; }; struct sp_velems_state { @@ -105,6 +108,10 @@ struct sp_velems_state { struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; }; +struct sp_so_state { + struct pipe_stream_output_state base; +}; + void * softpipe_create_blend_state(struct pipe_context *, @@ -122,6 +129,10 @@ void softpipe_bind_vertex_sampler_states(struct pipe_context *, unsigned num_samplers, void **samplers); +void +softpipe_bind_geometry_sampler_states(struct pipe_context *, + unsigned num_samplers, + void **samplers); void softpipe_delete_sampler_state(struct pipe_context *, void *); void * @@ -189,6 +200,11 @@ softpipe_set_vertex_sampler_views(struct pipe_context *, unsigned num, struct pipe_sampler_view **); +void +softpipe_set_geometry_sampler_views(struct pipe_context *, + unsigned num, + struct pipe_sampler_view **); + struct pipe_sampler_view * softpipe_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, @@ -244,6 +260,8 @@ softpipe_draw_elements_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount); +void softpipe_draw_stream_output(struct pipe_context *pipe, unsigned mode); + void softpipe_map_transfers(struct softpipe_context *sp); @@ -263,5 +281,20 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe); struct vertex_info * softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); +void * +softpipe_create_stream_output_state( + struct pipe_context *pipe, + const struct pipe_stream_output_state *templ); +void +softpipe_bind_stream_output_state(struct pipe_context *pipe, + void *so); +void +softpipe_delete_stream_output_state(struct pipe_context *pipe, void *so); + +void +softpipe_set_stream_output_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers); #endif diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 4c6d4909f5b..3ba4d934fd2 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -225,6 +225,19 @@ update_tgsi_samplers( struct softpipe_context *softpipe ) } } } + + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + struct softpipe_tex_tile_cache *tc = softpipe->geometry_tex_cache[i]; + + if (tc->texture) { + struct softpipe_resource *spt = softpipe_resource(tc->texture); + + if (spt->timestamp != tc->timestamp) { + sp_tex_tile_cache_validate_texture(tc); + tc->timestamp = spt->timestamp; + } + } + } } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 2fff80c4385..3fbf1f25781 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -35,6 +35,7 @@ #include "util/u_inlines.h" #include "draw/draw_context.h" #include "draw/draw_vs.h" +#include "draw/draw_gs.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_scan.h" @@ -223,6 +224,8 @@ softpipe_create_gs_state(struct pipe_context *pipe, if (state->draw_data == NULL) goto fail; + state->max_sampler = state->draw_data->info.file_max[TGSI_FILE_SAMPLER]; + return state; fail: diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 2692f06c927..79d9516ad9c 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -121,6 +121,33 @@ softpipe_bind_vertex_sampler_states(struct pipe_context *pipe, softpipe->dirty |= SP_NEW_SAMPLER; } +void +softpipe_bind_geometry_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; + + assert(num_samplers <= PIPE_MAX_GEOMETRY_SAMPLERS); + + /* Check for no-op */ + if (num_samplers == softpipe->num_geometry_samplers && + !memcmp(softpipe->geometry_samplers, samplers, num_samplers * sizeof(void *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < num_samplers; ++i) + softpipe->geometry_samplers[i] = samplers[i]; + for (i = num_samplers; i < PIPE_MAX_GEOMETRY_SAMPLERS; ++i) + softpipe->geometry_samplers[i] = NULL; + + softpipe->num_geometry_samplers = num_samplers; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + struct pipe_sampler_view * softpipe_create_sampler_view(struct pipe_context *pipe, @@ -210,6 +237,36 @@ softpipe_set_vertex_sampler_views(struct pipe_context *pipe, softpipe->dirty |= SP_NEW_TEXTURE; } +void +softpipe_set_geometry_sampler_views(struct pipe_context *pipe, + unsigned num, + struct pipe_sampler_view **views) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + assert(num <= PIPE_MAX_GEOMETRY_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_geometry_sampler_views && + !memcmp(softpipe->geometry_sampler_views, views, num * sizeof(struct pipe_sampler_view *))) { + return; + } + + draw_flush(softpipe->draw); + + for (i = 0; i < PIPE_MAX_GEOMETRY_SAMPLERS; i++) { + struct pipe_sampler_view *view = i < num ? views[i] : NULL; + + pipe_sampler_view_reference(&softpipe->geometry_sampler_views[i], view); + sp_tex_tile_cache_set_sampler_view(softpipe->geometry_tex_cache[i], view); + } + + softpipe->num_geometry_sampler_views = num; + + softpipe->dirty |= SP_NEW_TEXTURE; +} + /** * Find/create an sp_sampler_varient object for sampling the given texture, @@ -293,6 +350,30 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe) } } + if (softpipe->gs) { + for (i = 0; i <= softpipe->gs->max_sampler; i++) { + if (softpipe->geometry_samplers[i]) { + struct pipe_resource *texture = NULL; + + if (softpipe->geometry_sampler_views[i]) { + texture = softpipe->geometry_sampler_views[i]->texture; + } + + softpipe->tgsi.geom_samplers_list[i] = + get_sampler_varient( + i, + sp_sampler(softpipe->geometry_samplers[i]), + texture, + TGSI_PROCESSOR_GEOMETRY ); + + sp_sampler_varient_bind_texture( + softpipe->tgsi.geom_samplers_list[i], + softpipe->geometry_tex_cache[i], + texture ); + } + } + } + for (i = 0; i <= softpipe->fs->info.file_max[TGSI_FILE_SAMPLER]; i++) { if (softpipe->sampler[i]) { struct pipe_resource *texture = NULL; diff --git a/src/gallium/drivers/softpipe/sp_state_so.c b/src/gallium/drivers/softpipe/sp_state_so.c new file mode 100644 index 00000000000..cfe23f9e846 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_so.c @@ -0,0 +1,124 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" + +#include "util/u_format.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" + + +void * +softpipe_create_stream_output_state(struct pipe_context *pipe, + const struct pipe_stream_output_state *templ) +{ + struct sp_so_state *so; + so = (struct sp_so_state *) CALLOC_STRUCT(sp_so_state); + + if (so) { + so->base.num_outputs = templ->num_outputs; + so->base.stride = templ->stride; + memcpy(so->base.output_buffer, + templ->output_buffer, + sizeof(int) * templ->num_outputs); + memcpy(so->base.register_index, + templ->register_index, + sizeof(int) * templ->num_outputs); + memcpy(so->base.register_mask, + templ->register_mask, + sizeof(ubyte) * templ->num_outputs); + } + return so; +} + +void +softpipe_bind_stream_output_state(struct pipe_context *pipe, + void *so) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_so_state *sp_so = (struct sp_so_state *) so; + + softpipe->so = sp_so; + + softpipe->dirty |= SP_NEW_SO; + + if (sp_so) + draw_set_so_state(softpipe->draw, &sp_so->base); +} + +void +softpipe_delete_stream_output_state(struct pipe_context *pipe, void *so) +{ + FREE( so ); +} + +void +softpipe_set_stream_output_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + int i; + void *map_buffers[PIPE_MAX_SO_BUFFERS]; + + assert(num_buffers <= PIPE_MAX_SO_BUFFERS); + if (num_buffers > PIPE_MAX_SO_BUFFERS) + num_buffers = PIPE_MAX_SO_BUFFERS; + + softpipe->dirty |= SP_NEW_SO_BUFFERS; + + for (i = 0; i < num_buffers; ++i) { + void *mapped; + struct softpipe_resource *res = softpipe_resource(buffers[i]); + + if (!res) { + /* the whole call is invalid, bail out */ + softpipe->so_target.num_buffers = 0; + draw_set_mapped_so_buffers(softpipe->draw, 0, 0); + return; + } + + softpipe->so_target.buffer[i] = res; + softpipe->so_target.offset[i] = offsets[i]; + softpipe->so_target.so_count[i] = 0; + + mapped = res->data; + if (offsets[i] >= 0) + map_buffers[i] = ((char*)mapped) + offsets[i]; + else { + /* this is a buffer append */ + assert(!"appending not implemented"); + map_buffers[i] = mapped; + } + } + softpipe->so_target.num_buffers = num_buffers; + + draw_set_mapped_so_buffers(softpipe->draw, map_buffers, num_buffers); +} diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 99433f7d1d0..55b27e60100 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -33,4 +33,6 @@ void sp_init_surface_functions(struct softpipe_context *sp) { sp->pipe.resource_copy_region = util_resource_copy_region; + sp->pipe.clear_render_target = util_clear_render_target; + sp->pipe.clear_depth_stencil = util_clear_depth_stencil; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index ff83c66d8b2..cf7ab81405c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -71,7 +71,7 @@ lerp(float a, float v0, float v1) /** - * Do 2D/biliner interpolation of float values. + * Do 2D/bilinear interpolation of float values. * v00, v10, v01 and v11 are typically four texture samples in a square/box. * a and b are the horizontal and vertical interpolants. * It's important that this function is inlined when compiled with diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 96fb4b8e536..579f8034c7c 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -63,7 +63,7 @@ svga_query( struct pipe_query *q ) static boolean svga_get_query_result(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64_t *result); + void *result); static struct pipe_query *svga_create_query( struct pipe_context *pipe, unsigned query_type ) @@ -207,13 +207,14 @@ static void svga_end_query(struct pipe_context *pipe, static boolean svga_get_query_result(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64_t *result) + void *vresult) { struct svga_context *svga = svga_context( pipe ); struct svga_screen *svgascreen = svga_screen( pipe->screen ); struct svga_winsys_screen *sws = svgascreen->sws; struct svga_query *sq = svga_query( q ); SVGA3dQueryState state; + uint64_t *result = (uint64_t*)vresult; SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__); diff --git a/src/gallium/drivers/sw/sw.c b/src/gallium/drivers/sw/sw.c index 9f156df45f5..6b873ecc1b0 100644 --- a/src/gallium/drivers/sw/sw.c +++ b/src/gallium/drivers/sw/sw.c @@ -2,7 +2,6 @@ #include "util/u_debug.h" #include "target-helpers/wrap_screen.h" #include "sw_public.h" -#include "state_tracker/sw_winsys.h" /* Helper function to choose and instantiate one of the software rasterizers: diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 9ca916fe7b4..55dd6cf8837 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -248,7 +248,7 @@ static INLINE boolean trace_context_get_query_result(struct pipe_context *_pipe, struct pipe_query *query, boolean wait, - uint64_t *presult) + void *presult) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; @@ -260,7 +260,7 @@ trace_context_get_query_result(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); _result = pipe->get_query_result(pipe, query, wait, presult); - result = *presult; + result = *((uint64_t*)presult); trace_dump_arg(uint, result); trace_dump_ret(bool, _result); diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index b81702a4fac..c6ea198dbb4 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -146,6 +146,11 @@ #define PIPE_OS_UNIX #endif +#if defined(__GNU__) +#define PIPE_OS_HURD +#define PIPE_OS_UNIX +#endif + #if defined(__sun) #define PIPE_OS_SOLARIS #define PIPE_OS_UNIX diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 3e082bef2ff..7ec3d63a3fd 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -101,6 +101,11 @@ struct pipe_context { unsigned mode, unsigned start, unsigned count); + + /** + * Draw the stream output buffer at index 0 + */ + void (*draw_stream_output)( struct pipe_context *pipe, unsigned mode ); /*@}*/ /** @@ -130,10 +135,10 @@ struct pipe_context { * \param wait if true, this query will block until the result is ready * \return TRUE if results are ready, FALSE otherwise */ - boolean (*get_query_result)(struct pipe_context *pipe, + boolean (*get_query_result)(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64_t *result); + void *result); /*@}*/ /** @@ -153,6 +158,9 @@ struct pipe_context { void (*bind_vertex_sampler_states)(struct pipe_context *, unsigned num_samplers, void **samplers); + void (*bind_geometry_sampler_states)(struct pipe_context *, + unsigned num_samplers, + void **samplers); void (*delete_sampler_state)(struct pipe_context *, void *); void * (*create_rasterizer_state)(struct pipe_context *, @@ -186,6 +194,11 @@ struct pipe_context { void (*bind_vertex_elements_state)(struct pipe_context *, void *); void (*delete_vertex_elements_state)(struct pipe_context *, void *); + void * (*create_stream_output_state)(struct pipe_context *, + const struct pipe_stream_output_state *); + void (*bind_stream_output_state)(struct pipe_context *, void *); + void (*delete_stream_output_state)(struct pipe_context*, void*); + /*@}*/ /** @@ -228,10 +241,21 @@ struct pipe_context { unsigned num_views, struct pipe_sampler_view **); + void (*set_geometry_sampler_views)(struct pipe_context *, + unsigned num_views, + struct pipe_sampler_view **); + void (*set_vertex_buffers)( struct pipe_context *, unsigned num_buffers, const struct pipe_vertex_buffer * ); + void (*set_stream_output_buffers)(struct pipe_context *, + struct pipe_resource **buffers, + int *offsets, /*array of offsets + from the start of each + of the buffers */ + int num_buffers); + /*@}*/ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index b00677425ce..3b87d998ceb 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -288,6 +288,7 @@ enum pipe_transfer_usage { #define PIPE_BIND_DISPLAY_TARGET (1 << 8) /* flush_front_buffer */ #define PIPE_BIND_TRANSFER_WRITE (1 << 9) /* get_transfer */ #define PIPE_BIND_TRANSFER_READ (1 << 10) /* get_transfer */ +#define PIPE_BIND_STREAM_OUTPUT (1 << 11) /* set_stream_output_buffers */ #define PIPE_BIND_CUSTOM (1 << 16) /* state-tracker/winsys usages */ /* The first two flags above were previously part of the amorphous @@ -322,6 +323,7 @@ enum pipe_transfer_usage { #define PIPE_USAGE_STATIC 2 /* same as immutable?? */ #define PIPE_USAGE_IMMUTABLE 3 /* no change after first upload */ #define PIPE_USAGE_STREAM 4 /* upload, draw, upload, draw */ +#define PIPE_USAGE_STAGING 5 /* supports data transfers from the GPU to the CPU */ /* These are intended to be used in calls to is_format_supported, but @@ -379,7 +381,10 @@ enum pipe_transfer_usage { #define PIPE_QUERY_PRIMITIVES_GENERATED 1 #define PIPE_QUERY_PRIMITIVES_EMITTED 2 #define PIPE_QUERY_TIME_ELAPSED 3 -#define PIPE_QUERY_TYPES 4 +#define PIPE_QUERY_SO_STATISTICS 5 +#define PIPE_QUERY_GPU_FINISHED 6 +#define PIPE_QUERY_TIMESTAMP_DISJOINT 7 +#define PIPE_QUERY_TYPES 8 /** @@ -445,6 +450,7 @@ enum pipe_cap { PIPE_CAP_TGSI_CONT_SUPPORTED, PIPE_CAP_BLEND_EQUATION_SEPARATE, PIPE_CAP_SM3, /*< Shader Model, supported */ + PIPE_CAP_STREAM_OUTPUT, PIPE_CAP_MAX_PREDICATE_REGISTERS, /** Maximum texture image units accessible from vertex and fragment shaders * combined */ @@ -495,6 +501,19 @@ enum pipe_cap { #define PIPE_REFERENCED_FOR_READ (1 << 0) #define PIPE_REFERENCED_FOR_WRITE (1 << 1) +/** + * Composite query types + */ +struct pipe_query_data_so_statistics +{ + uint64_t num_primitives_written; + uint64_t primitives_storage_needed; +}; +struct pipe_query_data_timestamp_disjoint +{ + uint64_t frequency; + boolean disjoint; +}; #ifdef __cplusplus } diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 0a70237f260..9df20ea8581 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -65,16 +65,18 @@ struct tgsi_token }; enum tgsi_file_type { - TGSI_FILE_NULL =0, - TGSI_FILE_CONSTANT =1, - TGSI_FILE_INPUT =2, - TGSI_FILE_OUTPUT =3, - TGSI_FILE_TEMPORARY =4, - TGSI_FILE_SAMPLER =5, - TGSI_FILE_ADDRESS =6, - TGSI_FILE_IMMEDIATE =7, - TGSI_FILE_PREDICATE =8, - TGSI_FILE_SYSTEM_VALUE =9, + TGSI_FILE_NULL =0, + TGSI_FILE_CONSTANT =1, + TGSI_FILE_INPUT =2, + TGSI_FILE_OUTPUT =3, + TGSI_FILE_TEMPORARY =4, + TGSI_FILE_SAMPLER =5, + TGSI_FILE_ADDRESS =6, + TGSI_FILE_IMMEDIATE =7, + TGSI_FILE_PREDICATE =8, + TGSI_FILE_SYSTEM_VALUE =9, + TGSI_FILE_IMMEDIATE_ARRAY =10, + TGSI_FILE_TEMPORARY_ARRAY =11, TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ }; @@ -159,9 +161,9 @@ struct tgsi_declaration_semantic struct tgsi_immediate { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_IMMEDIATE */ - unsigned NrTokens : 8; /**< UINT */ + unsigned NrTokens : 14; /**< UINT */ unsigned DataType : 4; /**< one of TGSI_IMM_x */ - unsigned Padding : 16; + unsigned Padding : 10; }; union tgsi_immediate_data @@ -173,7 +175,7 @@ union tgsi_immediate_data #define TGSI_PROPERTY_GS_INPUT_PRIM 0 #define TGSI_PROPERTY_GS_OUTPUT_PRIM 1 -#define TGSI_PROPERTY_GS_MAX_VERTICES 2 +#define TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES 2 #define TGSI_PROPERTY_FS_COORD_ORIGIN 3 #define TGSI_PROPERTY_FS_COORD_PIXEL_CENTER 4 #define TGSI_PROPERTY_COUNT 5 diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 5255b2003f8..6231f06ec71 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -60,9 +60,11 @@ extern "C" { #define PIPE_MAX_CONSTANT_BUFFERS 32 #define PIPE_MAX_SAMPLERS 16 #define PIPE_MAX_VERTEX_SAMPLERS 16 +#define PIPE_MAX_GEOMETRY_SAMPLERS 16 #define PIPE_MAX_SHADER_INPUTS 16 #define PIPE_MAX_SHADER_OUTPUTS 16 #define PIPE_MAX_TEXTURE_LEVELS 16 +#define PIPE_MAX_SO_BUFFERS 4 struct pipe_reference @@ -345,6 +347,21 @@ struct pipe_resource unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */ }; +struct pipe_stream_output_state +{ + /**< number of the output buffer to insert each element into */ + int output_buffer[PIPE_MAX_SHADER_OUTPUTS]; + /**< which register to grab each output from */ + int register_index[PIPE_MAX_SHADER_OUTPUTS]; + /**< TGSI_WRITEMASK signifying which components to output */ + ubyte register_mask[PIPE_MAX_SHADER_OUTPUTS]; + /**< number of outputs */ + int num_outputs; + + /**< stride for an entire vertex, only used if all output_buffers + * are 0 */ + unsigned stride; +}; /** * Extra indexing info for (cube) texture resources. diff --git a/src/gallium/include/state_tracker/graw.h b/src/gallium/include/state_tracker/graw.h index e5b298e03d9..59b0e337c92 100644 --- a/src/gallium/include/state_tracker/graw.h +++ b/src/gallium/include/state_tracker/graw.h @@ -34,6 +34,9 @@ PUBLIC struct pipe_screen *graw_create_window_and_screen( int x, PUBLIC void graw_set_display_func( void (*func)( void ) ); PUBLIC void graw_main_loop( void ); +PUBLIC void *graw_parse_geometry_shader( struct pipe_context *pipe, + const char *text ); + PUBLIC void *graw_parse_vertex_shader( struct pipe_context *pipe, const char *text ); diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile index 8933890323a..fec178ffb30 100644 --- a/src/gallium/state_trackers/egl/Makefile +++ b/src/gallium/state_trackers/egl/Makefile @@ -31,15 +31,20 @@ kms_SOURCES = $(wildcard kms/*.c) kms_OBJECTS = $(kms_SOURCES:.c=.o) -ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(kms_INCLUDES) -ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(kms_SOURCES) -ALL_OBJECTS = $(common_OBJECTS) $(x11_OBJECTS) $(kms_OBJECTS) +fbdev_INCLUDES = -I$(TOP)/src/gallium/winsys/sw -I$(TOP)/src/gallium/drivers +fbdev_SOURCES = $(wildcard fbdev/*.c) +fbdev_OBJECTS = $(fbdev_SOURCES:.c=.o) + + +ALL_INCLUDES = $(common_INCLUDES) $(x11_INCLUDES) $(kms_INCLUDES) $(fbdev_INCLUDES) +ALL_SOURCES = $(common_SOURCES) $(x11_SOURCES) $(kms_SOURCES) $(fbdev_SOURCES) +ALL_OBJECTS = $(common_OBJECTS) $(x11_OBJECTS) $(kms_OBJECTS) $(fbdev_OBJECTS) ##### TARGETS ##### -EGL_DISPLAYS_MODS = $(foreach dpy, $(EGL_DISPLAYS), libegl$(dpy).a) +EGL_PLATFORMS_MODS = $(foreach plat, $(EGL_PLATFORMS), libegl$(plat).a) -default: depend $(EGL_DISPLAYS_MODS) +default: depend $(EGL_PLATFORMS_MODS) libeglx11.a: $(x11_OBJECTS) $(common_OBJECTS) Makefile @@ -48,6 +53,9 @@ libeglx11.a: $(x11_OBJECTS) $(common_OBJECTS) Makefile libeglkms.a: $(kms_OBJECTS) $(common_OBJECTS) Makefile $(MKLIB) -o eglkms -static $(kms_OBJECTS) $(common_OBJECTS) +libeglfbdev.a: $(fbdev_OBJECTS) $(common_OBJECTS) Makefile + $(MKLIB) -o eglfbdev -static $(fbdev_OBJECTS) $(common_OBJECTS) + depend: rm -f depend touch depend @@ -55,7 +63,7 @@ depend: clean: rm -f $(ALL_OBJECTS) - rm -f $(EGL_DISPLAYS_MODS) + rm -f $(EGL_PLATFORMS_MODS) rm -f depend depend.bak # Dummy target @@ -73,4 +81,7 @@ $(x11_OBJECTS): %.o: %.c $(kms_OBJECTS): %.o: %.c $(CC) -c $(common_INCLUDES) $(kms_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ +$(fbdev_OBJECTS): %.o: %.c + $(CC) -c $(common_INCLUDES) $(fbdev_INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@ + sinclude depend diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 361cc7960bd..8c7d2cb33e7 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -74,10 +74,10 @@ egl_g3d_get_probe(_EGLDriver *drv, _EGLDisplay *dpy) struct native_probe *nprobe; nprobe = (struct native_probe *) _eglGetProbeCache(gdrv->probe_key); - if (!nprobe || nprobe->display != dpy->NativeDisplay) { + if (!nprobe || nprobe->display != dpy->PlatformDisplay) { if (nprobe) nprobe->destroy(nprobe); - nprobe = native_create_probe(dpy->NativeDisplay); + nprobe = native_create_probe(dpy->PlatformDisplay); _eglSetProbeCache(gdrv->probe_key, (void *) nprobe); } @@ -96,7 +96,7 @@ egl_g3d_destroy_probe(_EGLDriver *drv, _EGLDisplay *dpy) struct native_probe *nprobe; nprobe = (struct native_probe *) _eglGetProbeCache(gdrv->probe_key); - if (nprobe && (!dpy || nprobe->display == dpy->NativeDisplay)) { + if (nprobe && (!dpy || nprobe->display == dpy->PlatformDisplay)) { nprobe->destroy(nprobe); _eglSetProbeCache(gdrv->probe_key, NULL); } @@ -479,7 +479,7 @@ egl_g3d_initialize(_EGLDriver *drv, _EGLDisplay *dpy, } dpy->DriverData = gdpy; - gdpy->native = native_create_display(dpy->NativeDisplay, + gdpy->native = native_create_display(dpy->PlatformDisplay, &egl_g3d_native_event_handler); if (!gdpy->native) { _eglError(EGL_NOT_INITIALIZED, "eglInitialize(no usable display)"); diff --git a/src/gallium/state_trackers/egl/common/egl_g3d_st.c b/src/gallium/state_trackers/egl/common/egl_g3d_st.c index e2217b3b2ba..683b74f62b2 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d_st.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d_st.c @@ -30,7 +30,6 @@ #include "util/u_string.h" #include "util/u_inlines.h" #include "util/u_pointer.h" -#include "util/u_format.h" #include "util/u_dl.h" #include "egldriver.h" #include "eglimage.h" diff --git a/src/gallium/state_trackers/egl/common/native.h b/src/gallium/state_trackers/egl/common/native.h index 3f60348c489..494becb61f2 100644 --- a/src/gallium/state_trackers/egl/common/native.h +++ b/src/gallium/state_trackers/egl/common/native.h @@ -211,7 +211,6 @@ const char * native_get_name(void); struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *handler); +native_create_display(void *dpy, struct native_event_handler *handler); #endif /* _NATIVE_H_ */ diff --git a/src/gallium/state_trackers/egl/common/native_probe.h b/src/gallium/state_trackers/egl/common/native_probe.h index aeed9f85dd5..539c4aa70d2 100644 --- a/src/gallium/state_trackers/egl/common/native_probe.h +++ b/src/gallium/state_trackers/egl/common/native_probe.h @@ -43,7 +43,7 @@ enum native_probe_result { */ struct native_probe { int magic; - EGLNativeDisplayType display; + void *display; void *data; void (*destroy)(struct native_probe *nprobe); @@ -57,7 +57,7 @@ struct native_probe { * same display. */ struct native_probe * -native_create_probe(EGLNativeDisplayType dpy); +native_create_probe(void *dpy); /** * Probe the probe object. diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c new file mode 100644 index 00000000000..399c1251ef6 --- /dev/null +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c @@ -0,0 +1,469 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <linux/fb.h> + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_pointer.h" + +#include "common/native.h" +#include "common/native_helper.h" +#include "fbdev/fbdev_sw_winsys.h" + +struct fbdev_display { + struct native_display base; + + int fd; + struct native_event_handler *event_handler; + + struct fb_fix_screeninfo finfo; + struct fb_var_screeninfo vinfo; + + struct native_config config; + struct native_connector connector; + struct native_mode mode; + + struct fbdev_surface *current_surface; +}; + +struct fbdev_surface { + struct native_surface base; + + struct fbdev_display *fbdpy; + struct resource_surface *rsurf; + int width, height; + + unsigned int sequence_number; + + boolean is_current; +}; + +static INLINE struct fbdev_display * +fbdev_display(const struct native_display *ndpy) +{ + return (struct fbdev_display *) ndpy; +} + +static INLINE struct fbdev_surface * +fbdev_surface(const struct native_surface *nsurf) +{ + return (struct fbdev_surface *) nsurf; +} + +static boolean +fbdev_surface_validate(struct native_surface *nsurf, uint attachment_mask, + unsigned int *seq_num, struct pipe_resource **textures, + int *width, int *height) +{ + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + + if (!resource_surface_add_resources(fbsurf->rsurf, attachment_mask)) + return FALSE; + if (textures) + resource_surface_get_resources(fbsurf->rsurf, textures, attachment_mask); + + if (seq_num) + *seq_num = fbsurf->sequence_number; + if (width) + *width = fbsurf->width; + if (height) + *height = fbsurf->height; + + return TRUE; +} + +static boolean +fbdev_surface_flush_frontbuffer(struct native_surface *nsurf) +{ + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + + if (!fbsurf->is_current) + return TRUE; + + return resource_surface_present(fbsurf->rsurf, + NATIVE_ATTACHMENT_FRONT_LEFT, NULL); +} + +static boolean +fbdev_surface_swap_buffers(struct native_surface *nsurf) +{ + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + struct fbdev_display *fbdpy = fbsurf->fbdpy; + boolean ret = TRUE; + + if (fbsurf->is_current) { + ret = resource_surface_present(fbsurf->rsurf, + NATIVE_ATTACHMENT_BACK_LEFT, NULL); + } + + resource_surface_swap_buffers(fbsurf->rsurf, + NATIVE_ATTACHMENT_FRONT_LEFT, NATIVE_ATTACHMENT_BACK_LEFT, TRUE); + /* the front/back textures are swapped */ + fbsurf->sequence_number++; + fbdpy->event_handler->invalid_surface(&fbdpy->base, + &fbsurf->base, fbsurf->sequence_number); + + return ret; +} + +static void +fbdev_surface_wait(struct native_surface *nsurf) +{ + /* no-op */ +} + +static void +fbdev_surface_destroy(struct native_surface *nsurf) +{ + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + + resource_surface_destroy(fbsurf->rsurf); + FREE(fbsurf); +} + +static struct native_surface * +fbdev_display_create_scanout_surface(struct native_display *ndpy, + const struct native_config *nconf, + uint width, uint height) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct fbdev_surface *fbsurf; + + fbsurf = CALLOC_STRUCT(fbdev_surface); + if (!fbsurf) + return NULL; + + fbsurf->fbdpy = fbdpy; + fbsurf->width = width; + fbsurf->height = height; + + fbsurf->rsurf = resource_surface_create(fbdpy->base.screen, + nconf->color_format, + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT); + if (!fbsurf->rsurf) { + FREE(fbsurf); + return NULL; + } + + resource_surface_set_size(fbsurf->rsurf, fbsurf->width, fbsurf->height); + + fbsurf->base.destroy = fbdev_surface_destroy; + fbsurf->base.swap_buffers = fbdev_surface_swap_buffers; + fbsurf->base.flush_frontbuffer = fbdev_surface_flush_frontbuffer; + fbsurf->base.validate = fbdev_surface_validate; + fbsurf->base.wait = fbdev_surface_wait; + + return &fbsurf->base; +} + +static boolean +fbdev_display_program(struct native_display *ndpy, int crtc_idx, + struct native_surface *nsurf, uint x, uint y, + const struct native_connector **nconns, int num_nconns, + const struct native_mode *nmode) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct fbdev_surface *fbsurf = fbdev_surface(nsurf); + + if (x || y) + return FALSE; + + if (fbdpy->current_surface) { + if (fbdpy->current_surface == fbsurf) + return TRUE; + fbdpy->current_surface->is_current = FALSE; + } + + if (fbsurf) + fbsurf->is_current = TRUE; + fbdpy->current_surface = fbsurf; + + return TRUE; +} + +static const struct native_mode ** +fbdev_display_get_modes(struct native_display *ndpy, + const struct native_connector *nconn, + int *num_modes) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + const struct native_mode **modes; + + modes = MALLOC(sizeof(*modes)); + if (modes) { + modes[0] = &fbdpy->mode; + if (num_modes) + *num_modes = 1; + } + + return modes; +} + +static const struct native_connector ** +fbdev_display_get_connectors(struct native_display *ndpy, int *num_connectors, + int *num_crtc) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + const struct native_connector **connectors; + + connectors = MALLOC(sizeof(*connectors)); + if (connectors) { + connectors[0] = &fbdpy->connector; + if (num_connectors) + *num_connectors = 1; + } + + return connectors; +} + +static struct native_display_modeset fbdev_display_modeset = { + .get_connectors = fbdev_display_get_connectors, + .get_modes = fbdev_display_get_modes, + .create_scanout_surface = fbdev_display_create_scanout_surface, + .program = fbdev_display_program +}; + +static const struct native_config ** +fbdev_display_get_configs(struct native_display *ndpy, int *num_configs) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + const struct native_config **configs; + + configs = MALLOC(sizeof(*configs)); + if (configs) { + configs[0] = &fbdpy->config; + if (num_configs) + *num_configs = 1; + } + + return configs; +} + +static int +fbdev_display_get_param(struct native_display *ndpy, + enum native_param_type param) +{ + int val; + + switch (param) { + default: + val = 0; + break; + } + + return val; +} + +static void +fbdev_display_destroy(struct native_display *ndpy) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + + fbdpy->base.screen->destroy(fbdpy->base.screen); + close(fbdpy->fd); + FREE(fbdpy); +} + +static boolean +fbdev_display_init_modes(struct native_display *ndpy) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct native_mode *nmode = &fbdpy->mode; + + nmode->desc = "Current Mode"; + nmode->width = fbdpy->vinfo.xres; + nmode->height = fbdpy->vinfo.yres; + nmode->refresh_rate = 60 * 1000; /* dummy */ + + return TRUE; +} + +static boolean +fbdev_display_init_connectors(struct native_display *ndpy) +{ + return TRUE; +} + +static enum pipe_format +vinfo_to_format(const struct fb_var_screeninfo *vinfo) +{ + enum pipe_format format = PIPE_FORMAT_NONE; + + switch (vinfo->bits_per_pixel) { + case 32: + if (vinfo->red.length == 8 && + vinfo->green.length == 8 && + vinfo->blue.length == 8) { + format = (vinfo->transp.length == 8) ? + PIPE_FORMAT_B8G8R8A8_UNORM : PIPE_FORMAT_B8G8R8X8_UNORM; + } + break; + case 16: + if (vinfo->red.length == 5 && + vinfo->green.length == 6 && + vinfo->blue.length == 5 && + vinfo->transp.length == 0) + format = PIPE_FORMAT_B5G6R5_UNORM; + break; + default: + break; + } + + return format; +} + +static boolean +fbdev_display_init_configs(struct native_display *ndpy) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct native_config *nconf = &fbdpy->config; + + nconf->color_format = vinfo_to_format(&fbdpy->vinfo); + if (nconf->color_format == PIPE_FORMAT_NONE) + return FALSE; + + nconf->buffer_mask = + (1 << NATIVE_ATTACHMENT_FRONT_LEFT) | + (1 << NATIVE_ATTACHMENT_BACK_LEFT); + + nconf->scanout_bit = TRUE; + + return TRUE; +} + +static boolean +fbdev_display_init(struct native_display *ndpy) +{ + struct fbdev_display *fbdpy = fbdev_display(ndpy); + struct sw_winsys *ws; + + if (ioctl(fbdpy->fd, FBIOGET_FSCREENINFO, &fbdpy->finfo)) + return FALSE; + + if (ioctl(fbdpy->fd, FBIOGET_VSCREENINFO, &fbdpy->vinfo)) + return FALSE; + + if (fbdpy->finfo.visual != FB_VISUAL_TRUECOLOR || + fbdpy->finfo.type != FB_TYPE_PACKED_PIXELS) + return FALSE; + + if (!fbdev_display_init_configs(&fbdpy->base) || + !fbdev_display_init_connectors(&fbdpy->base) || + !fbdev_display_init_modes(&fbdpy->base)) + return FALSE; + + ws = fbdev_create_sw_winsys(fbdpy->fd, fbdpy->config.color_format); + if (ws) + fbdpy->base.screen = native_create_sw_screen(ws); + + if (fbdpy->base.screen) { + if (!fbdpy->base.screen->is_format_supported(fbdpy->base.screen, + fbdpy->config.color_format, PIPE_TEXTURE_2D, 0, + PIPE_BIND_RENDER_TARGET, 0)) { + fbdpy->base.screen->destroy(fbdpy->base.screen); + fbdpy->base.screen = NULL; + } + } + + return (fbdpy->base.screen != NULL); +} + +static struct native_display * +fbdev_display_create(int fd, struct native_event_handler *event_handler) +{ + struct fbdev_display *fbdpy; + + fbdpy = CALLOC_STRUCT(fbdev_display); + if (!fbdpy) + return NULL; + + fbdpy->fd = fd; + fbdpy->event_handler = event_handler; + + if (!fbdev_display_init(&fbdpy->base)) { + FREE(fbdpy); + return NULL; + } + + fbdpy->base.destroy = fbdev_display_destroy; + fbdpy->base.get_param = fbdev_display_get_param; + fbdpy->base.get_configs = fbdev_display_get_configs; + + fbdpy->base.modeset = &fbdev_display_modeset; + + return &fbdpy->base; +} + +struct native_probe * +native_create_probe(void *dpy) +{ + return NULL; +} + +enum native_probe_result +native_get_probe_result(struct native_probe *nprobe) +{ + return NATIVE_PROBE_UNKNOWN; +} + +const char * +native_get_name(void) +{ + return "FBDEV"; +} + +struct native_display * +native_create_display(void *dpy, struct native_event_handler *event_handler) +{ + struct native_display *ndpy; + int fd; + + /* well, this makes fd 0 being ignored */ + if (!dpy) { + fd = open("/dev/fb0", O_RDWR); + } + else { + fd = dup((int) pointer_to_intptr(dpy)); + } + if (fd < 0) + return NULL; + + ndpy = fbdev_display_create(fd, event_handler); + if (!ndpy) + close(fd); + + return ndpy; +} diff --git a/src/gallium/state_trackers/egl/gdi/native_gdi.c b/src/gallium/state_trackers/egl/gdi/native_gdi.c index 1791d198d50..56f190de002 100644 --- a/src/gallium/state_trackers/egl/gdi/native_gdi.c +++ b/src/gallium/state_trackers/egl/gdi/native_gdi.c @@ -367,7 +367,7 @@ gdi_create_display(HDC hDC, struct pipe_screen *screen, } struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) +native_create_probe(void *dpy) { return NULL; } @@ -385,8 +385,7 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +native_create_display(void *dpy, struct native_event_handler *event_handler) { struct sw_winsys *winsys; struct pipe_screen *screen; diff --git a/src/gallium/state_trackers/egl/kms/native_kms.c b/src/gallium/state_trackers/egl/kms/native_kms.c index 8f667f1ec13..09a08f32b3c 100644 --- a/src/gallium/state_trackers/egl/kms/native_kms.c +++ b/src/gallium/state_trackers/egl/kms/native_kms.c @@ -769,7 +769,7 @@ kms_create_display(int fd, struct native_event_handler *event_handler) } struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) +native_create_probe(void *dpy) { return NULL; } @@ -792,8 +792,7 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +native_create_display(void *dpy, struct native_event_handler *event_handler) { struct native_display *ndpy = NULL; int fd; diff --git a/src/gallium/state_trackers/egl/x11/native_dri2.c b/src/gallium/state_trackers/egl/x11/native_dri2.c index 0ce7c0be64b..7464354fe61 100644 --- a/src/gallium/state_trackers/egl/x11/native_dri2.c +++ b/src/gallium/state_trackers/egl/x11/native_dri2.c @@ -738,7 +738,7 @@ dri2_display_hash_table_compare(void *key1, void *key2) } struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, +x11_create_dri2_display(Display *dpy, struct native_event_handler *event_handler) { struct dri2_display *dri2dpy; diff --git a/src/gallium/state_trackers/egl/x11/native_x11.c b/src/gallium/state_trackers/egl/x11/native_x11.c index 5d71778410b..6f1e5998732 100644 --- a/src/gallium/state_trackers/egl/x11/native_x11.c +++ b/src/gallium/state_trackers/egl/x11/native_x11.c @@ -45,7 +45,7 @@ x11_probe_destroy(struct native_probe *nprobe) } struct native_probe * -native_create_probe(EGLNativeDisplayType dpy) +native_create_probe(void *dpy) { struct native_probe *nprobe; struct x11_screen *xscr; @@ -117,8 +117,7 @@ native_get_name(void) } struct native_display * -native_create_display(EGLNativeDisplayType dpy, - struct native_event_handler *event_handler) +native_create_display(void *dpy, struct native_event_handler *event_handler) { struct native_display *ndpy = NULL; boolean force_sw; @@ -128,14 +127,14 @@ native_create_display(EGLNativeDisplayType dpy, force_sw = TRUE; if (!force_sw) { - ndpy = x11_create_dri2_display(dpy, event_handler); + ndpy = x11_create_dri2_display((Display *) dpy, event_handler); } if (!ndpy) { EGLint level = (force_sw) ? _EGL_INFO : _EGL_WARNING; _eglLog(level, "use software fallback"); - ndpy = x11_create_ximage_display(dpy, event_handler); + ndpy = x11_create_ximage_display((Display *) dpy, event_handler); } return ndpy; diff --git a/src/gallium/state_trackers/egl/x11/native_x11.h b/src/gallium/state_trackers/egl/x11/native_x11.h index e16da935c0b..a1c32c3455a 100644 --- a/src/gallium/state_trackers/egl/x11/native_x11.h +++ b/src/gallium/state_trackers/egl/x11/native_x11.h @@ -29,11 +29,11 @@ #include "common/native.h" struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, +x11_create_ximage_display(Display *dpy, struct native_event_handler *event_handler); struct native_display * -x11_create_dri2_display(EGLNativeDisplayType dpy, +x11_create_dri2_display(Display *dpy, struct native_event_handler *event_handler); #endif /* _NATIVE_X11_H_ */ diff --git a/src/gallium/state_trackers/egl/x11/native_ximage.c b/src/gallium/state_trackers/egl/x11/native_ximage.c index 45679fc9b4e..ee10a04cfb2 100644 --- a/src/gallium/state_trackers/egl/x11/native_ximage.c +++ b/src/gallium/state_trackers/egl/x11/native_ximage.c @@ -441,7 +441,7 @@ ximage_display_destroy(struct native_display *ndpy) } struct native_display * -x11_create_ximage_display(EGLNativeDisplayType dpy, +x11_create_ximage_display(Display *dpy, struct native_event_handler *event_handler) { struct ximage_display *xdpy; diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c index 9357cd5763f..c69d75e22e8 100644 --- a/src/gallium/state_trackers/xorg/xorg_driver.c +++ b/src/gallium/state_trackers/xorg/xorg_driver.c @@ -81,7 +81,8 @@ typedef enum OPTION_2D_ACCEL, OPTION_DEBUG_FALLBACK, OPTION_THROTTLE_SWAP, - OPTION_THROTTLE_DIRTY + OPTION_THROTTLE_DIRTY, + OPTION_3D_ACCEL } drv_option_enums; static const OptionInfoRec drv_options[] = { @@ -90,6 +91,7 @@ static const OptionInfoRec drv_options[] = { {OPTION_DEBUG_FALLBACK, "DebugFallback", OPTV_BOOLEAN, {0}, FALSE}, {OPTION_THROTTLE_SWAP, "SwapThrottling", OPTV_BOOLEAN, {0}, FALSE}, {OPTION_THROTTLE_DIRTY, "DirtyThrottling", OPTV_BOOLEAN, {0}, FALSE}, + {OPTION_3D_ACCEL, "3DAccel", OPTV_BOOLEAN, {0}, FALSE}, {-1, NULL, OPTV_NONE, {0}, FALSE} }; @@ -305,7 +307,8 @@ drv_init_resource_management(ScrnInfoPtr pScrn) if (ms->screen || ms->kms) return TRUE; - ms->screen = driver_descriptor.create_screen(ms->fd); + if (!ms->no3D) + ms->screen = driver_descriptor.create_screen(ms->fd); if (ms->screen) return TRUE; @@ -629,12 +632,21 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) CustomizerPtr cust = ms->cust; MessageType from_st; MessageType from_dt; + MessageType from_3D; + Bool use3D; if (!drv_init_drm(pScrn)) { FatalError("Could not init DRM"); return FALSE; } + use3D = cust ? !cust->no_3d : TRUE; + from_3D = xf86GetOptValBool(ms->Options, OPTION_3D_ACCEL, + &use3D) ? + X_CONFIG : X_PROBED; + + ms->no3D = !use3D; + if (!drv_init_resource_management(pScrn)) { FatalError("Could not init resource management (!pipe_screen && !libkms)"); return FALSE; @@ -747,7 +759,7 @@ drv_screen_init(int scrnIndex, ScreenPtr pScreen, int argc, char **argv) xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Fallback debugging is %s\n", ms->debug_fallback ? "enabled" : "disabled"); #ifdef DRI2 - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "3D Acceleration is %s\n", + xf86DrvMsg(pScrn->scrnIndex, from_3D, "3D Acceleration is %s\n", ms->screen ? "enabled" : "disabled"); #else xf86DrvMsg(pScrn->scrnIndex, X_INFO, "3D Acceleration is disabled\n"); diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h index cc2d379af15..a5178ef43a4 100644 --- a/src/gallium/state_trackers/xorg/xorg_tracker.h +++ b/src/gallium/state_trackers/xorg/xorg_tracker.h @@ -75,6 +75,7 @@ typedef struct _CustomizerRec { Bool dirty_throttling; Bool swap_throttling; + Bool no_3d; Bool (*winsys_screen_init)(struct _CustomizerRec *cust, int fd); Bool (*winsys_screen_close)(struct _CustomizerRec *cust); Bool (*winsys_enter_vt)(struct _CustomizerRec *cust); @@ -103,6 +104,7 @@ typedef struct _modesettingRec Bool swapThrottling; Bool dirtyThrottling; CloseScreenProcPtr CloseScreen; + Bool no3D; /* Broken-out options. */ OptionInfoPtr Options; diff --git a/src/gallium/targets/Makefile.egl b/src/gallium/targets/Makefile.egl index 4fa13e85ce4..315856014b7 100644 --- a/src/gallium/targets/Makefile.egl +++ b/src/gallium/targets/Makefile.egl @@ -24,15 +24,26 @@ x11_ST = $(TOP)/src/gallium/state_trackers/egl/libeglx11.a \ x11_LIBS = $(common_LIBS) -lX11 -lXext -lXfixes +kms_ST = $(TOP)/src/gallium/state_trackers/egl/libeglkms.a +kms_LIBS = $(common_LIBS) + +fbdev_ST = \ + $(TOP)/src/gallium/state_trackers/egl/libeglfbdev.a \ + $(TOP)/src/gallium/winsys/sw/fbdev/libfbdev.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/identity/libidentity.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/drivers/rbug/librbug.a +fbdev_LIBS = $(common_LIBS) + ifeq ($(MESA_LLVM),1) x11_ST += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a x11_LIBS += $(LLVM_LIBS) +fbdev_ST += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a +fbdev_LIBS += $(LLVM_LIBS) LDFLAGS += $(LLVM_LDFLAGS) endif -kms_ST = $(TOP)/src/gallium/state_trackers/egl/libeglkms.a -kms_LIBS = $(common_LIBS) - ### Include directories INCLUDES = \ -I$(TOP)/include \ @@ -51,13 +62,19 @@ INCLUDES = \ ##### TARGETS ##### -EGL_DISPLAY_DRIVERS = $(foreach dpy, $(EGL_DISPLAYS), egl_$(dpy)_$(EGL_DRIVER_NAME).so) +ifeq ($(EGL_DRIVER_NAME),swrast) +EGL_PLATFORMS := $(filter-out kms, $(EGL_PLATFORMS)) +else +EGL_PLATFORMS := $(filter-out fbdev, $(EGL_PLATFORMS)) +endif -EGL_DISPLAY_LIBS = $(foreach drv, $(EGL_DISPLAY_DRIVERS), $(TOP)/$(LIB_DIR)/egl/$(drv)) +EGL_PLATFORM_DRIVERS = $(foreach plat, $(EGL_PLATFORMS), egl_$(plat)_$(EGL_DRIVER_NAME).so) -default: $(EGL_DISPLAY_LIBS) +EGL_PLATFORM_LIBS = $(foreach drv, $(EGL_PLATFORM_DRIVERS), $(TOP)/$(LIB_DIR)/egl/$(drv)) -$(EGL_DISPLAY_LIBS): $(TOP)/$(LIB_DIR)/egl/%.so: %.so +default: $(EGL_PLATFORM_LIBS) + +$(EGL_PLATFORM_LIBS): $(TOP)/$(LIB_DIR)/egl/%.so: %.so @$(INSTALL) -d $(TOP)/$(LIB_DIR)/egl $(INSTALL) $< $(TOP)/$(LIB_DIR)/egl @@ -75,13 +92,16 @@ egl_x11_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(x11_ST) $(EGL_DRIVER_PIPE egl_kms_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(kms_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile $(call mklib-egl,kms) +egl_fbdev_$(EGL_DRIVER_NAME).so: $(EGL_DRIVER_OBJECTS) $(fbdev_ST) $(EGL_DRIVER_PIPES) $(GALLIUM_AUXILIARIES) Makefile + $(call mklib-egl,fbdev) + clean: -rm -f $(EGL_DRIVER_OBJECTS) - -rm -f $(EGL_DISPLAY_DRIVERS) + -rm -f $(EGL_PLATFORM_DRIVERS) -install: $(EGL_DISPLAY_LIBS) +install: $(EGL_PLATFORM_LIBS) $(INSTALL) -d $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR) - for lib in $(EGL_DISPLAY_LIBS); do \ + for lib in $(EGL_PLATFORM_LIBS); do \ $(MINSTALL) -m 755 "$$lib" $(DESTDIR)$(EGL_DRIVER_INSTALL_DIR); \ done diff --git a/src/gallium/targets/dri-radeong/Makefile b/src/gallium/targets/dri-radeong/Makefile index 28647540b0a..a0ee9f1d18a 100644 --- a/src/gallium/targets/dri-radeong/Makefile +++ b/src/gallium/targets/dri-radeong/Makefile @@ -7,6 +7,7 @@ PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/drm/libdridrm.a \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/r300/libr300.a diff --git a/src/gallium/targets/egl-radeon/Makefile b/src/gallium/targets/egl-radeon/Makefile index 2adc027b8f0..79426836db4 100644 --- a/src/gallium/targets/egl-radeon/Makefile +++ b/src/gallium/targets/egl-radeon/Makefile @@ -10,6 +10,7 @@ EGL_DRIVER_DEFINES = \ EGL_DRIVER_PIPES = \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/r300/libr300.a diff --git a/src/gallium/targets/graw-xlib/graw_util.c b/src/gallium/targets/graw-xlib/graw_util.c index 147532cdee1..47aca4464db 100644 --- a/src/gallium/targets/graw-xlib/graw_util.c +++ b/src/gallium/targets/graw-xlib/graw_util.c @@ -8,6 +8,19 @@ /* Helper functions. These are the same for all graw implementations. */ +void *graw_parse_geometry_shader(struct pipe_context *pipe, + const char *text) +{ + struct tgsi_token tokens[1024]; + struct pipe_shader_state state; + + if (!tgsi_text_translate(text, tokens, Elements(tokens))) + return NULL; + + state.tokens = tokens; + return pipe->create_gs_state(pipe, &state); +} + void *graw_parse_vertex_shader(struct pipe_context *pipe, const char *text) { diff --git a/src/gallium/targets/xorg-radeon/Makefile b/src/gallium/targets/xorg-radeon/Makefile index ba7caea155c..caf2ac98b94 100644 --- a/src/gallium/targets/xorg-radeon/Makefile +++ b/src/gallium/targets/xorg-radeon/Makefile @@ -14,10 +14,11 @@ DRIVER_LINKS = \ $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \ $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \ $(TOP)/src/gallium/drivers/r300/libr300.a \ + $(TOP)/src/gallium/drivers/galahad/libgalahad.a \ $(TOP)/src/gallium/drivers/trace/libtrace.a \ $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(GALLIUM_AUXILIARIES) \ - $(shell pkg-config --libs libdrm libdrm_intel) + $(shell pkg-config --libs libdrm libdrm_radeon) include ../Makefile.xorg diff --git a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c index e0edf32adb4..64934d53f68 100644 --- a/src/gallium/targets/xorg-vmwgfx/vmw_screen.c +++ b/src/gallium/targets/xorg-vmwgfx/vmw_screen.c @@ -203,6 +203,7 @@ vmw_screen_pre_init(ScrnInfoPtr pScrn, int flags) cust->winsys_screen_close = vmw_screen_close; cust->winsys_enter_vt = vmw_screen_enter_vt; cust->winsys_leave_vt = vmw_screen_leave_vt; + cust->no_3d = TRUE; vmw->pScrn = pScrn; pScrn->driverPrivate = cust; diff --git a/src/gallium/tests/graw/SConscript b/src/gallium/tests/graw/SConscript index 77bf59f54ff..a40d66d4a1d 100644 --- a/src/gallium/tests/graw/SConscript +++ b/src/gallium/tests/graw/SConscript @@ -9,14 +9,16 @@ except NameError: env = env.Clone() env.Prepend(LIBPATH = [graw.dir]) -env.Prepend(LIBS = ['graw']) +env.Prepend(LIBS = ['graw'] + gallium) progs = [ 'clear', 'tri', 'quad-tex', - 'fp-test', - 'vp-test', + 'fs-test', + 'vs-test', + 'gs-test', + 'tri-gs', ] for prog in progs: diff --git a/src/gallium/tests/graw/clear.c b/src/gallium/tests/graw/clear.c index 28c986eee64..ce52a93aa1b 100644 --- a/src/gallium/tests/graw/clear.c +++ b/src/gallium/tests/graw/clear.c @@ -9,7 +9,6 @@ #include "pipe/p_defines.h" #include "util/u_debug.h" /* debug_dump_surface_bmp() */ -#include "os/os_time.h" /* os_time_sleep() */ enum pipe_format formats[] = { PIPE_FORMAT_R8G8B8A8_UNORM, diff --git a/src/gallium/tests/graw/fragment-shader b/src/gallium/tests/graw/fragment-shader new file mode 120000 index 00000000000..c7dd0b7c6b2 --- /dev/null +++ b/src/gallium/tests/graw/fragment-shader @@ -0,0 +1 @@ +../python/tests/regress/fragment-shader
\ No newline at end of file diff --git a/src/gallium/tests/graw/fp-test.c b/src/gallium/tests/graw/fs-test.c index d1c29b90b86..dea087357d6 100644 --- a/src/gallium/tests/graw/fp-test.c +++ b/src/gallium/tests/graw/fs-test.c @@ -42,7 +42,8 @@ static const int HEIGHT = 250; static struct pipe_screen *screen = NULL; static struct pipe_context *ctx = NULL; static struct pipe_resource *rttex = NULL; -static struct pipe_resource *constbuf = NULL; +static struct pipe_resource *constbuf1 = NULL; +static struct pipe_resource *constbuf2 = NULL; static struct pipe_surface *surf = NULL; static struct pipe_sampler_view *sv = NULL; static void *sampler = NULL; @@ -73,7 +74,7 @@ static struct vertex vertices[] = { -1, 0, 0, 1 } }, }; -static float constants[] = +static float constants1[] = { 0.4, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, @@ -81,7 +82,25 @@ static float constants[] = 3, 0, 0, 0, 0, .5, 0, 0, - 0, 0, 1, 0, + 1, 0, 0, 1, + 0, 0, 0, 1, + + 1, 0, 0, 0.5, + 0, 1, 0, 0.5, + 0, 0, 1, 0, + 0, 0, 0, 1, +}; + + +static float constants2[] = +{ 1, 0, 0, 1, + 0, 1, 0, 1, + 0, 0, 1, 1, + 0, 0, 0, 0, + + 1, 1, 0, 1, + 1, .5, 0, 1, + 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0.5, @@ -97,34 +116,58 @@ static void init_fs_constbuf( void ) templat.target = PIPE_BUFFER; templat.format = PIPE_FORMAT_R8_UNORM; - templat.width0 = sizeof(constants); + templat.width0 = sizeof(constants1); templat.height0 = 1; templat.depth0 = 1; templat.last_level = 0; templat.nr_samples = 1; templat.bind = PIPE_BIND_CONSTANT_BUFFER; - constbuf = screen->resource_create(screen, - &templat); - if (constbuf == NULL) + constbuf1 = screen->resource_create(screen, + &templat); + if (constbuf1 == NULL) exit(4); + constbuf2 = screen->resource_create(screen, + &templat); + if (constbuf2 == NULL) + exit(4); - u_box_2d(0,0,sizeof(constants),1, &box); - ctx->transfer_inline_write(ctx, - constbuf, - u_subresource(0,0), - PIPE_TRANSFER_WRITE, - &box, - constants, - sizeof constants, - sizeof constants); + { + u_box_2d(0,0,sizeof(constants1),1, &box); + + ctx->transfer_inline_write(ctx, + constbuf1, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + constants1, + sizeof constants1, + sizeof constants1); - ctx->set_constant_buffer(ctx, - PIPE_SHADER_FRAGMENT, 0, - constbuf); + ctx->set_constant_buffer(ctx, + PIPE_SHADER_FRAGMENT, 0, + constbuf1); + } + { + u_box_2d(0,0,sizeof(constants2),1, &box); + + ctx->transfer_inline_write(ctx, + constbuf2, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + constants2, + sizeof constants2, + sizeof constants2); + + + ctx->set_constant_buffer(ctx, + PIPE_SHADER_FRAGMENT, 1, + constbuf2); + } } @@ -223,6 +266,7 @@ static void set_fragment_shader( const char *filename ) handle = graw_parse_fragment_shader(ctx, buf); ctx->bind_fs_state(ctx, handle); + fclose(f); } diff --git a/src/gallium/tests/graw/geometry-shader/add-mix.txt b/src/gallium/tests/graw/geometry-shader/add-mix.txt new file mode 100644 index 00000000000..63e689a5158 --- /dev/null +++ b/src/gallium/tests/graw/geometry-shader/add-mix.txt @@ -0,0 +1,23 @@ +GEOM +PROPERTY GS_INPUT_PRIMITIVE TRIANGLES +PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP +DCL IN[][0], POSITION, CONSTANT +DCL IN[][1], COLOR, CONSTANT +DCL OUT[0], POSITION, CONSTANT +DCL OUT[1], COLOR, CONSTANT + +MOV OUT[0], IN[0][0] +ADD OUT[1], IN[0][1], IN[1][1] +EMIT + +MOV OUT[0], IN[1][0] +ADD OUT[1], IN[1][1], IN[2][1] +EMIT + +MOV OUT[0], IN[2][0] +ADD OUT[1], IN[2][1], IN[0][1] +EMIT + +ENDPRIM + +END diff --git a/src/gallium/tests/graw/geometry-shader/add.txt b/src/gallium/tests/graw/geometry-shader/add.txt new file mode 100644 index 00000000000..d8c7c41e9bd --- /dev/null +++ b/src/gallium/tests/graw/geometry-shader/add.txt @@ -0,0 +1,23 @@ +GEOM +PROPERTY GS_INPUT_PRIMITIVE TRIANGLES +PROPERTY GS_OUTPUT_PRIMITIVE LINE_STRIP +DCL IN[][0], POSITION, CONSTANT +DCL IN[][1], COLOR, CONSTANT +DCL OUT[0], POSITION, CONSTANT +DCL OUT[1], COLOR, CONSTANT + +MOV OUT[0], IN[0][0] +ADD OUT[1], IN[0][1], IN[0][1] +EMIT + +MOV OUT[0], IN[1][0] +ADD OUT[1], IN[1][1], IN[1][1] +EMIT + +MOV OUT[0], IN[2][0] +ADD OUT[1], IN[2][1], IN[2][1] +EMIT + +ENDPRIM + +END diff --git a/src/gallium/tests/graw/geometry-shader/line.txt b/src/gallium/tests/graw/geometry-shader/line.txt new file mode 100644 index 00000000000..8be3f7fd985 --- /dev/null +++ b/src/gallium/tests/graw/geometry-shader/line.txt @@ -0,0 +1,28 @@ +GEOM +PROPERTY GS_INPUT_PRIMITIVE TRIANGLES +PROPERTY GS_OUTPUT_PRIMITIVE LINE_STRIP +PROPERTY GS_MAX_OUTPUT_VERTICES 4 +DCL IN[][0], POSITION, CONSTANT +DCL IN[][1], COLOR, CONSTANT +DCL OUT[0], POSITION, CONSTANT +DCL OUT[1], COLOR, CONSTANT + +MOV OUT[0], IN[0][0] +MOV OUT[1], IN[0][1] +EMIT + +MOV OUT[0], IN[1][0] +MOV OUT[1], IN[0][1] +EMIT + +MOV OUT[0], IN[2][0] +MOV OUT[1], IN[2][1] +EMIT + +MOV OUT[0], IN[0][0] +MOV OUT[1], IN[0][1] +EMIT + +ENDPRIM + +END diff --git a/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt b/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt new file mode 100644 index 00000000000..058acfbcb5a --- /dev/null +++ b/src/gallium/tests/graw/geometry-shader/mov-cb-2d.txt @@ -0,0 +1,24 @@ +GEOM +PROPERTY GS_INPUT_PRIMITIVE TRIANGLES +PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP +DCL IN[][0], POSITION, CONSTANT +DCL IN[][1], COLOR, CONSTANT +DCL OUT[0], POSITION, CONSTANT +DCL OUT[1], COLOR, CONSTANT +DCL CONST[1][0..6] + +MOV OUT[0], IN[0][0] +MOV OUT[1], CONST[1][0] +EMIT + +MOV OUT[0], IN[1][0] +MOV OUT[1], CONST[1][1] +EMIT + +MOV OUT[0], IN[2][0] +MOV OUT[1], CONST[1][4] +EMIT + +ENDPRIM + +END diff --git a/src/gallium/tests/graw/geometry-shader/mov.txt b/src/gallium/tests/graw/geometry-shader/mov.txt new file mode 100644 index 00000000000..97150a5da45 --- /dev/null +++ b/src/gallium/tests/graw/geometry-shader/mov.txt @@ -0,0 +1,23 @@ +GEOM +PROPERTY GS_INPUT_PRIMITIVE TRIANGLES +PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP +DCL IN[][0], POSITION, CONSTANT +DCL IN[][1], COLOR, CONSTANT +DCL OUT[0], POSITION, CONSTANT +DCL OUT[1], COLOR, CONSTANT + +MOV OUT[0], IN[0][0] +MOV OUT[1], IN[0][1] +EMIT + +MOV OUT[0], IN[1][0] +MOV OUT[1], IN[1][1] +EMIT + +MOV OUT[0], IN[2][0] +MOV OUT[1], IN[2][1] +EMIT + +ENDPRIM + +END diff --git a/src/gallium/tests/graw/geometry-shader/multi-line.txt b/src/gallium/tests/graw/geometry-shader/multi-line.txt new file mode 100644 index 00000000000..0762d19cdd9 --- /dev/null +++ b/src/gallium/tests/graw/geometry-shader/multi-line.txt @@ -0,0 +1,42 @@ +GEOM +PROPERTY GS_INPUT_PRIMITIVE TRIANGLES +PROPERTY GS_OUTPUT_PRIMITIVE LINE_STRIP +PROPERTY GS_MAX_OUTPUT_VERTICES 8 +DCL IN[][0], POSITION, CONSTANT +DCL IN[][1], COLOR, CONSTANT +DCL OUT[0], POSITION, CONSTANT +DCL OUT[1], COLOR, CONSTANT +DCL TEMP[0] + +MOV TEMP[0], IN[0][0] +ADD TEMP[0].y, IN[0][0], IN[1][0] + +MOV OUT[0], TEMP[0] +MOV OUT[1], IN[0][1] +EMIT +MOV OUT[0], IN[2][0] +MOV OUT[1], IN[0][1] +EMIT +MOV OUT[0], IN[0][0] +MOV OUT[1], IN[2][1] +EMIT +MOV OUT[0], TEMP[0] +MOV OUT[1], IN[0][1] +EMIT +ENDPRIM + +MOV OUT[0], TEMP[0] +MOV OUT[1], IN[0][1] +EMIT +MOV OUT[0], IN[2][0] +MOV OUT[1], IN[0][1] +EMIT +MOV OUT[0], IN[1][0] +MOV OUT[1], IN[2][1] +EMIT +MOV OUT[0], TEMP[0] +MOV OUT[1], IN[0][1] +EMIT +ENDPRIM + +END diff --git a/src/gallium/tests/graw/gs-test.c b/src/gallium/tests/graw/gs-test.c new file mode 100644 index 00000000000..3087d446fca --- /dev/null +++ b/src/gallium/tests/graw/gs-test.c @@ -0,0 +1,623 @@ +/* Display a cleared blue window. This demo has no dependencies on + * any utility code, just the graw interface and gallium. + */ + +#include "state_tracker/graw.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include <stdio.h> /* for fread(), etc */ + +#include "util/u_debug.h" /* debug_dump_surface_bmp() */ +#include "util/u_inlines.h" +#include "util/u_memory.h" /* Offset() */ +#include "util/u_box.h" + +static const char *filename = NULL; +unsigned show_fps = 0; +unsigned draw_strip = 0; + + +static void usage(char *name) +{ + fprintf(stderr, "usage: %s [ options ] shader_filename\n", name); +#ifndef WIN32 + fprintf(stderr, "\n" ); + fprintf(stderr, "options:\n"); + fprintf(stderr, " -fps show frames per second\n"); + fprintf(stderr, " -strip renders a triangle strip\n"); +#endif +} + + +enum pipe_format formats[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_NONE +}; + +static const int WIDTH = 250; +static const int HEIGHT = 250; + +static struct pipe_screen *screen = NULL; +static struct pipe_context *ctx = NULL; +static struct pipe_resource *rttex = NULL; +static struct pipe_resource *constbuf1 = NULL; +static struct pipe_resource *constbuf2 = NULL; +static struct pipe_surface *surf = NULL; +static struct pipe_sampler_view *sv = NULL; +static void *sampler = NULL; +static void *window = NULL; +static struct pipe_resource *samptex = NULL; + +struct vertex { + float position[4]; + float color[4]; + float texcoord[4]; + float generic[4]; +}; + +/* Vertex data matches progs/fp/fp-tri.c, but flipped in Y dimension + * so that the final images are the same. + */ +static struct vertex vertices[] = +{ + { { 0.9, 0.9, 0.0, 1.0 }, + { 0, 0, 1, 1 }, + { 1, 1, 0, 1 }, + { 1, 0, 1, 0 } + }, + + { { 0.9, -0.9, 0.0, 1.0 }, + { 1, 0, 0, 1 }, + { 1, -1, 0, 1 }, + { 0, 1, 0, 1 } + }, + + { {-0.9, 0.0, 0.0, 1.0 }, + { 0, 1, 0, 1 }, + { -1, 0, 0, 1 }, + { 0, 0, 1, 1 } + }, +}; + +static struct vertex vertices_strip[] = +{ + { { 0.9, 0.9, 0.0, 1.0 }, + { 0, 0, 1, 1 }, + { 1, 1, 0, 1 }, + { 1, 0, 0, 1 } + }, + + { { 0.9, -0.9, 0.0, 1.0 }, + { 1, 0, 0, 1 }, + { 1, -1, 0, 1 }, + { 0, 1, 0, 1 } + }, + + { {-0.9, 0.9, 0.0, 1.0 }, + { 0, 1, 0, 1 }, + { -1, 1, 0, 1 }, + { 0, 0, 1, 1 } + }, + + { {-0.9, -0.9, 0.0, 1.0 }, + { 1, 1, 0, 1 }, + { -1, -1, 0, 1 }, + { 1, 1, 0, 1 } + }, +}; + +static float constants1[] = +{ 0.4, 0, 0, 1, + 1, 1, 1, 1, + 2, 2, 2, 2, + 4, 8, 16, 32, + + 3, 0, 0, 0, + 0, .5, 0, 0, + 0, 0, 1, 0, + 0, 0, 0, 1, + + 1, 0, 0, 0.5, + 0, 1, 0, 0.5, + 0, 0, 1, 0, + 0, 0, 0, 1, +}; + + +static float constants2[] = +{ 1, 0, 0, 1, + 0, 1, 0, 1, + 0, 0, 1, 1, + 0, 0, 0, 1, + + 1, 1, 0, 1, + 1, .5, 0, 1, + 0, 1, 1, 1, + 1, 0, 1, 1, + + 1, 0, 0, 0.5, + 0, 1, 0, 0.5, + 0, 0, 1, 0, + 0, 0, 0, 1, +}; + + +static void init_fs_constbuf( void ) +{ + struct pipe_resource templat; + struct pipe_box box; + + templat.target = PIPE_BUFFER; + templat.format = PIPE_FORMAT_R8_UNORM; + templat.width0 = sizeof(constants1); + templat.height0 = 1; + templat.depth0 = 1; + templat.last_level = 0; + templat.nr_samples = 1; + templat.bind = PIPE_BIND_CONSTANT_BUFFER; + + constbuf1 = screen->resource_create(screen, &templat); + if (constbuf1 == NULL) + exit(4); + constbuf2 = screen->resource_create(screen, &templat); + if (constbuf2 == NULL) + exit(4); + + { + u_box_2d(0,0,sizeof(constants1),1, &box); + + ctx->transfer_inline_write(ctx, + constbuf1, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + constants1, + sizeof constants1, + sizeof constants1); + + + ctx->set_constant_buffer(ctx, + PIPE_SHADER_GEOMETRY, 0, + constbuf1); + } + { + u_box_2d(0,0,sizeof(constants2),1, &box); + + ctx->transfer_inline_write(ctx, + constbuf2, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + constants2, + sizeof constants2, + sizeof constants2); + + + ctx->set_constant_buffer(ctx, + PIPE_SHADER_GEOMETRY, 1, + constbuf2); + } +} + + +static void set_viewport( float x, float y, + float width, float height, + float near, float far) +{ + float z = far; + float half_width = (float)width / 2.0f; + float half_height = (float)height / 2.0f; + float half_depth = ((float)far - (float)near) / 2.0f; + struct pipe_viewport_state vp; + + vp.scale[0] = half_width; + vp.scale[1] = half_height; + vp.scale[2] = half_depth; + vp.scale[3] = 1.0f; + + vp.translate[0] = half_width + x; + vp.translate[1] = half_height + y; + vp.translate[2] = half_depth + z; + vp.translate[3] = 0.0f; + + ctx->set_viewport_state( ctx, &vp ); +} + +static void set_vertices( void ) +{ + struct pipe_vertex_element ve[4]; + struct pipe_vertex_buffer vbuf; + void *handle; + + memset(ve, 0, sizeof ve); + + ve[0].src_offset = Offset(struct vertex, position); + ve[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[1].src_offset = Offset(struct vertex, color); + ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[2].src_offset = Offset(struct vertex, texcoord); + ve[2].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[3].src_offset = Offset(struct vertex, generic); + ve[3].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + + handle = ctx->create_vertex_elements_state(ctx, 4, ve); + ctx->bind_vertex_elements_state(ctx, handle); + + vbuf.stride = sizeof( struct vertex ); + vbuf.buffer_offset = 0; + if (draw_strip) { + vbuf.max_index = sizeof(vertices_strip) / vbuf.stride; + vbuf.buffer = screen->user_buffer_create(screen, + vertices_strip, + sizeof(vertices_strip), + PIPE_BIND_VERTEX_BUFFER); + } else { + vbuf.max_index = sizeof(vertices) / vbuf.stride; + vbuf.buffer = screen->user_buffer_create(screen, + vertices, + sizeof(vertices), + PIPE_BIND_VERTEX_BUFFER); + } + + ctx->set_vertex_buffers(ctx, 1, &vbuf); +} + +static void set_vertex_shader( void ) +{ + void *handle; + const char *text = + "VERT\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL IN[2]\n" + "DCL IN[3]\n" + "DCL OUT[0], POSITION\n" + "DCL OUT[1], COLOR[0]\n" + "DCL OUT[2], GENERIC[0]\n" + "DCL OUT[3], GENERIC[1]\n" + " MOV OUT[0], IN[0]\n" + " MOV OUT[1], IN[1]\n" + " MOV OUT[2], IN[2]\n" + " MOV OUT[3], IN[3]\n" + " END\n"; + + handle = graw_parse_vertex_shader(ctx, text); + ctx->bind_vs_state(ctx, handle); +} + +static void set_fragment_shader( void ) +{ + void *handle; + const char *text = + "FRAG\n" + "DCL IN[0], COLOR, LINEAR\n" + "DCL OUT[0], COLOR\n" + " 0: MOV OUT[0], IN[0]\n" + " 1: END\n"; + + handle = graw_parse_fragment_shader(ctx, text); + ctx->bind_fs_state(ctx, handle); +} + + +static void set_geometry_shader( void ) +{ + FILE *f; + char buf[50000]; + void *handle; + int sz; + + if ((f = fopen(filename, "r")) == NULL) { + fprintf(stderr, "Couldn't open %s\n", filename); + exit(1); + } + + sz = fread(buf, 1, sizeof(buf), f); + if (!feof(f)) { + printf("file too long\n"); + exit(1); + } + printf("%.*s\n", sz, buf); + buf[sz] = 0; + + handle = graw_parse_geometry_shader(ctx, buf); + ctx->bind_gs_state(ctx, handle); + fclose(f); +} + + +static void draw( void ) +{ + float clear_color[4] = {.1,.3,.5,0}; + + ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); + if (draw_strip) + ctx->draw_arrays(ctx, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + else + ctx->draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); + + ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + +#if 0 + /* At the moment, libgraw leaks out/makes available some of the + * symbols from gallium/auxiliary, including these debug helpers. + * Will eventually want to bless some of these paths, and lock the + * others down so they aren't accessible from test programs. + * + * This currently just happens to work on debug builds - a release + * build will probably fail to link here: + */ + debug_dump_surface_bmp(ctx, "result.bmp", surf); +#endif + + screen->flush_frontbuffer(screen, surf, window); +} + +#define SIZE 16 + +static void init_tex( void ) +{ + struct pipe_sampler_view sv_template; + struct pipe_sampler_state sampler_desc; + struct pipe_resource templat; + struct pipe_box box; + ubyte tex2d[SIZE][SIZE][4]; + int s, t; + +#if (SIZE != 2) + for (s = 0; s < SIZE; s++) { + for (t = 0; t < SIZE; t++) { + if (0) { + int x = (s ^ t) & 1; + tex2d[t][s][0] = (x) ? 0 : 63; + tex2d[t][s][1] = (x) ? 0 : 128; + tex2d[t][s][2] = 0; + tex2d[t][s][3] = 0xff; + } + else { + int x = ((s ^ t) >> 2) & 1; + tex2d[t][s][0] = s*255/(SIZE-1); + tex2d[t][s][1] = t*255/(SIZE-1); + tex2d[t][s][2] = (x) ? 0 : 128; + tex2d[t][s][3] = 0xff; + } + } + } +#else + tex2d[0][0][0] = 0; + tex2d[0][0][1] = 255; + tex2d[0][0][2] = 255; + tex2d[0][0][3] = 0; + + tex2d[0][1][0] = 0; + tex2d[0][1][1] = 0; + tex2d[0][1][2] = 255; + tex2d[0][1][3] = 255; + + tex2d[1][0][0] = 255; + tex2d[1][0][1] = 255; + tex2d[1][0][2] = 0; + tex2d[1][0][3] = 255; + + tex2d[1][1][0] = 255; + tex2d[1][1][1] = 0; + tex2d[1][1][2] = 0; + tex2d[1][1][3] = 255; +#endif + + templat.target = PIPE_TEXTURE_2D; + templat.format = PIPE_FORMAT_B8G8R8A8_UNORM; + templat.width0 = SIZE; + templat.height0 = SIZE; + templat.depth0 = 1; + templat.last_level = 0; + templat.nr_samples = 1; + templat.bind = PIPE_BIND_SAMPLER_VIEW; + + + samptex = screen->resource_create(screen, + &templat); + if (samptex == NULL) + exit(4); + + u_box_2d(0,0,SIZE,SIZE, &box); + + ctx->transfer_inline_write(ctx, + samptex, + u_subresource(0,0), + PIPE_TRANSFER_WRITE, + &box, + tex2d, + sizeof tex2d[0], + sizeof tex2d); + + /* Possibly read back & compare against original data: + */ + if (0) + { + struct pipe_transfer *t; + uint32_t *ptr; + t = pipe_get_transfer(ctx, samptex, + 0, 0, 0, /* face, level, zslice */ + PIPE_TRANSFER_READ, + 0, 0, SIZE, SIZE); /* x, y, width, height */ + + ptr = ctx->transfer_map(ctx, t); + + if (memcmp(ptr, tex2d, sizeof tex2d) != 0) { + assert(0); + exit(9); + } + + ctx->transfer_unmap(ctx, t); + + ctx->transfer_destroy(ctx, t); + } + + memset(&sv_template, 0, sizeof sv_template); + sv_template.format = samptex->format; + sv_template.texture = samptex; + sv_template.first_level = 0; + sv_template.last_level = 0; + sv_template.swizzle_r = 0; + sv_template.swizzle_g = 1; + sv_template.swizzle_b = 2; + sv_template.swizzle_a = 3; + sv = ctx->create_sampler_view(ctx, samptex, &sv_template); + if (sv == NULL) + exit(5); + + ctx->set_fragment_sampler_views(ctx, 1, &sv); + + + memset(&sampler_desc, 0, sizeof sampler_desc); + sampler_desc.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler_desc.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler_desc.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler_desc.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler_desc.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler_desc.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler_desc.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler_desc.compare_func = 0; + sampler_desc.normalized_coords = 1; + sampler_desc.max_anisotropy = 0; + + sampler = ctx->create_sampler_state(ctx, &sampler_desc); + if (sampler == NULL) + exit(6); + + ctx->bind_fragment_sampler_states(ctx, 1, &sampler); + +} + +static void init( void ) +{ + struct pipe_framebuffer_state fb; + struct pipe_resource templat; + int i; + + /* It's hard to say whether window or screen should be created + * first. Different environments would prefer one or the other. + * + * Also, no easy way of querying supported formats if the screen + * cannot be created first. + */ + for (i = 0; + window == NULL && formats[i] != PIPE_FORMAT_NONE; + i++) { + + screen = graw_create_window_and_screen(0,0,WIDTH,HEIGHT, + formats[i], + &window); + } + + ctx = screen->context_create(screen, NULL); + if (ctx == NULL) + exit(3); + + templat.target = PIPE_TEXTURE_2D; + templat.format = formats[i]; + templat.width0 = WIDTH; + templat.height0 = HEIGHT; + templat.depth0 = 1; + templat.last_level = 0; + templat.nr_samples = 1; + templat.bind = (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET); + + rttex = screen->resource_create(screen, + &templat); + if (rttex == NULL) + exit(4); + + surf = screen->get_tex_surface(screen, rttex, 0, 0, 0, + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET); + if (surf == NULL) + exit(5); + + memset(&fb, 0, sizeof fb); + fb.nr_cbufs = 1; + fb.width = WIDTH; + fb.height = HEIGHT; + fb.cbufs[0] = surf; + + ctx->set_framebuffer_state(ctx, &fb); + + { + struct pipe_blend_state blend; + void *handle; + memset(&blend, 0, sizeof blend); + blend.rt[0].colormask = PIPE_MASK_RGBA; + handle = ctx->create_blend_state(ctx, &blend); + ctx->bind_blend_state(ctx, handle); + } + + { + struct pipe_depth_stencil_alpha_state depthstencil; + void *handle; + memset(&depthstencil, 0, sizeof depthstencil); + handle = ctx->create_depth_stencil_alpha_state(ctx, &depthstencil); + ctx->bind_depth_stencil_alpha_state(ctx, handle); + } + + { + struct pipe_rasterizer_state rasterizer; + void *handle; + memset(&rasterizer, 0, sizeof rasterizer); + rasterizer.cull_face = PIPE_FACE_NONE; + rasterizer.gl_rasterization_rules = 1; + handle = ctx->create_rasterizer_state(ctx, &rasterizer); + ctx->bind_rasterizer_state(ctx, handle); + } + + set_viewport(0, 0, WIDTH, HEIGHT, 30, 1000); + + init_tex(); + init_fs_constbuf(); + + set_vertices(); + set_vertex_shader(); + set_fragment_shader(); + set_geometry_shader(); +} + +static void args(int argc, char *argv[]) +{ + int i; + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "-fps") == 0) { + show_fps = 1; + } + else if (strcmp(argv[i], "-strip") == 0) { + draw_strip = 1; + } + else if (i == argc - 1) { + filename = argv[i]; + } + else { + usage(argv[0]); + exit(1); + } + } + + if (!filename) { + usage(argv[0]); + exit(1); + } +} + +int main( int argc, char *argv[] ) +{ + args(argc,argv); + init(); + + graw_set_display_func( draw ); + graw_main_loop(); + return 0; +} diff --git a/src/gallium/tests/graw/tri-gs.c b/src/gallium/tests/graw/tri-gs.c new file mode 100644 index 00000000000..d187505f8d4 --- /dev/null +++ b/src/gallium/tests/graw/tri-gs.c @@ -0,0 +1,267 @@ +/* Display a cleared blue window. This demo has no dependencies on + * any utility code, just the graw interface and gallium. + */ + +#include "state_tracker/graw.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" + +#include "util/u_debug.h" /* debug_dump_surface_bmp() */ +#include "util/u_memory.h" /* Offset() */ + +enum pipe_format formats[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_NONE +}; + +static const int WIDTH = 300; +static const int HEIGHT = 300; + +static struct pipe_screen *screen = NULL; +static struct pipe_context *ctx = NULL; +static struct pipe_surface *surf = NULL; +static void *window = NULL; + +struct vertex { + float position[4]; + float color[4]; +}; + +static struct vertex vertices[4] = +{ + { { 0.0f, -0.9f, 0.0f, 1.0f }, + { 1.0f, 0.0f, 0.0f, 1.0f } + }, + { { -0.9f, 0.9f, 0.0f, 1.0f }, + { 0.0f, 1.0f, 0.0f, 1.0f } + }, + { { 0.9f, 0.9f, 0.0f, 1.0f }, + { 0.0f, 0.0f, 1.0f, 1.0f } + } +}; + + + + +static void set_viewport( float x, float y, + float width, float height, + float near, float far) +{ + float z = far; + float half_width = (float)width / 2.0f; + float half_height = (float)height / 2.0f; + float half_depth = ((float)far - (float)near) / 2.0f; + struct pipe_viewport_state vp; + + vp.scale[0] = half_width; + vp.scale[1] = half_height; + vp.scale[2] = half_depth; + vp.scale[3] = 1.0f; + + vp.translate[0] = half_width + x; + vp.translate[1] = half_height + y; + vp.translate[2] = half_depth + z; + vp.translate[3] = 0.0f; + + ctx->set_viewport_state( ctx, &vp ); +} + +static void set_vertices( void ) +{ + struct pipe_vertex_element ve[2]; + struct pipe_vertex_buffer vbuf; + void *handle; + + memset(ve, 0, sizeof ve); + + ve[0].src_offset = Offset(struct vertex, position); + ve[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[1].src_offset = Offset(struct vertex, color); + ve[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + + handle = ctx->create_vertex_elements_state(ctx, 2, ve); + ctx->bind_vertex_elements_state(ctx, handle); + + + vbuf.stride = sizeof( struct vertex ); + vbuf.max_index = sizeof(vertices) / vbuf.stride; + vbuf.buffer_offset = 0; + vbuf.buffer = screen->user_buffer_create(screen, + vertices, + sizeof(vertices), + PIPE_BIND_VERTEX_BUFFER); + + ctx->set_vertex_buffers(ctx, 1, &vbuf); +} + +static void set_vertex_shader( void ) +{ + void *handle; + const char *text = + "VERT\n" + "DCL IN[0]\n" + "DCL IN[1]\n" + "DCL OUT[0], POSITION\n" + "DCL OUT[1], COLOR\n" + " 0: MOV OUT[1], IN[1]\n" + " 1: MOV OUT[0], IN[0]\n" + " 2: END\n"; + + handle = graw_parse_vertex_shader(ctx, text); + ctx->bind_vs_state(ctx, handle); +} + +static void set_fragment_shader( void ) +{ + void *handle; + const char *text = + "FRAG\n" + "DCL IN[0], COLOR, LINEAR\n" + "DCL OUT[0], COLOR\n" + " 0: MOV OUT[0], IN[0]\n" + " 1: END\n"; + + handle = graw_parse_fragment_shader(ctx, text); + ctx->bind_fs_state(ctx, handle); +} + + +static void set_geometry_shader( void ) +{ + void *handle; + const char *text = + "GEOM\n" + "PROPERTY GS_INPUT_PRIMITIVE TRIANGLES\n" + "PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP\n" + "DCL IN[][0], POSITION, CONSTANT\n" + "DCL IN[][1], COLOR, CONSTANT\n" + "DCL OUT[0], POSITION, CONSTANT\n" + "DCL OUT[1], COLOR, CONSTANT\n" + "0:MOV OUT[0], IN[0][0]\n" + "1:MOV OUT[1], IN[0][1]\n" + "2:EMIT\n" + "3:MOV OUT[0], IN[1][0]\n" + "4:MOV OUT[1], IN[0][1]\n" /* copy color from input vertex 0 */ + "5:EMIT\n" + "6:MOV OUT[0], IN[2][0]\n" + "7:MOV OUT[1], IN[2][1]\n" + "8:EMIT\n" + "9:ENDPRIM\n" + "10:END\n"; + + handle = graw_parse_geometry_shader(ctx, text); + ctx->bind_gs_state(ctx, handle); +} + +static void draw( void ) +{ + float clear_color[4] = {1,0,1,1}; + + ctx->clear(ctx, PIPE_CLEAR_COLOR, clear_color, 0, 0); + ctx->draw_arrays(ctx, PIPE_PRIM_TRIANGLES, 0, 3); + ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL); + + screen->flush_frontbuffer(screen, surf, window); +} + + +static void init( void ) +{ + struct pipe_framebuffer_state fb; + struct pipe_resource *tex, templat; + int i; + + /* It's hard to say whether window or screen should be created + * first. Different environments would prefer one or the other. + * + * Also, no easy way of querying supported formats if the screen + * cannot be created first. + */ + for (i = 0; + window == NULL && formats[i] != PIPE_FORMAT_NONE; + i++) { + + screen = graw_create_window_and_screen(0,0,300,300, + formats[i], + &window); + } + + ctx = screen->context_create(screen, NULL); + if (ctx == NULL) + exit(3); + + templat.target = PIPE_TEXTURE_2D; + templat.format = formats[i]; + templat.width0 = WIDTH; + templat.height0 = HEIGHT; + templat.depth0 = 1; + templat.last_level = 0; + templat.nr_samples = 1; + templat.bind = (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET); + + tex = screen->resource_create(screen, + &templat); + if (tex == NULL) + exit(4); + + surf = screen->get_tex_surface(screen, tex, 0, 0, 0, + PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET); + if (surf == NULL) + exit(5); + + memset(&fb, 0, sizeof fb); + fb.nr_cbufs = 1; + fb.width = WIDTH; + fb.height = HEIGHT; + fb.cbufs[0] = surf; + + ctx->set_framebuffer_state(ctx, &fb); + + { + struct pipe_blend_state blend; + void *handle; + memset(&blend, 0, sizeof blend); + blend.rt[0].colormask = PIPE_MASK_RGBA; + handle = ctx->create_blend_state(ctx, &blend); + ctx->bind_blend_state(ctx, handle); + } + + { + struct pipe_depth_stencil_alpha_state depthstencil; + void *handle; + memset(&depthstencil, 0, sizeof depthstencil); + handle = ctx->create_depth_stencil_alpha_state(ctx, &depthstencil); + ctx->bind_depth_stencil_alpha_state(ctx, handle); + } + + { + struct pipe_rasterizer_state rasterizer; + void *handle; + memset(&rasterizer, 0, sizeof rasterizer); + rasterizer.cull_face = PIPE_FACE_NONE; + rasterizer.gl_rasterization_rules = 1; + handle = ctx->create_rasterizer_state(ctx, &rasterizer); + ctx->bind_rasterizer_state(ctx, handle); + } + + set_viewport(0, 0, WIDTH, HEIGHT, 30, 1000); + set_vertices(); + set_vertex_shader(); + set_fragment_shader(); + set_geometry_shader(); +} + + +int main( int argc, char *argv[] ) +{ + init(); + + graw_set_display_func( draw ); + graw_main_loop(); + return 0; +} diff --git a/src/gallium/tests/graw/tri.c b/src/gallium/tests/graw/tri.c index f6e1f5b155a..80377f526d6 100644 --- a/src/gallium/tests/graw/tri.c +++ b/src/gallium/tests/graw/tri.c @@ -5,7 +5,6 @@ #include "state_tracker/graw.h" #include "pipe/p_screen.h" #include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" #include "pipe/p_state.h" #include "pipe/p_defines.h" diff --git a/src/gallium/tests/graw/vertex-shader b/src/gallium/tests/graw/vertex-shader new file mode 120000 index 00000000000..7b216e8ca08 --- /dev/null +++ b/src/gallium/tests/graw/vertex-shader @@ -0,0 +1 @@ +../python/tests/regress/vertex-shader
\ No newline at end of file diff --git a/src/gallium/tests/graw/vp-test.c b/src/gallium/tests/graw/vs-test.c index e68f9f439b4..7f93db42c0d 100644 --- a/src/gallium/tests/graw/vp-test.c +++ b/src/gallium/tests/graw/vs-test.c @@ -202,6 +202,7 @@ static void set_vertex_shader( void ) handle = graw_parse_vertex_shader(ctx, buf); ctx->bind_vs_state(ctx, handle); + fclose(f); } static void set_fragment_shader( void ) diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-2d.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-2d.sh index f70a5146f4e..bbc3a10f9b5 100644 --- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-2d.sh +++ b/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-2d.sh @@ -2,8 +2,8 @@ FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR -DCL CONST[1][1..2] +DCL CONST[1][6] -MAD OUT[0], IN[0], CONST[1][2], CONST[1][1] +MOV OUT[0], CONST[1][6] END diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad-immx.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad-immx.sh new file mode 100644 index 00000000000..6b034915769 --- /dev/null +++ b/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad-immx.sh @@ -0,0 +1,10 @@ +FRAG + +DCL IN[0], COLOR, LINEAR +DCL OUT[0], COLOR +DCL IMMX[0..1] {{ 0.5, 0.4, 0.6, 1.0 }, + { 0.5, 0.4, 0.6, 0.0 }} + +MAD OUT[0], IN[0], IMMX[0], IMMX[1] + +END diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-tempx.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-tempx.sh new file mode 100644 index 00000000000..81bcad2d65c --- /dev/null +++ b/src/gallium/tests/python/tests/regress/fragment-shader/frag-tempx.sh @@ -0,0 +1,14 @@ +FRAG + +DCL IN[0], COLOR, LINEAR +DCL OUT[0], COLOR + +DCL TEMPX[0][0..1] + +IMM FLT32 { -0.5, -0.4, -0.6, 0.0 } + +ADD TEMPX[0][0], IN[0], IMM[0] +ADD TEMPX[0][1], IN[0], IMM[0] +ABS OUT[0], TEMPX[0][1] + +END diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 469f1446d05..3d87a994c15 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -45,18 +45,21 @@ boolean r600_buffer_get_handle(struct radeon *rw, struct winsys_handle *whandle) { struct drm_gem_flink flink; - struct r600_buffer* rbuffer; - int r; + struct r600_buffer* rbuffer = (struct r600_buffer*)buf; - rbuffer = (struct r600_buffer*)buf; - if (!rbuffer->flink) { - flink.handle = rbuffer->bo->handle; - r = ioctl(rw->fd, DRM_IOCTL_GEM_FLINK, &flink); - if (r) { - return FALSE; + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + if (!rbuffer->flink) { + flink.handle = rbuffer->bo->handle; + + if (ioctl(rw->fd, DRM_IOCTL_GEM_FLINK, &flink)) { + return FALSE; + } + + rbuffer->flink = flink.name; } - rbuffer->flink = flink.name; + whandle->handle = rbuffer->flink; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = rbuffer->bo->handle; } - whandle->handle = rbuffer->flink; return TRUE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index ee1b9ede257..a4b6cff33d1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -267,16 +267,14 @@ static struct radeon_drm_buffer *get_drm_buffer(struct pb_buffer *_buf) boolean radeon_drm_bufmgr_get_handle(struct pb_buffer *_buf, struct winsys_handle *whandle) { - int retval, fd; struct drm_gem_flink flink; struct radeon_drm_buffer *buf = get_drm_buffer(_buf); + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { if (!buf->flinked) { - fd = buf->mgr->rws->fd; flink.handle = buf->bo->handle; - retval = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink); - if (retval) { + if (ioctl(buf->mgr->rws->fd, DRM_IOCTL_GEM_FLINK, &flink)) { return FALSE; } diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index a9b36578ef8..cb26447606d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -136,29 +136,21 @@ static boolean radeon_r300_winsys_is_buffer_referenced(struct r300_winsys_screen } static struct r300_winsys_buffer *radeon_r300_winsys_buffer_from_handle(struct r300_winsys_screen *rws, - struct pipe_screen *screen, - struct winsys_handle *whandle, - unsigned *stride) + unsigned handle) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); struct pb_buffer *_buf; - _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, whandle->handle); - *stride = whandle->stride; + _buf = radeon_drm_bufmgr_create_buffer_from_handle(ws->kman, handle); return radeon_libdrm_winsys_buffer(_buf); } static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *rws, struct r300_winsys_buffer *buffer, - unsigned stride, - struct winsys_handle *whandle) + struct winsys_handle *whandle) { struct pb_buffer *_buf = radeon_pb_buffer(buffer); - boolean ret; - ret = radeon_drm_bufmgr_get_handle(_buf, whandle); - if (ret) - whandle->stride = stride; - return ret; + return radeon_drm_bufmgr_get_handle(_buf, whandle); } static void radeon_set_flush_cb(struct r300_winsys_screen *rws, @@ -192,25 +184,23 @@ static boolean radeon_validate(struct r300_winsys_screen *rws) return TRUE; } -static void radeon_get_cs_info(struct r300_winsys_screen *rws, - struct r300_cs_info *info) +static unsigned radeon_get_cs_free_dwords(struct r300_winsys_screen *rws) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); struct radeon_cs *cs = ws->cs; - info->capacity = cs->ndw; - info->used = cs->cdw; - info->free = cs->ndw - cs->cdw; + return cs->ndw - cs->cdw; } -static void radeon_begin_cs(struct r300_winsys_screen *rws, - int size, - const char* file, - const char* function, - int line) +static uint32_t *radeon_get_cs_pointer(struct r300_winsys_screen *rws, + unsigned count) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - radeon_cs_begin(ws->cs, size, file, function, line); + struct radeon_cs *cs = ws->cs; + uint32_t *ptr = cs->packets + cs->cdw; + + cs->cdw += count; + return ptr; } static void radeon_write_cs_dword(struct r300_winsys_screen *rws, @@ -243,15 +233,6 @@ static void radeon_reset_bos(struct r300_winsys_screen *rws) radeon_cs_space_reset_bos(ws->cs); } -static void radeon_end_cs(struct r300_winsys_screen *rws, - const char* file, - const char* function, - int line) -{ - struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); - radeon_cs_end(ws->cs, file, function, line); -} - static void radeon_flush_cs(struct r300_winsys_screen *rws) { struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); @@ -345,12 +326,11 @@ radeon_setup_winsys(int fd, struct radeon_libdrm_winsys* ws) ws->base.add_buffer = radeon_add_buffer; ws->base.validate = radeon_validate; ws->base.destroy = radeon_winsys_destroy; - ws->base.get_cs_info = radeon_get_cs_info; - ws->base.begin_cs = radeon_begin_cs; + ws->base.get_cs_free_dwords = radeon_get_cs_free_dwords; + ws->base.get_cs_pointer = radeon_get_cs_pointer; ws->base.write_cs_dword = radeon_write_cs_dword; ws->base.write_cs_table = radeon_write_cs_table; ws->base.write_cs_reloc = radeon_write_cs_reloc; - ws->base.end_cs = radeon_end_cs; ws->base.flush_cs = radeon_flush_cs; ws->base.reset_bos = radeon_reset_bos; ws->base.set_flush_cb = radeon_set_flush_cb; diff --git a/src/gallium/winsys/sw/fbdev/Makefile b/src/gallium/winsys/sw/fbdev/Makefile new file mode 100644 index 00000000000..8832aab1934 --- /dev/null +++ b/src/gallium/winsys/sw/fbdev/Makefile @@ -0,0 +1,13 @@ +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = fbdev + +LIBRARY_INCLUDES = + +LIBRARY_DEFINES = + +C_SOURCES = \ + fbdev_sw_winsys.c + +include ../../../Makefile.template diff --git a/src/gallium/winsys/sw/fbdev/SConscript b/src/gallium/winsys/sw/fbdev/SConscript new file mode 100644 index 00000000000..3b5b4ff1c07 --- /dev/null +++ b/src/gallium/winsys/sw/fbdev/SConscript @@ -0,0 +1,23 @@ +####################################################################### +# SConscript for fbdev winsys + + +Import('*') + +if env['platform'] == 'linux': + + env = env.Clone() + + env.Append(CPPPATH = [ + '#/src/gallium/include', + '#/src/gallium/auxiliary', + '#/src/gallium/drivers', + ]) + + ws_fbdev = env.ConvenienceLibrary( + target = 'ws_fbdev', + source = [ + 'fbdev_sw_winsys.c', + ] + ) + Export('ws_fbdev') diff --git a/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.c b/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.c new file mode 100644 index 00000000000..f4f4cd7969b --- /dev/null +++ b/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.c @@ -0,0 +1,224 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <linux/fb.h> + +#include "pipe/p_compiler.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "state_tracker/sw_winsys.h" + +#include "fbdev_sw_winsys.h" + +struct fbdev_sw_displaytarget +{ + enum pipe_format format; + unsigned width; + unsigned height; + unsigned stride; + + void *data; + void *mapped; +}; + +struct fbdev_sw_winsys +{ + struct sw_winsys base; + + int fd; + enum pipe_format format; + + struct fb_fix_screeninfo finfo; + void *fbmem; + unsigned rows; + unsigned stride; +}; + +static INLINE struct fbdev_sw_displaytarget * +fbdev_sw_displaytarget(struct sw_displaytarget *dt) +{ + return (struct fbdev_sw_displaytarget *) dt; +} + +static INLINE struct fbdev_sw_winsys * +fbdev_sw_winsys(struct sw_winsys *ws) +{ + return (struct fbdev_sw_winsys *) ws; +} + +static void +fbdev_displaytarget_display(struct sw_winsys *ws, + struct sw_displaytarget *dt, + void *context_private) +{ + struct fbdev_sw_winsys *fbdev = fbdev_sw_winsys(ws); + struct fbdev_sw_displaytarget *fbdt = fbdev_sw_displaytarget(dt); + unsigned rows, len, i; + + rows = MIN2(fbdt->height, fbdev->rows); + len = util_format_get_stride(fbdt->format, fbdt->width); + len = MIN2(len, fbdev->stride); + + for (i = 0; i < rows; i++) { + void *dst = fbdev->fbmem + fbdev->stride * i; + void *src = fbdt->data + fbdt->stride * i; + + memcpy(dst, src, len); + } +} + +static void +fbdev_displaytarget_unmap(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct fbdev_sw_displaytarget *fbdt = fbdev_sw_displaytarget(dt); + fbdt->mapped = NULL; +} + +static void * +fbdev_displaytarget_map(struct sw_winsys *ws, + struct sw_displaytarget *dt, + unsigned flags) +{ + struct fbdev_sw_displaytarget *fbdt = fbdev_sw_displaytarget(dt); + fbdt->mapped = fbdt->data; + return fbdt->mapped; +} + +static void +fbdev_displaytarget_destroy(struct sw_winsys *ws, + struct sw_displaytarget *dt) +{ + struct fbdev_sw_displaytarget *fbdt = fbdev_sw_displaytarget(dt); + + if (fbdt->data) + align_free(fbdt->data); + + FREE(fbdt); +} + +static struct sw_displaytarget * +fbdev_displaytarget_create(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format, + unsigned width, unsigned height, + unsigned alignment, + unsigned *stride) +{ + struct fbdev_sw_winsys *fbdev = fbdev_sw_winsys(ws); + struct fbdev_sw_displaytarget *fbdt; + unsigned nblocksy, size, format_stride; + + if (fbdev->format != format) + return NULL; + + fbdt = CALLOC_STRUCT(fbdev_sw_displaytarget); + if (!fbdt) + return NULL; + + fbdt->format = format; + fbdt->width = width; + fbdt->height = height; + + format_stride = util_format_get_stride(format, width); + fbdt->stride = align(format_stride, alignment); + + nblocksy = util_format_get_nblocksy(format, height); + size = fbdt->stride * nblocksy; + + fbdt->data = align_malloc(size, alignment); + if (!fbdt->data) { + FREE(fbdt); + return NULL; + } + + *stride = fbdt->stride; + + return (struct sw_displaytarget *) fbdt; +} + +static boolean +fbdev_is_displaytarget_format_supported(struct sw_winsys *ws, + unsigned tex_usage, + enum pipe_format format) +{ + struct fbdev_sw_winsys *fbdev = fbdev_sw_winsys(ws); + return (fbdev->format == format); +} + +static void +fbdev_destroy(struct sw_winsys *ws) +{ + struct fbdev_sw_winsys *fbdev = fbdev_sw_winsys(ws); + + munmap(fbdev->fbmem, fbdev->finfo.smem_len); + FREE(fbdev); +} + +struct sw_winsys * +fbdev_create_sw_winsys(int fd, enum pipe_format format) +{ + struct fbdev_sw_winsys *fbdev; + + fbdev = CALLOC_STRUCT(fbdev_sw_winsys); + if (!fbdev) + return NULL; + + fbdev->fd = fd; + fbdev->format = format; + if (ioctl(fbdev->fd, FBIOGET_FSCREENINFO, &fbdev->finfo)) { + FREE(fbdev); + return NULL; + } + + fbdev->fbmem = mmap(0, fbdev->finfo.smem_len, + PROT_WRITE, MAP_SHARED, fbdev->fd, 0); + if (fbdev->fbmem == MAP_FAILED) { + FREE(fbdev); + return NULL; + } + + fbdev->rows = fbdev->finfo.smem_len / fbdev->finfo.line_length; + fbdev->stride = fbdev->finfo.line_length; + + fbdev->base.destroy = fbdev_destroy; + fbdev->base.is_displaytarget_format_supported = + fbdev_is_displaytarget_format_supported; + + fbdev->base.displaytarget_create = fbdev_displaytarget_create; + fbdev->base.displaytarget_destroy = fbdev_displaytarget_destroy; + fbdev->base.displaytarget_map = fbdev_displaytarget_map; + fbdev->base.displaytarget_unmap = fbdev_displaytarget_unmap; + + fbdev->base.displaytarget_display = fbdev_displaytarget_display; + + return &fbdev->base; +} diff --git a/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.h b/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.h new file mode 100644 index 00000000000..d958ab9db3e --- /dev/null +++ b/src/gallium/winsys/sw/fbdev/fbdev_sw_winsys.h @@ -0,0 +1,38 @@ +/* + * Mesa 3-D graphics library + * Version: 7.8 + * + * Copyright (C) 2010 LunarG Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <[email protected]> + */ + +#ifndef FBDEV_SW_WINSYS +#define FBDEV_SW_WINSYS + +struct sw_winsys; +enum pipe_format; + +struct sw_winsys * +fbdev_create_sw_winsys(int fd, enum pipe_format format); + +#endif /* FBDEV_SW_WINSYS */ |