diff options
author | Michal Krol <[email protected]> | 2010-01-05 11:04:50 +0100 |
---|---|---|
committer | Michal Krol <[email protected]> | 2010-01-05 11:04:50 +0100 |
commit | 9b21b3c52a8a7d58d08151d1a6bf25c472dec213 (patch) | |
tree | d9083b6af4e2e9b70a7fa6cd31bac45a36e0f6b6 /src/gallium/drivers | |
parent | 543b9566bdaa48fea2df1866fa1310c1cdbcde27 (diff) | |
parent | 1f9aa38f4e2be47229d92be2c1189c2b8d9c7133 (diff) |
Merge branch 'master' into instanced-arrays
Conflicts:
src/gallium/auxiliary/tgsi/tgsi_dump.c
src/gallium/include/pipe/p_shader_tokens.h
Diffstat (limited to 'src/gallium/drivers')
117 files changed, 1654 insertions, 871 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 5cc1d4ddf81..01bea0f8cce 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp) } } - draw_set_mapped_constant_buffer(sp->draw, + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], sp->constants[PIPE_SHADER_VERTEX].buffer->size); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index efc4f78364b..b723e794e71 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell ) vinfo->num_attribs = 0; /* we always want to emit vertex pos */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); @@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell ) break; case TGSI_SEMANTIC_COLOR: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); #if 1 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ src = 0; @@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, + src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index ac5fafec1ad..5b87286d4c5 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -331,7 +331,7 @@ cell_emit_state(struct cell_context *cell) const struct draw_context *const draw = cell->draw; struct cell_shader_info info; - info.num_outputs = draw_num_vs_outputs(draw); + info.num_outputs = draw_num_shader_outputs(draw); info.declarations = (uintptr_t) draw->vs.machine.Declarations; info.num_declarations = draw->vs.machine.NumDeclarations; info.instructions = (uintptr_t) draw->vs.machine.Instructions; diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 5ed330aa6ec..d86d8e09a51 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -1681,7 +1681,7 @@ exec_instruction( } break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c index effeba12972..669964770d4 100644 --- a/src/gallium/drivers/i915/i915_buffer.c +++ b/src/gallium/drivers/i915/i915_buffer.c @@ -111,6 +111,7 @@ i915_buffer_unmap(struct pipe_screen *screen, { struct i915_buffer *buf = i915_buffer(buffer); assert(!buf->ibuf); + (void) buf; } static void diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 94c8aee30fe..949f0463501 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe, } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, i915->current.constants[PIPE_SHADER_VERTEX], (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 4 * sizeof(float))); diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index e580b6c0f7f..1528afc8599 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -58,10 +58,10 @@ translate_wrap_mode(unsigned wrap) return TEXCOORDMODE_CLAMP_EDGE; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXCOORDMODE_CLAMP_BORDER; -/* + /* case PIPE_TEX_WRAP_MIRRORED_REPEAT: return TEXCOORDMODE_MIRROR; -*/ + */ default: return TEXCOORDMODE_WRAP; } diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 178d4e8781d..03dd5091a61 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* pos */ - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; @@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 ) /* primary color */ if (colors[0]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_COLOR; } /* secondary color */ if (colors[1]) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; } /* fog coord, not fog blend factor */ if (fog) { - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; } @@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) uint hwtc; if (texCoords[i]) { hwtc = TEXCOORDFMT_4D; - src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } else { diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 58d9e56df27..d67a1a62633 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -83,19 +83,19 @@ compile_clip_prog( struct brw_context *brw, c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; - if (c.key.output_color0) + if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT) c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; - if (c.key.output_color1) + if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT) c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; - if (c.key.output_bfc0) + if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT) c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; - if (c.key.output_bfc1) + if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT) c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; - if (c.key.output_edgeflag) + if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT) c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; if (BRW_IS_IGDNG(brw)) @@ -182,7 +182,6 @@ upload_clip_prog(struct brw_context *brw) */ /* CACHE_NEW_VS_PROG */ key.nr_attrs = brw->vs.prog_data->nr_outputs; - key.output_edgeflag = brw->vs.prog_data->output_edgeflag; /* PIPE_NEW_VS */ key.output_hpos = vs->output_hpos; @@ -190,6 +189,7 @@ upload_clip_prog(struct brw_context *brw) key.output_color1 = vs->output_color1; key.output_bfc0 = vs->output_bfc0; key.output_bfc1 = vs->output_bfc1; + key.output_edgeflag = vs->output_edgeflag; /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 56e78074000..8c006bb95b2 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -120,6 +120,13 @@ #define BRW_MAX_CURBE (32*16) + +/* Need a value to say a particular vertex shader output isn't + * present. Limits us to 63 outputs currently. + */ +#define BRW_OUTPUT_NOT_PRESENT ((1<<6)-1) + + struct brw_context; struct brw_depth_stencil_state { @@ -335,8 +342,6 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ - GLuint output_edgeflag; - GLboolean writes_psiz; /* Used for calculating urb partitions: diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h index 77d402d35e6..ba5b109c483 100644 --- a/src/gallium/drivers/i965/brw_disasm.h +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -23,6 +23,8 @@ #ifndef BRW_DISASM_H #define BRW_DISASM_H +#include <stdio.h> + struct brw_instruction; int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 4fe7b6acc16..00d8eaccbc4 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -860,7 +860,7 @@ void brw_land_fwd_jump(struct brw_compile *p, jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); - assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index 211be881789..452e1e89f93 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -114,18 +114,18 @@ static void color_clear(struct brw_context *brw, const float *rgba ) { enum pipe_error ret; - unsigned value; + union util_color value; util_pack_color( rgba, bsurface->base.format, &value ); if (bsurface->cpp == 2) - value |= value << 16; + value.ui |= value.ui << 16; - ret = try_clear( brw, bsurface, value ); + ret = try_clear( brw, bsurface, value.ui ); if (ret != 0) { brw_context_flush( brw ); - ret = try_clear( brw, bsurface, value ); + ret = try_clear( brw, bsurface, value.ui ); assert( ret == 0 ); } } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 6b03094f502..5d4e5025f97 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -3,6 +3,7 @@ #include "pipe/p_state.h" #include "brw_context.h" +#include "brw_debug.h" /** * called from intelDrawBuffer() @@ -51,8 +52,14 @@ static void brw_set_viewport_state( struct pipe_context *pipe, struct brw_context *brw = brw_context(pipe); brw->curr.viewport = *viewport; - brw->curr.ccv.min_depth = 0.0; /* XXX: near */ - brw->curr.ccv.max_depth = 1.0; /* XXX: far */ + brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2]; + brw->curr.ccv.max_depth = viewport->scale[2] * 1.0 + viewport->translate[2]; + + if (0) + debug_printf("%s depth range %f .. %f\n", + __FUNCTION__, + brw->curr.ccv.min_depth, + brw->curr.ccv.max_depth); brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; } diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 20f20571f65..bb32d90e331 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -197,6 +197,13 @@ static void *brw_create_vs_state( struct pipe_context *pipe, vs->id = brw->program_id++; vs->has_flow_control = has_flow_control(&vs->info); + vs->output_hpos = BRW_OUTPUT_NOT_PRESENT; + vs->output_color0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_color1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT; + vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT; + vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT; + for (i = 0; i < vs->info.num_outputs; i++) { int index = vs->info.output_semantic_index[i]; switch (vs->info.output_semantic_name[i]) { diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 714def5046d..8a16205d2f6 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -79,18 +79,12 @@ static void release_tmps( struct brw_vs_compile *c ) static boolean is_position_output( struct brw_vs_compile *c, unsigned vs_output ) { - struct brw_vertex_shader *vs = c->vp; - - if (vs_output == c->prog_data.output_edgeflag) { - return FALSE; - } - else { - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; - return (semantic == TGSI_SEMANTIC_POSITION && - index == 0); - } + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); } @@ -98,23 +92,16 @@ static boolean find_output_slot( struct brw_vs_compile *c, unsigned vs_output, unsigned *fs_input_slot ) { - struct brw_vertex_shader *vs = c->vp; + const struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; - if (vs_output == c->prog_data.output_edgeflag) { - *fs_input_slot = c->key.fs_signature.nr_inputs; - return TRUE; - } - else { - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - unsigned i; - - for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { - if (c->key.fs_signature.input[i].semantic == semantic && + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && c->key.fs_signature.input[i].semantic_index == index) { - *fs_input_slot = i; - return TRUE; - } + *fs_input_slot = i; + return TRUE; } } diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index de6156795d3..3ca676647c7 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'): env.Tool('udis86') +env.Append(CPPPATH = ['.']) + env.CodeGenerate( target = 'lp_tile_soa.c', script = 'lp_tile_soa.py', @@ -74,7 +76,7 @@ llvmpipe = env.ConvenienceLibrary( env = env.Clone() -env.Prepend(LIBS = [llvmpipe] + auxiliaries) +env.Prepend(LIBS = [llvmpipe] + gallium) tests = [ 'format', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index d14f468ba93..ced7b9c11d7 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld, enum lp_build_blend_swizzle { LP_BUILD_BLEND_SWIZZLE_RGBA = 0, - LP_BUILD_BLEND_SWIZZLE_AAAA = 1, + LP_BUILD_BLEND_SWIZZLE_AAAA = 1 }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c index dcc25fbff86..25c10af29f0 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c @@ -47,7 +47,7 @@ */ enum lp_build_flow_construct_kind { lP_BUILD_FLOW_SCOPE, - LP_BUILD_FLOW_SKIP, + LP_BUILD_FLOW_SKIP }; diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c index 5836e0173f9..10e82f120bb 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c @@ -130,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); - // UIToFP can't be expressed in SSE2 + /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); if (normalized) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c index a67c70ff25a..61b033c9fcf 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c @@ -321,7 +321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, { const uint unit = inst->Src[1].Register.Index; LLVMValueRef lodbias; - LLVMValueRef oow; + LLVMValueRef oow = NULL; LLVMValueRef coords[3]; unsigned num_coords; unsigned i; @@ -446,7 +446,12 @@ emit_instruction( { unsigned chan_index; LLVMValueRef src0, src1, src2; - LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + LLVMValueRef tmp0, tmp1, tmp2; + LLVMValueRef tmp3 = NULL; + LLVMValueRef tmp4 = NULL; + LLVMValueRef tmp5 = NULL; + LLVMValueRef tmp6 = NULL; + LLVMValueRef tmp7 = NULL; LLVMValueRef res; LLVMValueRef dst0[NUM_CHANNELS]; @@ -1310,7 +1315,7 @@ emit_instruction( return 0; break; - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return 0; diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 001311e7031..37587d4f792 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -140,6 +140,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe ); unsigned i; + /* check if any of the bound drawing surfaces are this texture */ if(llvmpipe->dirty_render_cache) { for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { if(llvmpipe->framebuffer.cbufs[i] && @@ -150,6 +151,13 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe, llvmpipe->framebuffer.zsbuf->texture == texture) return PIPE_REFERENCED_FOR_WRITE; } + + /* check if any of the tex_cache textures are this texture */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + if (llvmpipe->tex_cache[i] && + llvmpipe->tex_cache[i]->texture == texture) + return PIPE_REFERENCED_FOR_READ; + } for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { if (llvmpipe->vertex_tex_cache[i] && llvmpipe->vertex_tex_cache[i]->texture == texture) diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c index 2299566c665..a96c2cad9dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c +++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c @@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_arrays(draw, mode, start, count); /* - * unmap vertex/index buffers - will cause draw module to flush + * unmap vertex/index buffers */ for (i = 0; i < lp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); @@ -112,6 +112,12 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); /* Note: leave drawing surfaces mapped */ diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c index 4abff4ecccc..e8e2e2524ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c @@ -128,6 +128,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr, { struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr); assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + (void) cvbr; /* do nothing */ } diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c index b2e75d3b14e..a94cd05ef20 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_debug_dump.h" +#include "draw/draw_context.h" #include "lp_screen.h" #include "lp_context.h" #include "lp_state.h" @@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->blend == blend) + return; + + draw_flush(llvmpipe->draw); + llvmpipe->blend = blend; llvmpipe->dirty |= LP_NEW_BLEND; @@ -69,6 +75,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe, struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); unsigned i, j; + if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0) + return; + + draw_flush(llvmpipe->draw); + memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color); if(!llvmpipe->jit_context.blend_color) @@ -99,7 +110,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe, { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + if (llvmpipe->depth_stencil == depth_stencil) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->depth_stencil = depth_stencil; if(llvmpipe->depth_stencil) llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value; diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index e703964aaa8..acfd7be5f74 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -66,7 +66,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) /* compute vertex layout now */ const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(llvmpipe->draw); + const uint num = draw_current_shader_outputs(llvmpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -116,13 +116,13 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(llvmpipe->draw, + src = draw_find_shader_output(llvmpipe->draw, lpfs->info.input_semantic_name[i], lpfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw, + llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (llvmpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 22683ff8b42..f2b8c362644 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -673,7 +673,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); - llvmpipe->fs = (struct lp_fragment_shader *) fs; + if (llvmpipe->fs == fs) + return; + + draw_flush(llvmpipe->draw); + + llvmpipe->fs = fs; llvmpipe->dirty |= LP_NEW_FS; } @@ -688,6 +693,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) struct lp_fragment_shader_variant *variant; assert(fs != llvmpipe->fs); + (void) llvmpipe; variant = shader->variants; while(variant) { @@ -723,8 +729,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); - if(shader == PIPE_SHADER_VERTEX) - draw_flush(llvmpipe->draw); + draw_flush(llvmpipe->draw); /* note: reference counting */ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer); @@ -734,7 +739,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, } if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(llvmpipe->draw, data, size); + draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX, + data, size); } llvmpipe->dirty |= LP_NEW_CONSTANTS; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index 4561c6b8456..aa3b5a3f91e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -41,14 +41,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe, } void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + if (llvmpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(llvmpipe->draw, setup); + draw_set_rasterizer_state(llvmpipe->draw, rasterizer); - llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + llvmpipe->rasterizer = rasterizer; llvmpipe->dirty |= LP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index ba970cac985..e37ff04f3df 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -51,6 +51,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe, struct llvmpipe_context *lp = llvmpipe_context(pipe); uint i; + draw_flush(lp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c index 8a761648e7e..884e3878e62 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c @@ -70,14 +70,18 @@ fail: void -llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs; - llvmpipe->vs = (const struct lp_vertex_shader *)vs; + if (llvmpipe->vs == vs) + return; - draw_bind_vertex_shader(llvmpipe->draw, - (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL)); + draw_bind_vertex_shader(llvmpipe->draw, + vs ? vs->draw_data : NULL); + + llvmpipe->vs = vs; llvmpipe->dirty |= LP_NEW_VS; } diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c index 968c7a2d4aa..faddfb96779 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_conv.c +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -330,7 +330,7 @@ test_one(unsigned verbose, fprintf(stderr, "conv.bc written\n"); fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n"); firsttime = FALSE; - //abort(); + /* abort(); */ } } diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h index 9fa6c368125..05fded78e16 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.h @@ -115,7 +115,7 @@ extern const struct llvmpipe_cached_tex_tile * lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc, union tex_tile_address addr ); -static INLINE const union tex_tile_address +static INLINE union tex_tile_address tex_tile_address( unsigned x, unsigned y, unsigned z, diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c index 0d01c07fb5e..68520fa4f09 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c @@ -1085,7 +1085,7 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, const struct pipe_sampler_state *sampler = samp->sampler; unsigned level0, level1, j, imgFilter; int width, height; - float levelBlend; + float levelBlend = 0.0f; choose_mipmap_levels(tgsi_sampler, s, t, p, lodbias, @@ -1241,7 +1241,7 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, /* get/map pipe_surfaces corresponding to 3D tex slices */ unsigned level0, level1, j, imgFilter; int width, height, depth; - float levelBlend; + float levelBlend = 0.0f; const uint face = 0; choose_mipmap_levels(tgsi_sampler, s, t, p, diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h index 040b01865dd..19d00b58d37 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h @@ -29,7 +29,7 @@ #define LP_TILE_SOA_H #include "pipe/p_compiler.h" -#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS +#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */ #ifdef __cplusplus diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h index 595481c2cbc..74b472b6531 100644 --- a/src/gallium/drivers/llvmpipe/lp_winsys.h +++ b/src/gallium/drivers/llvmpipe/lp_winsys.h @@ -35,7 +35,7 @@ #define LP_WINSYS_H -#include "pipe/p_compiler.h" // for boolean +#include "pipe/p_compiler.h" /* for boolean */ #include "pipe/p_format.h" diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index e4cf91c005c..0437af3725c 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo, unsigned alignment, unsigned usage, unsigned size) { struct pipe_buffer *pb; - + pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *)); if (!pb) { nouveau_bo_ref(NULL, &bo); @@ -239,5 +239,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) void nouveau_screen_fini(struct nouveau_screen *screen) { + nouveau_channel_free(&screen->channel); } diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 42c77e5e778..4c3e08a43f5 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -23,6 +23,9 @@ #define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17) #define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18) +/* use along with GPU_WRITE for 2D-only writes */ +#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) + extern struct pipe_screen * nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *); diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 4b33636b2eb..770733a4a17 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -31,26 +31,26 @@ static boolean nv04_init_hwctx(struct nv04_context *nv04) { // requires a valid handle -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); +// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1); // OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1); OUT_RING(0); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); OUT_RING(0x40182800); // OUT_RING(1<<20/*no cull*/); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); // OUT_RING(0x24|(1<<6)|(1<<8)); OUT_RING(0x120001a4); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1); OUT_RING(0x332213a1); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1); OUT_RING(0x11001010); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1); OUT_RING(0x0); -// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); +// BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1); // OUT_RING(SCREEN_OFFSET); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1); OUT_RING(0xff000000); diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c index 0cce71ad1de..c152b52119a 100644 --- a/src/gallium/drivers/nv04/nv04_fragtex.c +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -4,7 +4,7 @@ #define _(m,tf) \ { \ PIPE_FORMAT_##m, \ - NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ + NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ } struct nv04_texture_format { @@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit) return; } - nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER + nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER + | NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER | nv04_fragtex_format(pt->format) - | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) - | ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) - | ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE - | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE + | ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) + | ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE + | NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE ; } diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index f6458232ae5..25395edfd71 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -93,7 +93,7 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render, static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); + BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); OUT_RINGp(buffer + VERTEX_SIZE * v0,8); OUT_RINGp(buffer + VERTEX_SIZE * v1,8); OUT_RINGp(buffer + VERTEX_SIZE * v2,8); @@ -105,7 +105,7 @@ static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buf static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); + BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); OUT_RINGp(buffer + VERTEX_SIZE * v0,8); OUT_RINGp(buffer + VERTEX_SIZE * v1,8); OUT_RINGp(buffer + VERTEX_SIZE * v2,8); @@ -114,7 +114,7 @@ static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buff static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) { - BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); + BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); OUT_RINGp(buffer + VERTEX_SIZE * v0,8); OUT_RINGp(buffer + VERTEX_SIZE * v1,8); OUT_RINGp(buffer + VERTEX_SIZE * v2,8); @@ -166,11 +166,11 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con if (numvert<3) break; - BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + BEGIN_RING( fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); for(j = 0; j<numvert; j++) OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); - BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); + BEGIN_RING_NI( fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 ); for(j = 0; j<numtri/2; j++ ) OUT_RING(striptbl[j]); if (numtri%2) @@ -185,7 +185,7 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const struct nv04_context* nv04 = render->nv04; int i,j; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); for(i = 1; i<nr_indices; i+=14) @@ -195,12 +195,12 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const if (numvert < 3) break; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); for(j=0;j<numvert;j++) OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); - BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); + BEGIN_RING_NI(fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2); for(j = 0; j<numtri/2; j++) OUT_RING(fantbl[j]); if (numtri%2) diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 170ce3eb7e5..7c5b6e8229a 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->fahrenheit); nv04_surface_2d_takedown(&screen->eng2d); + nouveau_screen_fini(&screen->base); + FREE(pscreen); } @@ -163,10 +165,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) fahrenheit_class = 0; sub3d_class = 0; } else if (dev->chipset >= 0x10) { - fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; + fahrenheit_class = NV10_TEXTURED_TRIANGLE; sub3d_class = NV10_CONTEXT_SURFACES_3D; } else { - fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; + fahrenheit_class=NV04_TEXTURED_TRIANGLE; sub3d_class = NV04_CONTEXT_SURFACES_3D; } diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c index ef3005db5fc..e3dc4c5bf44 100644 --- a/src/gallium/drivers/nv04/nv04_state.c +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) { switch (wrap) { case PIPE_TEX_WRAP_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; break; case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; break; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; break; case PIPE_TEX_WRAP_CLAMP: - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: case PIPE_TEX_WRAP_MIRROR_CLAMP: default: NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; } - return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; + return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; } static void * @@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe, ss = MALLOC(sizeof(struct nv04_sampler_state)); - ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | - (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | + (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); if (cso->max_anisotropy > 1.0) { - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; } switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; break; } @@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, case PIPE_TEX_FILTER_LINEAR: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; break; } break; @@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe, default: switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; break; case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; break; case PIPE_TEX_MIPFILTER_NONE: default: - filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; + filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; break; } break; @@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe, */ rs = MALLOC(sizeof(struct nv04_rasterizer_state)); - rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; return (void *)rs; } @@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); hw->control = float_to_ubyte(cso->alpha.ref_value); - hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); - hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; - hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; - hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; - hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; - hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; - hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); + hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN; + hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0; + hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; + hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; + hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0; + hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format return (void *)hw; } @@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe, /* struct nv04_context *nv04 = nv04_context(pipe); // XXX - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); OUT_RING (((s->maxx - s->minx) << 16) | s->minx); OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ } diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index eb2c1c57c67..bd98ae091fd 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -58,7 +58,7 @@ static void nv04_emit_control(struct nv04_context* nv04) { uint32_t control = nv04->dsa->control; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); OUT_RING(control); } @@ -75,7 +75,7 @@ static void nv04_emit_blend(struct nv04_context* nv04) blend|=(nv04_blend_func(nv04->blend->b_src)<<24); blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1); OUT_RING(blend); } @@ -84,7 +84,7 @@ static void nv04_emit_sampler(struct nv04_context *nv04, int unit) struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; struct pipe_texture *pt = &nv04mt->base; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3); OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); OUT_RING(nv04->sampler[unit]->filter); @@ -163,7 +163,7 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (nv04->dirty & NV04_NEW_CONTROL) { nv04->dirty &= ~NV04_NEW_CONTROL; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1); OUT_RING(nv04->dsa->control); } @@ -218,7 +218,7 @@ nv04_emit_hw_state(struct nv04_context *nv04) if (!(nv04->fp_samplers & (1 << i))) continue; struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; - BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); + BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2); OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); } diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c index 12df7fd1997..b24a9cee5ae 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.c +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -77,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format) } static INLINE unsigned -nv04_swizzle_bits(unsigned x, unsigned y) +nv04_swizzle_bits_square(unsigned x, unsigned y) { unsigned u = (x & 0x001) << 0 | (x & 0x002) << 1 | @@ -107,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y) return v | u; } +/* rectangular swizzled textures are linear concatenations of swizzled square tiles */ +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h) +{ + unsigned s = MIN2(w, h); + unsigned m = s - 1; + return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m); +} + static int nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, struct pipe_surface *dst, int dx, int dy, @@ -158,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, for (x = 0; x < w; x += sub_w) { sub_w = MIN2(sub_w, w - x); - /* Must be 64-byte aligned */ - assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format)) & 63)); + assert(!(dst->offset & 63)); BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); - OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format), + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); OUT_RING (chan, nv04_scaled_image_format(src->format)); OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w); - OUT_RING (chan, 0); + OUT_RING (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT)); OUT_RING (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w); OUT_RING (chan, 1 << 20); OUT_RING (chan, 1 << 20); @@ -491,3 +499,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen) ctx->fill = nv04_surface_fill; return ctx; } + +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns) +{ + int temp_flags; + + // printf("creating temp, flags is %i!\n", flags); + + if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD) + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD; + } + else + { + temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; + } + + struct nv40_screen* screen = (struct nv40_screen*)pscreen; + ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; + + struct pipe_texture templ; + memset(&templ, 0, sizeof(templ)); + templ.format = ns->base.texture->format; + templ.target = PIPE_TEXTURE_2D; + templ.width0 = ns->base.width; + templ.height0 = ns->base.height; + templ.depth0 = 1; + templ.last_level = 0; + + // TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented + templ.nr_samples = ns->base.texture->nr_samples; + + templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ); + struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags); + temp_ns->backing = ns; + + if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ) + eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height); + + return temp_ns; +} + diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h index 02b3f56ba8b..ce696a11a39 100644 --- a/src/gallium/drivers/nv04/nv04_surface_2d.h +++ b/src/gallium/drivers/nv04/nv04_surface_2d.h @@ -4,6 +4,7 @@ struct nv04_surface { struct pipe_surface base; unsigned pitch; + struct nv04_surface* backing; }; struct nv04_surface_2d { @@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen); void nv04_surface_2d_takedown(struct nv04_surface_2d **); +struct nv04_surface* +nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns); + #endif diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c index 8446073ae80..2dd2e146a8f 100644 --- a/src/gallium/drivers/nv04/nv04_transfer.c +++ b/src/gallium/drivers/nv04/nv04_transfer.c @@ -16,14 +16,14 @@ struct nv04_transfer { }; static void -nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv04_compatible_transfer_tex(pt, level, &tx_tex_template); + nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -130,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index e3167814f2b..099ab100433 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv04->constbuf[PIPE_SHADER_VERTEX], nv04->constbuf_nr[PIPE_SHADER_VERTEX]); diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index ee5901e743e..6a39ddeaacb 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->celsius); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c index c664973e904..eb04af9782e 100644 --- a/src/gallium/drivers/nv10/nv10_transfer.c +++ b/src/gallium/drivers/nv10/nv10_transfer.c @@ -16,14 +16,14 @@ struct nv10_transfer { }; static void -nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv10_compatible_transfer_tex(pt, level, &tx_tex_template); + nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -130,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx) /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index 441a4f75f3f..0d261412485 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, } draw_set_mapped_constant_buffer(draw, + PIPE_SHADER_VERTEX, nv10->constbuf[PIPE_SHADER_VERTEX], nv10->constbuf_nr[PIPE_SHADER_VERTEX]); diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index d1291a92e0a..8f7538e7f57 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -6,6 +6,7 @@ #include "nv20_context.h" #include "nv20_screen.h" +#include "../nv04/nv04_surface_2d.h" static void nv20_miptree_layout(struct nv20_miptree *nv20mt) @@ -127,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv20_miptree_layout(mt); mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size); @@ -183,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, ns->base.offset = nv20mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base; + return &ns->base; } static void nv20_miptree_surface_destroy(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv20_miptree_surface_destroy(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index 4eeacd1afd5..a0973f1ebdc 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->kelvin); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index 0122b1c2cdb..63cba1f4122 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -228,7 +228,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) } /* always do position */ { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->hwfmt[0] |= (1 << 0); } @@ -237,19 +237,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 4; i < 6; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 3)); } if (colors[0]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 3); } if (colors[1]) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); vinfo->hwfmt[0] |= (1 << 4); } @@ -258,7 +258,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 6; i < 10; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i - 1)); } @@ -267,7 +267,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 0; i < 4; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 9)); } @@ -276,13 +276,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20) for (i = 10; i < 12; i++) { if (!generics[i]) continue; - src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << (i + 3)); } if (fog) { - src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0); + src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); vinfo->hwfmt[0] |= (1 << 15); } diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c index 69b79c809f4..699773e8e6f 100644 --- a/src/gallium/drivers/nv20/nv20_transfer.c +++ b/src/gallium/drivers/nv20/nv20_transfer.c @@ -16,14 +16,14 @@ struct nv20_transfer { }; static void -nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv20_compatible_transfer_tex(pt, level, &tx_tex_template); + nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, face, level, zslice, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv20_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index 84d7db6c5e2..4bf461eba92 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, draw_set_mapped_element_buffer(draw, 0, NULL); } - draw_set_mapped_constant_buffer(draw, + draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, nv20->constbuf[PIPE_SHADER_VERTEX], nv20->constbuf_nr[PIPE_SHADER_VERTEX]); diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 46a821a48b1..38b39159f19 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -25,6 +25,12 @@ static void nv30_destroy(struct pipe_context *pipe) { struct nv30_context *nv30 = nv30_context(pipe); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (nv30->state.hw[i]) + so_ref(NULL, &nv30->state.hw[i]); + } if (nv30->draw) draw_destroy(nv30->draw); diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 40965a97723..d1ff18e2dfb 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -435,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv30_sr(NV30SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV30_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV30_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -517,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SIN: @@ -870,6 +886,12 @@ void nv30_fragprog_destroy(struct nv30_context *nv30, struct nv30_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index ce95d9700f6..8fbba38e78f 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -5,6 +5,7 @@ #include "util/u_math.h" #include "nv30_context.h" +#include "../nv04/nv04_surface_2d.h" static void nv30_miptree_layout(struct nv30_miptree *nv30mt) @@ -108,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv30_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, @@ -196,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = nv30mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv30_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv30_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 7cd36902eb4..760467f7367 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -156,6 +156,12 @@ static void nv30_screen_destroy(struct pipe_screen *pscreen) { struct nv30_screen *screen = nv30_screen(pscreen); + unsigned i; + + for (i = 0; i < NV30_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -163,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->rankine); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); FREE(pscreen); } diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c index 2255a02caed..65598991c68 100644 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ b/src/gallium/drivers/nv30/nv30_transfer.c @@ -16,14 +16,14 @@ struct nv30_transfer { }; static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv30_compatible_transfer_tex(pt, level, &tx_tex_template); + nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv30_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index eb9cce4c786..d56c7a6b49c 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -25,6 +25,12 @@ static void nv40_destroy(struct pipe_context *pipe) { struct nv40_context *nv40 = nv40_context(pipe); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (nv40->state.hw[i]) + so_ref(NULL, &nv40->state.hw[i]); + } if (nv40->draw) draw_destroy(nv40->draw); diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index b2f19ecb699..3875bc35457 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -261,7 +261,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, map = pipe_buffer_map(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, map, nr); + draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, + map, nr); } draw_arrays(nv40->draw, mode, start, count); @@ -285,7 +286,7 @@ static INLINE void emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, unsigned semantic, unsigned index) { - unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index); unsigned a = nv40->swtnl.nr_attribs++; nv40->swtnl.hw[a] = hw; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 1bf16726d10..bb9c85cc434 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); break; case NV40SR_NONE: sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); @@ -445,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nv40_sr(NV40SR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); dst.cc_test = NV40_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; @@ -573,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, neg(swz(tmp, X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SEQ: @@ -752,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) { struct tgsi_full_immediate *imm; float vals[4]; - + imm = &p.FullToken.FullImmediate; assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); assert(fpc->nr_imm < MAX_IMM); @@ -836,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, fp->insn[fpc->inst_offset + 1] = 0x00000000; fp->insn[fpc->inst_offset + 2] = 0x00000000; fp->insn[fpc->inst_offset + 3] = 0x00000000; - + fp->translated = TRUE; out_err: tgsi_parse_free(&parse); @@ -917,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) update_constants: if (fp->nr_consts) { float *map; - + map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { @@ -948,6 +964,12 @@ void nv40_fragprog_destroy(struct nv40_context *nv40, struct nv40_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index b974e68a077..89bd155ff49 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -5,6 +5,7 @@ #include "util/u_math.h" #include "nv40_context.h" +#include "../nv04/nv04_surface_2d.h" @@ -105,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. + * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. + * This also happens for small mipmaps of large textures. */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + nv40_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); @@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ns->base.offset = mt->level[level].image_offset[0]; } + /* create a linear temporary that we can render into if necessary. + * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so + * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ + if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) + return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base; + return &ns->base; } static void nv40_miptree_surface_del(struct pipe_surface *ps) { + struct nv04_surface* ns = (struct nv04_surface*)ps; + if(ns->backing) + { + struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen; + if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) + screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); + nv40_miptree_surface_del(&ns->backing->base); + } + pipe_texture_reference(&ps->texture, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index bd13dfddd1c..d01e7128051 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -140,6 +140,12 @@ static void nv40_screen_destroy(struct pipe_screen *pscreen) { struct nv40_screen *screen = nv40_screen(pscreen); + unsigned i; + + for (i = 0; i < NV40_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } nouveau_resource_free(&screen->vp_exec_heap); nouveau_resource_free(&screen->vp_data_heap); @@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen) nouveau_notifier_free(&screen->query); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->curie); + nv04_surface_2d_takedown(&screen->eng2d); nouveau_screen_fini(&screen->base); diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c index b084a38b482..791ee6823d3 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -16,14 +16,14 @@ struct nv40_transfer { }; static void -nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); template->target = pt->target; template->format = pt->format; - template->width0 = u_minify(pt->width0, level); - template->height0 = u_minify(pt->height0, level); + template->width0 = width; + template->height0 = height; template->depth0 = 1; template->last_level = 0; template->nr_samples = pt->nr_samples; @@ -71,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv40_compatible_transfer_tex(pt, level, &tx_tex_template); + nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } + tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch; + tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, pipe_transfer_buffer_flags(&tx->base)); @@ -105,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, /* TODO: Check if SIFM can un-swizzle */ nvscreen->eng2d->copy(nvscreen->eng2d, tx->surface, 0, 0, - src, 0, 0, - src->width, src->height); + src, x, y, + w, h); pipe_surface_reference(&src, NULL); } @@ -126,13 +128,13 @@ nv40_transfer_del(struct pipe_transfer *ptx) dst = pscreen->get_tex_surface(pscreen, ptx->texture, ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE); + PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ nvscreen->eng2d->copy(nvscreen->eng2d, - dst, 0, 0, + dst, tx->base.x, tx->base.y, tx->surface, 0, 0, - dst->width, dst->height); + tx->base.width, tx->base.height); pipe_surface_reference(&dst, NULL); } @@ -151,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); - return map + ns->base.offset + - ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); + if(!tx->direct) + return map + ns->base.offset; + else + return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); } static void diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index d21b80eab8d..5997456e4c9 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -43,6 +43,39 @@ nv50_destroy(struct pipe_context *pipe) { struct nv50_context *nv50 = nv50_context(pipe); + if (nv50->state.fb) + so_ref(NULL, &nv50->state.fb); + if (nv50->state.blend) + so_ref(NULL, &nv50->state.blend); + if (nv50->state.blend_colour) + so_ref(NULL, &nv50->state.blend_colour); + if (nv50->state.zsa) + so_ref(NULL, &nv50->state.zsa); + if (nv50->state.rast) + so_ref(NULL, &nv50->state.rast); + if (nv50->state.stipple) + so_ref(NULL, &nv50->state.stipple); + if (nv50->state.scissor) + so_ref(NULL, &nv50->state.scissor); + if (nv50->state.viewport) + so_ref(NULL, &nv50->state.viewport); + if (nv50->state.tsc_upload) + so_ref(NULL, &nv50->state.tsc_upload); + if (nv50->state.tic_upload) + so_ref(NULL, &nv50->state.tic_upload); + if (nv50->state.vertprog) + so_ref(NULL, &nv50->state.vertprog); + if (nv50->state.fragprog) + so_ref(NULL, &nv50->state.fragprog); + if (nv50->state.programs) + so_ref(NULL, &nv50->state.programs); + if (nv50->state.vtxfmt) + so_ref(NULL, &nv50->state.vtxfmt); + if (nv50->state.vtxbuf) + so_ref(NULL, &nv50->state.vtxbuf); + if (nv50->state.vtxattr) + so_ref(NULL, &nv50->state.vtxattr); + draw_destroy(nv50->draw); FREE(nv50); } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 679c28ce4b1..2d0b1818ef6 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -154,26 +154,17 @@ struct nv50_pc { int if_lvl, loop_lvl; unsigned loop_pos[NV50_MAX_LOOP_NESTING]; + unsigned *insn_pos; /* actual program offset of each TGSI insn */ + boolean in_subroutine; + /* current instruction and total number of insns */ unsigned insn_cur; unsigned insn_nr; boolean allow32; -}; - -static INLINE struct nv50_reg * -reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) -{ - struct nv50_reg *ri; - assert(pc->reg_instance_nr < 16); - ri = &pc->reg_instances[pc->reg_instance_nr++]; - if (reg) { - *ri = *reg; - reg->mod = 0; - } - return ri; -} + uint8_t edgeflag_out; +}; static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) @@ -253,6 +244,21 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) assert(0); } +static INLINE struct nv50_reg * +reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) +{ + struct nv50_reg *ri; + + assert(pc->reg_instance_nr < 16); + ri = &pc->reg_instances[pc->reg_instance_nr++]; + if (reg) { + alloc_reg(pc, reg); + *ri = *reg; + reg->mod = 0; + } + return ri; +} + /* XXX: For shaders that aren't executed linearly (e.g. shaders that * contain loops), we need to assign all hw regs to TGSI TEMPs early, * lest we risk temp_temps overwriting regs alloc'd "later". @@ -279,22 +285,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) return NULL; } -/* Assign the hw of the discarded temporary register src - * to the tgsi register dst and free src. - */ -static void -assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) -{ - assert(src->index == -1 && src->hw != -1); - - if (dst->hw != -1) - pc->r_temp[dst->hw] = NULL; - pc->r_temp[src->hw] = dst; - dst->hw = src->hw; - - FREE(src); -} - /* release the hardware resource held by r */ static void release_hw(struct nv50_pc *pc, struct nv50_reg *r) @@ -451,10 +441,19 @@ is_immd(struct nv50_program_exec *e) return FALSE; } +static boolean +is_join(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 2) + return TRUE; + return FALSE; +} + static INLINE void set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, struct nv50_program_exec *e) { + assert(!is_immd(e)); set_long(pc, e); e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); e->inst[1] |= (pred << 7) | (idx << 12); @@ -497,15 +496,6 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) static INLINE void set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) { - union { - float f; - uint32_t ui; - } u; - u.ui = pc->immd_buf[imm->hw]; - - u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f; - u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f; - set_long(pc, e); /* XXX: can't be predicated - bits overlap; cases where both * are required should be avoided by using pc->allow32 */ @@ -513,8 +503,8 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) set_pred_wr(pc, 0, 0, e); e->inst[1] |= 0x00000002 | 0x00000001; - e->inst[0] |= (u.ui & 0x3f) << 16; - e->inst[1] |= (u.ui >> 6) << 2; + e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16; + e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2; } static INLINE void @@ -663,6 +653,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); } +/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */ static void emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) { @@ -715,6 +706,34 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f) FREE(imm); } +/* Assign the hw of the discarded temporary register src + * to the tgsi register dst and free src. + */ +static void +assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + assert(src->index == -1 && src->hw != -1); + + if (pc->if_lvl || pc->loop_lvl || + (dst->type != P_TEMP) || + (src->hw < pc->result_nr * 4 && + pc->p->type == PIPE_SHADER_FRAGMENT) || + pc->p->info.opcode_count[TGSI_OPCODE_CAL] || + pc->p->info.opcode_count[TGSI_OPCODE_BRA]) { + + emit_mov(pc, dst, src); + free_temp(pc, src); + return; + } + + if (dst->hw != -1) + pc->r_temp[dst->hw] = NULL; + pc->r_temp[src->hw] = dst; + dst->hw = src->hw; + + FREE(src); +} + static void emit_nop(struct nv50_pc *pc) { @@ -886,7 +905,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, set_dst(pc, dst, e); set_src_0(pc, src0, e); if (src1->type == P_IMMD && !is_long(e)) { - if (src0->mod & NV50_MOD_NEG) + if (src0->mod ^ src1->mod) e->inst[0] |= 0x00008000; set_immd(pc, src1, e); } else { @@ -997,6 +1016,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, op != TGSI_OPCODE_XOR) assert(!"invalid bit op"); + assert(!(src0->mod | src1->mod)); + if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) { set_immd(pc, src1, e); if (op == TGSI_OPCODE_OR) @@ -1048,6 +1069,14 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, src2->mod ^= NV50_MOD_NEG; } +#define NV50_FLOP_RCP 0 +#define NV50_FLOP_RSQ 2 +#define NV50_FLOP_LG2 3 +#define NV50_FLOP_SIN 4 +#define NV50_FLOP_COS 5 +#define NV50_FLOP_EX2 6 + +/* rcp, rsqrt, lg2 support neg and abs */ static void emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, struct nv50_reg *src) @@ -1055,17 +1084,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub, struct nv50_program_exec *e = exec(pc); e->inst[0] |= 0x90000000; - if (sub) { + if (sub || src->mod) { set_long(pc, e); e->inst[1] |= (sub << 29); } set_dst(pc, dst, e); + set_src_0_restricted(pc, src, e); - if (sub == 0 || sub == 2) - set_src_0_restricted(pc, src, e); - else - set_src_0(pc, src, e); + assert(!src->mod || sub < 4); + + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; emit(pc, e); } @@ -1082,6 +1114,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29) | 0x00004000; + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1097,6 +1134,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) set_long(pc, e); e->inst[1] |= (6 << 29); + if (src->mod & NV50_MOD_NEG) + e->inst[1] |= 0x04000000; + if (src->mod & NV50_MOD_ABS) + e->inst[1] |= 0x00100000; + emit(pc, e); } @@ -1231,10 +1273,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, { struct nv50_reg *temp = alloc_temp(pc, NULL); - emit_flop(pc, 3, temp, v); + emit_flop(pc, NV50_FLOP_LG2, temp, v); emit_mul(pc, temp, temp, e); emit_preex2(pc, temp, temp); - emit_flop(pc, 6, dst, temp); + emit_flop(pc, NV50_FLOP_EX2, dst, temp); free_temp(pc, temp); } @@ -1336,66 +1378,53 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src) } static struct nv50_program_exec * -emit_breakaddr(struct nv50_pc *pc) +emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc) { struct nv50_program_exec *e = exec(pc); - e->inst[0] = 0x40000002; + e->inst[0] = (op << 28) | 2; set_long(pc, e); + if (pred >= 0) + set_pred(pc, cc, pred, e); emit(pc, e); return e; } -static void -emit_break(struct nv50_pc *pc, int pred, unsigned cc) +static INLINE struct nv50_program_exec * +emit_breakaddr(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x50000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); + return emit_control_flow(pc, 0x4, -1, 0); +} - emit(pc, e); +static INLINE void +emit_break(struct nv50_pc *pc, int pred, unsigned cc) +{ + emit_control_flow(pc, 0x5, pred, cc); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_joinat(struct nv50_pc *pc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0xa0000002; - set_long(pc, e); - - emit(pc, e); - return e; + return emit_control_flow(pc, 0xa, -1, 0); } -static struct nv50_program_exec * +static INLINE struct nv50_program_exec * emit_branch(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); + return emit_control_flow(pc, 0x1, pred, cc); +} - e->inst[0] = 0x10000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - emit(pc, e); - return pc->p->exec_tail; +static INLINE struct nv50_program_exec * +emit_call(struct nv50_pc *pc, int pred, unsigned cc) +{ + return emit_control_flow(pc, 0x2, pred, cc); } -static void +static INLINE void emit_ret(struct nv50_pc *pc, int pred, unsigned cc) { - struct nv50_program_exec *e = exec(pc); - - e->inst[0] = 0x30000002; - set_long(pc, e); - if (pred >= 0) - set_pred(pc, cc, pred, e); - - emit(pc, e); + emit_control_flow(pc, 0x3, pred, cc); } #define QOP_ADD 0 @@ -1458,7 +1487,7 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], if (arg == 4) /* there is no textureProj(samplerCubeShadow) */ emit_mov(pc, t[3], src[3]); - emit_flop(pc, 0, t[2], t[2]); + emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]); emit_mul(pc, t[0], src[0], t[2]); emit_mul(pc, t[1], src[1], t[2]); @@ -1476,7 +1505,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], t[3]->rhw = src[3]->rhw; emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID)); - emit_flop(pc, 0, t[3], t[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]); for (c = 0; c < dim; ++c) { t[c]->rhw = src[c]->rhw; @@ -1490,7 +1519,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4], /* XXX: for some reason the blob sometimes uses MAD * (mad f32 $rX $rY $rZ neg $r63) */ - emit_flop(pc, 0, t[3], src[3]); + emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]); for (c = 0; c < dim; ++c) emit_mul(pc, t[c], src[c], t[3]); if (arg != dim) /* depth reference value */ @@ -1537,7 +1566,13 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod, struct nv50_reg *src, struct nv50_program_exec *tex) { struct nv50_program_exec *join_at; - unsigned i, target = pc->p->exec_size + 7 * 2; + unsigned i, target = pc->p->exec_size + 9 * 2; + + if (pc->p->type != PIPE_SHADER_FRAGMENT) { + emit(pc, tex); + return; + } + pc->allow32 = FALSE; /* Subtract lod of each pixel from lod of top left pixel, jump * texlod insn if result is 0, then repeat for 2 other pixels. @@ -1663,6 +1698,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, emit(pc, e); } else if (bias_lod < 0) { + assert(pc->p->type == PIPE_SHADER_FRAGMENT); e->inst[0] |= arg << 22; e->inst[1] |= 0x20000000; /* texbias */ emit_mov(pc, t[arg], src[3]); @@ -1782,20 +1818,24 @@ static boolean negate_supported(const struct tgsi_full_instruction *insn, int i) { switch (insn->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_COS: case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: case TGSI_OPCODE_DP3: case TGSI_OPCODE_DP4: - case TGSI_OPCODE_MUL: + case TGSI_OPCODE_EX2: case TGSI_OPCODE_KIL: - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: + case TGSI_OPCODE_LG2: case TGSI_OPCODE_MAD: - return TRUE; + case TGSI_OPCODE_MUL: case TGSI_OPCODE_POW: - if (i == 1) - return TRUE; - return FALSE; + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */ + case TGSI_OPCODE_SCS: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SUB: + return TRUE; default: return FALSE; } @@ -1820,7 +1860,9 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c) case TGSI_OPCODE_DST: return mask & (c ? 0xa : 0x6); case TGSI_OPCODE_EX2: + case TGSI_OPCODE_EXP: case TGSI_OPCODE_LG2: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_POW: case TGSI_OPCODE_RCP: case TGSI_OPCODE_RSQ: @@ -2042,6 +2084,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c) assert(0); return 0x0; } + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LOG: case TGSI_OPCODE_LIT: case TGSI_OPCODE_SCS: case TGSI_OPCODE_TEX: @@ -2082,6 +2126,8 @@ nv50_kill_branch(struct nv50_pc *pc) if (pc->if_insn[lvl]->next != pc->p->exec_tail) return FALSE; + if (is_immd(pc->p->exec_tail)) + return FALSE; /* if ccode == 'true', the BRA is from an ELSE and the predicate * reg may no longer be valid, since we currently always use $p0 @@ -2215,10 +2261,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_BGNSUB: + assert(!pc->in_subroutine); + pc->in_subroutine = TRUE; + /* probably not necessary, but align to 8 byte boundary */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + break; case TGSI_OPCODE_BRK: assert(pc->loop_lvl > 0); emit_break(pc, -1, 0); break; + case TGSI_OPCODE_CAL: + assert(inst->Label.Label < pc->insn_nr); + emit_call(pc, -1, 0)->param.index = inst->Label.Label; + /* replaced by actual offset in nv50_program_fixup_insns */ + break; case TGSI_OPCODE_CEIL: for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) @@ -2239,17 +2297,22 @@ nv50_program_tx_insn(struct nv50_pc *pc, set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */ } break; + case TGSI_OPCODE_CONT: + assert(pc->loop_lvl > 0); + emit_branch(pc, -1, 0)->param.index = + pc->loop_pos[pc->loop_lvl - 1]; + break; case TGSI_OPCODE_COS: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 5, dst[3], temp); + emit_flop(pc, NV50_FLOP_COS, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) temp = brdc = temp_temp(pc); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 5, brdc, temp); + emit_flop(pc, NV50_FLOP_COS, brdc, temp); break; case TGSI_OPCODE_DDX: for (c = 0; c < 4; c++) { @@ -2321,9 +2384,40 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDSUB: + assert(pc->in_subroutine); + pc->in_subroutine = FALSE; + break; case TGSI_OPCODE_EX2: emit_preex2(pc, temp, src[0][0]); - emit_flop(pc, 6, brdc, temp); + emit_flop(pc, NV50_FLOP_EX2, brdc, temp); + break; + case TGSI_OPCODE_EXP: + { + struct nv50_reg *t[2]; + + assert(!temp); + t[0] = temp_temp(pc); + t[1] = temp_temp(pc); + + if (mask & 0x6) + emit_mov(pc, t[0], src[0][0]); + if (mask & 0x3) + emit_flr(pc, t[1], src[0][0]); + + if (mask & (1 << 1)) + emit_sub(pc, dst[1], t[0], t[1]); + if (mask & (1 << 0)) { + emit_preex2(pc, t[1], t[1]); + emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]); + } + if (mask & (1 << 2)) { + emit_preex2(pc, t[0], t[0]); + emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } break; case TGSI_OPCODE_FLR: for (c = 0; c < 4; c++) { @@ -2363,7 +2457,35 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_lit(pc, &dst[0], mask, &src[0][0]); break; case TGSI_OPCODE_LG2: - emit_flop(pc, 3, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]); + break; + case TGSI_OPCODE_LOG: + { + struct nv50_reg *t[2]; + + t[0] = temp_temp(pc); + if (mask & (1 << 1)) + t[1] = temp_temp(pc); + else + t[1] = t[0]; + + emit_abs(pc, t[0], src[0][0]); + emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], t[1]); + emit_flr(pc, t[1], t[1]); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], t[1]); + if (mask & (1 << 1)) { + t[1]->mod = NV50_MOD_NEG; + emit_preex2(pc, t[1], t[1]); + t[1]->mod = 0; + emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]); + emit_mul(pc, dst[1], t[0], t[1]); + } + if (mask & (1 << 3)) + emit_mov_immdval(pc, dst[3], 1.0f); + } break; case TGSI_OPCODE_LRP: temp = temp_temp(pc); @@ -2413,24 +2535,25 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: - emit_flop(pc, 0, brdc, src[0][0]); + emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: - if (pc->p->type == PIPE_SHADER_FRAGMENT) + if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine) nv50_fp_move_results(pc); emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: - emit_flop(pc, 2, brdc, src[0][0]); + src[0][0]->mod |= NV50_MOD_ABS; + emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); break; case TGSI_OPCODE_SCS: temp = temp_temp(pc); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) - emit_flop(pc, 5, dst[0], temp); + emit_flop(pc, NV50_FLOP_COS, dst[0], temp); if (mask & (1 << 1)) - emit_flop(pc, 4, dst[1], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[1], temp); if (mask & (1 << 2)) emit_mov_immdval(pc, dst[2], 0.0); if (mask & (1 << 3)) @@ -2439,14 +2562,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, case TGSI_OPCODE_SIN: if (mask & 8) { emit_precossin(pc, temp, src[0][3]); - emit_flop(pc, 4, dst[3], temp); + emit_flop(pc, NV50_FLOP_SIN, dst[3], temp); if (!(mask &= 7)) break; if (temp == dst[3]) temp = brdc = temp_temp(pc); } emit_precossin(pc, temp, src[0][0]); - emit_flop(pc, 4, brdc, temp); + emit_flop(pc, NV50_FLOP_SIN, brdc, temp); break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: @@ -2510,6 +2633,17 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mov_immdval(pc, dst[3], 1.0); break; case TGSI_OPCODE_END: + if (pc->p->type == PIPE_SHADER_FRAGMENT) + nv50_fp_move_results(pc); + + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + else + if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail)) + emit_nop(pc); + + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -2554,10 +2688,16 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn) mask = dst->WriteMask; if (dst->File == TGSI_FILE_TEMPORARY) - reg = pc->temp; + reg = pc->temp; else - if (dst->File == TGSI_FILE_OUTPUT) - reg = pc->result; + if (dst->File == TGSI_FILE_OUTPUT) { + reg = pc->result; + + if (insn->Instruction.Opcode == TGSI_OPCODE_MOV && + dst->Index == pc->edgeflag_out && + insn->Src[0].Register.File == TGSI_FILE_INPUT) + pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index; + } if (reg) { for (c = 0; c < 4; c++) { @@ -2724,7 +2864,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } pc->r_brdc = NULL; - if (!deqs) + if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3])) return nv50_program_tx_insn(pc, &insn); deqs = nv50_revdep_reorder(m, rdep); @@ -2775,7 +2915,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1; emit_interp(pc, iv, NULL, mode & INTERP_CENTROID); - emit_flop(pc, 0, iv, iv); + emit_flop(pc, NV50_FLOP_RCP, iv, iv); /* XXX: when loading interpolants dynamically, move these * to the program head, or make sure it can't be skipped. @@ -2856,6 +2996,9 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (p->cfg.io_nr > first) p->cfg.io_nr = first; break; + case TGSI_SEMANTIC_EDGEFLAG: + pc->edgeflag_out = first; + break; /* case TGSI_SEMANTIC_CLIP_DISTANCE: p->cfg.clpd = MIN2(p->cfg.clpd, first); @@ -3104,6 +3247,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; @@ -3192,16 +3337,6 @@ nv50_program_fixup_insns(struct nv50_pc *pc) if (e->param.index >= 0 && !e->param.mask) bra_list[n++] = e; - /* last instruction must be long so it can have the exit bit set */ - if (!is_long(pc->p->exec_tail)) - convert_to_long(pc, pc->p->exec_tail); - /* set exit bit */ - pc->p->exec_tail->inst[1] |= 1; - - /* !immd on exit insn simultaneously means !join */ - assert(!is_immd(pc->p->exec_head)); - assert(!is_immd(pc->p->exec_tail)); - /* Make sure we don't have any single 32 bit instructions. */ for (e = pc->p->exec_head, pos = 0; e; e = e->next) { pos += is_long(e) ? 2 : 1; @@ -3210,12 +3345,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc) for (i = 0; i < n; ++i) if (bra_list[i]->param.index >= pos) bra_list[i]->param.index += 1; + for (i = 0; i < pc->insn_nr; ++i) + if (pc->insn_pos[i] >= pos) + pc->insn_pos[i] += 1; convert_to_long(pc, e); ++pos; } } FREE(bra_list); + + if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL]) + return; + + /* fill in CALL offsets */ + for (e = pc->p->exec_head; e; e = e->next) { + if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2) + e->param.index = pc->insn_pos[e->param.index]; + } } static boolean @@ -3237,19 +3384,20 @@ nv50_program_tx(struct nv50_program *p) if (ret == FALSE) goto out_cleanup; + pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned)); + tgsi_parse_init(&parse, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { const union tgsi_full_token *tok = &parse.FullToken; - /* don't allow half insn/immd on first and last instruction */ + /* previously allow32 was FALSE for first & last instruction */ pc->allow32 = TRUE; - if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr) - pc->allow32 = FALSE; tgsi_parse_token(&parse); switch (tok->Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: + pc->insn_pos[pc->insn_cur] = pc->p->exec_size; ++pc->insn_cur; ret = nv50_tgsi_insn(pc, tok); if (ret == FALSE) @@ -3260,9 +3408,6 @@ nv50_program_tx(struct nv50_program *p) } } - if (pc->p->type == PIPE_SHADER_FRAGMENT) - nv50_fp_move_results(pc); - nv50_program_fixup_insns(pc); p->param_nr = pc->param_nr * 4; @@ -3480,7 +3625,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1); + so_method(so, tesla, NV50TCL_FP_CONTROL, 1); so_data (so, p->cfg.regs[2]); so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1); so_data (so, p->cfg.regs[3]); @@ -3652,7 +3797,7 @@ nv50_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1); so_data (so, reg[4]); - so_method(so, tesla, 0x1540, 4); + so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4); so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 4a90c372ce3..461fec1d89c 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -58,6 +58,7 @@ struct nv50_program { /* VP only */ uint8_t clpd, clpd_nr; uint8_t psiz; + uint8_t edgeflag_in; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 268c9823f7d..5d9e18218ae 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_query *q = nv50_query(pq); - BEGIN_RING(chan, tesla, 0x1530, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1); OUT_RING (chan, 1); - BEGIN_RING(chan, tesla, 0x1514, 1); + BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1); OUT_RING (chan, 1); q->ready = FALSE; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index d443ca3ad06..7e039ea82ec 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -128,7 +128,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_TEXTURE_MIRROR_REPEAT: return 1; case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; + return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case NOUVEAU_CAP_HW_VTXBUF: @@ -165,6 +165,21 @@ static void nv50_screen_destroy(struct pipe_screen *pscreen) { struct nv50_screen *screen = nv50_screen(pscreen); + unsigned i; + + for (i = 0; i < 2; i++) { + if (screen->constbuf_parm[i]) + nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); + } + + if (screen->constbuf_misc[0]) + nouveau_bo_ref(NULL, &screen->constbuf_misc[0]); + if (screen->tic) + nouveau_bo_ref(NULL, &screen->tic); + if (screen->tsc) + nouveau_bo_ref(NULL, &screen->tsc); + if (screen->static_init) + so_ref(NULL, &screen->static_init); nouveau_notifier_free(&screen->sync); nouveau_grobj_free(&screen->tesla); @@ -231,8 +246,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) break; case 0x80: case 0x90: - /* this stupid name should be corrected. */ - tesla_class = NV54TCL; + tesla_class = NV84TCL; break; case 0xa0: switch (chipset) { @@ -242,7 +256,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) tesla_class = NVA0TCL; break; default: - tesla_class = 0x8597; + tesla_class = NVA8TCL; break; } break; @@ -287,7 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, chan->vram->handle); so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); so_data (so, NV50_2D_OPERATION_SRCCOPY); - so_method(so, screen->eng2d, 0x0290, 1); + so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1); so_data (so, 0); so_method(so, screen->eng2d, 0x0888, 1); so_data (so, 1); @@ -297,34 +311,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Static tesla init */ so = so_new(256, 20); - so_method(so, screen->tesla, 0x1558, 1); - so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); + so_data (so, NV50TCL_COND_MODE_ALWAYS); so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); so_data (so, screen->sync->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), - NV50TCL_DMA_UNK0__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11); + for (i = 0; i < 11; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), - NV50TCL_DMA_UNK1__SIZE); - for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0), + NV50TCL_DMA_COLOR__SIZE); + for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++) so_data(so, chan->vram->handle); - so_method(so, screen->tesla, 0x121c, 1); + so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1); so_data (so, 1); /* activate all 32 lanes (threads) in a warp */ - so_method(so, screen->tesla, 0x19a0, 1); + so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1); so_data (so, 0x2); so_method(so, screen->tesla, 0x1400, 1); so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, 0x13b4, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); so_data (so, 0x54); - so_method(so, screen->tesla, 0x13bc, 1); + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); so_data (so, 0x54); /* origin is top left (set to 1 for bottom left) */ - so_method(so, screen->tesla, 0x13ac, 1); + so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, 8); @@ -360,7 +373,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) // B = buffer ID (maybe more than 1 byte) // N = CB index used in shader instruction // P = program type (0 = VP, 2 = GP, 3 = FP) - so_method(so, screen->tesla, 0x1694, 1); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x000BBNP1); */ @@ -424,23 +437,26 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Vertex array limits - max them out */ for (i = 0; i < 16; i++) { - so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2); + so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_data (so, 0x000000ff); so_data (so, 0xffffffff); } - so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2); so_data (so, fui(0.0)); so_data (so, fui(1.0)); /* no dynamic combination of TIC & TSC entries => only BIND_TIC used */ - so_method(so, screen->tesla, 0x1234, 1); + so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1); so_data (so, 1); /* activate first scissor rectangle */ - so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1); + so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1); so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, 1); /* default edgeflag to TRUE */ + so_emit(chan, so); so_ref (so, &screen->static_init); so_ref (NULL, &so); diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 88aef52d08c..30b2b0f91bf 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -295,7 +295,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : NV50TCL_SHADE_MODEL_SMOOTH); - so_method(so, tesla, 0x1684, 1); + so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1); so_data (so, cso->flatshade_first ? 0 : 1); so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1); @@ -392,7 +392,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); so_data (so, fui(cso->offset_scale)); so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); - so_data (so, fui(cso->offset_units)); + so_data (so, fui(cso->offset_units * 2.0f)); } rso->pipe = *cso; @@ -439,9 +439,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, 0); } - /* XXX: keep hex values until header is updated (names reversed) */ if (cso->stencil[0].enabled) { - so_method(so, tesla, 0x1380, 8); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); @@ -451,23 +450,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, cso->stencil[0].writemask); so_data (so, cso->stencil[0].valuemask); } else { - so_method(so, tesla, 0x1380, 1); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, tesla, 0x1594, 5); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); so_data (so, 1); so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, tesla, 0x0f54, 3); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); so_data (so, cso->stencil[1].ref_value); so_data (so, cso->stencil[1].writemask); so_data (so, cso->stencil[1].valuemask); } else { - so_method(so, tesla, 0x1594, 1); + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 871e8097b65..c8bdf9dc276 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) * FP result 0 always goes to RT[0], bits 4 - 6 are ignored. * Ambiguous assignment results in no rendering (no DATA_ERROR). */ - so_method(so, tesla, 0x121c, 1); + so_method(so, tesla, NV50TCL_RT_CONTROL, 1); so_data (so, fb->nr_cbufs | (0 << 4) | (1 << 7) | (2 << 10) | (3 << 13) | (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25)); @@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->cbufs[i]->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1224, 1); + so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1); so_data (so, 1); } @@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50) level[fb->zsbuf->level].tile_mode << 4); so_data(so, 0x00000000); - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 1); so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3); so_data (so, fb->zsbuf->width); so_data (so, fb->zsbuf->height); so_data (so, 0x00010001); } else { - so_method(so, tesla, 0x1538, 1); + so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1); so_data (so, 0); } - so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2); so_data (so, w << 16); so_data (so, h << 16); /* set window lower left corner */ - so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2); + so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2); so_data (so, 0); so_data (so, 0); /* set screen scissor rectangle */ @@ -325,7 +325,7 @@ nv50_state_validate(struct nv50_context *nv50) nv50->state.scissor_enabled = rast->scissor; so = so_new(3, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); if (nv50->state.scissor_enabled) { so_data(so, (s->maxx << 16) | s->minx); so_data(so, (s->maxy << 16) | s->miny); @@ -355,11 +355,11 @@ scissor_uptodate: so = so_new(14, 0); if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); so_data (so, fui(nv50->viewport.translate[0])); so_data (so, fui(nv50->viewport.translate[1])); so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); so_data (so, fui(nv50->viewport.scale[0])); so_data (so, fui(nv50->viewport.scale[1])); so_data (so, fui(nv50->viewport.scale[2])); @@ -440,7 +440,7 @@ void nv50_so_init_sifc(struct nv50_context *nv50, so_data (so, 1); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0); - so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2); + so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); so_data (so, 0); so_data (so, NV50_2D_SIFC_FORMAT_R8_UNORM); so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 79655fc08d5..6378132979e 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -176,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, if (ret) return; - BEGIN_RING(chan, eng2d, 0x0580, 3); - OUT_RING (chan, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3); + OUT_RING (chan, NV50_2D_DRAW_SHAPE_RECTANGLES); OUT_RING (chan, format); OUT_RING (chan, value); - BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4); OUT_RING (chan, destx); OUT_RING (chan, desty); OUT_RING (chan, width); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 4d9afa6fedc..a2f1db2914c 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -47,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1); OUT_RING (chan, src_pitch); src_offset += (sy * src_pitch) + (sx * cpp); } else { @@ -66,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); + NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1); OUT_RING (chan, dst_pitch); dst_offset += (dy * dst_pitch) + (dx * cpp); } else { @@ -89,7 +89,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RELOCh(chan, src_bo, src_offset, src_reloc); OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); + NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); if (src_bo->tile_flags) { @@ -107,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, dst_offset += (line_count * dst_pitch); } BEGIN_RING(chan, m2mf, - NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); + NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4); OUT_RING (chan, width * cpp); OUT_RING (chan, line_count); OUT_RING (chan, 0x00000101); @@ -291,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50, /* NV50_2D_OPERATION_SRCCOPY assumed already set */ - BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2); + BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2); OUT_RING (chan, 0); OUT_RING (chan, src_format); BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10); @@ -334,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50, src += src_pitch; } - BEGIN_RING(chan, tesla, 0x1440, 1); + BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1); OUT_RING (chan, 0); } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f7fa0659e8c..602adfc50de 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -99,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c) { static const uint32_t hw_values[] = { 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16, 0, 0, 0, 0, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32, - NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 }; + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32, + NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 }; /* we'd also have R11G11B10 and R10G10B10A2 */ @@ -198,7 +198,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, return nv50_push_elements_u08(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -208,7 +208,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -231,7 +231,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, return nv50_push_elements_u16(nv50, map, count); if (count & 1) { - BEGIN_RING(chan, tesla, 0x15e8, 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; count--; @@ -241,7 +241,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, unsigned nr = count > 2046 ? 2046 : count; int i; - BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1); for (i = 0; i < nr; i += 2) OUT_RING (chan, (map[i + 1] << 16) | map[i]); @@ -266,7 +266,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, while (count) { unsigned nr = count > 2047 ? 2047 : count; - BEGIN_RING(chan, tesla, 0x400015e8, nr); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr); OUT_RINGp (chan, map, nr); count -= nr; @@ -372,6 +372,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, so_data (so, fui(v[1])); break; case 1: + if (attrib == nv50->vertprog->cfg.edgeflag_in) { + so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + so_data (so, v[0] ? 1 : 0); + } so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; @@ -401,6 +405,9 @@ nv50_vbo_validate(struct nv50_context *nv50) !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; + if (nv50->vertprog->cfg.edgeflag_in < 16) + nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ + n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); vtxattr = NULL; @@ -445,7 +452,7 @@ nv50_vbo_validate(struct nv50_context *nv50) NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); /* vertex array limits */ - so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2); + so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2); so_reloc (vtxbuf, bo, vb->buffer->size - 1, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -479,6 +486,9 @@ struct nv50_vbo_emitctx unsigned nr_ve; unsigned vtx_dwords; unsigned vtx_max; + + float edgeflag; + unsigned ve_edgeflag; }; static INLINE void @@ -622,6 +632,9 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, if (nv50_map_vbufs(nv50) == FALSE) return FALSE; + emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in; + + emit->edgeflag = 0.5f; emit->nr_ve = 0; emit->vtx_dwords = 0; @@ -644,7 +657,8 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, desc = util_format_description(ve->src_format); assert(desc); - size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); + size = util_format_get_component_bits( + ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); assert(ve->nr_components > 0 && ve->nr_components <= 4); @@ -686,10 +700,31 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, } emit->vtx_max = 512 / emit->vtx_dwords; + if (emit->ve_edgeflag < 16) + emit->vtx_max = 1; return TRUE; } +static INLINE void +set_edgeflag(struct nouveau_channel *chan, + struct nouveau_grobj *tesla, + struct nv50_vbo_emitctx *emit, uint32_t index) +{ + unsigned i = emit->ve_edgeflag; + + if (i < 16) { + float f = *((float *)(emit->map[i] + index * emit->stride[i])); + + if (emit->edgeflag != f) { + emit->edgeflag = f; + + BEGIN_RING(chan, tesla, 0x15e4, 1); + OUT_RING (chan, f ? 1 : 0); + } + } +} + static boolean nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) { @@ -704,6 +739,8 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx_next(chan, &emit); @@ -729,6 +766,8 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -754,6 +793,8 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); @@ -779,6 +820,8 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) unsigned i, dw, nr = MIN2(count, emit.vtx_max); dw = nr * emit.vtx_dwords; + set_edgeflag(chan, tesla, &emit, *map); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw); for (i = 0; i < nr; ++i) emit_vtx(chan, &emit, *map++); diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 0d2de17be93..183aa17f9b3 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -4,7 +4,12 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript') env = env.Clone() # add the paths for r300compiler -env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa']) +env.Append(CPPPATH = [ + '#/src/mesa/drivers/dri/r300/compiler', + '#/src/gallium/winsys/drm/radeon/core', + '#/include', + '#/src/mesa', +]) r300 = env.ConvenienceLibrary( target = 'r300', diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 199ce3a945d..1dc9216a7b2 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -997,7 +997,7 @@ validate: goto validate; } } else { - // debug_printf("No VBO while emitting dirty state!\n"); + /* debug_printf("No VBO while emitting dirty state!\n"); */ } if (!r300->winsys->validate(r300->winsys)) { r300->context.flush(&r300->context, 0, NULL); @@ -1129,7 +1129,7 @@ validate: */ /* Finally, emit the VBO. */ - //r300_emit_vertex_buffer(r300); + /* r300_emit_vertex_buffer(r300); */ r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index d8d08fbe264..0aa1da07f8b 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2638,7 +2638,7 @@ enum { VE_COND_MUX_GTE = 25, VE_SET_GREATER_THAN = 26, VE_SET_EQUAL = 27, - VE_SET_NOT_EQUAL = 28, + VE_SET_NOT_EQUAL = 28 }; enum { @@ -2672,20 +2672,20 @@ enum { ME_PRED_SET_CLR = 25, ME_PRED_SET_INV = 26, ME_PRED_SET_POP = 27, - ME_PRED_SET_RESTORE = 28, + ME_PRED_SET_RESTORE = 28 }; enum { /* R3XX */ PVS_MACRO_OP_2CLK_MADD = 0, - PVS_MACRO_OP_2CLK_M2X_ADD = 1, + PVS_MACRO_OP_2CLK_M2X_ADD = 1 }; enum { PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ - PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */ }; enum { @@ -2694,7 +2694,7 @@ enum { PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ - PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */ }; enum { @@ -2703,7 +2703,7 @@ enum { PVS_SRC_SELECT_Z = 2, /* Select Z Component */ PVS_SRC_SELECT_W = 3, /* Select W Component */ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ - PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ + PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */ }; /* PVS Opcode & Destination Operand Description */ @@ -2742,7 +2742,7 @@ enum { PVS_DST_ADDR_SEL_MASK = 0x3, PVS_DST_ADDR_SEL_SHIFT = 29, PVS_DST_ADDR_MODE_0_MASK = 0x1, - PVS_DST_ADDR_MODE_0_SHIFT = 31, + PVS_DST_ADDR_MODE_0_SHIFT = 31 }; /* PVS Source Operand Description */ @@ -2777,7 +2777,7 @@ enum { PVS_SRC_ADDR_SEL_MASK = 0x3, PVS_SRC_ADDR_SEL_SHIFT = 29, PVS_SRC_ADDR_MODE_1_MASK = 0x0, - PVS_SRC_ADDR_MODE_1_SHIFT = 32, + PVS_SRC_ADDR_MODE_1_SHIFT = 32 }; /*\}*/ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 2d70ec2ac94..a89cb633e02 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -335,8 +335,9 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, draw_set_mapped_element_buffer(r300->draw, 0, NULL); draw_set_mapped_constant_buffer(r300->draw, - r300->shader_constants[PIPE_SHADER_VERTEX].constants, - r300->shader_constants[PIPE_SHADER_VERTEX].count * + PIPE_SHADER_VERTEX, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); draw_arrays(r300->draw, mode, start, count); @@ -361,6 +362,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); int i; + void* indices; if (!u_trim_pipe_prim(mode, &count)) { return FALSE; @@ -377,12 +379,13 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, draw_set_mapped_vertex_buffer(r300->draw, i, buf); } - void* indices = pipe_buffer_map(pipe->screen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(r300->draw, indexSize, minIndex, maxIndex, indices); draw_set_mapped_constant_buffer(r300->draw, + PIPE_SHADER_VERTEX, r300->shader_constants[PIPE_SHADER_VERTEX].constants, r300->shader_constants[PIPE_SHADER_VERTEX].count * (sizeof(float) * 4)); @@ -474,7 +477,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render) r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo, PIPE_BUFFER_USAGE_CPU_WRITE); - return (r300render->vbo_ptr + r300render->vbo_offset); + return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); } static void r300_render_unmap_vertices(struct vbuf_render* render, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 29bc701a86e..727ae7ade6d 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -71,9 +71,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300, struct tgsi_shader_info* info = &r300->vs->info; int output; - output = draw_find_vs_output(r300->draw, - info->output_semantic_name[index], - info->output_semantic_index[index]); + output = draw_find_shader_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output); } diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 096cdb20bbe..a792c2cf989 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */ /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */ /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */ - /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */ + /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */ /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */ /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */ /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */ diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index f98087deb8c..5f130453c39 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -36,6 +36,7 @@ #include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_tile_cache.h" @@ -55,6 +56,9 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, if (softpipe->no_rast) return; + if (!softpipe_check_render_cond(softpipe)) + return; + #if 0 softpipe_update_derived(softpipe); /* not needed?? */ #endif diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 969d69d6b42..3ac807d4b5c 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -176,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe, } +static void +softpipe_render_condition( struct pipe_context *pipe, + struct pipe_query *query, + uint mode ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + + softpipe->render_cond_query = query; + softpipe->render_cond_mode = mode; +} + + + struct pipe_context * softpipe_create( struct pipe_screen *screen ) { @@ -191,6 +204,7 @@ softpipe_create( struct pipe_screen *screen ) #endif softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); softpipe->pipe.winsys = screen->winsys; softpipe->pipe.screen = screen; @@ -222,6 +236,10 @@ softpipe_create( struct pipe_screen *screen ) softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + softpipe->pipe.create_gs_state = softpipe_create_gs_state; + softpipe->pipe.bind_gs_state = softpipe_bind_gs_state; + softpipe->pipe.delete_gs_state = softpipe_delete_gs_state; + softpipe->pipe.set_blend_color = softpipe_set_blend_color; softpipe->pipe.set_clip_state = softpipe_set_clip_state; softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; @@ -249,6 +267,8 @@ softpipe_create( struct pipe_screen *screen ) softpipe_init_query_funcs( softpipe ); + softpipe->pipe.render_condition = softpipe_render_condition; + /* * Alloc caches for accessing drawing surfaces and textures. * Must be before quad stage setup! diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 8ce20c5744c..73fa744f9d4 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -58,6 +58,7 @@ struct softpipe_context { struct pipe_rasterizer_state *rasterizer; struct sp_fragment_shader *fs; struct sp_vertex_shader *vs; + struct sp_geometry_shader *gs; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -115,6 +116,10 @@ struct softpipe_context { unsigned line_stipple_counter; + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + /** Software quad rendering pipeline */ struct { struct quad_stage *shade; @@ -147,6 +152,7 @@ struct softpipe_context { unsigned use_sse : 1; unsigned dump_fs : 1; + unsigned dump_gs : 1; unsigned no_rast : 1; }; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 14cb1322e1d..87312ae1510 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -38,6 +38,7 @@ #include "util/u_prim.h" #include "sp_context.h" +#include "sp_query.h" #include "sp_state.h" #include "draw/draw_context.h" @@ -48,7 +49,7 @@ static void softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; - uint i, size; + uint i, vssize, gssize; for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) @@ -57,13 +58,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) } if (sp->constants[PIPE_SHADER_VERTEX].buffer) - size = sp->constants[PIPE_SHADER_VERTEX].buffer->size; + vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size; else - size = 0; + vssize = 0; - draw_set_mapped_constant_buffer(sp->draw, + if (sp->constants[PIPE_SHADER_GEOMETRY].buffer) + gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size; + else + gssize = 0; + + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, sp->mapped_constants[PIPE_SHADER_VERTEX], - size); + vssize); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, + sp->mapped_constants[PIPE_SHADER_GEOMETRY], + gssize); } @@ -78,9 +87,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0); + draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0); - for (i = 0; i < 2; i++) { + for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) ws->buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; @@ -220,6 +230,9 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, struct draw_context *draw = sp->draw; unsigned i; + if (!softpipe_check_render_cond(sp)) + return TRUE; + sp->reduced_api_prim = u_reduced_prim(mode); if (sp->dirty) { diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 5fbac06a535..7f573aef3c3 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -128,6 +128,7 @@ sp_vbuf_unmap_vertices(struct vbuf_render *vbr, { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + (void) cvbr; /* do nothing */ } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index fe6b6cec353..d9babe81dad 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -229,7 +229,7 @@ blend_quad(struct quad_stage *qs, static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; struct softpipe_context *softpipe = qs->softpipe; - float source[4][QUAD_SIZE]; + float source[4][QUAD_SIZE] = { { 0 } }; /* * Compute src/first term RGB diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 379cf4ad064..4ef5d9f7b1d 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe, } +/** + * Called by rendering function to check rendering is conditional. + * \return TRUE if we should render, FALSE if we should skip rendering + */ +boolean +softpipe_check_render_cond(struct softpipe_context *sp) +{ + struct pipe_context *pipe = &sp->pipe; + boolean b, wait; + uint64_t result; + + if (!sp->render_cond_query) { + return TRUE; /* no query predicate, draw normally */ + } + + wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT || + sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result); + if (b) + return result > 0; + else + return TRUE; +} + + void softpipe_init_query_funcs(struct softpipe_context *softpipe ) { softpipe->pipe.create_query = softpipe_create_query; diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h index 05060a45759..736c033897e 100644 --- a/src/gallium/drivers/softpipe/sp_query.h +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -32,6 +32,10 @@ #ifndef SP_QUERY_H #define SP_QUERY_H +extern boolean +softpipe_check_render_cond(struct softpipe_context *sp); + + struct softpipe_context; extern void softpipe_init_query_funcs(struct softpipe_context * ); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 615581b95f9..3da75364c5d 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1268,7 +1268,7 @@ void sp_setup_prepare( struct setup_context *setup ) } /* Note: nr_attrs is only used for debugging (vertex printing) */ - setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw); + setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw); sp->quad.first->begin( sp->quad.first ); diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 00da41b9857..f8886565e92 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -50,6 +50,7 @@ #define SP_NEW_VERTEX 0x1000 #define SP_NEW_VS 0x2000 #define SP_NEW_QUERY 0x4000 +#define SP_NEW_GS 0x8000 struct tgsi_sampler; @@ -90,6 +91,11 @@ struct sp_vertex_shader { int max_sampler; /* -1 if no samplers */ }; +/** Subclass of pipe_shader_state */ +struct sp_geometry_shader { + struct pipe_shader_state shader; + struct draw_geometry_shader *draw_data; +}; void * @@ -143,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *, const struct pipe_shader_state *); void softpipe_bind_vs_state(struct pipe_context *, void *); void softpipe_delete_vs_state(struct pipe_context *, void *); +void *softpipe_create_gs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_gs_state(struct pipe_context *, void *); +void softpipe_delete_gs_state(struct pipe_context *, void *); void softpipe_set_polygon_stipple( struct pipe_context *, const struct pipe_poly_stipple * ); diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index efed082f823..95ab3234337 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -29,6 +29,7 @@ */ #include "util/u_memory.h" +#include "draw/draw_context.h" #include "sp_context.h" #include "sp_state.h" @@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend = (struct pipe_blend_state *)blend; softpipe->dirty |= SP_NEW_BLEND; @@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + softpipe->blend_color = *blend_color; softpipe->dirty |= SP_NEW_BLEND; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index c24a737d07b..f6856a5f691 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -67,7 +67,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* compute vertex layout now */ const struct sp_fragment_shader *spfs = softpipe->fs; struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - const uint num = draw_num_vs_outputs(softpipe->draw); + const uint num = draw_current_shader_outputs(softpipe->draw); uint i; /* Tell draw_vbuf to simply emit the whole post-xform vertex @@ -117,13 +117,13 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } /* this includes texcoords and varying vars */ - src = draw_find_vs_output(softpipe->draw, - spfs->info.input_semantic_name[i], - spfs->info.input_semantic_index[i]); + src = draw_find_shader_output(softpipe->draw, + spfs->info.input_semantic_name[i], + spfs->info.input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src); } - softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + softpipe->psize_slot = draw_find_shader_output(softpipe->draw, TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index b41f7e8ab72..aa12bb215a8 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) { struct softpipe_context *softpipe = softpipe_context(pipe); - softpipe->fs = (struct sp_fragment_shader *) fs; + draw_flush(softpipe->draw); + + if (softpipe->fs == fs) + return; + + draw_flush(softpipe->draw); + + softpipe->fs = fs; softpipe->dirty |= SP_NEW_FS; } @@ -159,9 +166,75 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + draw_flush(softpipe->draw); + /* note: reference counting */ pipe_buffer_reference(&softpipe->constants[shader].buffer, buf ? buf->buffer : NULL); softpipe->dirty |= SP_NEW_CONSTANTS; } + +void * +softpipe_create_gs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_geometry_shader *state; + + state = CALLOC_STRUCT(sp_geometry_shader); + if (state == NULL ) + goto fail; + + /* debug */ + if (softpipe->dump_gs) + tgsi_dump(templ->tokens, 0); + + /* copy shader tokens, the ones passed in will go away. + */ + state->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (state->shader.tokens == NULL) + goto fail; + + state->draw_data = draw_create_geometry_shader(softpipe->draw, templ); + if (state->draw_data == NULL) + goto fail; + + return state; + +fail: + if (state) { + FREE( (void *)state->shader.tokens ); + FREE( state->draw_data ); + FREE( state ); + } + return NULL; +} + + +void +softpipe_bind_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->gs = (struct sp_geometry_shader *)gs; + + draw_bind_geometry_shader(softpipe->draw, + (softpipe->gs ? softpipe->gs->draw_data : NULL)); + + softpipe->dirty |= SP_NEW_GS; +} + + +void +softpipe_delete_gs_state(struct pipe_context *pipe, void *gs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_geometry_shader *state = + (struct sp_geometry_shader *)gs; + + draw_delete_geometry_shader(softpipe->draw, + (state) ? state->draw_data : 0); + FREE(state); +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 87b72196838..a5b00336d44 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe, } void softpipe_bind_rasterizer_state(struct pipe_context *pipe, - void *setup) + void *rasterizer) { struct softpipe_context *softpipe = softpipe_context(pipe); + if (softpipe->rasterizer == rasterizer) + return; + /* pass-through to draw module */ - draw_set_rasterizer_state(softpipe->draw, setup); + draw_set_rasterizer_state(softpipe->draw, rasterizer); - softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + softpipe->rasterizer = rasterizer; softpipe->dirty |= SP_NEW_RASTERIZER; } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index a518248bb18..f6154109ea8 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -51,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, struct softpipe_context *sp = softpipe_context(pipe); uint i; + draw_flush(sp->draw); + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { /* check if changing cbuf */ if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index c3de12b4a39..af99c9de37c 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -29,6 +29,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_screen.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "util/u_upload_mgr.h" #include "svga_context.h" @@ -61,6 +62,9 @@ static void svga_destroy( struct pipe_context *pipe ) u_upload_destroy( svga->upload_vb ); u_upload_destroy( svga->upload_ib ); + util_bitmask_destroy( svga->vs_bm ); + util_bitmask_destroy( svga->fs_bm ); + for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader) pipe_buffer_reference( &svga->curr.cb[shader], NULL ); @@ -130,7 +134,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga = CALLOC_STRUCT(svga_context); if (svga == NULL) - goto error1; + goto no_svga; svga->pipe.winsys = screen->winsys; svga->pipe.screen = screen; @@ -142,7 +146,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->swc = svgascreen->sws->context_create(svgascreen->sws); if(!svga->swc) - goto error2; + goto no_swc; svga_init_blend_functions(svga); svga_init_blit_functions(svga); @@ -165,32 +169,40 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0); if (!svga_init_swtnl(svga)) - goto error3; + goto no_swtnl; + + svga->fs_bm = util_bitmask_create(); + if (svga->fs_bm == NULL) + goto no_fs_bm; + + svga->vs_bm = util_bitmask_create(); + if (svga->vs_bm == NULL) + goto no_vs_bm; svga->upload_ib = u_upload_create( svga->pipe.screen, 32 * 1024, 16, PIPE_BUFFER_USAGE_INDEX ); if (svga->upload_ib == NULL) - goto error4; + goto no_upload_ib; svga->upload_vb = u_upload_create( svga->pipe.screen, 128 * 1024, 16, PIPE_BUFFER_USAGE_VERTEX ); if (svga->upload_vb == NULL) - goto error5; + goto no_upload_vb; svga->hwtnl = svga_hwtnl_create( svga, svga->upload_ib, svga->swc ); if (svga->hwtnl == NULL) - goto error6; + goto no_hwtnl; ret = svga_emit_initial_state( svga ); if (ret) - goto error7; + goto no_state; /* Avoid shortcircuiting state with initial value of zero. */ @@ -209,19 +221,23 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen ) return &svga->pipe; -error7: +no_state: svga_hwtnl_destroy( svga->hwtnl ); -error6: +no_hwtnl: u_upload_destroy( svga->upload_vb ); -error5: +no_upload_vb: u_upload_destroy( svga->upload_ib ); -error4: +no_upload_ib: + util_bitmask_destroy( svga->vs_bm ); +no_vs_bm: + util_bitmask_destroy( svga->fs_bm ); +no_fs_bm: svga_destroy_swtnl(svga); -error3: +no_swtnl: svga->swc->destroy(svga->swc); -error2: +no_swc: FREE(svga); -error1: +no_svga: return NULL; } diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 0885d9ca741..fa7f6cb3bb9 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -41,6 +41,7 @@ struct draw_vertex_shader; struct svga_shader_result; struct SVGACmdMemory; +struct util_bitmask; struct u_upload_mgr; @@ -319,12 +320,14 @@ struct svga_context boolean new_vdecl; } swtnl; + /* Bitmask of used shader IDs */ + struct util_bitmask *fs_bm; + struct util_bitmask *vs_bm; + struct { unsigned dirty[4]; unsigned texture_timestamp; - unsigned next_fs_id; - unsigned next_vs_id; /* Internally generated shaders: */ diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 8db40d0fd57..ca73cf9d5a3 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -164,7 +164,8 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl ) } SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n", - svga_surface(svga->curr.framebuffer.cbufs[0])->handle, + svga->curr.framebuffer.cbufs[0] ? + svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL, hwtnl->cmd.prim_count); ret = SVGA3D_BeginDrawPrimitives(swc, diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c index e3be840d920..a461a86dd31 100644 --- a/src/gallium/drivers/svga/svga_pipe_fs.c +++ b/src/gallium/drivers/svga/svga_pipe_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -107,6 +108,8 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->fs_bm, result->id ); + svga_destroy_shader_result( result ); } diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c index c104c41f5f8..e82d10c2595 100644 --- a/src/gallium/drivers/svga/svga_pipe_vs.c +++ b/src/gallium/drivers/svga/svga_pipe_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_text.h" @@ -172,6 +173,8 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader) assert(ret == PIPE_OK); } + util_bitmask_clear( svga->vs_bm, result->id ); + svga_destroy_shader_result( result ); } diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c index 6ec38ed3e45..1902b0106ba 100644 --- a/src/gallium/drivers/svga/svga_state_fs.c +++ b/src/gallium/drivers/svga/svga_state_fs.c @@ -26,6 +26,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "svga_context.h" #include "svga_state.h" @@ -74,9 +75,12 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->fs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) + goto fail; ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + result->id, SVGA3D_SHADERTYPE_PS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -84,14 +88,16 @@ static enum pipe_error compile_fs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_fs_id++; result->next = fs->base.results; fs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->fs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -116,7 +122,7 @@ fail: */ static int emit_white_fs( struct svga_context *svga ) { - int ret; + int ret = PIPE_ERROR; /* ps_3_0 * def c0, 1.000000, 0.000000, 0.000000, 1.000000 @@ -137,16 +143,26 @@ static int emit_white_fs( struct svga_context *svga ) 0x0000ffff, }; + assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX); + svga->state.white_fs_id = util_bitmask_add(svga->fs_bm); + if(svga->state.white_fs_id == SVGA3D_INVALID_ID) + goto no_fs_id; + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_fs_id, + svga->state.white_fs_id, SVGA3D_SHADERTYPE_PS, white_tokens, sizeof(white_tokens)); if (ret) - return ret; + goto no_definition; - svga->state.white_fs_id = svga->state.next_fs_id++; return 0; + +no_definition: + util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id); + svga->state.white_fs_id = SVGA3D_INVALID_ID; +no_fs_id: + return ret; } @@ -251,12 +267,14 @@ static int emit_hw_fs( struct svga_context *svga, assert(id != SVGA3D_INVALID_ID); - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_PS, - id ); - if (ret) - return ret; + if (result != svga->state.hw_draw.fs) { + if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_PS, + id ); + if (ret) + return ret; + } svga->dirty |= SVGA_NEW_FS_RESULT; svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index 82e7874e2a8..c614281858d 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -27,6 +27,7 @@ #include "pipe/p_defines.h" #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_bitmask.h" #include "translate/translate.h" #include "svga_context.h" @@ -78,8 +79,12 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; } + result->id = util_bitmask_add(svga->vs_bm); + if(result->id == UTIL_BITMASK_INVALID_INDEX) + goto fail; + ret = SVGA3D_DefineShader(svga->swc, - svga->state.next_vs_id, + result->id, SVGA3D_SHADERTYPE_VS, result->tokens, result->nr_tokens * sizeof result->tokens[0]); @@ -87,14 +92,16 @@ static enum pipe_error compile_vs( struct svga_context *svga, goto fail; *out_result = result; - result->id = svga->state.next_vs_id++; result->next = vs->base.results; vs->base.results = result; return PIPE_OK; fail: - if (result) + if (result) { + if (result->id != UTIL_BITMASK_INVALID_INDEX) + util_bitmask_clear( svga->vs_bm, result->id ); svga_destroy_shader_result( result ); + } return ret; } @@ -142,12 +149,14 @@ static int emit_hw_vs( struct svga_context *svga, id = result->id; } - if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { - ret = SVGA3D_SetShader(svga->swc, - SVGA3D_SHADERTYPE_VS, - id ); - if (ret) - return ret; + if (result != svga->state.hw_draw.vs) { + if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) { + ret = SVGA3D_SetShader(svga->swc, + SVGA3D_SHADERTYPE_VS, + id ); + if (ret) + return ret; + } svga->dirty |= SVGA_NEW_VS_RESULT; svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id; diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c index 8b14c913f72..7655121bec1 100644 --- a/src/gallium/drivers/svga/svga_swtnl_draw.c +++ b/src/gallium/drivers/svga/svga_swtnl_draw.c @@ -90,7 +90,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga, PIPE_BUFFER_USAGE_CPU_READ); assert(map); draw_set_mapped_constant_buffer( - draw, + draw, PIPE_SHADER_VERTEX, map, svga->curr.cb[PIPE_SHADER_VERTEX]->size); } diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 25b8c2af3a0..94b6ccc62dd 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -156,7 +156,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) memset(vdecl, 0, sizeof(vdecl)); /* always add position */ - src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo->attrib[0].emit = EMIT_4F; vdecl[0].array.offset = offset; @@ -169,7 +169,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga ) for (i = 0; i < fs->base.info.num_inputs; i++) { unsigned name = fs->base.info.input_semantic_name[i]; unsigned index = fs->base.info.input_semantic_index[i]; - src = draw_find_vs_output(draw, name, index); + src = draw_find_shader_output(draw, name, index); vdecl[nr_decls].array.offset = offset; vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i]; diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c index b8ef137c015..0cd620189b7 100644 --- a/src/gallium/drivers/svga/svga_tgsi.c +++ b/src/gallium/drivers/svga/svga_tgsi.c @@ -31,6 +31,7 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_scan.h" #include "util/u_memory.h" +#include "util/u_bitmask.h" #include "svgadump/svga_shader_dump.h" @@ -221,6 +222,7 @@ svga_tgsi_translate( const struct svga_shader *shader, result->tokens = (const unsigned *)emit.buf; result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned); memcpy(&result->key, &key, sizeof key); + result->id = UTIL_BITMASK_INVALID_INDEX; if (SVGA_DEBUG & DEBUG_TGSI) { diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 1670da8bfa9..dc5eb8fc606 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_I2F: case TGSI_OPCODE_NOT: case TGSI_OPCODE_SHL: - case TGSI_OPCODE_SHR: + case TGSI_OPCODE_ISHR: case TGSI_OPCODE_XOR: return FALSE; diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c index e6d4a74e868..d59fb89a58c 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_dump.c @@ -1444,6 +1444,312 @@ dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd) void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; + + switch(cmd_id) { + case SVGA_3D_CMD_SURFACE_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); + { + const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; + dump_SVGA3dCmdDefineSurface(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dSize) <= next) { + dump_SVGA3dSize((const SVGA3dSize *)body); + body += sizeof(SVGA3dSize); + } + } + break; + case SVGA_3D_CMD_SURFACE_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); + { + const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; + dump_SVGA3dCmdDestroySurface(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_COPY: + _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); + { + const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; + dump_SVGA3dCmdSurfaceCopy(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + } + break; + case SVGA_3D_CMD_SURFACE_STRETCHBLT: + _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); + { + const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; + dump_SVGA3dCmdSurfaceStretchBlt(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SURFACE_DMA: + _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); + { + const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; + dump_SVGA3dCmdSurfaceDMA(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyBox) <= next) { + dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); + body += sizeof(SVGA3dCopyBox); + } + while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { + dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); + body += sizeof(SVGA3dCmdSurfaceDMASuffix); + } + } + break; + case SVGA_3D_CMD_CONTEXT_DEFINE: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); + { + const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; + dump_SVGA3dCmdDefineContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CONTEXT_DESTROY: + _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); + { + const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; + dump_SVGA3dCmdDestroyContext(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTRANSFORM: + _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); + { + const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; + dump_SVGA3dCmdSetTransform(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETZRANGE: + _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); + { + const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; + dump_SVGA3dCmdSetZRange(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETRENDERSTATE: + _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); + { + const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; + dump_SVGA3dCmdSetRenderState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRenderState) <= next) { + dump_SVGA3dRenderState((const SVGA3dRenderState *)body); + body += sizeof(SVGA3dRenderState); + } + } + break; + case SVGA_3D_CMD_SETRENDERTARGET: + _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); + { + const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; + dump_SVGA3dCmdSetRenderTarget(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETTEXTURESTATE: + _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); + { + const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; + dump_SVGA3dCmdSetTextureState(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dTextureState) <= next) { + dump_SVGA3dTextureState((const SVGA3dTextureState *)body); + body += sizeof(SVGA3dTextureState); + } + } + break; + case SVGA_3D_CMD_SETMATERIAL: + _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); + { + const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; + dump_SVGA3dCmdSetMaterial(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTDATA: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); + { + const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; + dump_SVGA3dCmdSetLightData(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETLIGHTENABLED: + _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); + { + const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; + dump_SVGA3dCmdSetLightEnabled(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETVIEWPORT: + _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); + { + const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; + dump_SVGA3dCmdSetViewport(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SETCLIPPLANE: + _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); + { + const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; + dump_SVGA3dCmdSetClipPlane(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_CLEAR: + _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); + { + const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; + dump_SVGA3dCmdClear(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dRect) <= next) { + dump_SVGA3dRect((const SVGA3dRect *)body); + body += sizeof(SVGA3dRect); + } + } + break; + case SVGA_3D_CMD_PRESENT: + _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); + { + const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; + dump_SVGA3dCmdPresent(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGA3dCopyRect) <= next) { + dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); + body += sizeof(SVGA3dCopyRect); + } + } + break; + case SVGA_3D_CMD_SHADER_DEFINE: + _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); + { + const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; + dump_SVGA3dCmdDefineShader(cmd); + body = (const uint8_t *)&cmd[1]; + svga_shader_dump((const uint32_t *)body, + (unsigned)(next - body)/sizeof(uint32_t), + FALSE ); + body = next; + } + break; + case SVGA_3D_CMD_SHADER_DESTROY: + _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); + { + const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; + dump_SVGA3dCmdDestroyShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); + { + const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; + dump_SVGA3dCmdSetShader(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_SET_SHADER_CONST: + _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); + { + const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; + dump_SVGA3dCmdSetShaderConst(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_DRAW_PRIMITIVES: + _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); + { + const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; + unsigned i, j; + dump_SVGA3dCmdDrawPrimitives(cmd); + body = (const uint8_t *)&cmd[1]; + for(i = 0; i < cmd->numVertexDecls; ++i) { + dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); + body += sizeof(SVGA3dVertexDecl); + } + for(j = 0; j < cmd->numRanges; ++j) { + dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); + body += sizeof(SVGA3dPrimitiveRange); + } + while(body + sizeof(SVGA3dVertexDivisor) <= next) { + dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); + body += sizeof(SVGA3dVertexDivisor); + } + } + break; + case SVGA_3D_CMD_SETSCISSORRECT: + _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); + { + const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; + dump_SVGA3dCmdSetScissorRect(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BEGIN_QUERY: + _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); + { + const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; + dump_SVGA3dCmdBeginQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_END_QUERY: + _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); + { + const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; + dump_SVGA3dCmdEndQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_WAIT_FOR_QUERY: + _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); + { + const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; + dump_SVGA3dCmdWaitForQuery(cmd); + body = (const uint8_t *)&cmd[1]; + } + break; + case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: + _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); + { + const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; + dump_SVGA3dCmdBlitSurfaceToScreen(cmd); + body = (const uint8_t *)&cmd[1]; + while(body + sizeof(SVGASignedRect) <= next) { + dump_SVGASignedRect((const SVGASignedRect *)body); + body += sizeof(SVGASignedRect); + } + } + break; + default: + _debug_printf("\t0x%08x\n", cmd_id); + break; + } + + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} + + +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -1458,307 +1764,11 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; - switch(cmd_id) { - case SVGA_3D_CMD_SURFACE_DEFINE: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n"); - { - const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body; - dump_SVGA3dCmdDefineSurface(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dSize) <= next) { - dump_SVGA3dSize((const SVGA3dSize *)body); - body += sizeof(SVGA3dSize); - } - } - break; - case SVGA_3D_CMD_SURFACE_DESTROY: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n"); - { - const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body; - dump_SVGA3dCmdDestroySurface(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_COPY: - _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n"); - { - const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body; - dump_SVGA3dCmdSurfaceCopy(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - } - break; - case SVGA_3D_CMD_SURFACE_STRETCHBLT: - _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n"); - { - const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body; - dump_SVGA3dCmdSurfaceStretchBlt(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SURFACE_DMA: - _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n"); - { - const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body; - dump_SVGA3dCmdSurfaceDMA(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyBox) <= next) { - dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body); - body += sizeof(SVGA3dCopyBox); - } - while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) { - dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body); - body += sizeof(SVGA3dCmdSurfaceDMASuffix); - } - } - break; - case SVGA_3D_CMD_CONTEXT_DEFINE: - _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n"); - { - const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body; - dump_SVGA3dCmdDefineContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CONTEXT_DESTROY: - _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n"); - { - const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body; - dump_SVGA3dCmdDestroyContext(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTRANSFORM: - _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n"); - { - const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body; - dump_SVGA3dCmdSetTransform(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETZRANGE: - _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n"); - { - const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body; - dump_SVGA3dCmdSetZRange(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETRENDERSTATE: - _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n"); - { - const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body; - dump_SVGA3dCmdSetRenderState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRenderState) <= next) { - dump_SVGA3dRenderState((const SVGA3dRenderState *)body); - body += sizeof(SVGA3dRenderState); - } - } - break; - case SVGA_3D_CMD_SETRENDERTARGET: - _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n"); - { - const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body; - dump_SVGA3dCmdSetRenderTarget(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETTEXTURESTATE: - _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n"); - { - const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body; - dump_SVGA3dCmdSetTextureState(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dTextureState) <= next) { - dump_SVGA3dTextureState((const SVGA3dTextureState *)body); - body += sizeof(SVGA3dTextureState); - } - } - break; - case SVGA_3D_CMD_SETMATERIAL: - _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n"); - { - const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body; - dump_SVGA3dCmdSetMaterial(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTDATA: - _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n"); - { - const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body; - dump_SVGA3dCmdSetLightData(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETLIGHTENABLED: - _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n"); - { - const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body; - dump_SVGA3dCmdSetLightEnabled(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETVIEWPORT: - _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n"); - { - const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body; - dump_SVGA3dCmdSetViewport(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SETCLIPPLANE: - _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n"); - { - const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body; - dump_SVGA3dCmdSetClipPlane(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_CLEAR: - _debug_printf("\tSVGA_3D_CMD_CLEAR\n"); - { - const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body; - dump_SVGA3dCmdClear(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dRect) <= next) { - dump_SVGA3dRect((const SVGA3dRect *)body); - body += sizeof(SVGA3dRect); - } - } - break; - case SVGA_3D_CMD_PRESENT: - _debug_printf("\tSVGA_3D_CMD_PRESENT\n"); - { - const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body; - dump_SVGA3dCmdPresent(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGA3dCopyRect) <= next) { - dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body); - body += sizeof(SVGA3dCopyRect); - } - } - break; - case SVGA_3D_CMD_SHADER_DEFINE: - _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n"); - { - const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body; - dump_SVGA3dCmdDefineShader(cmd); - body = (const uint8_t *)&cmd[1]; - svga_shader_dump((const uint32_t *)body, - (unsigned)(next - body)/sizeof(uint32_t), - FALSE ); - body = next; - } - break; - case SVGA_3D_CMD_SHADER_DESTROY: - _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n"); - { - const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body; - dump_SVGA3dCmdDestroyShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER: - _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n"); - { - const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body; - dump_SVGA3dCmdSetShader(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_SET_SHADER_CONST: - _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n"); - { - const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body; - dump_SVGA3dCmdSetShaderConst(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_DRAW_PRIMITIVES: - _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n"); - { - const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body; - unsigned i, j; - dump_SVGA3dCmdDrawPrimitives(cmd); - body = (const uint8_t *)&cmd[1]; - for(i = 0; i < cmd->numVertexDecls; ++i) { - dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body); - body += sizeof(SVGA3dVertexDecl); - } - for(j = 0; j < cmd->numRanges; ++j) { - dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body); - body += sizeof(SVGA3dPrimitiveRange); - } - while(body + sizeof(SVGA3dVertexDivisor) <= next) { - dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body); - body += sizeof(SVGA3dVertexDivisor); - } - } - break; - case SVGA_3D_CMD_SETSCISSORRECT: - _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n"); - { - const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body; - dump_SVGA3dCmdSetScissorRect(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BEGIN_QUERY: - _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n"); - { - const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body; - dump_SVGA3dCmdBeginQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_END_QUERY: - _debug_printf("\tSVGA_3D_CMD_END_QUERY\n"); - { - const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body; - dump_SVGA3dCmdEndQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_WAIT_FOR_QUERY: - _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n"); - { - const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body; - dump_SVGA3dCmdWaitForQuery(cmd); - body = (const uint8_t *)&cmd[1]; - } - break; - case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN: - _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n"); - { - const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body; - dump_SVGA3dCmdBlitSurfaceToScreen(cmd); - body = (const uint8_t *)&cmd[1]; - while(body + sizeof(SVGASignedRect) <= next) { - dump_SVGASignedRect((const SVGASignedRect *)body); - body += sizeof(SVGASignedRect); - } - } - break; - default: - _debug_printf("\t0x%08x\n", cmd_id); - break; - } - - while(body + sizeof(uint32_t) <= next) { - _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - _debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { _debug_printf("\tSVGA_CMD_FENCE\n"); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h index 69a87020875..ca0154361cc 100644 --- a/src/gallium/drivers/svga/svgadump/svga_dump.h +++ b/src/gallium/drivers/svga/svgadump/svga_dump.h @@ -28,6 +28,9 @@ #include "pipe/p_compiler.h" +void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size); + void svga_dump_commands(const void *commands, uint32_t size); diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py index a1ada29ef84..0bc0b3ae317 100755 --- a/src/gallium/drivers/svga/svgadump/svga_dump.py +++ b/src/gallium/drivers/svga/svgadump/svga_dump.py @@ -208,6 +208,56 @@ cmds = [ def dump_cmds(): print r''' void +svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size) +{ + const uint8_t *body = (const uint8_t *)data; + const uint8_t *next = body + size; +''' + print ' switch(cmd_id) {' + indexes = 'ijklmn' + for id, header, body, footer in cmds: + print ' case %s:' % id + print ' _debug_printf("\\t%s\\n");' % id + print ' {' + print ' const %s *cmd = (const %s *)body;' % (header, header) + if len(body): + print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' + print ' dump_%s(cmd);' % header + print ' body = (const uint8_t *)&cmd[1];' + for i in range(len(body)): + struct, count = body[i] + idx = indexes[i] + print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) + print ' dump_%s((const %s *)body);' % (struct, struct) + print ' body += sizeof(%s);' % struct + print ' }' + if footer is not None: + print ' while(body + sizeof(%s) <= next) {' % footer + print ' dump_%s((const %s *)body);' % (footer, footer) + print ' body += sizeof(%s);' % footer + print ' }' + if id == 'SVGA_3D_CMD_SHADER_DEFINE': + print ' svga_shader_dump((const uint32_t *)body,' + print ' (unsigned)(next - body)/sizeof(uint32_t),' + print ' FALSE);' + print ' body = next;' + print ' }' + print ' break;' + print ' default:' + print ' _debug_printf("\\t0x%08x\\n", cmd_id);' + print ' break;' + print ' }' + print r''' + while(body + sizeof(uint32_t) <= next) { + _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); + body += sizeof(uint32_t); + } + while(body + sizeof(uint32_t) <= next) + _debug_printf("\t\t0x%02x\n", *body++); +} +''' + print r''' +void svga_dump_commands(const void *commands, uint32_t size) { const uint8_t *next = commands; @@ -222,51 +272,11 @@ svga_dump_commands(const void *commands, uint32_t size) const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next; const uint8_t *body = (const uint8_t *)&header[1]; - next = (const uint8_t *)body + header->size; + next = body + header->size; if(next > last) break; -''' - print ' switch(cmd_id) {' - indexes = 'ijklmn' - for id, header, body, footer in cmds: - print ' case %s:' % id - print ' _debug_printf("\\t%s\\n");' % id - print ' {' - print ' const %s *cmd = (const %s *)body;' % (header, header) - if len(body): - print ' unsigned ' + ', '.join(indexes[:len(body)]) + ';' - print ' dump_%s(cmd);' % header - print ' body = (const uint8_t *)&cmd[1];' - for i in range(len(body)): - struct, count = body[i] - idx = indexes[i] - print ' for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx) - print ' dump_%s((const %s *)body);' % (struct, struct) - print ' body += sizeof(%s);' % struct - print ' }' - if footer is not None: - print ' while(body + sizeof(%s) <= next) {' % footer - print ' dump_%s((const %s *)body);' % (footer, footer) - print ' body += sizeof(%s);' % footer - print ' }' - if id == 'SVGA_3D_CMD_SHADER_DEFINE': - print ' sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));' - print ' body = next;' - print ' }' - print ' break;' - print ' default:' - print ' _debug_printf("\\t0x%08x\\n", cmd_id);' - print ' break;' - print ' }' - - print r''' - while(body + sizeof(uint32_t) <= next) { - _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body); - body += sizeof(uint32_t); - } - while(body + sizeof(uint32_t) <= next) - _debug_printf("\t\t0x%02x\n", *body++); + svga_dump_command(cmd_id, body, header->size); } else if(cmd_id == SVGA_CMD_FENCE) { _debug_printf("\tSVGA_CMD_FENCE\n"); diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README index 1000c31e49a..203c3851bc3 100644 --- a/src/gallium/drivers/trace/README +++ b/src/gallium/drivers/trace/README @@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing ldd progs/trivial/tri -== Traceing == +== Tracing == -For traceing then do +For tracing then do - export XMESA_TRACE=y GALLIUM_TRACE=tri.trace progs/trivial/tri which should create a tri.trace file, which is an XML file. You can view copying diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 80f4874b780..ad47a56fba4 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -52,6 +52,7 @@ trace_buffer_unwrap(struct trace_context *tr_ctx, assert(tr_buf->buffer); assert(tr_buf->buffer->screen == tr_scr->screen); + (void) tr_scr; return tr_buf->buffer; } @@ -90,6 +91,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx, assert(tr_surf->surface); assert(tr_surf->surface->texture->screen == tr_scr->screen); + (void) tr_scr; return tr_surf->surface; } diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 7e2ccbcfdc5..0f45e211a32 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -40,7 +40,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) #include <stdlib.h> #endif @@ -258,7 +258,7 @@ boolean trace_dump_trace_begin() trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); trace_dump_writes("<trace version='0.1'>\n"); -#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) +#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE) /* Linux applications rarely cleanup GL / Gallium resources so catch * application exit here */ atexit(trace_dump_trace_close); diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index c31b1d86986..0546aad9b50 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -45,7 +45,7 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_USER) # define sleep Sleep -#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) +#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE) void usleep(int); # define sleep usleep #else @@ -180,7 +180,7 @@ static int trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, uint32_t serial) { struct trace_screen *tr_scr = tr_rbug->tr_scr; - struct trace_texture *tr_tex; + struct trace_texture *tr_tex = NULL; struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header; struct tr_list *ptr; struct pipe_texture *t; @@ -223,7 +223,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, struct rbug_proto_texture_read *gptr = (struct rbug_proto_texture_read *)header; struct trace_screen *tr_scr = tr_rbug->tr_scr; - struct trace_texture *tr_tex; + struct trace_texture *tr_tex = NULL; struct tr_list *ptr; struct pipe_screen *screen = tr_scr->screen; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index ac20a47af1e..117503aaff6 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -426,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen, struct pipe_transfer *transfer = tr_trans->transfer; if(tr_trans->map) { - size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride; + size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; trace_dump_call_begin("pipe_screen", "transfer_write"); diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index 1c16042ee5a..e2f981d0513 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -32,7 +32,7 @@ struct tgsi_token; enum trace_shader_type { TRACE_SHADER_FRAGMENT = 0, TRACE_SHADER_VERTEX = 1, - TRACE_SHADER_GEOMETRY = 2, + TRACE_SHADER_GEOMETRY = 2 }; struct trace_shader |