diff options
author | Roland Scheidegger <[email protected]> | 2006-11-03 12:48:18 +0000 |
---|---|---|
committer | Roland Scheidegger <[email protected]> | 2006-11-03 12:48:18 +0000 |
commit | 029d18cd3d79ff956c50b3486078d968d15bf0fb (patch) | |
tree | 7f7dbfc01e0f1a93863c8f78da811508f02c09c3 | |
parent | e7b71f69190c9fec567163dd7f63df593e069678 (diff) |
enable generic arrays for r200 hw vertex programs by assigning unused color and texture inputs to them. Not widely tested yet. This should eliminate all fallbacks due to vertex programs, except writes to back facing colors, or when exceeding a hw limit (12 temps, 12 attribs etc.).
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_context.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_maos_arrays.c | 63 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_tcl.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r200/r200_vertprog.c | 98 |
4 files changed, 143 insertions, 27 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index 07b6d6e12db..c6910f5238a 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -725,8 +725,8 @@ struct r200_tcl_info { GLint last_offset; GLuint hw_primitive; -/* FIXME: what's the maximum number of components? */ - struct r200_dma_region *aos_components[11]; +/* hw can handle 12 components max */ + struct r200_dma_region *aos_components[12]; GLuint nr_aos_components; GLuint *Elts; @@ -738,6 +738,7 @@ struct r200_tcl_info { struct r200_dma_region fog; struct r200_dma_region tex[R200_MAX_TEXTURE_UNITS]; struct r200_dma_region norm; + struct r200_dma_region generic[16]; }; diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c index 92348c90caa..f6ab2f00746 100644 --- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c +++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c @@ -385,8 +385,8 @@ void r200EmitArrays( GLcontext *ctx, GLuint inputs ) GLuint vfmt0 = 0, vfmt1 = 0; GLuint count = VB->Count; GLuint i; - - if (1) { + + if (inputs & VERT_BIT_POS) { if (!rmesa->tcl.obj.buf) emit_vector( ctx, &rmesa->tcl.obj, @@ -404,7 +404,6 @@ void r200EmitArrays( GLcontext *ctx, GLuint inputs ) } component[nr++] = &rmesa->tcl.obj; } - if (inputs & VERT_BIT_NORMAL) { if (!rmesa->tcl.norm.buf) @@ -481,7 +480,7 @@ void r200EmitArrays( GLcontext *ctx, GLuint inputs ) vfmt0 |= R200_VTX_FP_RGB << R200_VTX_COLOR_1_SHIFT; component[nr++] = &rmesa->tcl.spec; } - + for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) { if (inputs & (VERT_BIT_TEX0 << i)) { if (!rmesa->tcl.tex[i].buf) @@ -497,6 +496,50 @@ void r200EmitArrays( GLcontext *ctx, GLuint inputs ) } } + if (ctx->VertexProgram._Enabled) { + int *vp_inputs = rmesa->curr_vp_hw->inputs; + for ( i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++ ) { + if (inputs & (1 << i)) { + int geninput = i - VERT_ATTRIB_GENERIC0; + if (!rmesa->tcl.generic[geninput].buf) { + emit_vector( ctx, + &(rmesa->tcl.generic[geninput]), + (char *)VB->AttribPtr[i]->data, + 4, + VB->AttribPtr[i]->stride, + count ); + } + component[nr++] = &rmesa->tcl.generic[geninput]; + switch (vp_inputs[i]) { + case 0: + vfmt0 |= R200_VTX_W0 | R200_VTX_Z0; + break; + case 2: + case 3: + case 4: + case 5: + vfmt0 |= R200_VTX_FP_RGBA << (R200_VTX_COLOR_0_SHIFT + (vp_inputs[i] - 2) * 2); + break; + case 6: + case 7: + case 8: + case 9: + case 10: + case 11: + vfmt1 |= 4 << (R200_VTX_TEX0_COMP_CNT_SHIFT + (vp_inputs[i] - 6) * 3); + break; + case 13: + vfmt0 |= R200_VTX_XY1 | R200_VTX_Z1 | R200_VTX_W1; + break; + case 1: + case 12: + default: + assert(0); + } + } + } + } + if (vfmt0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0] || vfmt1 != rmesa->hw.vtx.cmd[VTX_VTXFMT_1]) { R200_STATECHANGE( rmesa, vtx ); @@ -522,7 +565,7 @@ void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs ) if (newinputs & VERT_BIT_NORMAL) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ ); - + if (newinputs & VERT_BIT_FOG) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ ); @@ -536,4 +579,14 @@ void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs ) if (newinputs & VERT_BIT_TEX(unit)) r200ReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ ); } + + if (ctx->VertexProgram._Enabled) { + int i; + for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { + if (newinputs & (1 << i)) + r200ReleaseDmaRegion( rmesa, + &rmesa->tcl.generic[i - VERT_ATTRIB_GENERIC0], __FUNCTION__ ); + } + } + } diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index dab478db92d..3334d1e1566 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -436,11 +436,13 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, We only need to change compsel. */ GLuint out_compsel = 0; GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten; +#if 0 /* can't handle other inputs, generic attribs etc. currently - should never arrive here */ assert ((rmesa->curr_vp_hw->mesa_program.Base.InputsRead & ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 | VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 | VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) == 0); +#endif inputs |= rmesa->curr_vp_hw->mesa_program.Base.InputsRead; assert(vp_out & (1 << VERT_RESULT_HPOS)); out_compsel = R200_OUTPUT_XYZW; @@ -577,7 +579,7 @@ static void transition_to_hwtnl( GLcontext *ctx ) rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_USE_MASK; rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_VTX_FOG; } - + R200_STATECHANGE( rmesa, vte ); rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VTX_XY_FMT|R200_VTX_Z_FMT); rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] |= R200_VTX_W0_FMT; diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 9ac7a96827a..acea0dba998 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -404,6 +404,8 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte unsigned long hw_op; int dofogfix = 0; int fog_temp_i = 0; + int free_inputs; + int array_count = 0; vp->native = GL_FALSE; vp->translated = GL_TRUE; @@ -412,6 +414,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte if (mesa_vp->Base.NumInstructions == 0) return GL_FALSE; +#if 0 if ((mesa_vp->Base.InputsRead & ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 | VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 | @@ -422,6 +425,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte } return GL_FALSE; } +#endif if ((mesa_vp->Base.OutputsWritten & ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) | @@ -470,35 +474,85 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte else mesa_vp->Base.NumNativeParameters = 0; - for(i=0; i < VERT_ATTRIB_MAX; i++) + for(i = 0; i < VERT_ATTRIB_MAX; i++) vp->inputs[i] = -1; + free_inputs = 0x2ffd; + /* fglrx uses fixed inputs as follows for conventional attribs. - generic attribs use non-fixed assignment, fglrx will always use the lowest attrib values available. - There are 12 generic attribs possible, corresponding to attrib 0, 2-11 and 13 in a hw vertex prog. - attr 1 and 12 are not available for generic attribs as those cannot be made vec4 (correspond to - vertex normal/weight) + generic attribs use non-fixed assignment, fglrx will always use the + lowest attrib values available. We'll just do the same. + There are 12 generic attribs possible, corresponding to attrib 0, 2-11 + and 13 in a hw vertex prog. + attr 1 and 12 aren't used for generic attribs as those cannot be made vec4 + (correspond to vertex normal/weight - maybe weight actually could be made vec4). + Additionally, not more than 12 arrays in total are possible I think. attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0 attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0) attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1) attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0) - generic attribs would require some more work (dma regions, renaming). */ +*/ -/* may look different when using idx buf / input_route instead of se_vtx_fmt? */ - vp->inputs[VERT_ATTRIB_POS] = 0; - vp->inputs[VERT_ATTRIB_WEIGHT] = 12; - vp->inputs[VERT_ATTRIB_NORMAL] = 1; - vp->inputs[VERT_ATTRIB_COLOR0] = 2; - vp->inputs[VERT_ATTRIB_COLOR1] = 3; - vp->inputs[VERT_ATTRIB_FOG] = 15; - vp->inputs[VERT_ATTRIB_TEX0] = 6; - vp->inputs[VERT_ATTRIB_TEX1] = 7; - vp->inputs[VERT_ATTRIB_TEX2] = 8; - vp->inputs[VERT_ATTRIB_TEX3] = 9; - vp->inputs[VERT_ATTRIB_TEX4] = 10; - vp->inputs[VERT_ATTRIB_TEX5] = 11; /* attr 4,5 and 13 are only used with generic attribs. Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is not possibe to use with vertex progs as it is lacking in vert prog specification) */ +/* may look different when using idx buf / input_route instead of se_vtx_fmt? */ + if (mesa_vp->Base.InputsRead & VERT_BIT_POS) { + vp->inputs[VERT_ATTRIB_POS] = 0; + free_inputs &= ~(1 << 0); + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_ATTRIB_WEIGHT) { + /* we don't actually handle that later. Then again, we don't have to... */ + vp->inputs[VERT_ATTRIB_WEIGHT] = 12; + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) { + vp->inputs[VERT_ATTRIB_NORMAL] = 1; + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) { + vp->inputs[VERT_ATTRIB_COLOR0] = 2; + free_inputs &= ~(1 << 2); + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) { + vp->inputs[VERT_ATTRIB_COLOR1] = 3; + free_inputs &= ~(1 << 3); + array_count++; + } + if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) { + vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++; + } + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) { + if (mesa_vp->Base.InputsRead & (1 << i)) { + vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6; + free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6)); + array_count++; + } + } + /* using VERT_ATTRIB_TEX6/7 would be illegal */ + /* completely ignore aliasing? */ + for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { + int j; + /* completely ignore aliasing? */ + if (mesa_vp->Base.InputsRead & (1 << i)) { + array_count++; + if (array_count > 12) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "more than 12 attribs used in vert prog\n"); + } + return GL_FALSE; + } + for (j = 0; j < 14; j++) { + /* will always find one due to limited array_count */ + if (free_inputs & (1 << j)) { + free_inputs &= ~(1 << j); + vp->inputs[i] = j; + break; + } + } + } + } if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) { if (R200_DEBUG & DEBUG_FALLBACKS) { @@ -506,6 +560,12 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte } return GL_FALSE; } + if (free_inputs & 1) { + if (R200_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "can't handle vert prog without position input\n"); + } + return GL_FALSE; + } o_inst = vp->instr; for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){ |