diff options
-rw-r--r-- | src/gallium/auxiliary/util/u_dynarray.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nv30_vertprog.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_context.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_fragprog.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state_emit.c | 82 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_vertprog.c | 170 |
8 files changed, 166 insertions, 116 deletions
diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h index 9d1c1713a7c..980cadf22d1 100644 --- a/src/gallium/auxiliary/util/u_dynarray.h +++ b/src/gallium/auxiliary/util/u_dynarray.h @@ -106,6 +106,9 @@ util_dynarray_trim(struct util_dynarray *buf) #define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type))) #define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type) #define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type)) +#define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx)) +#define util_dynarray_begin(buf) ((buf)->data) +#define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size)) #endif /* U_DYNARRAY_H */ diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h index df92469078c..9a68f5c1fb0 100644 --- a/src/gallium/drivers/nvfx/nv30_vertprog.h +++ b/src/gallium/drivers/nvfx/nv30_vertprog.h @@ -125,7 +125,7 @@ #define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ #define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ #define NV30_VP_INST_DEST_SHIFT 2 -#define NV30_VP_INST_DEST_MASK (0x0F << 2) +#define NV30_VP_INST_DEST_MASK (0x1F << 2) # define NV30_VP_INST_DEST_POS 0 # define NV30_VP_INST_DEST_BFC0 1 # define NV30_VP_INST_DEST_BFC1 2 @@ -133,7 +133,8 @@ # define NV30_VP_INST_DEST_COL1 4 # define NV30_VP_INST_DEST_FOGC 5 # define NV30_VP_INST_DEST_PSZ 6 -# define NV30_VP_INST_DEST_TC(n) (8+n) +# define NV30_VP_INST_DEST_TC(n) (8+(n)) +# define NV30_VP_INST_DEST_CLP(n) (17 + (n)) /* Useful to split the source selection regs into their pieces */ #define NV30_VP_SRC0_HIGH_SHIFT 6 diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 80b36fb7b91..2f775f92cf5 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -75,6 +75,10 @@ nvfx_create(struct pipe_screen *pscreen, void *priv) screen->base.channel->user_private = nvfx; nvfx->is_nv4x = screen->is_nv4x; + /* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs + * However, my code for that doesn't work, so use vp clipping for all cards, which works. + */ + nvfx->use_vp_clipping = TRUE; nvfx_init_query_functions(nvfx); nvfx_init_surface_functions(nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 2134f3c3865..680f4c6ce0f 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -134,6 +134,7 @@ struct nvfx_context { struct nvfx_screen *screen; unsigned is_nv4x; /* either 0 or ~0 */ + boolean use_vp_clipping; struct draw_context *draw; struct blitter_context* blitter; diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index a7e43b1513b..23a85c9342e 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1468,19 +1468,6 @@ update: nvfx->hw_pointsprite_control = pointsprite_control; } } - - if(nvfx->is_nv4x) - { - unsigned vp_output = vp->or | fp->or; - - if(vp_output != nvfx->hw_vp_output) - { - WAIT_RING(chan, 2); - OUT_RING(chan, RING_3D(NV40TCL_VP_RESULT_EN, 1)); - OUT_RING(chan, vp_output); - nvfx->hw_vp_output = vp_output; - } - } } void diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index 37951919182..e9c1f2c26d2 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -24,8 +24,6 @@ struct nvfx_vertex_program { boolean translated; - struct pipe_clip_state ucp; - struct nvfx_vertex_program_exec *insns; unsigned nr_insns; struct nvfx_vertex_program_data *consts; @@ -42,7 +40,7 @@ struct nvfx_vertex_program { uint32_t ir; uint32_t or; - uint32_t clip_ctrl; + int clip_nr; struct util_dynarray branch_relocs; struct util_dynarray const_relocs; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index bd89a385d7c..c43a75aaa21 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -90,6 +90,74 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_STIPPLE) nvfx_state_stipple_validate(nvfx); + if(nvfx->dirty & NVFX_NEW_UCP) + { + unsigned enables[7] = + { + 0, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4, + NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4 | NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5, + }; + + if(!nvfx->use_vp_clipping) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1)); + OUT_RING(chan, 0); + + WAIT_RING(chan, 6 * 4 + 1); + OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANE_A(0), nvfx->clip.nr * 4)); + OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4); + } + + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1)); + OUT_RING(chan, enables[nvfx->clip.nr]); + } + + if(nvfx->use_vp_clipping && (nvfx->dirty & (NVFX_NEW_UCP | NVFX_NEW_VERTPROG))) + { + unsigned i; + struct nvfx_vertex_program* vp = nvfx->vertprog; + if(nvfx->clip.nr != vp->clip_nr) + { + unsigned idx; + WAIT_RING(chan, 14); + + /* remove last instruction bit */ + if(vp->clip_nr >= 0) + { + idx = vp->nr_insns - 7 + vp->clip_nr; + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start + idx); + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4)); + OUT_RINGp (chan, vp->insns[idx].data, 4); + } + + /* set last instruction bit */ + idx = vp->nr_insns - 7 + nvfx->clip.nr; + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start + idx); + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_INST(0), 4)); + OUT_RINGp(chan, vp->insns[idx].data, 3); + OUT_RING(chan, vp->insns[idx].data[3] | 1); + vp->clip_nr = nvfx->clip.nr; + } + + // TODO: only do this for the ones changed + WAIT_RING(chan, 6 * 6); + for(i = 0; i < nvfx->clip.nr; ++i) + { + OUT_RING(chan, RING_3D(NV34TCL_VP_UPLOAD_CONST_ID, 5)); + OUT_RING(chan, vp->data->start + i); + OUT_RINGp (chan, nvfx->clip.ucp[i], 4); + } + } + if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE)) { nvfx_fragprog_validate(nvfx); @@ -97,6 +165,20 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) flush_tex_cache = TRUE; // TODO: do we need this? } + if(nvfx->is_nv4x) + { + unsigned vp_output = nvfx->vertprog->or | nvfx->hw_fragprog->or; + vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6); + + if(vp_output != nvfx->hw_vp_output) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV40TCL_VP_RESULT_EN, 1)); + OUT_RING(chan, vp_output); + nvfx->hw_vp_output = vp_output; + } + } + if(all_swizzled >= 0) nvfx_framebuffer_validate(nvfx, all_swizzled); diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 3b8d3853b7f..ea7e88c5613 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -29,8 +29,6 @@ #include "nv30_vertprog.h" #include "nv40_vertprog.h" -#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) - struct nvfx_loop_entry { unsigned brk_target; @@ -205,52 +203,33 @@ emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot break; case NVFXSR_OUTPUT: /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ - switch (dst.index) { - case NVFX_VP_INST_DEST_CLIP(0): - vp->or |= (1 << 6); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0; - dst.index = NVFX_VP(INST_DEST_FOGC); - break; - case NVFX_VP_INST_DEST_CLIP(1): - vp->or |= (1 << 7); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1; - dst.index = NVFX_VP(INST_DEST_FOGC); - break; - case NVFX_VP_INST_DEST_CLIP(2): - vp->or |= (1 << 8); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2; - dst.index = NVFX_VP(INST_DEST_FOGC); - break; - case NVFX_VP_INST_DEST_CLIP(3): - vp->or |= (1 << 9); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3; - dst.index = NVFX_VP(INST_DEST_PSZ); - break; - case NVFX_VP_INST_DEST_CLIP(4): - vp->or |= (1 << 10); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4; - dst.index = NVFX_VP(INST_DEST_PSZ); - break; - case NVFX_VP_INST_DEST_CLIP(5): - vp->or |= (1 << 11); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5; - dst.index = NVFX_VP(INST_DEST_PSZ); - break; - default: - if(nvfx->is_nv4x) { - /* we don't need vp->or on nv3x - * texcoords are handled by fragment program - */ - switch (dst.index) { - case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; - case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - } + if(nvfx->is_nv4x) { + switch (dst.index) { + case NV30_VP_INST_DEST_CLP(0): + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NV30_VP_INST_DEST_CLP(1): + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NV30_VP_INST_DEST_CLP(2): + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NV30_VP_INST_DEST_CLP(3): + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NV30_VP_INST_DEST_CLP(4): + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NV30_VP_INST_DEST_CLP(5): + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; } - break; } if(!nvfx->is_nv4x) { @@ -914,6 +893,13 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, vpc->nvfx = nvfx; vpc->vp = vp; + /* reserve space for ucps */ + if(nvfx->use_vp_clipping) + { + for(i = 0; i < 6; ++i) + constant(vpc, -1, 0, 0, 0, 0); + } + if (!nvfx_vertprog_prepare(nvfx, vpc)) { FREE(vpc); return; @@ -923,7 +909,8 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, * planes are enabled. We need to append code to the vtxprog * to handle clip planes later. */ - if (vp->ucp.nr) { + /* TODO: maybe support patching this depending on whether there are ucps: not sure if it is really matters much */ + if (nvfx->use_vp_clipping) { vpc->r_result[vpc->hpos_idx] = temp(vpc); vpc->r_temps_discard = 0; } @@ -994,34 +981,39 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, } /* Insert code to handle user clip planes */ - for (i = 0; i < vp->ucp.nr; i++) { - struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, - NVFX_VP_INST_DEST_CLIP(i)); - struct nvfx_src ceqn = nvfx_src(constant(vpc, -1, - nvfx->clip.ucp[i][0], - nvfx->clip.ucp[i][1], - nvfx->clip.ucp[i][2], - nvfx->clip.ucp[i][3])); - struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]); - unsigned mask; + if(nvfx->use_vp_clipping) + { + for (i = 0; i < 6; i++) { + struct nvfx_reg cdst = nvfx_reg(NVFXSR_OUTPUT, NV30_VP_INST_DEST_CLP(i)); + struct nvfx_src ceqn = nvfx_src(nvfx_reg(NVFXSR_CONST, i)); + struct nvfx_src htmp = nvfx_src(vpc->r_result[vpc->hpos_idx]); + unsigned mask; - switch (i) { - case 0: case 3: mask = NVFX_VP_MASK_Y; break; - case 1: case 4: mask = NVFX_VP_MASK_Z; break; - case 2: case 5: mask = NVFX_VP_MASK_W; break; - default: - NOUVEAU_ERR("invalid clip dist #%d\n", i); - goto out_err; - } + if(nvfx->is_nv4x) + { + switch (i) { + case 0: case 3: mask = NVFX_VP_MASK_Y; break; + case 1: case 4: mask = NVFX_VP_MASK_Z; break; + case 2: case 5: mask = NVFX_VP_MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + } + else + mask = NVFX_VP_MASK_X; - nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none)); + nvfx_vp_emit(vpc, arith(VEC, DP4, cdst, mask, htmp, ceqn, none)); + } } + else + { + if(vp->nr_insns) + vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; - //vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; - - /* Append NOP + END instruction for branches to the end of the program */ - nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none)); - vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST | 0x1000; + nvfx_vp_emit(vpc, arith(VEC, NOP, none.reg, 0, none, none, none)); + vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; + } if(debug_get_option_nvfx_dump_vp()) { @@ -1034,6 +1026,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, debug_printf("\n"); } + vp->clip_nr = -1; vp->exec_start = -1; vp->translated = TRUE; out_err: @@ -1063,13 +1056,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) if (nvfx->render_mode == HW) { vp = nvfx->vertprog; constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; - - // TODO: ouch! can't we just use constant slots for these?! - if ((nvfx->dirty & NVFX_NEW_UCP) || - memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { - nvfx_vertprog_destroy(nvfx, vp); - memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp)); - } } else { vp = nvfx->swtnl.vertprog; constbuf = NULL; @@ -1169,7 +1155,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) vp->exec_start = vp->exec->start; } - if (vp->nr_consts && vp->data_start != vp->data->start) { + if (vp->data_start != vp->data->start) { for(unsigned i = 0; i < vp->const_relocs.size; i += sizeof(struct nvfx_relocation)) { struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i); @@ -1182,6 +1168,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) } vp->data_start = vp->data->start; + upload_code = TRUE; } /* Update + Upload constant values */ @@ -1191,7 +1178,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) if (constbuf) map = (float*)nvfx_buffer(constbuf)->data; - for (i = 0; i < vp->nr_consts; i++) { + for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) { struct nvfx_vertex_program_data *vpd = &vp->consts[i]; if (vpd->index >= 0) { @@ -1217,9 +1204,10 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); OUT_RINGp (chan, vp->insns[i].data, 4); } + vp->clip_nr = -1; } - if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) + if(nvfx->dirty & (NVFX_NEW_VERTPROG)) { WAIT_RING(chan, 6); OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1)); @@ -1228,8 +1216,6 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 1)); OUT_RING(chan, vp->ir); } - OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1)); - OUT_RING(chan, vp->clip_ctrl); } return TRUE; @@ -1238,27 +1224,15 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) void nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) { - vp->translated = FALSE; - - if (vp->nr_insns) { + if (vp->nr_insns) FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - if (vp->nr_consts) { + if (vp->nr_consts) FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } nouveau_resource_free(&vp->exec); - vp->exec_start = 0; nouveau_resource_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - vp->ir = vp->or = vp->clip_ctrl = 0; util_dynarray_fini(&vp->branch_relocs); util_dynarray_fini(&vp->const_relocs); } |