diff options
-rw-r--r-- | src/gallium/drivers/nv40/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_context.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_draw.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_state.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_state_clip.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_state_emit.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_vertprog.c | 159 |
7 files changed, 87 insertions, 112 deletions
diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 3369a21574f..9c8eadf7e44 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -14,7 +14,6 @@ DRIVER_SOURCES = \ nv40_screen.c \ nv40_state.c \ nv40_state_blend.c \ - nv40_state_clip.c \ nv40_state_emit.c \ nv40_state_fb.c \ nv40_state_rasterizer.c \ diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index a1b5c88a063..436351b6bcc 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -76,9 +76,6 @@ enum nv40_state_index { #define NV40_NEW_ARRAYS (1 << 11) #define NV40_NEW_UCP (1 << 12) -#define NV40_FALLBACK_TNL (1 << 0) -#define NV40_FALLBACK_RAST (1 << 1) - struct nv40_rasterizer_state { struct pipe_rasterizer_state pipe; struct nouveau_stateobj *so; @@ -203,7 +200,6 @@ extern void nv40_fragtex_bind(struct nv40_context *); extern boolean nv40_state_validate(struct nv40_context *nv40); extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); extern void nv40_state_emit(struct nv40_context *nv40); -extern struct nv40_state_entry nv40_state_clip; extern struct nv40_state_entry nv40_state_rasterizer; extern struct nv40_state_entry nv40_state_scissor; extern struct nv40_state_entry nv40_state_stipple; diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 9cd8fa6a497..7f008aca3a8 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -236,7 +236,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, if (!nv40_state_validate_swtnl(nv40)) return FALSE; - nv40->dirty &= ~(1ULL << NV40_STATE_VTXBUF); + nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); nv40_state_emit(nv40); for (i = 0; i < PIPE_ATTRIB_MAX; i++) { diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index e018464c9f8..2b4225deb2e 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -29,6 +29,9 @@ struct nv40_vertex_program { struct draw_vertex_shader *draw; boolean translated; + + struct pipe_clip_state ucp; + struct nv40_vertex_program_exec *insns; unsigned nr_insns; struct nv40_vertex_program_data *consts; diff --git a/src/gallium/drivers/nv40/nv40_state_clip.c b/src/gallium/drivers/nv40/nv40_state_clip.c deleted file mode 100644 index c52390f9edb..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_clip.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_clip_validate(struct nv40_context *nv40) -{ - - if (nv40->render_mode == HW) { - nv40->fallback_swtnl &= ~NV40_NEW_UCP; - if (nv40->clip.nr) - nv40->fallback_swtnl |= NV40_NEW_UCP; - } - - return FALSE; -} - -struct nv40_state_entry nv40_state_clip = { - .validate = nv40_state_clip_validate, - .dirty = { - .pipe = NV40_NEW_UCP, - .hw = 0 - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index c742e4f421e..864dfc2e0c3 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -5,7 +5,6 @@ static struct nv40_state_entry *render_states[] = { &nv40_state_framebuffer, &nv40_state_rasterizer, - &nv40_state_clip, &nv40_state_scissor, &nv40_state_stipple, &nv40_state_fragprog, @@ -22,7 +21,6 @@ static struct nv40_state_entry *render_states[] = { static struct nv40_state_entry *swtnl_states[] = { &nv40_state_framebuffer, &nv40_state_rasterizer, - &nv40_state_clip, &nv40_state_scissor, &nv40_state_stipple, &nv40_state_fragprog, @@ -127,8 +125,7 @@ nv40_state_validate(struct nv40_context *nv40) nv40->pipe.flush(&nv40->pipe, 0, NULL); nv40->dirty |= (NV40_NEW_VIEWPORT | NV40_NEW_VERTPROG | - NV40_NEW_ARRAYS | - NV40_NEW_UCP); + NV40_NEW_ARRAYS); nv40->render_mode = HW; } @@ -153,8 +150,7 @@ nv40_state_validate_swtnl(struct nv40_context *nv40) nv40->pipe.flush(&nv40->pipe, 0, NULL); nv40->dirty |= (NV40_NEW_VIEWPORT | NV40_NEW_VERTPROG | - NV40_NEW_ARRAYS | - NV40_NEW_UCP); + NV40_NEW_ARRAYS); nv40->render_mode = SWTNL; } diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 08d3f387e09..e10250528e2 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -52,6 +52,8 @@ struct nv40_vpc { struct nv40_sreg *imm; unsigned nr_imm; + + unsigned hpos_idx; }; static struct nv40_sreg @@ -423,11 +425,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, int ai = -1, ci = -1, ii = -1; int i; - struct { - struct nv40_sreg dst; - unsigned m; - } clip; - if (finst->Instruction.Opcode == TGSI_OPCODE_END) return TRUE; @@ -501,47 +498,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); - /* If writing to clip distance regs, need to modify instruction to - * change which component is written to. On NV40 the clip regs - * are the unused components (yzw) of FOGC/PSZ. - */ - clip.dst = none; - if (dst.type == NV40SR_OUTPUT && - dst.index >= NV40_VP_INST_DEST_CLIP(0) && - dst.index <= NV40_VP_INST_DEST_CLIP(5)) { - unsigned n = dst.index - NV40_VP_INST_DEST_CLIP(0); - unsigned m[] = - { MASK_Y, MASK_Z, MASK_W, MASK_Y, MASK_Z, MASK_W }; - - /* Some instructions we can get away with swizzling and/or - * changing the writemask. Others, we'll use a temp reg. - */ - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_DST: - case TGSI_OPCODE_EXP: - case TGSI_OPCODE_LIT: - case TGSI_OPCODE_LOG: - case TGSI_OPCODE_XPD: - clip.dst = dst; - clip.m = m[n]; - dst = temp(vpc); - break; - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - case TGSI_OPCODE_POW: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - mask = m[n]; - break; - default: - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) - src[i] = swz(src[i], X, X, X, X); - mask = m[n]; - break; - } - } - switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); @@ -639,11 +595,6 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, return FALSE; } - if (clip.dst.type != NV40SR_NONE) { - arith(vpc, 0, OP_MOV, clip.dst, clip.m, - swz(dst, X, X, X, X), none, none); - } - release_temps(vpc); return TRUE; } @@ -658,6 +609,7 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, switch (fdec->Semantic.SemanticName) { case TGSI_SEMANTIC_POSITION: hw = NV40_VP_INST_DEST_POS; + vpc->hpos_idx = idx; break; case TGSI_SEMANTIC_COLOR: if (fdec->Semantic.SemanticIndex == 0) { @@ -695,15 +647,6 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, return FALSE; } break; -#if 0 - case TGSI_SEMANTIC_CLIP: - if (fdec->Semantic.SemanticIndex >= 6) { - NOUVEAU_ERR("bad clip distance index\n"); - return FALSE; - } - hw = NV40_VP_INST_DEST_CLIP(fdec->Semantic.SemanticIndex); - break; -#endif default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; @@ -748,6 +691,10 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) } break; #endif + case TGSI_FILE_OUTPUT: + if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + return FALSE; + break; default: break; } @@ -803,6 +750,8 @@ nv40_vertprog_translate(struct nv40_context *nv40, { struct tgsi_parse_context parse; struct nv40_vpc *vpc = NULL; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int i; vpc = CALLOC(1, sizeof(struct nv40_vpc)); if (!vpc) @@ -814,26 +763,21 @@ nv40_vertprog_translate(struct nv40_context *nv40, return; } + /* Redirect post-transform vertex position to a temp if user clip + * planes are enabled. We need to append code the the vtxprog + * to handle clip planes later. + */ + if (vp->ucp.nr) { + vpc->r_result[vpc->hpos_idx] = temp(vpc); + vpc->r_temps_discard = 0; + } + tgsi_parse_init(&parse, vp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv40_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; case TGSI_TOKEN_TYPE_IMMEDIATE: { const struct tgsi_full_immediate *imm; @@ -862,6 +806,39 @@ nv40_vertprog_translate(struct nv40_context *nv40, } } + /* Write out HPOS if it was redirected to a temp earlier */ + if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { + struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_POS); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + + arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); + } + + /* Insert code to handle user clip planes */ + for (i = 0; i < vp->ucp.nr; i++) { + struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_CLIP(i)); + struct nv40_sreg ceqn = constant(vpc, -1, + nv40->clip.ucp[i][0], + nv40->clip.ucp[i][1], + nv40->clip.ucp[i][2], + nv40->clip.ucp[i][3]); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + unsigned mask; + + switch (i) { + case 0: case 3: mask = MASK_Y; break; + case 1: case 4: mask = MASK_Z; break; + case 2: case 5: mask = MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + + arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); + } + vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; vp->translated = TRUE; out_err: @@ -883,6 +860,12 @@ nv40_vertprog_validate(struct nv40_context *nv40) if (nv40->render_mode == HW) { vp = nv40->vertprog; constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + + if ((nv40->dirty & NV40_NEW_UCP) || + memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { + nv40_vertprog_destroy(nv40, vp); + memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); + } } else { vp = nv40->swtnl.vertprog; constbuf = NULL; @@ -1045,16 +1028,36 @@ check_gpu_resources: void nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) { - if (vp->nr_consts) - FREE(vp->consts); - if (vp->nr_insns) + struct nouveau_winsys *nvws = nv40->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = vp->clip_ctrl = 0; + so_ref(NULL, &vp->so); } struct nv40_state_entry nv40_state_vertprog = { .validate = nv40_vertprog_validate, .dirty = { - .pipe = NV40_NEW_VERTPROG, + .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, .hw = NV40_STATE_VERTPROG, } }; |