summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nvfx/nvfx_vertprog.c56
1 files changed, 39 insertions, 17 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index 6e5b6c09363..6914e730c5f 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -40,6 +40,7 @@ struct nvfx_vpc {
struct nvfx_context* nvfx;
struct pipe_shader_state pipe;
struct nvfx_vertex_program *vp;
+ struct tgsi_shader_info* info;
struct nvfx_vertex_program_exec *vpi;
@@ -448,9 +449,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
int mask;
int ai = -1, ci = -1, ii = -1;
int i;
-
- if (finst->Instruction.Opcode == TGSI_OPCODE_END)
- return TRUE;
+ unsigned sub_depth = 0;
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
@@ -705,13 +704,24 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
break;
case TGSI_OPCODE_RET:
- tmp = none;
- tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0;
- nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp));
+ if(sub_depth || !nvfx->use_vp_clipping) {
+ tmp = none;
+ tmp.swz[0] = tmp.swz[1] = tmp.swz[2] = tmp.swz[3] = 0;
+ nvfx_vp_emit(vpc, arith(0, SCA, RET, none.reg, 0, none, none, tmp));
+ } else {
+ reloc.location = vpc->vp->nr_insns;
+ reloc.target = vpc->info->num_instructions;
+ util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
+ nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
+ }
break;
case TGSI_OPCODE_BGNSUB:
+ ++sub_depth;
+ break;
case TGSI_OPCODE_ENDSUB:
+ --sub_depth;
+ break;
case TGSI_OPCODE_ENDIF:
/* nothing to do here */
break;
@@ -752,6 +762,23 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
break;
+ case TGSI_OPCODE_END:
+ assert(!sub_depth);
+ if(nvfx->use_vp_clipping) {
+ if(idx != (vpc->info->num_instructions - 1)) {
+ reloc.location = vpc->vp->nr_insns;
+ reloc.target = vpc->info->num_instructions;
+ util_dynarray_append(&vpc->label_relocs, struct nvfx_relocation, reloc);
+ nvfx_vp_emit(vpc, arith(0, SCA, BRA, none.reg, 0, none, none, none));
+ }
+ } else {
+ if(vpc->vp->nr_insns)
+ vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
+ nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none));
+ vpc->vp->insns[vpc->vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
+ }
+ break;
+
default:
NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
return FALSE;
@@ -946,7 +973,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)
DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
static struct nvfx_vertex_program*
-nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps)
+nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps, const struct tgsi_shader_info* info)
{
struct tgsi_parse_context parse;
struct nvfx_vertex_program* vp = NULL;
@@ -968,6 +995,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_stat
vpc->nvfx = nvfx;
vpc->vp = vp;
vpc->pipe = *vps;
+ vpc->info = info;
{
// TODO: use a 64-bit atomic here!
@@ -1086,14 +1114,6 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_stat
nvfx_vp_emit(vpc, arith(0, VEC, DP4, cdst, mask, htmp, ceqn, none));
}
}
- else
- {
- if(vp->nr_insns)
- vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
-
- nvfx_vp_emit(vpc, arith(0, VEC, NOP, none.reg, 0, none, none, none));
- vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST;
- }
if(debug_get_option_nvfx_dump_vp())
{
@@ -1133,6 +1153,7 @@ nvfx_vertprog_translate_draw_vp(struct nvfx_context *nvfx, struct nvfx_pipe_vert
{
struct nvfx_vertex_program* vp = NULL;
struct pipe_shader_state vps;
+ struct tgsi_shader_info info;
struct ureg_program *ureg = NULL;
unsigned num_outputs = MIN2(pvp->info.num_outputs, 16);
@@ -1146,7 +1167,8 @@ nvfx_vertprog_translate_draw_vp(struct nvfx_context *nvfx, struct nvfx_pipe_vert
ureg_END( ureg );
vps.tokens = ureg_get_tokens(ureg, 0);
- vp = nvfx_vertprog_translate(nvfx, &vps);
+ tgsi_scan_shader(vps.tokens, &info);
+ vp = nvfx_vertprog_translate(nvfx, &vps, &info);
ureg_free_tokens(vps.tokens);
ureg_destroy(ureg);
@@ -1169,7 +1191,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
vp = pvp->vp;
if(!vp) {
- vp = nvfx_vertprog_translate(nvfx, &pvp->pipe);
+ vp = nvfx_vertprog_translate(nvfx, &pvp->pipe, &pvp->info);
if(!vp)
vp = NVFX_VP_FAILED;
pvp->vp = vp;