diff options
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_fragprog.c | 204 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_shader.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state.h | 24 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state_emit.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_vertprog.c | 41 |
7 files changed, 211 insertions, 88 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 8899bf991e1..7ec6a4f4124 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -45,6 +45,7 @@ #define NVFX_NEW_VERTCONST (1 << 14) #define NVFX_NEW_FRAGCONST (1 << 15) #define NVFX_NEW_INDEX (1 << 16) +#define NVFX_NEW_SPRITE (1 << 17) struct nvfx_rasterizer_state { struct pipe_rasterizer_state pipe; diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index ae4fe3aa262..0a599c62a74 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -16,8 +16,6 @@ struct nvfx_fpc { struct nvfx_fragment_program *fp; - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - unsigned r_temps; unsigned r_temps_discard; struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; @@ -36,6 +34,8 @@ struct nvfx_fpc { struct nvfx_sreg imm[MAX_IMM]; unsigned nr_imm; + + unsigned char generic_to_slot[256]; /* semantic idx for each input semantic */ }; static INLINE struct nvfx_sreg @@ -111,6 +111,11 @@ emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src) sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); break; + case NVFXSR_RELOCATED: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + //printf("adding relocation at %x for %x\n", fpc->inst_offset, src.index); + util_dynarray_append(&fpc->fp->slot_relocations[src.index], unsigned, fpc->inst_offset); + break; case NVFXSR_CONST: if (!fpc->have_const) { grow_insns(fpc, 4); @@ -241,8 +246,28 @@ tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, - fpc->attrib_map[fsrc->Register.Index]); + if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_POSITION) { + assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0); + src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_POSITION); + } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_COLOR) { + if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0) + src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL0); + else if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 1) + src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL1); + else + assert(0); + } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG) { + assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0); + src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_FOGC); + } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FACE) { + /* TODO: check this has the correct values */ + /* XXX: what do we do for nv30 here (assuming it lacks facing)?! */ + assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0); + src = nvfx_sr(NVFXSR_INPUT, NV40_FP_OP_INPUT_SRC_FACING); + } else { + assert(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_GENERIC); + src = nvfx_sr(NVFXSR_RELOCATED, fpc->generic_to_slot[fpc->fp->info.input_semantic_index[fsrc->Register.Index]]); + } break; case TGSI_FILE_CONSTANT: src = constant(fpc, fsrc->Register.Index, NULL); @@ -611,48 +636,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, } static boolean -nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NVFX_FP_OP_INPUT_SRC_POSITION; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NVFX_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NVFX_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NVFX_FP_OP_INPUT_SRC_FOGC; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. - Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad input semantic\n"); - return FALSE; - } - - fpc->attrib_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_declaration *fdec) { @@ -691,6 +674,15 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) { struct tgsi_parse_context p; int high_temp = -1, i; + struct util_semantic_set set; + + fpc->fp->num_slots = util_semantic_set_from_program_file(&set, fpc->fp->pipe.tokens, TGSI_FILE_INPUT); + if(fpc->fp->num_slots > 8) + return FALSE; + util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, 8); + util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, 8); + + memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input)); tgsi_parse_init(&p, fpc->fp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { @@ -703,10 +695,6 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) const struct tgsi_full_declaration *fdec; fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec)) - goto out_err; - break; case TGSI_FILE_OUTPUT: if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec)) goto out_err; @@ -805,7 +793,7 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx, /* Terminate final instruction */ if(fp->insn) - fp->insn[fpc->inst_offset] |= 0x00000001; + fp->insn[fpc->inst_offset] |= 0x00000001; /* Append NOP + END instruction, may or may not be necessary. */ fpc->inst_offset = fp->insn_len; @@ -881,9 +869,70 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG)) update = TRUE; - if(update) { - int offset; + struct nvfx_vertex_program* vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog; + if (fp->last_vp_id != vp->id) { + char* vp_sem_table = vp->generic_to_fp_input; + unsigned char* fp_semantics = fp->slot_to_generic; + unsigned diff = 0; + fp->last_vp_id = nvfx->vertprog->id; + unsigned char* cur_slots = fp->slot_to_fp_input; + for(unsigned i = 0; i < fp->num_slots; ++i) { + unsigned char slot_mask = vp_sem_table[fp_semantics[i]]; + diff |= (slot_mask >> 4) & (slot_mask ^ cur_slots[i]); + } + + if(diff) + { + for(unsigned i = 0; i < fp->num_slots; ++i) { + /* if 0xff, then this will write to the dummy value at fp->last_layout_mask[0] */ + fp->slot_to_fp_input[i] = vp_sem_table[fp_semantics[i]] & 0xf; + //printf("fp: GENERIC[%i] from fpreg %i\n", fp_semantics[i], fp->slot_to_fp_input[i]); + } + + fp->progs_left_with_obsolete_slot_assignments = fp->progs; + update = TRUE; + } + } + + // last_sprite_coord_enable + unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable; + if(fp->last_sprite_coord_enable != sprite_coord_enable) + { + unsigned texcoord_mask = vp->texcoord_ouput_mask; + fp->last_sprite_coord_enable = sprite_coord_enable; + fp->point_sprite_control = 0; + for(unsigned i = 0; i < fp->num_slots; ++i) { + if((1 << fp->slot_to_generic[i]) & sprite_coord_enable) + { + unsigned fpin = fp->slot_to_fp_input[i]; + //printf("sprite: slot %i generic %i had texcoord %i\n", i, fp->slot_to_generic[i], fpin - NVFX_FP_OP_INPUT_SRC_TC0); + if(fpin >= 0x0f) + { + unsigned tc = __builtin_ctz(~texcoord_mask); + texcoord_mask |= (1 << tc); + fp->slot_to_fp_input[i] = fpin = NVFX_FP_OP_INPUT_SRC_TC(tc); + + fp->progs_left_with_obsolete_slot_assignments = fp->progs; + update = TRUE; + } + //printf("sprite: slot %i texcoord %i\n", i, fpin - NVFX_FP_OP_INPUT_SRC_TC0); + fp->point_sprite_control |= (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0 + 8)); + } + else + { + unsigned fpin = fp->slot_to_fp_input[i]; + if(!(vp->texcoord_ouput_mask & (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0)))) + { + fp->slot_to_fp_input[i] = 0x0f; + fp->progs_left_with_obsolete_slot_assignments = fp->progs; + update = TRUE; + } + } + } + } + + if(update) { ++fp->bo_prog_idx; if(fp->bo_prog_idx >= fp->progs_per_bo) { @@ -893,10 +942,9 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) } else { - struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16); - char *map, *buf; - int i; - + struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16); + fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo]; + memset(fpbo->slots, 0, 8 * fp->progs_per_bo); if(fp->fpbo) { fpbo->next = fp->fpbo->next; @@ -906,12 +954,14 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) fpbo->next = fpbo; fp->fpbo = fpbo; fpbo->bo = 0; + fp->progs += fp->progs_per_bo; + fp->progs_left_with_obsolete_slot_assignments += fp->progs_per_bo; nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo); nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC); - map = fpbo->bo->map; - buf = fpbo->insn; - for(i = 0; i < fp->progs_per_bo; ++i) + uint8_t* map = fpbo->bo->map; + uint8_t* buf = (uint8_t*)fpbo->insn; + for(unsigned i = 0; i < fp->progs_per_bo; ++i) { memcpy(buf, fp->insn, fp->insn_len * 4); nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4); @@ -922,7 +972,8 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) fp->bo_prog_idx = 0; } - offset = fp->bo_prog_idx * fp->prog_size; + int offset = fp->bo_prog_idx * fp->prog_size; + uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) { struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT]; @@ -941,6 +992,25 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) } } } + + if(fp->progs_left_with_obsolete_slot_assignments) { + unsigned char* fpbo_slots = &fp->fpbo->slots[fp->bo_prog_idx * 8]; + for(unsigned i = 0; i < fp->num_slots; ++i) { + unsigned value = fp->slot_to_fp_input[i];; + if(value != fpbo_slots[i]) { + unsigned* p = (unsigned*)fp->slot_relocations[i].data; + unsigned* pend = (unsigned*)((char*)fp->slot_relocations[i].data + fp->slot_relocations[i].size); + for(; p != pend; ++p) { + unsigned off = *p; + unsigned dw = fp->insn[off]; + dw = (dw & ~NVFX_FP_OP_INPUT_SRC_MASK) | (value << NVFX_FP_OP_INPUT_SRC_SHIFT); + nvfx_fp_memcpy(&fpmap[*p], &dw, sizeof(dw)); + } + fpbo_slots[i] = value; + } + } + --fp->progs_left_with_obsolete_slot_assignments; + } } if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) { @@ -960,6 +1030,13 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) OUT_RING(chan, fp->samplers); } } + + if(nvfx->dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SPRITE)) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_POINT_SPRITE, 1)); + OUT_RING(chan, fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization); + } } void @@ -982,6 +1059,7 @@ void nvfx_fragprog_destroy(struct nvfx_context *nvfx, struct nvfx_fragment_program *fp) { + unsigned i; struct nvfx_fragment_program_bo* fpbo = fp->fpbo; if(fpbo) { @@ -996,7 +1074,9 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx, while(fpbo != fp->fpbo); } + for(i = 0; i < 8; ++i) + util_dynarray_fini(&fp->slot_relocations[i]); + if (fp->insn_len) FREE(fp->insn); } - diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h index 50830b39164..88cf91b95f4 100644 --- a/src/gallium/drivers/nvfx/nvfx_shader.h +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -323,6 +323,7 @@ #define NVFXSR_INPUT 2 #define NVFXSR_TEMP 3 #define NVFXSR_CONST 4 +#define NVFXSR_RELOCATED 5 #define NVFX_COND_FL 0 #define NVFX_COND_LT 1 diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 25d29720a85..c3addf1114f 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -91,6 +91,7 @@ nvfx_rasterizer_state_create(struct pipe_context *pipe, /*XXX: ignored: * point_smooth -nohw * multisample + * sprite_coord_origin */ sb_method(sb, NV34TCL_SHADE_MODEL, 1); @@ -150,20 +151,6 @@ nvfx_rasterizer_state_create(struct pipe_context *pipe, sb_data(sb, fui(cso->offset_units * 2)); } - sb_method(sb, NV34TCL_POINT_SPRITE, 1); - if (cso->point_quad_rasterization) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if ((cso->sprite_coord_enable >> i) & 1) - psctl |= (1 << (8 + i)); - } - - sb_data(sb, psctl); - } else { - sb_data(sb, 0); - } - rsso->pipe = *cso; rsso->sb_len = sb_len(sb, rsso->sb); return (void *)rsso; @@ -189,6 +176,12 @@ nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) nvfx->dirty |= NVFX_NEW_STIPPLE; nvfx->draw_dirty |= NVFX_NEW_STIPPLE; } + + if(((struct nvfx_rasterizer_state*)hwcso)->pipe.point_quad_rasterization != nvfx->rasterizer->pipe.point_quad_rasterization + || ((struct nvfx_rasterizer_state*)hwcso)->pipe.sprite_coord_enable != nvfx->rasterizer->pipe.sprite_coord_enable) + { + nvfx->dirty |= NVFX_NEW_SPRITE; + } } nvfx->rasterizer = hwcso; @@ -280,9 +273,13 @@ nvfx_vp_state_create(struct pipe_context *pipe, struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_vertex_program *vp; + // TODO: use a 64-bit atomic here! + static unsigned long long id = 0; + vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); + vp->id = ++id; return (void *)vp; } diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index 9ceb2577ecc..e1fa3c7e041 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -4,6 +4,8 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" #include "nouveau/nouveau_statebuf.h" +#include "util/u_dynarray.h" +#include "util/u_linkage.h" struct nvfx_vertex_program_exec { uint32_t data[4]; @@ -18,6 +20,7 @@ struct nvfx_vertex_program_data { struct nvfx_vertex_program { struct pipe_shader_state pipe; + unsigned long long id; struct draw_vertex_shader *draw; @@ -30,6 +33,9 @@ struct nvfx_vertex_program { struct nvfx_vertex_program_data *consts; unsigned nr_consts; + char generic_to_fp_input[256]; + unsigned texcoord_ouput_mask; + struct nouveau_resource *exec; unsigned exec_start; struct nouveau_resource *data; @@ -49,6 +55,7 @@ struct nvfx_fragment_program_data { struct nvfx_fragment_program_bo { struct nvfx_fragment_program_bo* next; struct nouveau_bo* bo; + unsigned char* slots; char insn[] __attribute__((aligned(16))); }; @@ -58,6 +65,7 @@ struct nvfx_fragment_program { boolean translated; unsigned samplers; + unsigned point_sprite_control; uint32_t *insn; int insn_len; @@ -65,11 +73,27 @@ struct nvfx_fragment_program { struct nvfx_fragment_program_data *consts; unsigned nr_consts; + unsigned num_slots; /* how many input semantics? */ + unsigned char slot_to_generic[8]; /* semantics */ + unsigned char slot_to_fp_input[8]; /* current assignment of slots for each used semantic */ + struct util_dynarray slot_relocations[8]; + + /* This is reset to progs on any relocation update, and decreases every time we + * move to a new prog due to a constant update + * When this is the same as progs, applying relocations is no longer necessary. + */ + unsigned progs_left_with_obsolete_slot_assignments; + + unsigned long long last_vp_id; + unsigned last_sprite_coord_enable; + uint32_t fp_control; unsigned bo_prog_idx; unsigned prog_size; unsigned progs_per_bo; + unsigned progs; + struct nvfx_fragment_program_bo* fpbo; }; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index b9d18977919..2e0e366ca3d 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -15,6 +15,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(nvfx != nvfx->screen->cur_ctx) { nvfx->dirty = ~0; + nvfx->hw_vtxelt_nr = 16; nvfx->screen->cur_ctx = nvfx; } @@ -86,7 +87,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(dirty & NVFX_NEW_STIPPLE) nvfx_state_stipple_validate(nvfx); - if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST)) + if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST | NVFX_NEW_VERTPROG | NVFX_NEW_SPRITE)) { nvfx_fragprog_validate(nvfx); if(dirty & NVFX_NEW_FRAGPROG) diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 939d2b83aee..3d2f2b9fba0 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -1,7 +1,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "util/u_inlines.h" +#include "util/u_linkage.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" @@ -61,7 +61,7 @@ temp(struct nvfx_vpc *vpc) return nvfx_sr(NVFXSR_TEMP, idx); } -static INLINE void +static inline void release_temps(struct nvfx_vpc *vpc) { vpc->r_temps &= ~vpc->r_temps_discard; @@ -339,7 +339,7 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, emit_src(nvfx, vpc, hw, 2, s2); } -static INLINE struct nvfx_sreg +static inline struct nvfx_sreg tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { struct nvfx_sreg src = { 0 }; @@ -385,14 +385,14 @@ tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { dst = vpc->r_address[fdst->Register.Index]; break; default: - NOUVEAU_ERR("bad dst file\n"); + NOUVEAU_ERR("bad dst file %i\n", fdst->Register.File); break; } return dst; } -static INLINE int +static inline int tgsi_mask(uint tgsi) { int mask = 0; @@ -650,12 +650,8 @@ nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, hw = NVFX_VP(INST_DEST_PSZ); break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NVFX_VP(INST_DEST_TC(fdec->Semantic.Index)); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } + hw = (vpc->vp->generic_to_fp_input[fdec->Semantic.Index] & 0xf) + + NVFX_VP(INST_DEST_TC(0)) - NVFX_FP_OP_INPUT_SRC_TC(0); break; case TGSI_SEMANTIC_EDGEFLAG: /* not really an error just a fallback */ @@ -675,6 +671,29 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) { struct tgsi_parse_context p; int high_temp = -1, high_addr = -1, nr_imm = 0, i; + struct util_semantic_set set; + unsigned char sem_layout[8]; + unsigned sem_layout_size; + unsigned num_outputs; + + num_outputs = util_semantic_set_from_program_file(&set, vpc->vp->pipe.tokens, TGSI_FILE_OUTPUT); + + if(num_outputs > 8) { + NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs); + return FALSE; + } + util_semantic_layout_from_set(sem_layout, &set, 8, 8); + + /* hope 0xf is (0, 0, 0, 1) initialized; otherwise, we are _probably_ not required to do this */ + memset(vpc->vp->generic_to_fp_input, 0x0f, sizeof(vpc->vp->generic_to_fp_input)); + vpc->vp->texcoord_ouput_mask = 0; + for(int i = 0; i < 8; ++i) { + if(sem_layout[i] == 0xff) + continue; + vpc->vp->texcoord_ouput_mask |= (1 << i); + //printf("vp: GENERIC[%i] to fpreg %i\n", sem_layout[i], NVFX_FP_OP_INPUT_SRC_TC(0) + i); + vpc->vp->generic_to_fp_input[sem_layout[i]] = 0xf0 | (NVFX_FP_OP_INPUT_SRC_TC(0) + i); + } tgsi_parse_init(&p, vpc->vp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { |