diff options
Diffstat (limited to 'src/gallium/drivers/nvfx/nvfx_fragprog.c')
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_fragprog.c | 204 |
1 files changed, 142 insertions, 62 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index ae4fe3aa262..0a599c62a74 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -16,8 +16,6 @@ struct nvfx_fpc { struct nvfx_fragment_program *fp; - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - unsigned r_temps; unsigned r_temps_discard; struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; @@ -36,6 +34,8 @@ struct nvfx_fpc { struct nvfx_sreg imm[MAX_IMM]; unsigned nr_imm; + + unsigned char generic_to_slot[256]; /* semantic idx for each input semantic */ }; static INLINE struct nvfx_sreg @@ -111,6 +111,11 @@ emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src) sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); break; + case NVFXSR_RELOCATED: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + //printf("adding relocation at %x for %x\n", fpc->inst_offset, src.index); + util_dynarray_append(&fpc->fp->slot_relocations[src.index], unsigned, fpc->inst_offset); + break; case NVFXSR_CONST: if (!fpc->have_const) { grow_insns(fpc, 4); @@ -241,8 +246,28 @@ tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, - fpc->attrib_map[fsrc->Register.Index]); + if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_POSITION) { + assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0); + src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_POSITION); + } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_COLOR) { + if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0) + src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL0); + else if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 1) + src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL1); + else + assert(0); + } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG) { + assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0); + src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_FOGC); + } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FACE) { + /* TODO: check this has the correct values */ + /* XXX: what do we do for nv30 here (assuming it lacks facing)?! */ + assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0); + src = nvfx_sr(NVFXSR_INPUT, NV40_FP_OP_INPUT_SRC_FACING); + } else { + assert(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_GENERIC); + src = nvfx_sr(NVFXSR_RELOCATED, fpc->generic_to_slot[fpc->fp->info.input_semantic_index[fsrc->Register.Index]]); + } break; case TGSI_FILE_CONSTANT: src = constant(fpc, fsrc->Register.Index, NULL); @@ -611,48 +636,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, } static boolean -nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NVFX_FP_OP_INPUT_SRC_POSITION; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NVFX_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NVFX_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NVFX_FP_OP_INPUT_SRC_FOGC; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. - Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad input semantic\n"); - return FALSE; - } - - fpc->attrib_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_declaration *fdec) { @@ -691,6 +674,15 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) { struct tgsi_parse_context p; int high_temp = -1, i; + struct util_semantic_set set; + + fpc->fp->num_slots = util_semantic_set_from_program_file(&set, fpc->fp->pipe.tokens, TGSI_FILE_INPUT); + if(fpc->fp->num_slots > 8) + return FALSE; + util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, 8); + util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, 8); + + memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input)); tgsi_parse_init(&p, fpc->fp->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { @@ -703,10 +695,6 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) const struct tgsi_full_declaration *fdec; fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec)) - goto out_err; - break; case TGSI_FILE_OUTPUT: if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec)) goto out_err; @@ -805,7 +793,7 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx, /* Terminate final instruction */ if(fp->insn) - fp->insn[fpc->inst_offset] |= 0x00000001; + fp->insn[fpc->inst_offset] |= 0x00000001; /* Append NOP + END instruction, may or may not be necessary. */ fpc->inst_offset = fp->insn_len; @@ -881,9 +869,70 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG)) update = TRUE; - if(update) { - int offset; + struct nvfx_vertex_program* vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog; + if (fp->last_vp_id != vp->id) { + char* vp_sem_table = vp->generic_to_fp_input; + unsigned char* fp_semantics = fp->slot_to_generic; + unsigned diff = 0; + fp->last_vp_id = nvfx->vertprog->id; + unsigned char* cur_slots = fp->slot_to_fp_input; + for(unsigned i = 0; i < fp->num_slots; ++i) { + unsigned char slot_mask = vp_sem_table[fp_semantics[i]]; + diff |= (slot_mask >> 4) & (slot_mask ^ cur_slots[i]); + } + + if(diff) + { + for(unsigned i = 0; i < fp->num_slots; ++i) { + /* if 0xff, then this will write to the dummy value at fp->last_layout_mask[0] */ + fp->slot_to_fp_input[i] = vp_sem_table[fp_semantics[i]] & 0xf; + //printf("fp: GENERIC[%i] from fpreg %i\n", fp_semantics[i], fp->slot_to_fp_input[i]); + } + + fp->progs_left_with_obsolete_slot_assignments = fp->progs; + update = TRUE; + } + } + + // last_sprite_coord_enable + unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable; + if(fp->last_sprite_coord_enable != sprite_coord_enable) + { + unsigned texcoord_mask = vp->texcoord_ouput_mask; + fp->last_sprite_coord_enable = sprite_coord_enable; + fp->point_sprite_control = 0; + for(unsigned i = 0; i < fp->num_slots; ++i) { + if((1 << fp->slot_to_generic[i]) & sprite_coord_enable) + { + unsigned fpin = fp->slot_to_fp_input[i]; + //printf("sprite: slot %i generic %i had texcoord %i\n", i, fp->slot_to_generic[i], fpin - NVFX_FP_OP_INPUT_SRC_TC0); + if(fpin >= 0x0f) + { + unsigned tc = __builtin_ctz(~texcoord_mask); + texcoord_mask |= (1 << tc); + fp->slot_to_fp_input[i] = fpin = NVFX_FP_OP_INPUT_SRC_TC(tc); + + fp->progs_left_with_obsolete_slot_assignments = fp->progs; + update = TRUE; + } + //printf("sprite: slot %i texcoord %i\n", i, fpin - NVFX_FP_OP_INPUT_SRC_TC0); + fp->point_sprite_control |= (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0 + 8)); + } + else + { + unsigned fpin = fp->slot_to_fp_input[i]; + if(!(vp->texcoord_ouput_mask & (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0)))) + { + fp->slot_to_fp_input[i] = 0x0f; + fp->progs_left_with_obsolete_slot_assignments = fp->progs; + update = TRUE; + } + } + } + } + + if(update) { ++fp->bo_prog_idx; if(fp->bo_prog_idx >= fp->progs_per_bo) { @@ -893,10 +942,9 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) } else { - struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16); - char *map, *buf; - int i; - + struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16); + fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo]; + memset(fpbo->slots, 0, 8 * fp->progs_per_bo); if(fp->fpbo) { fpbo->next = fp->fpbo->next; @@ -906,12 +954,14 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) fpbo->next = fpbo; fp->fpbo = fpbo; fpbo->bo = 0; + fp->progs += fp->progs_per_bo; + fp->progs_left_with_obsolete_slot_assignments += fp->progs_per_bo; nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo); nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC); - map = fpbo->bo->map; - buf = fpbo->insn; - for(i = 0; i < fp->progs_per_bo; ++i) + uint8_t* map = fpbo->bo->map; + uint8_t* buf = (uint8_t*)fpbo->insn; + for(unsigned i = 0; i < fp->progs_per_bo; ++i) { memcpy(buf, fp->insn, fp->insn_len * 4); nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4); @@ -922,7 +972,8 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) fp->bo_prog_idx = 0; } - offset = fp->bo_prog_idx * fp->prog_size; + int offset = fp->bo_prog_idx * fp->prog_size; + uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset); if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) { struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT]; @@ -941,6 +992,25 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) } } } + + if(fp->progs_left_with_obsolete_slot_assignments) { + unsigned char* fpbo_slots = &fp->fpbo->slots[fp->bo_prog_idx * 8]; + for(unsigned i = 0; i < fp->num_slots; ++i) { + unsigned value = fp->slot_to_fp_input[i];; + if(value != fpbo_slots[i]) { + unsigned* p = (unsigned*)fp->slot_relocations[i].data; + unsigned* pend = (unsigned*)((char*)fp->slot_relocations[i].data + fp->slot_relocations[i].size); + for(; p != pend; ++p) { + unsigned off = *p; + unsigned dw = fp->insn[off]; + dw = (dw & ~NVFX_FP_OP_INPUT_SRC_MASK) | (value << NVFX_FP_OP_INPUT_SRC_SHIFT); + nvfx_fp_memcpy(&fpmap[*p], &dw, sizeof(dw)); + } + fpbo_slots[i] = value; + } + } + --fp->progs_left_with_obsolete_slot_assignments; + } } if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) { @@ -960,6 +1030,13 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) OUT_RING(chan, fp->samplers); } } + + if(nvfx->dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SPRITE)) + { + WAIT_RING(chan, 2); + OUT_RING(chan, RING_3D(NV34TCL_POINT_SPRITE, 1)); + OUT_RING(chan, fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization); + } } void @@ -982,6 +1059,7 @@ void nvfx_fragprog_destroy(struct nvfx_context *nvfx, struct nvfx_fragment_program *fp) { + unsigned i; struct nvfx_fragment_program_bo* fpbo = fp->fpbo; if(fpbo) { @@ -996,7 +1074,9 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx, while(fpbo != fp->fpbo); } + for(i = 0; i < 8; ++i) + util_dynarray_fini(&fp->slot_relocations[i]); + if (fp->insn_len) FREE(fp->insn); } - |