summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvfx/nvfx_fragprog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nvfx/nvfx_fragprog.c')
-rw-r--r--src/gallium/drivers/nvfx/nvfx_fragprog.c204
1 files changed, 142 insertions, 62 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index ae4fe3aa262..0a599c62a74 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -16,8 +16,6 @@
struct nvfx_fpc {
struct nvfx_fragment_program *fp;
- uint attrib_map[PIPE_MAX_SHADER_INPUTS];
-
unsigned r_temps;
unsigned r_temps_discard;
struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
@@ -36,6 +34,8 @@ struct nvfx_fpc {
struct nvfx_sreg imm[MAX_IMM];
unsigned nr_imm;
+
+ unsigned char generic_to_slot[256]; /* semantic idx for each input semantic */
};
static INLINE struct nvfx_sreg
@@ -111,6 +111,11 @@ emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
break;
+ case NVFXSR_RELOCATED:
+ sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
+ //printf("adding relocation at %x for %x\n", fpc->inst_offset, src.index);
+ util_dynarray_append(&fpc->fp->slot_relocations[src.index], unsigned, fpc->inst_offset);
+ break;
case NVFXSR_CONST:
if (!fpc->have_const) {
grow_insns(fpc, 4);
@@ -241,8 +246,28 @@ tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
switch (fsrc->Register.File) {
case TGSI_FILE_INPUT:
- src = nvfx_sr(NVFXSR_INPUT,
- fpc->attrib_map[fsrc->Register.Index]);
+ if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_POSITION) {
+ assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
+ src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_POSITION);
+ } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_COLOR) {
+ if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0)
+ src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL0);
+ else if(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 1)
+ src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_COL1);
+ else
+ assert(0);
+ } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FOG) {
+ assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
+ src = nvfx_sr(NVFXSR_INPUT, NVFX_FP_OP_INPUT_SRC_FOGC);
+ } else if(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_FACE) {
+ /* TODO: check this has the correct values */
+ /* XXX: what do we do for nv30 here (assuming it lacks facing)?! */
+ assert(fpc->fp->info.input_semantic_index[fsrc->Register.Index] == 0);
+ src = nvfx_sr(NVFXSR_INPUT, NV40_FP_OP_INPUT_SRC_FACING);
+ } else {
+ assert(fpc->fp->info.input_semantic_name[fsrc->Register.Index] == TGSI_SEMANTIC_GENERIC);
+ src = nvfx_sr(NVFXSR_RELOCATED, fpc->generic_to_slot[fpc->fp->info.input_semantic_index[fsrc->Register.Index]]);
+ }
break;
case TGSI_FILE_CONSTANT:
src = constant(fpc, fsrc->Register.Index, NULL);
@@ -611,48 +636,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
}
static boolean
-nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
- const struct tgsi_full_declaration *fdec)
-{
- int hw;
-
- switch (fdec->Semantic.Name) {
- case TGSI_SEMANTIC_POSITION:
- hw = NVFX_FP_OP_INPUT_SRC_POSITION;
- break;
- case TGSI_SEMANTIC_COLOR:
- if (fdec->Semantic.Index == 0) {
- hw = NVFX_FP_OP_INPUT_SRC_COL0;
- } else
- if (fdec->Semantic.Index == 1) {
- hw = NVFX_FP_OP_INPUT_SRC_COL1;
- } else {
- NOUVEAU_ERR("bad colour semantic index\n");
- return FALSE;
- }
- break;
- case TGSI_SEMANTIC_FOG:
- hw = NVFX_FP_OP_INPUT_SRC_FOGC;
- break;
- case TGSI_SEMANTIC_GENERIC:
- if (fdec->Semantic.Index <= 7) {
- hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
- Index);
- } else {
- NOUVEAU_ERR("bad generic semantic index\n");
- return FALSE;
- }
- break;
- default:
- NOUVEAU_ERR("bad input semantic\n");
- return FALSE;
- }
-
- fpc->attrib_map[fdec->Range.First] = hw;
- return TRUE;
-}
-
-static boolean
nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
@@ -691,6 +674,15 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
{
struct tgsi_parse_context p;
int high_temp = -1, i;
+ struct util_semantic_set set;
+
+ fpc->fp->num_slots = util_semantic_set_from_program_file(&set, fpc->fp->pipe.tokens, TGSI_FILE_INPUT);
+ if(fpc->fp->num_slots > 8)
+ return FALSE;
+ util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, 8);
+ util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, 8);
+
+ memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input));
tgsi_parse_init(&p, fpc->fp->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&p)) {
@@ -703,10 +695,6 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
const struct tgsi_full_declaration *fdec;
fdec = &p.FullToken.FullDeclaration;
switch (fdec->Declaration.File) {
- case TGSI_FILE_INPUT:
- if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec))
- goto out_err;
- break;
case TGSI_FILE_OUTPUT:
if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
goto out_err;
@@ -805,7 +793,7 @@ nvfx_fragprog_translate(struct nvfx_context *nvfx,
/* Terminate final instruction */
if(fp->insn)
- fp->insn[fpc->inst_offset] |= 0x00000001;
+ fp->insn[fpc->inst_offset] |= 0x00000001;
/* Append NOP + END instruction, may or may not be necessary. */
fpc->inst_offset = fp->insn_len;
@@ -881,9 +869,70 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG))
update = TRUE;
- if(update) {
- int offset;
+ struct nvfx_vertex_program* vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog;
+ if (fp->last_vp_id != vp->id) {
+ char* vp_sem_table = vp->generic_to_fp_input;
+ unsigned char* fp_semantics = fp->slot_to_generic;
+ unsigned diff = 0;
+ fp->last_vp_id = nvfx->vertprog->id;
+ unsigned char* cur_slots = fp->slot_to_fp_input;
+ for(unsigned i = 0; i < fp->num_slots; ++i) {
+ unsigned char slot_mask = vp_sem_table[fp_semantics[i]];
+ diff |= (slot_mask >> 4) & (slot_mask ^ cur_slots[i]);
+ }
+
+ if(diff)
+ {
+ for(unsigned i = 0; i < fp->num_slots; ++i) {
+ /* if 0xff, then this will write to the dummy value at fp->last_layout_mask[0] */
+ fp->slot_to_fp_input[i] = vp_sem_table[fp_semantics[i]] & 0xf;
+ //printf("fp: GENERIC[%i] from fpreg %i\n", fp_semantics[i], fp->slot_to_fp_input[i]);
+ }
+
+ fp->progs_left_with_obsolete_slot_assignments = fp->progs;
+ update = TRUE;
+ }
+ }
+
+ // last_sprite_coord_enable
+ unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable;
+ if(fp->last_sprite_coord_enable != sprite_coord_enable)
+ {
+ unsigned texcoord_mask = vp->texcoord_ouput_mask;
+ fp->last_sprite_coord_enable = sprite_coord_enable;
+ fp->point_sprite_control = 0;
+ for(unsigned i = 0; i < fp->num_slots; ++i) {
+ if((1 << fp->slot_to_generic[i]) & sprite_coord_enable)
+ {
+ unsigned fpin = fp->slot_to_fp_input[i];
+ //printf("sprite: slot %i generic %i had texcoord %i\n", i, fp->slot_to_generic[i], fpin - NVFX_FP_OP_INPUT_SRC_TC0);
+ if(fpin >= 0x0f)
+ {
+ unsigned tc = __builtin_ctz(~texcoord_mask);
+ texcoord_mask |= (1 << tc);
+ fp->slot_to_fp_input[i] = fpin = NVFX_FP_OP_INPUT_SRC_TC(tc);
+
+ fp->progs_left_with_obsolete_slot_assignments = fp->progs;
+ update = TRUE;
+ }
+ //printf("sprite: slot %i texcoord %i\n", i, fpin - NVFX_FP_OP_INPUT_SRC_TC0);
+ fp->point_sprite_control |= (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0 + 8));
+ }
+ else
+ {
+ unsigned fpin = fp->slot_to_fp_input[i];
+ if(!(vp->texcoord_ouput_mask & (1 << (fpin - NVFX_FP_OP_INPUT_SRC_TC0))))
+ {
+ fp->slot_to_fp_input[i] = 0x0f;
+ fp->progs_left_with_obsolete_slot_assignments = fp->progs;
+ update = TRUE;
+ }
+ }
+ }
+ }
+
+ if(update) {
++fp->bo_prog_idx;
if(fp->bo_prog_idx >= fp->progs_per_bo)
{
@@ -893,10 +942,9 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
}
else
{
- struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + fp->prog_size * fp->progs_per_bo, 16);
- char *map, *buf;
- int i;
-
+ struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16);
+ fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo];
+ memset(fpbo->slots, 0, 8 * fp->progs_per_bo);
if(fp->fpbo)
{
fpbo->next = fp->fpbo->next;
@@ -906,12 +954,14 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
fpbo->next = fpbo;
fp->fpbo = fpbo;
fpbo->bo = 0;
+ fp->progs += fp->progs_per_bo;
+ fp->progs_left_with_obsolete_slot_assignments += fp->progs_per_bo;
nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo);
nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC);
- map = fpbo->bo->map;
- buf = fpbo->insn;
- for(i = 0; i < fp->progs_per_bo; ++i)
+ uint8_t* map = fpbo->bo->map;
+ uint8_t* buf = (uint8_t*)fpbo->insn;
+ for(unsigned i = 0; i < fp->progs_per_bo; ++i)
{
memcpy(buf, fp->insn, fp->insn_len * 4);
nvfx_fp_memcpy(map, fp->insn, fp->insn_len * 4);
@@ -922,7 +972,8 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
fp->bo_prog_idx = 0;
}
- offset = fp->bo_prog_idx * fp->prog_size;
+ int offset = fp->bo_prog_idx * fp->prog_size;
+ uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
@@ -941,6 +992,25 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
}
}
}
+
+ if(fp->progs_left_with_obsolete_slot_assignments) {
+ unsigned char* fpbo_slots = &fp->fpbo->slots[fp->bo_prog_idx * 8];
+ for(unsigned i = 0; i < fp->num_slots; ++i) {
+ unsigned value = fp->slot_to_fp_input[i];;
+ if(value != fpbo_slots[i]) {
+ unsigned* p = (unsigned*)fp->slot_relocations[i].data;
+ unsigned* pend = (unsigned*)((char*)fp->slot_relocations[i].data + fp->slot_relocations[i].size);
+ for(; p != pend; ++p) {
+ unsigned off = *p;
+ unsigned dw = fp->insn[off];
+ dw = (dw & ~NVFX_FP_OP_INPUT_SRC_MASK) | (value << NVFX_FP_OP_INPUT_SRC_SHIFT);
+ nvfx_fp_memcpy(&fpmap[*p], &dw, sizeof(dw));
+ }
+ fpbo_slots[i] = value;
+ }
+ }
+ --fp->progs_left_with_obsolete_slot_assignments;
+ }
}
if(update || (nvfx->dirty & NVFX_NEW_FRAGPROG)) {
@@ -960,6 +1030,13 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)
OUT_RING(chan, fp->samplers);
}
}
+
+ if(nvfx->dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SPRITE))
+ {
+ WAIT_RING(chan, 2);
+ OUT_RING(chan, RING_3D(NV34TCL_POINT_SPRITE, 1));
+ OUT_RING(chan, fp->point_sprite_control | nvfx->rasterizer->pipe.point_quad_rasterization);
+ }
}
void
@@ -982,6 +1059,7 @@ void
nvfx_fragprog_destroy(struct nvfx_context *nvfx,
struct nvfx_fragment_program *fp)
{
+ unsigned i;
struct nvfx_fragment_program_bo* fpbo = fp->fpbo;
if(fpbo)
{
@@ -996,7 +1074,9 @@ nvfx_fragprog_destroy(struct nvfx_context *nvfx,
while(fpbo != fp->fpbo);
}
+ for(i = 0; i < 8; ++i)
+ util_dynarray_fini(&fp->slot_relocations[i]);
+
if (fp->insn_len)
FREE(fp->insn);
}
-