diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/nv50/Makefile.sources | 6 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir.cpp | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_driver.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_context.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.c | 785 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.h | 60 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_shader_state.c | 50 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_state_validate.c | 7 |
10 files changed, 262 insertions, 664 deletions
diff --git a/src/gallium/drivers/nv50/Makefile.sources b/src/gallium/drivers/nv50/Makefile.sources index 507540022eb..eeb0e11652a 100644 --- a/src/gallium/drivers/nv50/Makefile.sources +++ b/src/gallium/drivers/nv50/Makefile.sources @@ -13,12 +13,6 @@ C_SOURCES := \ nv50_vbo.c \ nv50_program.c \ nv50_shader_state.c \ - nv50_pc.c \ - nv50_pc_print.c \ - nv50_pc_emit.c \ - nv50_tgsi_to_nc.c \ - nv50_pc_optimize.c \ - nv50_pc_regalloc.c \ nv50_push.c \ nv50_query.c diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp index 048759060ad..1006985ab93 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir.cpp @@ -1051,6 +1051,7 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) } info->io.clipDistance = 0xff; info->io.pointSize = 0xff; + info->io.instanceId = 0xff; info->io.vertexId = 0xff; info->io.edgeFlagIn = 0xff; info->io.edgeFlagOut = 0xff; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h index dc42b8295e9..e734c5b03bd 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_driver.h @@ -164,6 +164,7 @@ struct nv50_ir_prog_info uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ int8_t genUserClip; /* request user clip planes for ClipVertex */ uint8_t pointSize; /* output index for PointSize */ + uint8_t instanceId; /* system value index of InstanceID */ uint8_t vertexId; /* system value index of VertexID */ uint8_t edgeFlagIn; uint8_t edgeFlagOut; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index 1cfaf3aefae..4530dc23715 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -859,6 +859,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) break; case TGSI_FILE_SYSTEM_VALUE: switch (sn) { + case TGSI_SEMANTIC_INSTANCEID: + info->io.instanceId = first; + break; case TGSI_SEMANTIC_VERTEXID: info->io.vertexId = first; break; diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index ab185194786..65304a8f1ff 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -162,11 +162,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *); /* nv50_draw.c */ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); -/* nv50_program.c */ -boolean nv50_program_translate(struct nv50_program *); -boolean nv50_program_translate_new(struct nv50_program *); -void nv50_program_destroy(struct nv50_context *, struct nv50_program *); - /* nv50_query.c */ void nv50_init_query_functions(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 776cecaeb7e..115a425fde6 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -21,658 +21,321 @@ */ #include "nv50_program.h" -#include "nv50_pc.h" #include "nv50_context.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_dump.h" - #include "codegen/nv50_ir_driver.h" static INLINE unsigned bitcount4(const uint32_t val) { - static const unsigned cnt[16] + static const uint8_t cnt[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; return cnt[val & 0xf]; } -static unsigned -nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) -{ - unsigned mask = inst->Dst[0].Register.WriteMask; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); - case TGSI_OPCODE_DP3: - return 0x7; - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - case TGSI_OPCODE_KIL: /* WriteMask ignored */ - return 0xf; - case TGSI_OPCODE_DST: - return mask & (c ? 0xa : 0x6); - case TGSI_OPCODE_EX2: - case TGSI_OPCODE_EXP: - case TGSI_OPCODE_LG2: - case TGSI_OPCODE_LOG: - case TGSI_OPCODE_POW: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SCS: - return 0x1; - case TGSI_OPCODE_IF: - return 0x1; - case TGSI_OPCODE_LIT: - return 0xb; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXL: - case TGSI_OPCODE_TXP: - { - const struct tgsi_instruction_texture *tex; - - assert(inst->Instruction.Texture); - tex = &inst->Texture; - - mask = 0x7; - if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && - inst->Instruction.Opcode != TGSI_OPCODE_TXD) - mask |= 0x8; /* bias, lod or proj */ - - switch (tex->Texture) { - case TGSI_TEXTURE_1D: - mask &= 0x9; - break; - case TGSI_TEXTURE_SHADOW1D: - mask &= 0x5; - break; - case TGSI_TEXTURE_2D: - mask &= 0xb; - break; - default: - break; - } - } - return mask; - case TGSI_OPCODE_XPD: - { - unsigned x = 0; - if (mask & 1) x |= 0x6; - if (mask & 2) x |= 0x5; - if (mask & 4) x |= 0x3; - return x; - } - default: - break; - } - - return mask; -} - -static void -nv50_indirect_inputs(struct nv50_translation_info *ti, int id) +static int +nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) { - int i, c; - - for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) - for (c = 0; c < 4; ++c) - ti->input_access[i][c] = id; + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, c; - ti->indirect_inputs = TRUE; -} + n = 0; + for (i = 0; i < info->numInputs; ++i) { + prog->in[i].id = i; + prog->in[i].sn = info->in[i].sn; + prog->in[i].si = info->in[i].si; + prog->in[i].hw = n; + prog->in[i].mask = info->in[i].mask; -static void -nv50_indirect_outputs(struct nv50_translation_info *ti, int id) -{ - int i, c; + prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); - for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) for (c = 0; c < 4; ++c) - ti->output_access[i][c] = id; - - ti->indirect_outputs = TRUE; -} - -static void -prog_inst(struct nv50_translation_info *ti, - const struct tgsi_full_instruction *inst, int id) -{ - const struct tgsi_dst_register *dst; - const struct tgsi_src_register *src; - int s, c, k; - unsigned mask; - - if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { - ti->subr[ti->subr_nr].pos = id - 1; - ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */ - ++ti->subr_nr; + if (info->in[i].mask & (1 << c)) + info->in[i].slot[c] = n++; } + prog->in_nr = info->numInputs; - if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { - dst = &inst->Dst[0].Register; - - for (c = 0; c < 4; ++c) { - if (dst->Indirect) - nv50_indirect_outputs(ti, id); - if (!(dst->WriteMask & (1 << c))) - continue; - ti->output_access[dst->Index][c] = id; - } - - if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && - inst->Src[0].Register.File == TGSI_FILE_INPUT && - dst->Index == ti->edgeflag_out) - ti->p->vp.edgeflag = inst->Src[0].Register.Index; - } else - if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { - if (inst->Dst[0].Register.Indirect) - ti->store_to_memory = TRUE; - } - - for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { - src = &inst->Src[s].Register; - if (src->File == TGSI_FILE_TEMPORARY) - if (inst->Src[s].Register.Indirect) - ti->store_to_memory = TRUE; - if (src->File != TGSI_FILE_INPUT) + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_INSTANCEID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; continue; - mask = nv50_tgsi_src_mask(inst, s); - - if (inst->Src[s].Register.Indirect) - nv50_indirect_inputs(ti, id); - - for (c = 0; c < 4; ++c) { - if (!(mask & (1 << c))) - continue; - k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); - if (k <= TGSI_SWIZZLE_W) - ti->input_access[src->Index][k] = id; - } - } -} - -/* Probably should introduce something like struct tgsi_function_declaration - * instead of trying to guess inputs/outputs. - */ -static void -prog_subroutine_inst(struct nv50_subroutine *subr, - const struct tgsi_full_instruction *inst) -{ - const struct tgsi_dst_register *dst; - const struct tgsi_src_register *src; - int s, c, k; - unsigned mask; - - for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { - src = &inst->Src[s].Register; - if (src->File != TGSI_FILE_TEMPORARY) + case TGSI_SEMANTIC_VERTEXID: + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; + prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12; continue; - mask = nv50_tgsi_src_mask(inst, s); - - assert(!inst->Src[s].Register.Indirect); - - for (c = 0; c < 4; ++c) { - k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); - - if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) - if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) - subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); - } - } - - if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { - dst = &inst->Dst[0].Register; - - for (c = 0; c < 4; ++c) - if (dst->WriteMask & (1 << c)) - subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); - } -} - -static void -prog_immediate(struct nv50_translation_info *ti, - const struct tgsi_full_immediate *imm) -{ - int c; - unsigned n = ti->immd32_nr++; - - assert(ti->immd32_nr <= ti->scan.immediate_count); - - for (c = 0; c < 4; ++c) - ti->immd32[n * 4 + c] = imm->u[c].Uint; - - ti->immd32_ty[n] = imm->Immediate.DataType; -} - -static INLINE unsigned -translate_interpolate(const struct tgsi_full_declaration *decl) -{ - unsigned mode; - - if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) - mode = NV50_INTERP_FLAT; - else - if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) - mode = 0; - else - mode = NV50_INTERP_LINEAR; - - if (decl->Declaration.Centroid) - mode |= NV50_INTERP_CENTROID; - - return mode; -} - -static void -prog_decl(struct nv50_translation_info *ti, - const struct tgsi_full_declaration *decl) -{ - unsigned i, first, last, sn = 0, si = 0; - - first = decl->Range.First; - last = decl->Range.Last; - - if (decl->Declaration.Semantic) { - sn = decl->Semantic.Name; - si = decl->Semantic.Index; - } - - switch (decl->Declaration.File) { - case TGSI_FILE_INPUT: - for (i = first; i <= last; ++i) - ti->interp_mode[i] = translate_interpolate(decl); - - if (!decl->Declaration.Semantic) - break; - - for (i = first; i <= last; ++i) { - ti->p->in[i].sn = sn; - ti->p->in[i].si = si; - } - - switch (sn) { - case TGSI_SEMANTIC_FACE: - break; - case TGSI_SEMANTIC_COLOR: - if (ti->p->type == PIPE_SHADER_FRAGMENT) - ti->p->vp.bfc[si] = first; - break; - } - break; - case TGSI_FILE_OUTPUT: - if (!decl->Declaration.Semantic) - break; - - for (i = first; i <= last; ++i) { - ti->p->out[i].sn = sn; - ti->p->out[i].si = si; - } - - switch (sn) { - case TGSI_SEMANTIC_BCOLOR: - ti->p->vp.bfc[si] = first; - break; - case TGSI_SEMANTIC_PSIZE: - ti->p->vp.psiz = first; - break; - case TGSI_SEMANTIC_EDGEFLAG: - ti->edgeflag_out = first; - break; default: break; } - break; - case TGSI_FILE_SYSTEM_VALUE: - /* For VP/GP inputs, they are put in s[] after the last normal input. - * Let sysval_map reflect the order of the sysvals in s[] and fixup later. - */ - switch (decl->Semantic.Name) { - case TGSI_SEMANTIC_FACE: - break; - case TGSI_SEMANTIC_INSTANCEID: - ti->p->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; - ti->sysval_map[first] = 2; + } + /* VertexID before InstanceID */ + if (info->io.vertexId < info->numSysVals) + info->sv[info->io.vertexId].slot[0] = n++; + if (info->io.instanceId < info->numSysVals) + info->sv[info->io.instanceId].slot[0] = n++; + + n = 0; + for (i = 0; i < info->numOutputs; ++i) { + switch (info->out[i].sn) { + case TGSI_SEMANTIC_PSIZE: + prog->vp.psiz = i; break; - case TGSI_SEMANTIC_PRIMID: + case TGSI_SEMANTIC_CLIPDIST: + prog->vp.clpd[info->out[i].si] = n; break; - /* - case TGSI_SEMANTIC_PRIMIDIN: + case TGSI_SEMANTIC_EDGEFLAG: + prog->vp.edgeflag = i; break; - case TGSI_SEMANTIC_VERTEXID: + case TGSI_SEMANTIC_BCOLOR: + prog->vp.bfc[info->out[i].si] = i; break; - */ default: break; } - break; - case TGSI_FILE_CONSTANT: - ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16); - break; - case TGSI_FILE_ADDRESS: - case TGSI_FILE_SAMPLER: - case TGSI_FILE_TEMPORARY: - break; - default: - assert(0); - break; - } -} - -static int -nv50_vertprog_prepare(struct nv50_translation_info *ti) -{ - struct nv50_program *p = ti->p; - int i, c; - unsigned num_inputs = 0; - - ti->input_file = NV_FILE_MEM_S; - ti->output_file = NV_FILE_OUT; - - for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { - p->in[i].id = i; - p->in[i].hw = num_inputs; - - for (c = 0; c < 4; ++c) { - if (!ti->input_access[i][c]) - continue; - ti->input_map[i][c] = num_inputs++; - p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32); - } - } - - for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { - p->out[i].id = i; - p->out[i].hw = p->max_out; - - for (c = 0; c < 4; ++c) { - if (!ti->output_access[i][c]) - continue; - ti->output_map[i][c] = p->max_out++; - p->out[i].mask |= 1 << c; - } - } - - p->vp.clpd = p->max_out; - p->max_out += p->vp.clpd_nr; + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].hw = n; + prog->out[i].mask = info->out[i].mask; - for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { - switch (ti->sysval_map[i]) { - case 2: - if (!(ti->p->vp.attrs[2] & NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID)) - ti->sysval_map[i] = 1; - ti->sysval_map[i] = (ti->sysval_map[i] - 1) + num_inputs; - break; - default: - break; - } + for (c = 0; c < 4; ++c) + if (info->out[i].mask & (1 << c)) + info->out[i].slot[c] = n++; } + prog->out_nr = info->numOutputs; + prog->max_out = n; - if (p->vp.psiz < 0x40) - p->vp.psiz = p->out[p->vp.psiz].hw; + if (prog->vp.psiz < info->numOutputs) + prog->vp.psiz = prog->out[prog->vp.psiz].hw; return 0; } static int -nv50_fragprog_prepare(struct nv50_translation_info *ti) +nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) { - struct nv50_program *p = ti->p; - int i, j, c; - unsigned nvary, nintp, depr; - unsigned n = 0, m = 0, skip = 0; - ubyte sn[16], si[16]; - - /* FP flags */ - - if (ti->scan.writes_z) { - p->fp.flags[1] = 0x11; - p->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; - } - - if (ti->scan.uses_kill) - p->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; - - /* FP inputs */ - - ti->input_file = NV_FILE_MEM_V; - ti->output_file = NV_FILE_GPR; - - /* count non-flat inputs, save semantic info */ - for (i = 0; i < p->in_nr; ++i) { - m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1; - sn[i] = p->in[i].sn; - si[i] = p->in[i].si; + struct nv50_program *prog = (struct nv50_program *)info->driverPriv; + unsigned i, n, m, c; + unsigned nvary; + unsigned nflat; + unsigned nintp = 0; + + /* count recorded non-flat inputs */ + for (m = 0, i = 0; i < info->numInputs; ++i) { + switch (info->in[i].sn) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_FACE: + continue; + default: + m += info->in[i].flat ? 0 : 1; + break; + } } + /* careful: id may be != i in info->in[prog->in[i].id] */ - /* reorder p->in[] so that non-flat inputs are first and - * kick out special inputs that don't use VP/GP_RESULT_MAP + /* Fill prog->in[] so that non-flat inputs are first and + * kick out special inputs that don't use the RESULT_MAP. */ - nintp = 0; - for (i = 0; i < p->in_nr; ++i) { - if (sn[i] == TGSI_SEMANTIC_POSITION) { - for (c = 0; c < 4; ++c) { - ti->input_map[i][c] = nintp; - if (ti->input_access[i][c]) { - p->fp.interp |= 1 << (24 + c); - ++nintp; - } - } - skip++; - continue; + for (n = 0, i = 0; i < info->numInputs; ++i) { + if (info->in[i].sn == TGSI_SEMANTIC_POSITION) { + prog->fp.interp |= info->in[i].mask << 24; + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) + info->in[i].slot[c] = nintp++; } else - if (sn[i] == TGSI_SEMANTIC_FACE) { - ti->input_map[i][0] = 255; - skip++; - continue; - } + if (info->in[i].sn == TGSI_SEMANTIC_FACE) { + info->in[i].slot[0] = 255; + } else { + unsigned j = info->in[i].flat ? m++ : n++; - j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++; + if (info->in[i].sn == TGSI_SEMANTIC_COLOR) + prog->vp.bfc[info->in[i].si] = j; - if (sn[i] == TGSI_SEMANTIC_COLOR) - p->vp.bfc[si[i]] = j; - - p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0; - p->in[j].id = i; - p->in[j].sn = sn[i]; - p->in[j].si = si[i]; - } - assert(n <= m); - p->in_nr -= skip; + prog->in[j].id = i; + prog->in[j].mask = info->in[i].mask; + prog->in[j].sn = info->in[i].sn; + prog->in[j].si = info->in[i].si; + prog->in[j].linear = info->in[i].linear; - if (!(p->fp.interp & (8 << 24))) { - p->fp.interp |= (8 << 24); + prog->in_nr++; + } + } + if (!(prog->fp.interp & (8 << 24))) { ++nintp; + prog->fp.interp |= 8 << 24; } - /* after HPOS */ - p->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT; - - for (i = 0; i < p->in_nr; ++i) { - int j = p->in[i].id; - p->in[i].hw = nintp; + for (i = 0; i < prog->in_nr; ++i) { + int j = prog->in[i].id; - for (c = 0; c < 4; ++c) { - if (!ti->input_access[j][c]) - continue; - p->in[i].mask |= 1 << c; - ti->input_map[j][c] = nintp++; - } - /* count color inputs */ - if (i == p->vp.bfc[0] || i == p->vp.bfc[1]) - p->fp.colors += bitcount4(p->in[i].mask) << 16; + prog->in[i].hw = nintp; + for (c = 0; c < 4; ++c) + if (info->in[i].mask & (1 << c)) + info->in[j].slot[c] = nintp++; } - nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */ - nvary = nintp; - if (n < m) - nvary -= p->in[n].hw; + /* (n == m) if m never increased, i.e. no flat inputs */ + nflat = (n < m) ? (nintp - prog->in[n].hw) : 0; + nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */ + nvary = nintp - nflat; + + prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT; + prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT; - p->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT; - p->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT; + /* put front/back colors right after HPOS */ + prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT; + for (i = 0; i < 2; ++i) + if (prog->vp.bfc[i] < 0x80) + prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16; /* FP outputs */ - if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0))) - p->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS; + if (info->prop.fp.numColourResults > 1) + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS; - depr = p->out_nr; - for (i = 0; i < p->out_nr; ++i) { - p->out[i].id = i; - if (p->out[i].sn == TGSI_SEMANTIC_POSITION) { - depr = i; + for (i = 0; i < info->numOutputs; ++i) { + prog->out[i].id = i; + prog->out[i].sn = info->out[i].sn; + prog->out[i].si = info->out[i].si; + prog->out[i].mask = info->out[i].mask; + + if (i == info->io.fragDepth || i == info->io.sampleMask) continue; - } - p->out[i].hw = p->max_out; - p->out[i].mask = 0xf; + prog->out[i].hw = info->out[i].si * 4; for (c = 0; c < 4; ++c) - ti->output_map[i][c] = p->max_out++; - } - if (depr < p->out_nr) { - p->out[depr].mask = 0x4; - p->out[depr].hw = ti->output_map[depr][2] = p->max_out++; - } else { - /* allowed values are 1, 4, 5, 8, 9, ... */ - p->max_out = MAX2(4, p->max_out); + info->out[i].slot[c] = prog->out[i].hw + c; + + prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4); } + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.sampleMask].slot[0] = prog->max_out++; + + if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.fragDepth].slot[2] = prog->max_out++; + + if (!prog->max_out) + prog->max_out = 4; + return 0; } static int -nv50_geomprog_prepare(struct nv50_translation_info *ti) +nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) { - ti->input_file = NV_FILE_MEM_S; - ti->output_file = NV_FILE_OUT; - - assert(0); - return 1; + switch (info->type) { + case PIPE_SHADER_VERTEX: + return nv50_vertprog_assign_slots(info); + case PIPE_SHADER_GEOMETRY: + return nv50_vertprog_assign_slots(info); + case PIPE_SHADER_FRAGMENT: + return nv50_fragprog_assign_slots(info); + default: + return -1; + } } -static int -nv50_prog_scan(struct nv50_translation_info *ti) +boolean +nv50_program_translate(struct nv50_program *prog, uint16_t chipset) { - struct nv50_program *p = ti->p; - struct tgsi_parse_context parse; - int ret, i; - - p->vp.edgeflag = 0x40; - p->vp.psiz = 0x40; - p->vp.bfc[0] = 0x40; - p->vp.bfc[1] = 0x40; - p->gp.primid = 0x80; + struct nv50_ir_prog_info *info; + int ret; - tgsi_scan_shader(p->pipe.tokens, &ti->scan); + info = CALLOC_STRUCT(nv50_ir_prog_info); + if (!info) + return FALSE; -#if NV50_DEBUG & NV50_DEBUG_SHADER - tgsi_dump(p->pipe.tokens, 0); -#endif + info->type = prog->type; + info->target = chipset; + info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; + info->bin.source = (void *)prog->pipe.tokens; - ti->subr = - CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); + info->io.genUserClip = prog->vp.clpd_nr; - ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); - ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); + info->assignSlots = nv50_program_assign_varying_slots; - ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0])); + prog->vp.bfc[0] = 0x80; + prog->vp.bfc[1] = 0x80; + prog->vp.clpd[0] = 0x80; + prog->vp.clpd[1] = 0x80; + prog->vp.psiz = 0x80; + prog->vp.edgeflag = 0x80; + prog->gp.primid = 0x80; - tgsi_parse_init(&parse, p->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); + info->driverPriv = prog; - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - prog_immediate(ti, &parse.FullToken.FullImmediate); - break; - case TGSI_TOKEN_TYPE_DECLARATION: - prog_decl(ti, &parse.FullToken.FullDeclaration); - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - ti->insns[ti->inst_nr] = parse.FullToken.FullInstruction; - prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr); - break; - } - } +#ifdef DEBUG + info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); + info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); +#else + info->optLevel = 3; +#endif - /* Scan to determine which registers are inputs/outputs of a subroutine. */ - for (i = 0; i < ti->subr_nr; ++i) { - int pc = ti->subr[i].id; - while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) - prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); + ret = nv50_ir_generate_code(info); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; } - - p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1; - p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1; - - switch (p->type) { - case PIPE_SHADER_VERTEX: - ret = nv50_vertprog_prepare(ti); - break; - case PIPE_SHADER_FRAGMENT: - ret = nv50_fragprog_prepare(ti); - break; - case PIPE_SHADER_GEOMETRY: - ret = nv50_geomprog_prepare(ti); - break; - default: - assert(!"unsupported program type"); - ret = -1; - break; + prog->code = info->bin.code; + prog->code_size = info->bin.codeSize; + prog->fixups = info->bin.relocData; + prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); + + if (prog->type == PIPE_SHADER_FRAGMENT) { + if (info->prop.fp.writesDepth) { + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; + prog->fp.flags[1] = 0x11; + } + if (info->prop.fp.usesDiscard) + prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } - assert(!ret); - return ret; -} - -/* Temporary, need a reference to nv50_ir_generate_code in libnv50 or - * it "gets disappeared" and cannot be used in libnvc0 ... - */ -boolean -nv50_program_translate_new(struct nv50_program *p) -{ - struct nv50_ir_prog_info info; - - return nv50_ir_generate_code(&info); +out: + FREE(info); + return !ret; } boolean -nv50_program_translate(struct nv50_program *p) +nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) { - struct nv50_translation_info *ti; + struct nouveau_heap *heap; int ret; + uint32_t size = align(prog->code_size, 0x40); - ti = CALLOC_STRUCT(nv50_translation_info); - ti->p = p; - - ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; - - ret = nv50_prog_scan(ti); - if (ret) { - NOUVEAU_ERR("unsupported shader program\n"); - goto out; + switch (prog->type) { + case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; + case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break; + case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break; + default: + assert(!"invalid program type"); + return FALSE; } - ret = nv50_generate_code(ti); + ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); if (ret) { - NOUVEAU_ERR("error during shader translation\n"); - goto out; + /* Out of space: evict everything to compactify the code segment, hoping + * the working set is much smaller and drifts slowly. Improve me ! + */ + while (heap->next) { + struct nv50_program *evict = heap->next->priv; + if (evict) + nouveau_heap_free(&evict->mem); + } + debug_printf("WARNING: out of code space, evicting all shaders.\n"); } + prog->code_base = prog->mem->start; -out: - if (ti->immd32) - FREE(ti->immd32); - if (ti->immd32_ty) - FREE(ti->immd32_ty); - if (ti->insns) - FREE(ti->insns); - if (ti->subr) - FREE(ti->subr); - FREE(ti); - return ret ? FALSE : TRUE; + if (prog->fixups) + nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); + + nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, + (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); + + BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); + PUSH_DATA (nv50->base.pushbuf, 0); + + return TRUE; } void diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 17aee97a024..92361ad9946 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -23,8 +23,10 @@ #ifndef __NV50_PROG_H__ #define __NV50_PROG_H__ +struct nv50_context; + #include "pipe/p_state.h" -#include "tgsi/tgsi_scan.h" +#include "pipe/p_shader_tokens.h" #define NV50_CAP_MAX_PROGRAM_TEMPS 64 @@ -64,10 +66,10 @@ struct nv50_program { struct { uint32_t attrs[3]; /* VP_ATTR_EN_0,1 and VP_GP_BUILTIN_ATTR_EN */ - ubyte psiz; - ubyte bfc[2]; + ubyte psiz; /* output slot of point size */ + ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */ ubyte edgeflag; - ubyte clpd; + ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */ ubyte clpd_nr; } vp; @@ -83,55 +85,13 @@ struct nv50_program { uint8_t prim_type; /* point, line strip or tri strip */ } gp; - /* relocation records */ - void *fixups; - unsigned num_fixups; + void *fixups; /* relocation records */ struct nouveau_heap *mem; }; -#define NV50_INTERP_LINEAR (1 << 0) -#define NV50_INTERP_FLAT (1 << 1) -#define NV50_INTERP_CENTROID (1 << 2) - -/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */ -struct nv50_subroutine { - unsigned id; - unsigned pos; - /* function inputs and outputs */ - uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4]; - uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4]; -}; - -struct nv50_translation_info { - struct nv50_program *p; - unsigned inst_nr; - struct tgsi_full_instruction *insns; - ubyte input_file; - ubyte output_file; - ubyte input_map[PIPE_MAX_SHADER_INPUTS][4]; - ubyte output_map[PIPE_MAX_SHADER_OUTPUTS][4]; - ubyte sysval_map[TGSI_SEMANTIC_COUNT]; - ubyte interp_mode[PIPE_MAX_SHADER_INPUTS]; - int input_access[PIPE_MAX_SHADER_INPUTS][4]; - int output_access[PIPE_MAX_SHADER_OUTPUTS][4]; - boolean indirect_inputs; - boolean indirect_outputs; - boolean store_to_memory; - struct tgsi_shader_info scan; - uint32_t *immd32; - unsigned immd32_nr; - ubyte *immd32_ty; - ubyte edgeflag_out; - struct nv50_subroutine *subr; - unsigned subr_nr; -}; - -int nv50_generate_code(struct nv50_translation_info *ti); - -void nv50_relocate_program(struct nv50_program *p, - uint32_t code_base, uint32_t data_base); - -boolean nv50_program_tx(struct nv50_program *p); +boolean nv50_program_translate(struct nv50_program *, uint16_t chipset); +boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *); +void nv50_program_destroy(struct nv50_context *, struct nv50_program *); #endif /* __NV50_PROG_H__ */ diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index f45fd2cde22..4bcd2049099 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -85,9 +85,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: /* shader support missing */ return 0; case PIPE_CAP_MIN_TEXEL_OFFSET: - return 0 /* -8, TODO */; + return -8; case PIPE_CAP_MAX_TEXEL_OFFSET: - return 0 /* +7, TODO */; + return 7; case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_TEXTURE_SHADOW_MAP: @@ -108,7 +108,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SM3: return 1; case PIPE_CAP_GLSL_FEATURE_LEVEL: - return 120; + return 130; case PIPE_CAP_MAX_RENDER_TARGETS: return 8; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: @@ -202,7 +202,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_SUBROUTINES: return 0; /* please inline, or provide function declarations */ case PIPE_SHADER_CAP_INTEGERS: - return 0; + return 1; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: return 32; default: diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index 07d4e6bb0c5..46835c672d6 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -118,43 +118,16 @@ nv50_constbufs_validate(struct nv50_context *nv50) static boolean nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog) { - struct nouveau_heap *heap; - int ret; - unsigned size; - if (!prog->translated) { - prog->translated = nv50_program_translate(prog); + prog->translated = nv50_program_translate( + prog, nv50->screen->base.device->chipset); if (!prog->translated) return FALSE; } else if (prog->mem) return TRUE; - if (prog->type == PIPE_SHADER_FRAGMENT) heap = nv50->screen->fp_code_heap; - else - if (prog->type == PIPE_SHADER_GEOMETRY) heap = nv50->screen->gp_code_heap; - else - heap = nv50->screen->vp_code_heap; - - size = align(prog->code_size, 0x100); - - ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); - if (ret) { - NOUVEAU_ERR("out of code space for shader type %i\n", prog->type); - return FALSE; - } - prog->code_base = prog->mem->start; - - nv50_relocate_program(prog, prog->code_base, 0); - - nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, - (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, - NOUVEAU_BO_VRAM, prog->code_size, prog->code); - - BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); - PUSH_DATA (nv50->base.pushbuf, 0); - - return TRUE; + return nv50_program_upload_code(nv50, prog); } static INLINE void @@ -383,20 +356,25 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]); for (c = 0; c < vp->vp.clpd_nr; ++c) - map[m++] = vp->vp.clpd + c; + map[m++] = vp->vp.clpd[c / 4] + (c % 4); colors |= m << 8; /* adjust BFC0 id */ + dummy.mask = 0x0; + /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */ if (nv50->rast->pipe.light_twoside) { - for (i = 0; i < 2; ++i) - m = nv50_vec4_map(map, m, lin, - &fp->in[fp->vp.bfc[i]], &vp->out[vp->vp.bfc[i]]); + for (i = 0; i < 2; ++i) { + n = vp->vp.bfc[i]; + if (fp->vp.bfc[i] >= fp->in_nr) + continue; + m = nv50_vec4_map(map, m, lin, &fp->in[fp->vp.bfc[i]], + (n < vp->out_nr) ? &vp->out[n] : &dummy); + } } colors += m - 4; /* adjust FFC0 id */ interp |= m << 8; /* set map id where 'normal' FP inputs start */ - dummy.mask = 0x0; for (i = 0; i < fp->in_nr; ++i) { for (n = 0; n < vp->out_nr; ++n) if (vp->out[n].sn == fp->in[i].sn && @@ -409,7 +387,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) /* PrimitiveID either is replaced by the system value, or * written by the geometry shader into an output register */ - if (fp->gp.primid < 0x40) { + if (fp->gp.primid < 0x80) { primid = m; map[m++] = vp->gp.primid; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 7af992076c7..d89556447ab 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -211,10 +211,13 @@ nv50_check_program_ucps(struct nv50_context *nv50, nv50_program_destroy(nv50, vp); vp->vp.clpd_nr = n; - if (likely(vp == nv50->vertprog)) + if (likely(vp == nv50->vertprog)) { + nv50->dirty |= NV50_NEW_VERTPROG; nv50_vertprog_validate(nv50); - else + } else { + nv50->dirty |= NV50_NEW_GMTYPROG; nv50_gmtyprog_validate(nv50); + } nv50_fp_linkage_validate(nv50); } |