summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvc0/nvc0_program.c
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2011-09-13 23:10:35 +0200
committerChristoph Bumiller <[email protected]>2011-09-14 16:19:52 +0200
commit3afabfb929cf24a783c10c99bf0d86245e70a94a (patch)
tree73d04e84b157fe0803be054570533034f9e8e00c /src/gallium/drivers/nvc0/nvc0_program.c
parent57594065c30feec9376be9b2132659f7d87362ee (diff)
nvc0: hook up to new shader code generator
Also includes loading of shared shader library code (used for f64 and integer division) and setting up the immediate array buffer which is appended to the code.
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_program.c')
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c973
1 files changed, 448 insertions, 525 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index bcee027917e..eaad0805909 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -20,479 +20,343 @@
* SOFTWARE.
*/
-#include "pipe/p_shader_tokens.h"
#include "pipe/p_defines.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_dump.h"
-
#include "nvc0_context.h"
-#include "nvc0_pc.h"
-
-static unsigned
-nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
-{
- unsigned mask = inst->Dst[0].Register.WriteMask;
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_SIN:
- return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
- case TGSI_OPCODE_DP3:
- return 0x7;
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- case TGSI_OPCODE_KIL: /* WriteMask ignored */
- return 0xf;
- case TGSI_OPCODE_DST:
- return mask & (c ? 0xa : 0x6);
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_EXP:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_LOG:
- case TGSI_OPCODE_POW:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SCS:
- return 0x1;
- case TGSI_OPCODE_IF:
- return 0x1;
- case TGSI_OPCODE_LIT:
- return 0xb;
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXL:
- case TGSI_OPCODE_TXP:
- {
- const struct tgsi_instruction_texture *tex;
-
- assert(inst->Instruction.Texture);
- tex = &inst->Texture;
-
- mask = 0x7;
- if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXD)
- mask |= 0x8; /* bias, lod or proj */
-
- switch (tex->Texture) {
- case TGSI_TEXTURE_1D:
- mask &= 0x9;
- break;
- case TGSI_TEXTURE_SHADOW1D:
- mask &= 0x5;
- break;
- case TGSI_TEXTURE_2D:
- mask &= 0xb;
- break;
- default:
- break;
- }
- }
- return mask;
- case TGSI_OPCODE_XPD:
- {
- unsigned x = 0;
- if (mask & 1) x |= 0x6;
- if (mask & 2) x |= 0x5;
- if (mask & 4) x |= 0x3;
- return x;
- }
- default:
- break;
- }
- return mask;
-}
+#include "nv50/codegen/nv50_ir_driver.h"
+/* If only they told use the actual semantic instead of just GENERIC ... */
static void
-nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
+nvc0_mesa_varying_hack(struct nv50_ir_varying *var)
{
- int i, c;
-
- for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
- for (c = 0; c < 4; ++c)
- ti->input_access[i][c] = id;
+ unsigned c;
- ti->indirect_inputs = TRUE;
-}
-
-static void
-nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
-{
- int i, c;
+ if (var->sn != TGSI_SEMANTIC_GENERIC)
+ return;
- for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+ if (var->si <= 7) /* gl_TexCoord */
for (c = 0; c < 4; ++c)
- ti->output_access[i][c] = id;
-
- ti->indirect_outputs = TRUE;
+ var->slot[c] = (0x300 + var->si * 0x10 + c * 0x4) / 4;
+ else
+ if (var->si == 9) /* gl_PointCoord */
+ for (c = 0; c < 4; ++c)
+ var->slot[c] = (0x2e0 + c * 0x4) / 4;
+ else
+ for (c = 0; c < 4; ++c) /* move down user varyings (first has index 8) */
+ var->slot[c] -= 0x80 / 4;
}
-static INLINE unsigned
-nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input)
+static uint32_t
+nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
{
- /* NOTE: locations 0xfxx indicate special regs */
switch (sn) {
- /*
- case TGSI_SEMANTIC_VERTEXID:
- *is_input = TRUE;
- return 0x2fc;
- */
- case TGSI_SEMANTIC_PRIMID:
- *is_input = TRUE;
- return 0x60;
- /*
- case TGSI_SEMANTIC_LAYER_INDEX:
- return 0x64;
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- return 0x68;
- */
- case TGSI_SEMANTIC_INSTANCEID:
- *is_input = TRUE;
- return 0x2f8;
- case TGSI_SEMANTIC_FACE:
- *is_input = TRUE;
- return 0x3fc;
- /*
- case TGSI_SEMANTIC_INVOCATIONID:
- return 0xf11;
- */
+/* case TGSI_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; */
+ case TGSI_SEMANTIC_PRIMID: return 0x060;
+ case TGSI_SEMANTIC_PSIZE: return 0x06c;
+ case TGSI_SEMANTIC_POSITION: return 0x070;
+ case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_FOG: return 0x270;
+ case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
+ case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
+/* case TGSI_SEMANTIC_CLIP: return 0x2c0 + si * 0x10; */
+/* case TGSI_SEMANTIC_POINTCOORD: return 0x2e0; */
+/* case TGSI_SEMANTIC_TESSCOORD: return ~0; */ /* 0x2f0, but special load */
+ case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
+/* case TGSI_SEMANTIC_VERTEXID: return 0x2fc; */
+/* case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; */
+ case TGSI_SEMANTIC_FACE: return 0x3fc;
+/* case TGSI_SEMANTIC_INVOCATIONID: return ~0; */
default:
- assert(0);
- return 0x000;
+ assert(!"invalid TGSI input semantic");
+ return ~0;
}
}
-static INLINE unsigned
-nvc0_varying_location(unsigned sn, unsigned si)
+static uint32_t
+nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
{
switch (sn) {
- case TGSI_SEMANTIC_POSITION:
- return 0x70;
- case TGSI_SEMANTIC_COLOR:
- return 0x280 + (si * 16); /* are these hard-wired ? */
- case TGSI_SEMANTIC_BCOLOR:
- return 0x2a0 + (si * 16);
- case TGSI_SEMANTIC_FOG:
- return 0x270;
- case TGSI_SEMANTIC_PSIZE:
- return 0x6c;
- /*
- case TGSI_SEMANTIC_PNTC:
- return 0x2e0;
- */
- case TGSI_SEMANTIC_GENERIC:
- /* We'd really like to distinguish between TEXCOORD and GENERIC here,
- * since only 0x300 to 0x37c can be replaced by sprite coordinates.
- * Also, gl_PointCoord should be a system value and must be assigned to
- * address 0x2e0. For now, let's cheat:
- */
- assert(si < 31);
- if (si <= 7)
- return 0x300 + si * 16;
- if (si == 9)
- return 0x2e0;
- return 0x80 + ((si - 8) * 16);
- case TGSI_SEMANTIC_NORMAL:
- return 0x360;
- case TGSI_SEMANTIC_PRIMID:
- return 0x40;
- case TGSI_SEMANTIC_FACE:
- return 0x3fc;
- case TGSI_SEMANTIC_EDGEFLAG: /* doesn't exist, set value like for an sreg */
- return 0xf00;
- /*
- case TGSI_SEMANTIC_CLIP_DISTANCE:
- return 0x2c0 + (si * 4);
- */
+/* case TGSI_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; */
+ case TGSI_SEMANTIC_PRIMID: return 0x040;
+/* case TGSI_SEMANTIC_LAYER: return 0x064; */
+/* case TGSI_SEMANTIC_VIEWPORTINDEX: return 0x068; */
+ case TGSI_SEMANTIC_PSIZE: return 0x06c;
+ case TGSI_SEMANTIC_POSITION: return 0x070;
+ case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_FOG: return 0x270;
+ case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
+ case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
+/* case TGSI_SEMANTIC_CLIP: return 0x2c0 + si * 0x10; */
+/* case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; */
+ case TGSI_SEMANTIC_EDGEFLAG: return ~0;
default:
- assert(0);
- return 0x000;
+ assert(!"invalid TGSI output semantic");
+ return ~0;
}
}
-static INLINE unsigned
-nvc0_interp_mode(const struct tgsi_full_declaration *decl)
+static int
+nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
{
- unsigned mode;
-
- if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
- mode = NVC0_INTERP_FLAT;
- else
- if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
- mode = NVC0_INTERP_PERSPECTIVE;
- else
- if (decl->Declaration.Semantic && decl->Semantic.Name == TGSI_SEMANTIC_COLOR)
- mode = NVC0_INTERP_PERSPECTIVE;
- else
- mode = NVC0_INTERP_LINEAR;
+ unsigned i, c;
- if (decl->Declaration.Centroid)
- mode |= NVC0_INTERP_CENTROID;
+ for (i = 0; i < info->numInputs; ++i)
+ for (c = 0; c < 4; ++c)
+ info->in[i].slot[c] = (0x80 + i * 0x10 + c * 0x4) / 4;
- return mode;
+ return 0;
}
-static void
-prog_immediate(struct nvc0_translation_info *ti,
- const struct tgsi_full_immediate *imm)
+static int
+nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
{
- int c;
- unsigned n = ti->immd32_nr++;
+ unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+ unsigned offset;
+ unsigned i, c;
- assert(ti->immd32_nr <= ti->scan.immediate_count);
+ for (i = 0; i < info->numInputs; ++i) {
+ offset = nvc0_shader_input_address(info->in[i].sn,
+ info->in[i].si, ubase);
+ if (info->in[i].patch && offset >= 0x20)
+ offset = 0x20 + info->in[i].si * 0x10;
- for (c = 0; c < 4; ++c)
- ti->immd32[n * 4 + c] = imm->u[c].Uint;
+ for (c = 0; c < 4; ++c)
+ info->in[i].slot[c] = (offset + c * 0x4) / 4;
- ti->immd32_ty[n] = imm->Immediate.DataType;
+ nvc0_mesa_varying_hack(&info->in[i]);
+ }
+
+ return 0;
}
-static boolean
-prog_decl(struct nvc0_translation_info *ti,
- const struct tgsi_full_declaration *decl)
+static int
+nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
{
+ unsigned last = info->prop.fp.numColourResults * 4;
unsigned i, c;
- unsigned sn = TGSI_SEMANTIC_GENERIC;
- unsigned si = 0;
- const unsigned first = decl->Range.First;
- const unsigned last = decl->Range.Last;
-
- if (decl->Declaration.Semantic) {
- sn = decl->Semantic.Name;
- si = decl->Semantic.Index;
- }
-
- switch (decl->Declaration.File) {
- case TGSI_FILE_INPUT:
- for (i = first; i <= last; ++i) {
- if (ti->prog->type == PIPE_SHADER_VERTEX) {
- for (c = 0; c < 4; ++c)
- ti->input_loc[i][c] = 0x80 + i * 16 + c * 4;
- } else {
- for (c = 0; c < 4; ++c)
- ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
- /* for sprite coordinates: */
- ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4;
- }
- if (ti->prog->type == PIPE_SHADER_FRAGMENT)
- ti->interp_mode[i] = nvc0_interp_mode(decl);
- }
- break;
- case TGSI_FILE_OUTPUT:
- for (i = first; i <= last; ++i, ++si) {
- if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
- si = i;
- if (i == ti->fp_depth_output) {
- ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
- } else {
- if (i > ti->fp_depth_output)
- si -= 1;
- for (c = 0; c < 4; ++c)
- ti->output_loc[i][c] = si * 4 + c;
- }
- } else {
- if (sn == TGSI_SEMANTIC_EDGEFLAG)
- ti->edgeflag_out = i;
- for (c = 0; c < 4; ++c)
- ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
- /* for TFB_VARYING_LOCS: */
- ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4;
- }
- }
- break;
- case TGSI_FILE_SYSTEM_VALUE:
- i = first;
- ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]);
- assert(first == last);
- break;
- case TGSI_FILE_TEMPORARY:
- ti->temp128_nr = MAX2(ti->temp128_nr, last + 1);
- break;
- case TGSI_FILE_NULL:
- case TGSI_FILE_CONSTANT:
- case TGSI_FILE_SAMPLER:
- case TGSI_FILE_ADDRESS:
- case TGSI_FILE_IMMEDIATE:
- case TGSI_FILE_PREDICATE:
- break;
- default:
- NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
- return FALSE;
- }
- return TRUE;
-}
-static void
-prog_inst(struct nvc0_translation_info *ti,
- const struct tgsi_full_instruction *inst, int id)
-{
- const struct tgsi_dst_register *dst;
- const struct tgsi_src_register *src;
- int s, c, k;
- unsigned mask;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
- ti->subr[ti->num_subrs].first_insn = id - 1;
- ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
- ++ti->num_subrs;
- }
+ for (i = 0; i < info->numOutputs; ++i)
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = info->out[i].si * 4 + c;
- if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
- dst = &inst->Dst[0].Register;
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.sampleMask].slot[0] = last++;
- for (c = 0; c < 4; ++c) {
- if (dst->Indirect)
- nvc0_indirect_outputs(ti, id);
- if (!(dst->WriteMask & (1 << c)))
- continue;
- ti->output_access[dst->Index][c] = id;
- }
+ if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.fragDepth].slot[2] = last;
- if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
- inst->Src[0].Register.File == TGSI_FILE_INPUT &&
- dst->Index == ti->edgeflag_out)
- ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
- } else
- if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
- if (inst->Dst[0].Register.Indirect)
- ti->require_stores = TRUE;
- }
+ return 0;
+}
- for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
- src = &inst->Src[s].Register;
- if (src->File == TGSI_FILE_TEMPORARY)
- if (inst->Src[s].Register.Indirect)
- ti->require_stores = TRUE;
- if (src->File != TGSI_FILE_INPUT)
- continue;
- mask = nvc0_tgsi_src_mask(inst, s);
+static int
+nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+ unsigned offset;
+ unsigned i, c;
- if (inst->Src[s].Register.Indirect)
- nvc0_indirect_inputs(ti, id);
+ for (i = 0; i < info->numOutputs; ++i) {
+ offset = nvc0_shader_output_address(info->out[i].sn,
+ info->out[i].si, ubase);
+ if (info->out[i].patch && offset >= 0x20)
+ offset = 0x20 + info->out[i].si * 0x10;
- for (c = 0; c < 4; ++c) {
- if (!(mask & (1 << c)))
- continue;
- k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
- if (k <= TGSI_SWIZZLE_W)
- ti->input_access[src->Index][k] = id;
- }
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = (offset + c * 0x4) / 4;
+
+ nvc0_mesa_varying_hack(&info->out[i]);
}
+
+ return 0;
}
-/* Probably should introduce something like struct tgsi_function_declaration
- * instead of trying to guess inputs/outputs.
- */
-static void
-prog_subroutine_inst(struct nvc0_subroutine *subr,
- const struct tgsi_full_instruction *inst)
+static int
+nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info)
{
- const struct tgsi_dst_register *dst;
- const struct tgsi_src_register *src;
- int s, c, k;
- unsigned mask;
-
- for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
- src = &inst->Src[s].Register;
- if (src->File != TGSI_FILE_TEMPORARY)
- continue;
- mask = nvc0_tgsi_src_mask(inst, s);
+ int ret;
- for (c = 0; c < 4; ++c) {
- k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+ if (info->type == PIPE_SHADER_VERTEX)
+ ret = nvc0_vp_assign_input_slots(info);
+ else
+ ret = nvc0_sp_assign_input_slots(info);
+ if (ret)
+ return ret;
- if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
- if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
- subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
- }
- }
+ if (info->type == PIPE_SHADER_FRAGMENT)
+ ret = nvc0_fp_assign_output_slots(info);
+ else
+ ret = nvc0_sp_assign_output_slots(info);
+ return ret;
+}
- if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
- dst = &inst->Dst[0].Register;
+static INLINE void
+nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
+{
+ uint8_t min = (vp->hdr[4] >> 12) & 0xff;
+ uint8_t max = (vp->hdr[4] >> 24);
- for (c = 0; c < 4; ++c)
- if (dst->WriteMask & (1 << c))
- subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
- }
+ min = MIN2(min, slot);
+ max = MAX2(max, slot);
+
+ vp->hdr[4] = (max << 24) | (min << 12);
}
+/* Common part of header generation for VP, TCP, TEP and GP. */
static int
-nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
{
- int i, c;
- unsigned a;
+ unsigned i, c, a;
- for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
- for (c = 0; c < 4; ++c, ++a)
- if (ti->input_access[i][c])
- vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
+ for (i = 0; i < info->numInputs; ++i) {
+ if (info->in[i].patch)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ a = info->in[i].slot[c];
+ if (info->in[i].mask & (1 << c))
+ vp->hdr[5 + a / 32] |= 1 << (a % 32);
+ }
}
- for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
- a = (ti->output_loc[i][0] - 0x40) / 4;
- if (ti->output_loc[i][0] >= 0xf00)
+ for (i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].patch)
continue;
- for (c = 0; c < 4; ++c, ++a) {
- if (!ti->output_access[i][c])
+ for (c = 0; c < 4; ++c) {
+ if (!(info->out[i].mask & (1 << c)))
continue;
- vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
+ assert(info->out[i].slot[c] >= 0x40 / 4);
+ a = info->out[i].slot[c] - 0x40 / 4;
+ vp->hdr[13 + a / 32] |= 1 << (a % 32);
+ if (info->out[i].oread)
+ nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]);
}
}
- for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
- a = ti->sysval_loc[i] / 4;
- if (a > 0 && a < (0xf00 / 4))
- vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32);
+ for (i = 0; i < info->numSysVals; ++i) {
+ switch (info->sv[i].sn) {
+ case TGSI_SEMANTIC_PRIMID:
+ vp->hdr[5] |= 1 << 24;
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ vp->hdr[10] |= 1 << 30;
+ break;
+ /*
+ case TGSI_SEMANTIC_VERTEXID:
+ vp->hdr[10] |= 1 << 31;
+ break;
+ */
+ default:
+ break;
+ }
}
return 0;
}
static int
-nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
{
- vp->hdr[0] = 0x20461;
+ vp->hdr[0] = 0x20061 | (1 << 10);
vp->hdr[4] = 0xff000;
- vp->hdr[18] = (1 << vp->vp.num_ucps) - 1;
+ vp->hdr[18] = (1 << info->io.clipDistanceCount) - 1;
+
+ return nvc0_vtgp_gen_header(vp, info);
+}
+
+#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN)
+static void
+nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
+{
+ switch (info->prop.tp.domain) {
+ case PIPE_PRIM_LINES:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES;
+ if (info->prop.tp.winding > 0)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
+ break;
+ case PIPE_PRIM_QUADS:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS;
+ break;
+ default:
+ tp->tp.tess_mode = ~0;
+ return;
+ }
+ if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
- return nvc0_vp_gp_gen_header(vp, ti);
+ switch (info->prop.tp.partitioning) {
+ case PIPE_TESS_PART_INTEGER:
+ case PIPE_TESS_PART_POW2:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
+ break;
+ case PIPE_TESS_PART_FRACT_ODD:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
+ break;
+ case PIPE_TESS_PART_FRACT_EVEN:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
+ break;
+ default:
+ assert(!"invalid tessellator partitioning");
+ break;
+ }
}
+#endif
+#ifdef PIPE_SHADER_HULL
static int
-nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
+nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
{
- unsigned invocations = 1;
- unsigned max_output_verts, output_prim;
- unsigned i;
+ unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
- gp->hdr[0] = 0x21061;
+ tcp->tp.input_patch_size = info->prop.tp.inputPatchSize;
- for (i = 0; i < ti->scan.num_properties; ++i) {
- switch (ti->scan.properties[i].name) {
- case TGSI_PROPERTY_GS_OUTPUT_PRIM:
- output_prim = ti->scan.properties[i].data[0];
- break;
- case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
- max_output_verts = ti->scan.properties[i].data[0];
- assert(max_output_verts < 512);
- break;
- /*
- case TGSI_PROPERTY_GS_INVOCATIONS:
- invocations = ti->scan.properties[i].data[0];
- assert(invocations <= 32);
- break;
- */
- default:
- break;
- }
- }
+ if (info->numPatchConstants)
+ opcs = 8 + info->numPatchConstants * 4;
+
+ tcp->hdr[0] = 0x20061 | (2 << 10);
+
+ tcp->hdr[1] = opcs << 24;
+ tcp->hdr[2] = info->prop.tp.outputPatchSize << 24;
+
+ tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */
+
+ nvc0_vtgp_gen_header(tcp, info);
- gp->hdr[2] = MIN2(invocations, 32) << 24;
+ nvc0_tp_get_tess_mode(tcp, info);
- switch (output_prim) {
+ return 0;
+}
+#endif
+
+#ifdef PIPE_SHADER_DOMAIN
+static int
+nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
+{
+ tep->hdr[0] = 0x20061 | (3 << 10);
+ tep->hdr[4] = 0xff000;
+
+ nvc0_vtgp_gen_header(tep, info);
+
+ nvc0_tp_get_tess_mode(tep, info);
+
+ tep->hdr[18] |= 0x3 << 12; /* ? */
+
+ return 0;
+}
+#endif
+
+static int
+nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
+{
+ gp->hdr[0] = 0x20061 | (4 << 10);
+
+ gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24;
+
+ switch (info->prop.gp.outputPrim) {
case PIPE_PRIM_POINTS:
gp->hdr[3] = 0x01000000;
gp->hdr[0] |= 0xf0000000;
@@ -510,206 +374,263 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
break;
}
- gp->hdr[4] = max_output_verts & 0x1ff;
+ gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff;
+
+ return nvc0_vtgp_gen_header(gp, info);
+}
+
+#define NVC0_INTERP_FLAT (1 << 0)
+#define NVC0_INTERP_PERSPECTIVE (2 << 0)
+#define NVC0_INTERP_LINEAR (3 << 0)
+#define NVC0_INTERP_CENTROID (1 << 2)
- return nvc0_vp_gp_gen_header(gp, ti);
+static uint8_t
+nvc0_hdr_interp_mode(const struct nv50_ir_varying *var)
+{
+ if (var->linear)
+ return NVC0_INTERP_LINEAR;
+ if (var->flat)
+ return NVC0_INTERP_FLAT;
+ return NVC0_INTERP_PERSPECTIVE;
}
static int
-nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
+nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
{
- int i, c;
- unsigned a, m;
-
- fp->hdr[0] = 0x21462;
+ unsigned i, c, a, m;
+
+ fp->hdr[0] = 0x20062 | (5 << 10);
fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
- if (ti->scan.uses_kill)
+ if (info->prop.fp.usesDiscard)
fp->hdr[0] |= 0x8000;
- if (ti->scan.writes_z) {
+ if (info->prop.fp.numColourResults > 1)
+ fp->hdr[0] |= 0x4000;
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ fp->hdr[19] |= 0x1;
+ if (info->prop.fp.writesDepth) {
fp->hdr[19] |= 0x2;
- if (ti->scan.num_outputs > 2)
- fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
- } else {
- if (ti->scan.num_outputs > 1)
- fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+ fp->flags[0] = 0x11; /* deactivate ZCULL */
}
- for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
- m = ti->interp_mode[i] & 3;
+ for (i = 0; i < info->numInputs; ++i) {
+ m = nvc0_hdr_interp_mode(&info->in[i]);
for (c = 0; c < 4; ++c) {
- if (!ti->input_access[i][c])
+ if (!(info->in[i].mask & (1 << c)))
continue;
- a = ti->input_loc[i][c] / 2;
- if (ti->input_loc[i][c] >= 0x2c0)
- a -= 32;
- if (ti->input_loc[i][0] == 0x70)
- fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */
- else
- if (ti->input_loc[i][0] == 0x2e0)
- fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */
- else
+ if (info->in[i].slot[0] == (0x070 / 4)) {
+ fp->hdr[5] |= 1 << (28 + c);
+ } else
+ if (info->in[i].slot[0] == (0x2e0 / 4)) {
+ if (c <= 1)
+ fp->hdr[14] |= 1 << (24 + c);
+ } else {
+ if (info->in[i].slot[c] < (0x040 / 4) ||
+ info->in[i].slot[c] > (0x380 / 4))
+ continue;
+ a = info->in[i].slot[c] * 2;
+ if (info->in[i].slot[0] >= (0x2c0 / 4))
+ a -= 32;
fp->hdr[4 + a / 32] |= m << (a % 32);
+ }
}
}
- for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
- if (i != ti->fp_depth_output)
- fp->hdr[18] |= 0xf << ti->output_loc[i][0];
+ for (i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+ fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
}
- for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
- a = ti->sysval_loc[i] / 2;
- if ((a > 0) && (a < 0xf00 / 2))
- fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32);
- }
+ fp->fp.early_z = info->prop.fp.earlyFragTests;
+ if (fp->fp.early_z == FALSE && fp->code_size >= 0x400)
+ fp->fp.early_z = !(info->prop.fp.writesDepth ||
+ info->prop.fp.usesDiscard ||
+ (info->io.globalAccess & 2));
return 0;
}
-static boolean
-nvc0_prog_scan(struct nvc0_translation_info *ti)
+#ifdef DEBUG
+static void
+nvc0_program_dump(struct nvc0_program *prog)
{
- struct nvc0_program *prog = ti->prog;
- struct tgsi_parse_context parse;
- int ret;
- unsigned i;
+ unsigned pos;
-#if NV50_DEBUG & NV50_DEBUG_SHADER
- tgsi_dump(prog->pipe.tokens, 0);
+ for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
+ debug_printf("HDR[%02lx] = 0x%08x\n",
+ pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
+
+ debug_printf("shader binary code (0x%x bytes):", prog->code_size);
+ for (pos = 0; pos < prog->code_size / 4; ++pos) {
+ if ((pos % 8) == 0)
+ debug_printf("\n");
+ debug_printf("%08x ", prog->code[pos]);
+ }
+ debug_printf("\n");
+}
#endif
- tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
+boolean
+nvc0_program_translate(struct nvc0_program *prog)
+{
+ struct nv50_ir_prog_info *info;
+ int ret;
- if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
- ti->fp_depth_output = 255;
- for (i = 0; i < ti->scan.num_outputs; ++i)
- if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
- ti->fp_depth_output = i;
- }
+ info = CALLOC_STRUCT(nv50_ir_prog_info);
+ if (!info)
+ return FALSE;
- ti->subr =
- CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+ info->type = prog->type;
+ info->target = 0xc0;
+ info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+ info->bin.source = (void *)prog->pipe.tokens;
- ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
- ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
+ info->io.clipDistanceCount = prog->vp.num_ucps;
- ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
+ info->assignSlots = nvc0_program_assign_varying_slots;
- tgsi_parse_init(&parse, prog->pipe.tokens);
- while (!tgsi_parse_end_of_tokens(&parse)) {
- tgsi_parse_token(&parse);
+#ifdef DEBUG
+ info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
+ info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
+#else
+ info->optLevel = 3;
+#endif
- switch (parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- prog_immediate(ti, &parse.FullToken.FullImmediate);
- break;
- case TGSI_TOKEN_TYPE_DECLARATION:
- prog_decl(ti, &parse.FullToken.FullDeclaration);
- break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
- prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
- break;
- default:
- break;
- }
+ ret = nv50_ir_generate_code(info);
+ if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
}
- for (i = 0; i < ti->num_subrs; ++i) {
- unsigned pc = ti->subr[i].id;
- while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
- prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
- }
+ prog->code = info->bin.code;
+ prog->code_size = info->bin.codeSize;
+ prog->immd_data = info->immd.buf;
+ prog->immd_size = info->immd.bufSize;
+ prog->relocs = info->bin.relocData;
+ prog->max_gpr = MAX2(4, (info->bin.maxGPR + 1));
+
+ prog->vp.edgeflag = PIPE_MAX_ATTRIBS;
switch (prog->type) {
case PIPE_SHADER_VERTEX:
- ti->input_file = NV_FILE_MEM_A;
- ti->output_file = NV_FILE_MEM_V;
- ret = nvc0_vp_gen_header(prog, ti);
+ ret = nvc0_vp_gen_header(prog, info);
break;
- /*
- case PIPE_SHADER_TESSELLATION_CONTROL:
- ret = nvc0_tcp_gen_header(ti);
+#ifdef PIPE_SHADER_HULL
+ case PIPE_SHADER_HULL:
+ ret = nvc0_tcp_gen_header(prog, info);
break;
- case PIPE_SHADER_TESSELLATION_EVALUATION:
- ret = nvc0_tep_gen_header(ti);
+#endif
+#ifdef PIPE_SHADER_DOMAIN
+ case PIPE_SHADER_DOMAIN:
+ ret = nvc0_tep_gen_header(prog, info);
break;
+#endif
case PIPE_SHADER_GEOMETRY:
- ret = nvc0_gp_gen_header(ti);
+ ret = nvc0_gp_gen_header(prog, info);
break;
- */
case PIPE_SHADER_FRAGMENT:
- ti->input_file = NV_FILE_MEM_V;
- ti->output_file = NV_FILE_GPR;
-
- if (ti->scan.writes_z)
- prog->flags[0] = 0x11; /* ? */
- else
- if (!ti->scan.uses_kill && !ti->global_stores)
- prog->fp.early_z = 1;
-
- ret = nvc0_fp_gen_header(prog, ti);
+ ret = nvc0_fp_gen_header(prog, info);
break;
default:
- assert(!"unsupported program type");
ret = -1;
+ NOUVEAU_ERR("unknown program type: %u\n", prog->type);
break;
}
+ if (ret)
+ goto out;
- if (ti->require_stores) {
+ if (info->bin.tlsSpace) {
+ assert(info->bin.tlsSpace < (1 << 24));
prog->hdr[0] |= 1 << 26;
- prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */
+ prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */
}
+ if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 16;
- assert(!ret);
- return ret;
+out:
+ FREE(info);
+ return !ret;
}
boolean
-nvc0_program_translate(struct nvc0_program *prog)
+nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
- struct nvc0_translation_info *ti;
+ struct nvc0_screen *screen = nvc0->screen;
int ret;
+ uint32_t size = prog->code_size + NVC0_SHADER_HEADER_SIZE;
+ uint32_t lib_pos = screen->lib_code->start;
+ uint32_t code_pos;
+
+ /* c[] bindings need to be aligned to 0x100, but we could use relocations
+ * to save space. */
+ if (prog->immd_size) {
+ prog->immd_base = size;
+ size = align(size, 0x40);
+ size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
+ }
+ size = align(size, 0x40); /* required by SP_START_ID */
- ti = CALLOC_STRUCT(nvc0_translation_info);
- ti->prog = prog;
+ ret = nouveau_resource_alloc(screen->text_heap, size, prog, &prog->res);
+ if (ret) {
+ NOUVEAU_ERR("out of code space\n");
+ return FALSE;
+ }
+ prog->code_base = prog->res->start;
+ prog->immd_base = align(prog->res->start + prog->immd_base, 0x100);
+ assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <
+ prog->res->start + prog->res->size));
- ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
+ code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
- prog->vp.edgeflag = PIPE_MAX_ATTRIBS;
+ if (prog->relocs)
+ nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
- if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps)
- ti->append_ucp = TRUE;
+#ifdef DEBUG
+ if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+ nvc0_program_dump(prog);
+#endif
- ret = nvc0_prog_scan(ti);
- if (ret) {
- NOUVEAU_ERR("unsupported shader program\n");
- goto out;
- }
+ nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base,
+ NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0_m2mf_push_linear(&nvc0->base, screen->text,
+ prog->code_base + NVC0_SHADER_HEADER_SIZE,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+ if (prog->immd_size)
+ nvc0_m2mf_push_linear(&nvc0->base,
+ screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+ prog->immd_size, prog->immd_data);
- ret = nvc0_generate_code(ti);
- if (ret)
- NOUVEAU_ERR("shader translation failed\n");
+ BEGIN_RING(screen->base.channel, RING_3D(MEM_BARRIER), 1);
+ OUT_RING (screen->base.channel, 0x1111);
-#if NV50_DEBUG & NV50_DEBUG_SHADER
- unsigned i;
- for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
- debug_printf("HDR[%02lx] = 0x%08x\n",
- i * sizeof(prog->hdr[0]), prog->hdr[i]);
-#endif
+ return TRUE;
+}
-out:
- if (ti->immd32)
- FREE(ti->immd32);
- if (ti->immd32_ty)
- FREE(ti->immd32_ty);
- if (ti->insns)
- FREE(ti->insns);
- if (ti->subr)
- FREE(ti->subr);
- FREE(ti);
- return ret ? FALSE : TRUE;
+/* Upload code for builtin functions like integer division emulation. */
+void
+nvc0_program_library_upload(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+ uint32_t size;
+ const uint32_t *code;
+
+ if (screen->lib_code)
+ return;
+
+ nv50_ir_get_target_library(screen->base.device->chipset, &code, &size);
+ if (!size)
+ return;
+
+ ret = nouveau_resource_alloc(screen->text_heap, align(size, 0x100), NULL,
+ &screen->lib_code);
+ if (ret)
+ return;
+
+ nvc0_m2mf_push_linear(&nvc0->base,
+ screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+ size, code);
+ /* no need for a memory barrier, will be emitted with first program */
}
void
@@ -720,6 +641,8 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (prog->code)
FREE(prog->code);
+ if (prog->immd_data)
+ FREE(prog->immd_data);
if (prog->relocs)
FREE(prog->relocs);