aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvc0
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nvc0')
-rw-r--r--src/gallium/drivers/nvc0/Makefile.sources6
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h3
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c973
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.h66
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c7
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.h1
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c47
8 files changed, 494 insertions, 612 deletions
diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources
index 9b1fb97f0cb..95f796fc065 100644
--- a/src/gallium/drivers/nvc0/Makefile.sources
+++ b/src/gallium/drivers/nvc0/Makefile.sources
@@ -13,12 +13,6 @@ C_SOURCES := \
nvc0_vbo.c \
nvc0_program.c \
nvc0_shader_state.c \
- nvc0_pc.c \
- nvc0_pc_print.c \
- nvc0_pc_emit.c \
- nvc0_tgsi_to_nc.c \
- nvc0_pc_optimize.c \
- nvc0_pc_regalloc.c \
nvc0_push.c \
nvc0_push2.c \
nvc0_query.c
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 360afbb943e..8fa1675902e 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -152,6 +152,9 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
nouveau_context_init_vdec(&nvc0->base);
+ /* shader builtin library is per-screen, but we need a context for m2mf */
+ nvc0_program_library_upload(nvc0);
+
return pipe;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 353a5418dd5..c11d1c32efe 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -79,6 +79,7 @@ struct nvc0_context {
uint8_t num_textures[5];
uint8_t num_samplers[5];
uint8_t tls_required; /* bitmask of shader types using l[] */
+ uint8_t c14_bound; /* whether immediate array constbuf is bound */
uint16_t scissor;
uint32_t uniform_buffer_bound[5];
} state;
@@ -161,7 +162,9 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
/* nvc0_program.c */
boolean nvc0_program_translate(struct nvc0_program *);
+boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
+void nvc0_program_library_upload(struct nvc0_context *);
/* nvc0_query.c */
void nvc0_init_query_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index bcee027917e..eaad0805909 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -20,479 +20,343 @@
* SOFTWARE.
*/
-#include "pipe/p_shader_tokens.h"
#include "pipe/p_defines.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_dump.h"
-
#include "nvc0_context.h"
-#include "nvc0_pc.h"
-
-static unsigned
-nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
-{
- unsigned mask = inst->Dst[0].Register.WriteMask;
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_COS:
- case TGSI_OPCODE_SIN:
- return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
- case TGSI_OPCODE_DP3:
- return 0x7;
- case TGSI_OPCODE_DP4:
- case TGSI_OPCODE_DPH:
- case TGSI_OPCODE_KIL: /* WriteMask ignored */
- return 0xf;
- case TGSI_OPCODE_DST:
- return mask & (c ? 0xa : 0x6);
- case TGSI_OPCODE_EX2:
- case TGSI_OPCODE_EXP:
- case TGSI_OPCODE_LG2:
- case TGSI_OPCODE_LOG:
- case TGSI_OPCODE_POW:
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- case TGSI_OPCODE_SCS:
- return 0x1;
- case TGSI_OPCODE_IF:
- return 0x1;
- case TGSI_OPCODE_LIT:
- return 0xb;
- case TGSI_OPCODE_TEX:
- case TGSI_OPCODE_TXB:
- case TGSI_OPCODE_TXL:
- case TGSI_OPCODE_TXP:
- {
- const struct tgsi_instruction_texture *tex;
-
- assert(inst->Instruction.Texture);
- tex = &inst->Texture;
-
- mask = 0x7;
- if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXD)
- mask |= 0x8; /* bias, lod or proj */
-
- switch (tex->Texture) {
- case TGSI_TEXTURE_1D:
- mask &= 0x9;
- break;
- case TGSI_TEXTURE_SHADOW1D:
- mask &= 0x5;
- break;
- case TGSI_TEXTURE_2D:
- mask &= 0xb;
- break;
- default:
- break;
- }
- }
- return mask;
- case TGSI_OPCODE_XPD:
- {
- unsigned x = 0;
- if (mask & 1) x |= 0x6;
- if (mask & 2) x |= 0x5;
- if (mask & 4) x |= 0x3;
- return x;
- }
- default:
- break;
- }
- return mask;
-}
+#include "nv50/codegen/nv50_ir_driver.h"
+/* If only they told use the actual semantic instead of just GENERIC ... */
static void
-nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
+nvc0_mesa_varying_hack(struct nv50_ir_varying *var)
{
- int i, c;
-
- for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
- for (c = 0; c < 4; ++c)
- ti->input_access[i][c] = id;
+ unsigned c;
- ti->indirect_inputs = TRUE;
-}
-
-static void
-nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
-{
- int i, c;
+ if (var->sn != TGSI_SEMANTIC_GENERIC)
+ return;
- for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+ if (var->si <= 7) /* gl_TexCoord */
for (c = 0; c < 4; ++c)
- ti->output_access[i][c] = id;
-
- ti->indirect_outputs = TRUE;
+ var->slot[c] = (0x300 + var->si * 0x10 + c * 0x4) / 4;
+ else
+ if (var->si == 9) /* gl_PointCoord */
+ for (c = 0; c < 4; ++c)
+ var->slot[c] = (0x2e0 + c * 0x4) / 4;
+ else
+ for (c = 0; c < 4; ++c) /* move down user varyings (first has index 8) */
+ var->slot[c] -= 0x80 / 4;
}
-static INLINE unsigned
-nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input)
+static uint32_t
+nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
{
- /* NOTE: locations 0xfxx indicate special regs */
switch (sn) {
- /*
- case TGSI_SEMANTIC_VERTEXID:
- *is_input = TRUE;
- return 0x2fc;
- */
- case TGSI_SEMANTIC_PRIMID:
- *is_input = TRUE;
- return 0x60;
- /*
- case TGSI_SEMANTIC_LAYER_INDEX:
- return 0x64;
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- return 0x68;
- */
- case TGSI_SEMANTIC_INSTANCEID:
- *is_input = TRUE;
- return 0x2f8;
- case TGSI_SEMANTIC_FACE:
- *is_input = TRUE;
- return 0x3fc;
- /*
- case TGSI_SEMANTIC_INVOCATIONID:
- return 0xf11;
- */
+/* case TGSI_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; */
+ case TGSI_SEMANTIC_PRIMID: return 0x060;
+ case TGSI_SEMANTIC_PSIZE: return 0x06c;
+ case TGSI_SEMANTIC_POSITION: return 0x070;
+ case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_FOG: return 0x270;
+ case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
+ case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
+/* case TGSI_SEMANTIC_CLIP: return 0x2c0 + si * 0x10; */
+/* case TGSI_SEMANTIC_POINTCOORD: return 0x2e0; */
+/* case TGSI_SEMANTIC_TESSCOORD: return ~0; */ /* 0x2f0, but special load */
+ case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
+/* case TGSI_SEMANTIC_VERTEXID: return 0x2fc; */
+/* case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; */
+ case TGSI_SEMANTIC_FACE: return 0x3fc;
+/* case TGSI_SEMANTIC_INVOCATIONID: return ~0; */
default:
- assert(0);
- return 0x000;
+ assert(!"invalid TGSI input semantic");
+ return ~0;
}
}
-static INLINE unsigned
-nvc0_varying_location(unsigned sn, unsigned si)
+static uint32_t
+nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
{
switch (sn) {
- case TGSI_SEMANTIC_POSITION:
- return 0x70;
- case TGSI_SEMANTIC_COLOR:
- return 0x280 + (si * 16); /* are these hard-wired ? */
- case TGSI_SEMANTIC_BCOLOR:
- return 0x2a0 + (si * 16);
- case TGSI_SEMANTIC_FOG:
- return 0x270;
- case TGSI_SEMANTIC_PSIZE:
- return 0x6c;
- /*
- case TGSI_SEMANTIC_PNTC:
- return 0x2e0;
- */
- case TGSI_SEMANTIC_GENERIC:
- /* We'd really like to distinguish between TEXCOORD and GENERIC here,
- * since only 0x300 to 0x37c can be replaced by sprite coordinates.
- * Also, gl_PointCoord should be a system value and must be assigned to
- * address 0x2e0. For now, let's cheat:
- */
- assert(si < 31);
- if (si <= 7)
- return 0x300 + si * 16;
- if (si == 9)
- return 0x2e0;
- return 0x80 + ((si - 8) * 16);
- case TGSI_SEMANTIC_NORMAL:
- return 0x360;
- case TGSI_SEMANTIC_PRIMID:
- return 0x40;
- case TGSI_SEMANTIC_FACE:
- return 0x3fc;
- case TGSI_SEMANTIC_EDGEFLAG: /* doesn't exist, set value like for an sreg */
- return 0xf00;
- /*
- case TGSI_SEMANTIC_CLIP_DISTANCE:
- return 0x2c0 + (si * 4);
- */
+/* case TGSI_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; */
+ case TGSI_SEMANTIC_PRIMID: return 0x040;
+/* case TGSI_SEMANTIC_LAYER: return 0x064; */
+/* case TGSI_SEMANTIC_VIEWPORTINDEX: return 0x068; */
+ case TGSI_SEMANTIC_PSIZE: return 0x06c;
+ case TGSI_SEMANTIC_POSITION: return 0x070;
+ case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_FOG: return 0x270;
+ case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
+ case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
+/* case TGSI_SEMANTIC_CLIP: return 0x2c0 + si * 0x10; */
+/* case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; */
+ case TGSI_SEMANTIC_EDGEFLAG: return ~0;
default:
- assert(0);
- return 0x000;
+ assert(!"invalid TGSI output semantic");
+ return ~0;
}
}
-static INLINE unsigned
-nvc0_interp_mode(const struct tgsi_full_declaration *decl)
+static int
+nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
{
- unsigned mode;
-
- if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
- mode = NVC0_INTERP_FLAT;
- else
- if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
- mode = NVC0_INTERP_PERSPECTIVE;
- else
- if (decl->Declaration.Semantic && decl->Semantic.Name == TGSI_SEMANTIC_COLOR)
- mode = NVC0_INTERP_PERSPECTIVE;
- else
- mode = NVC0_INTERP_LINEAR;
+ unsigned i, c;
- if (decl->Declaration.Centroid)
- mode |= NVC0_INTERP_CENTROID;
+ for (i = 0; i < info->numInputs; ++i)
+ for (c = 0; c < 4; ++c)
+ info->in[i].slot[c] = (0x80 + i * 0x10 + c * 0x4) / 4;
- return mode;
+ return 0;
}
-static void
-prog_immediate(struct nvc0_translation_info *ti,
- const struct tgsi_full_immediate *imm)
+static int
+nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
{
- int c;
- unsigned n = ti->immd32_nr++;
+ unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+ unsigned offset;
+ unsigned i, c;
- assert(ti->immd32_nr <= ti->scan.immediate_count);
+ for (i = 0; i < info->numInputs; ++i) {
+ offset = nvc0_shader_input_address(info->in[i].sn,
+ info->in[i].si, ubase);
+ if (info->in[i].patch && offset >= 0x20)
+ offset = 0x20 + info->in[i].si * 0x10;
- for (c = 0; c < 4; ++c)
- ti->immd32[n * 4 + c] = imm->u[c].Uint;
+ for (c = 0; c < 4; ++c)
+ info->in[i].slot[c] = (offset + c * 0x4) / 4;
- ti->immd32_ty[n] = imm->Immediate.DataType;
+ nvc0_mesa_varying_hack(&info->in[i]);
+ }
+
+ return 0;
}
-static boolean
-prog_decl(struct nvc0_translation_info *ti,
- const struct tgsi_full_declaration *decl)
+static int
+nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
{
+ unsigned last = info->prop.fp.numColourResults * 4;
unsigned i, c;
- unsigned sn = TGSI_SEMANTIC_GENERIC;
- unsigned si = 0;
- const unsigned first = decl->Range.First;
- const unsigned last = decl->Range.Last;
-
- if (decl->Declaration.Semantic) {
- sn = decl->Semantic.Name;
- si = decl->Semantic.Index;
- }
-
- switch (decl->Declaration.File) {
- case TGSI_FILE_INPUT:
- for (i = first; i <= last; ++i) {
- if (ti->prog->type == PIPE_SHADER_VERTEX) {
- for (c = 0; c < 4; ++c)
- ti->input_loc[i][c] = 0x80 + i * 16 + c * 4;
- } else {
- for (c = 0; c < 4; ++c)
- ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
- /* for sprite coordinates: */
- ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4;
- }
- if (ti->prog->type == PIPE_SHADER_FRAGMENT)
- ti->interp_mode[i] = nvc0_interp_mode(decl);
- }
- break;
- case TGSI_FILE_OUTPUT:
- for (i = first; i <= last; ++i, ++si) {
- if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
- si = i;
- if (i == ti->fp_depth_output) {
- ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
- } else {
- if (i > ti->fp_depth_output)
- si -= 1;
- for (c = 0; c < 4; ++c)
- ti->output_loc[i][c] = si * 4 + c;
- }
- } else {
- if (sn == TGSI_SEMANTIC_EDGEFLAG)
- ti->edgeflag_out = i;
- for (c = 0; c < 4; ++c)
- ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
- /* for TFB_VARYING_LOCS: */
- ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4;
- }
- }
- break;
- case TGSI_FILE_SYSTEM_VALUE:
- i = first;
- ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]);
- assert(first == last);
- break;
- case TGSI_FILE_TEMPORARY:
- ti->temp128_nr = MAX2(ti->temp128_nr, last + 1);
- break;
- case TGSI_FILE_NULL:
- case TGSI_FILE_CONSTANT:
- case TGSI_FILE_SAMPLER:
- case TGSI_FILE_ADDRESS:
- case TGSI_FILE_IMMEDIATE:
- case TGSI_FILE_PREDICATE:
- break;
- default:
- NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
- return FALSE;
- }
- return TRUE;
-}
-static void
-prog_inst(struct nvc0_translation_info *ti,
- const struct tgsi_full_instruction *inst, int id)
-{
- const struct tgsi_dst_register *dst;
- const struct tgsi_src_register *src;
- int s, c, k;
- unsigned mask;
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
- ti->subr[ti->num_subrs].first_insn = id - 1;
- ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
- ++ti->num_subrs;
- }
+ for (i = 0; i < info->numOutputs; ++i)
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = info->out[i].si * 4 + c;
- if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
- dst = &inst->Dst[0].Register;
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.sampleMask].slot[0] = last++;
- for (c = 0; c < 4; ++c) {
- if (dst->Indirect)
- nvc0_indirect_outputs(ti, id);
- if (!(dst->WriteMask & (1 << c)))
- continue;
- ti->output_access[dst->Index][c] = id;
- }
+ if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.fragDepth].slot[2] = last;
- if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
- inst->Src[0].Register.File == TGSI_FILE_INPUT &&
- dst->Index == ti->edgeflag_out)
- ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
- } else
- if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
- if (inst->Dst[0].Register.Indirect)
- ti->require_stores = TRUE;
- }
+ return 0;
+}
- for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
- src = &inst->Src[s].Register;
- if (src->File == TGSI_FILE_TEMPORARY)
- if (inst->Src[s].Register.Indirect)
- ti->require_stores = TRUE;
- if (src->File != TGSI_FILE_INPUT)
- continue;
- mask = nvc0_tgsi_src_mask(inst, s);
+static int
+nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+ unsigned offset;
+ unsigned i, c;
- if (inst->Src[s].Register.Indirect)
- nvc0_indirect_inputs(ti, id);
+ for (i = 0; i < info->numOutputs; ++i) {
+ offset = nvc0_shader_output_address(info->out[i].sn,
+ info->out[i].si, ubase);
+ if (info->out[i].patch && offset >= 0x20)
+ offset = 0x20 + info->out[i].si * 0x10;
- for (c = 0; c < 4; ++c) {
- if (!(mask & (1 << c)))
- continue;
- k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
- if (k <= TGSI_SWIZZLE_W)
- ti->input_access[src->Index][k] = id;
- }
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = (offset + c * 0x4) / 4;
+
+ nvc0_mesa_varying_hack(&info->out[i]);
}
+
+ return 0;
}
-/* Probably should introduce something like struct tgsi_function_declaration
- * instead of trying to guess inputs/outputs.
- */
-static void
-prog_subroutine_inst(struct nvc0_subroutine *subr,
- const struct tgsi_full_instruction *inst)
+static int
+nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info)
{
- const struct tgsi_dst_register *dst;
- const struct tgsi_src_register *src;
- int s, c, k;
- unsigned mask;
-
- for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
- src = &inst->Src[s].Register;
- if (src->File != TGSI_FILE_TEMPORARY)
- continue;
- mask = nvc0_tgsi_src_mask(inst, s);
+ int ret;
- for (c = 0; c < 4; ++c) {
- k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+ if (info->type == PIPE_SHADER_VERTEX)
+ ret = nvc0_vp_assign_input_slots(info);
+ else
+ ret = nvc0_sp_assign_input_slots(info);
+ if (ret)
+ return ret;
- if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
- if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
- subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
- }
- }
+ if (info->type == PIPE_SHADER_FRAGMENT)
+ ret = nvc0_fp_assign_output_slots(info);
+ else
+ ret = nvc0_sp_assign_output_slots(info);
+ return ret;
+}
- if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
- dst = &inst->Dst[0].Register;
+static INLINE void
+nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
+{
+ uint8_t min = (vp->hdr[4] >> 12) & 0xff;
+ uint8_t max = (vp->hdr[4] >> 24);
- for (c = 0; c < 4; ++c)
- if (dst->WriteMask & (1 << c))
- subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
- }
+ min = MIN2(min, slot);
+ max = MAX2(max, slot);
+
+ vp->hdr[4] = (max << 24) | (min << 12);
}
+/* Common part of header generation for VP, TCP, TEP and GP. */
static int
-nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
{
- int i, c;
- unsigned a;
+ unsigned i, c, a;
- for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
- for (c = 0; c < 4; ++c, ++a)
- if (ti->input_access[i][c])
- vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
+ for (i = 0; i < info->numInputs; ++i) {
+ if (info->in[i].patch)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ a = info->in[i].slot[c];
+ if (info->in[i].mask & (1 << c))
+ vp->hdr[5 + a / 32] |= 1 << (a % 32);
+ }
}
- for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
- a = (ti->output_loc[i][0] - 0x40) / 4;
- if (ti->output_loc[i][0] >= 0xf00)
+ for (i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].patch)
continue;
- for (c = 0; c < 4; ++c, ++a) {
- if (!ti->output_access[i][c])
+ for (c = 0; c < 4; ++c) {
+ if (!(info->out[i].mask & (1 << c)))
continue;
- vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
+ assert(info->out[i].slot[c] >= 0x40 / 4);
+ a = info->out[i].slot[c] - 0x40 / 4;
+ vp->hdr[13 + a / 32] |= 1 << (a % 32);
+ if (info->out[i].oread)
+ nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]);
}
}
- for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
- a = ti->sysval_loc[i] / 4;
- if (a > 0 && a < (0xf00 / 4))
- vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32);
+ for (i = 0; i < info->numSysVals; ++i) {
+ switch (info->sv[i].sn) {
+ case TGSI_SEMANTIC_PRIMID:
+ vp->hdr[5] |= 1 << 24;
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ vp->hdr[10] |= 1 << 30;
+ break;
+ /*
+ case TGSI_SEMANTIC_VERTEXID:
+ vp->hdr[10] |= 1 << 31;
+ break;
+ */
+ default:
+ break;
+ }
}
return 0;
}
static int
-nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
{
- vp->hdr[0] = 0x20461;
+ vp->hdr[0] = 0x20061 | (1 << 10);
vp->hdr[4] = 0xff000;
- vp->hdr[18] = (1 << vp->vp.num_ucps) - 1;
+ vp->hdr[18] = (1 << info->io.clipDistanceCount) - 1;
+
+ return nvc0_vtgp_gen_header(vp, info);
+}
+
+#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN)
+static void
+nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
+{
+ switch (info->prop.tp.domain) {
+ case PIPE_PRIM_LINES:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES;
+ if (info->prop.tp.winding > 0)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
+ break;
+ case PIPE_PRIM_QUADS:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS;
+ break;
+ default:
+ tp->tp.tess_mode = ~0;
+ return;
+ }
+ if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
- return nvc0_vp_gp_gen_header(vp, ti);
+ switch (info->prop.tp.partitioning) {
+ case PIPE_TESS_PART_INTEGER:
+ case PIPE_TESS_PART_POW2:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
+ break;
+ case PIPE_TESS_PART_FRACT_ODD:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
+ break;
+ case PIPE_TESS_PART_FRACT_EVEN:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
+ break;
+ default:
+ assert(!"invalid tessellator partitioning");
+ break;
+ }
}
+#endif
+#ifdef PIPE_SHADER_HULL
static int
-nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
+nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
{
- unsigned invocations = 1;
- unsigned max_output_verts, output_prim;
- unsigned i;
+ unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
- gp->hdr[0] = 0x21061;
+ tcp->tp.input_patch_size = info->prop.tp.inputPatchSize;
- for (i = 0; i < ti->scan.num_properties; ++i) {
- switch (ti->scan.properties[i].name) {
- case TGSI_PROPERTY_GS_OUTPUT_PRIM:
- output_prim = ti->scan.properties[i].data[0];
- break;
- case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
- max_output_verts = ti->scan.properties[i].data[0];
- assert(max_output_verts < 512);
- break;
- /*
- case TGSI_PROPERTY_GS_INVOCATIONS:
- invocations = ti->scan.properties[i].data[0];
- assert(invocations <= 32);
- break;
- */
- default:
- break;
- }
- }
+ if (info->numPatchConstants)
+ opcs = 8 + info->numPatchConstants * 4;
+
+ tcp->hdr[0] = 0x20061 | (2 << 10);
+
+ tcp->hdr[1] = opcs << 24;
+ tcp->hdr[2] = info->prop.tp.outputPatchSize << 24;
+
+ tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */
+
+ nvc0_vtgp_gen_header(tcp, info);
- gp->hdr[2] = MIN2(invocations, 32) << 24;
+ nvc0_tp_get_tess_mode(tcp, info);
- switch (output_prim) {
+ return 0;
+}
+#endif
+
+#ifdef PIPE_SHADER_DOMAIN
+static int
+nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
+{
+ tep->hdr[0] = 0x20061 | (3 << 10);
+ tep->hdr[4] = 0xff000;
+
+ nvc0_vtgp_gen_header(tep, info);
+
+ nvc0_tp_get_tess_mode(tep, info);
+
+ tep->hdr[18] |= 0x3 << 12; /* ? */
+
+ return 0;
+}
+#endif
+
+static int
+nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
+{
+ gp->hdr[0] = 0x20061 | (4 << 10);
+
+ gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24;
+
+ switch (info->prop.gp.outputPrim) {
case PIPE_PRIM_POINTS:
gp->hdr[3] = 0x01000000;
gp->hdr[0] |= 0xf0000000;
@@ -510,206 +374,263 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
break;
}
- gp->hdr[4] = max_output_verts & 0x1ff;
+ gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff;
+
+ return nvc0_vtgp_gen_header(gp, info);
+}
+
+#define NVC0_INTERP_FLAT (1 << 0)
+#define NVC0_INTERP_PERSPECTIVE (2 << 0)
+#define NVC0_INTERP_LINEAR (3 << 0)
+#define NVC0_INTERP_CENTROID (1 << 2)
- return nvc0_vp_gp_gen_header(gp, ti);
+static uint8_t
+nvc0_hdr_interp_mode(const struct nv50_ir_varying *var)
+{
+ if (var->linear)
+ return NVC0_INTERP_LINEAR;
+ if (var->flat)
+ return NVC0_INTERP_FLAT;
+ return NVC0_INTERP_PERSPECTIVE;
}
static int
-nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
+nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
{
- int i, c;
- unsigned a, m;
-
- fp->hdr[0] = 0x21462;
+ unsigned i, c, a, m;
+
+ fp->hdr[0] = 0x20062 | (5 << 10);
fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
- if (ti->scan.uses_kill)
+ if (info->prop.fp.usesDiscard)
fp->hdr[0] |= 0x8000;
- if (ti->scan.writes_z) {
+ if (info->prop.fp.numColourResults > 1)
+ fp->hdr[0] |= 0x4000;
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ fp->hdr[19] |= 0x1;
+ if (info->prop.fp.writesDepth) {
fp->hdr[19] |= 0x2;
- if (ti->scan.num_outputs > 2)
- fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
- } else {
- if (ti->scan.num_outputs > 1)
- fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+ fp->flags[0] = 0x11; /* deactivate ZCULL */
}
- for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
- m = ti->interp_mode[i] & 3;
+ for (i = 0; i < info->numInputs; ++i) {
+ m = nvc0_hdr_interp_mode(&info->in[i]);
for (c = 0; c < 4; ++c) {
- if (!ti->input_access[i][c])
+ if (!(info->in[i].mask & (1 << c)))
continue;
- a = ti->input_loc[i][c] / 2;
- if (ti->input_loc[i][c] >= 0x2c0)
- a -= 32;
- if (ti->input_loc[i][0] == 0x70)
- fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */
- else
- if (ti->input_loc[i][0] == 0x2e0)
- fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */
- else
+ if (info->in[i].slot[0] == (0x070 / 4)) {
+ fp->hdr[5] |= 1 << (28 + c);
+ } else
+ if (info->in[i].slot[0] == (0x2e0 / 4)) {
+ if (c <= 1)
+ fp->hdr[14] |= 1 << (24 + c);
+ } else {
+ if (info->in[i].slot[c] < (0x040 / 4) ||
+ info->in[i].slot[c] > (0x380 / 4))
+ continue;
+ a = info->in[i].slot[c] * 2;
+ if (info->in[i].slot[0] >= (0x2c0 / 4))
+ a -= 32;
fp->hdr[4 + a / 32] |= m << (a % 32);
+ }
}
}
- for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
- if (i != ti->fp_depth_output)
- fp->hdr[18] |= 0xf << ti->output_loc[i][0];
+ for (i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+ fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
}
- for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
- a = ti->sysval_loc[i] / 2;
- if ((a > 0) && (a < 0xf00 / 2))
- fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32);
- }
+ fp->fp.early_z = info->prop.fp.earlyFragTests;
+ if (fp->fp.early_z == FALSE && fp->code_size >= 0x400)
+ fp->fp.early_z = !(info->prop.fp.writesDepth ||
+ info->prop.fp.usesDiscard ||
+ (info->io.globalAccess & 2));
return 0;
}
-static boolean
-nvc0_prog_scan(struct nvc0_translation_info *ti)
+#ifdef DEBUG
+static void
+nvc0_program_dump(struct nvc0_program *prog)
{
- struct nvc0_program *prog = ti->prog;
- struct tgsi_parse_context parse;
- int ret;
- unsigned i;
+ unsigned pos;
-#if NV50_DEBUG & NV50_DEBUG_SHADER
- tgsi_dump(prog->pipe.tokens, 0);
+ for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
+ debug_printf("HDR[%02lx] = 0x%08x\n",
+ pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
+
+ debug_printf("shader binary code (0x%x bytes):", prog->code_size);
+ for (pos = 0; pos < prog->code_size / 4; ++pos) {
+ if ((pos % 8) == 0)
+ debug_printf("\n");
+ debug_printf("%08x ", prog->code[pos]);
+ }
+ debug_printf("\n");
+}
#endif
- tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
+boolean
+nvc0_program_translate(struct nvc0_program *prog)
+{
+ struct nv50_ir_prog_info *info;
+ int ret;
- if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
- ti->fp_depth_output = 255;
- for (i = 0; i < ti->scan.num_outputs; ++i)
- if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
- ti->fp_depth_output = i;
- }
+ info = CALLOC_STRUCT(nv50_ir_prog_info);
+ if (!info)
+ return FALSE;
- ti->subr =
- CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+ info->type = prog->type;
+ info->target = 0xc0;
+ info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+ info->bin.source = (void *)prog->pipe.tokens;
- ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
- ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
+ info->io.clipDistanceCount = prog->vp.num_ucps;
- ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
+ info->assignSlots = nvc0_program_assign_varying_slots;
- tgsi_parse_init(&parse, prog->pipe.tokens);
- while (!tgsi_parse_end_of_tokens(&parse)) {
- tgsi_parse_token(&parse);
+#ifdef DEBUG
+ info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
+ info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
+#else
+ info->optLevel = 3;
+#endif
- switch (parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- prog_immediate(ti, &parse.FullToken.FullImmediate);
- break;
- case TGSI_TOKEN_TYPE_DECLARATION:
- prog_decl(ti, &parse.FullToken.FullDeclaration);
- break;
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
- prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
- break;
- default:
- break;
- }
+ ret = nv50_ir_generate_code(info);
+ if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
}
- for (i = 0; i < ti->num_subrs; ++i) {
- unsigned pc = ti->subr[i].id;
- while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
- prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
- }
+ prog->code = info->bin.code;
+ prog->code_size = info->bin.codeSize;
+ prog->immd_data = info->immd.buf;
+ prog->immd_size = info->immd.bufSize;
+ prog->relocs = info->bin.relocData;
+ prog->max_gpr = MAX2(4, (info->bin.maxGPR + 1));
+
+ prog->vp.edgeflag = PIPE_MAX_ATTRIBS;
switch (prog->type) {
case PIPE_SHADER_VERTEX:
- ti->input_file = NV_FILE_MEM_A;
- ti->output_file = NV_FILE_MEM_V;
- ret = nvc0_vp_gen_header(prog, ti);
+ ret = nvc0_vp_gen_header(prog, info);
break;
- /*
- case PIPE_SHADER_TESSELLATION_CONTROL:
- ret = nvc0_tcp_gen_header(ti);
+#ifdef PIPE_SHADER_HULL
+ case PIPE_SHADER_HULL:
+ ret = nvc0_tcp_gen_header(prog, info);
break;
- case PIPE_SHADER_TESSELLATION_EVALUATION:
- ret = nvc0_tep_gen_header(ti);
+#endif
+#ifdef PIPE_SHADER_DOMAIN
+ case PIPE_SHADER_DOMAIN:
+ ret = nvc0_tep_gen_header(prog, info);
break;
+#endif
case PIPE_SHADER_GEOMETRY:
- ret = nvc0_gp_gen_header(ti);
+ ret = nvc0_gp_gen_header(prog, info);
break;
- */
case PIPE_SHADER_FRAGMENT:
- ti->input_file = NV_FILE_MEM_V;
- ti->output_file = NV_FILE_GPR;
-
- if (ti->scan.writes_z)
- prog->flags[0] = 0x11; /* ? */
- else
- if (!ti->scan.uses_kill && !ti->global_stores)
- prog->fp.early_z = 1;
-
- ret = nvc0_fp_gen_header(prog, ti);
+ ret = nvc0_fp_gen_header(prog, info);
break;
default:
- assert(!"unsupported program type");
ret = -1;
+ NOUVEAU_ERR("unknown program type: %u\n", prog->type);
break;
}
+ if (ret)
+ goto out;
- if (ti->require_stores) {
+ if (info->bin.tlsSpace) {
+ assert(info->bin.tlsSpace < (1 << 24));
prog->hdr[0] |= 1 << 26;
- prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */
+ prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */
}
+ if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 16;
- assert(!ret);
- return ret;
+out:
+ FREE(info);
+ return !ret;
}
boolean
-nvc0_program_translate(struct nvc0_program *prog)
+nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
- struct nvc0_translation_info *ti;
+ struct nvc0_screen *screen = nvc0->screen;
int ret;
+ uint32_t size = prog->code_size + NVC0_SHADER_HEADER_SIZE;
+ uint32_t lib_pos = screen->lib_code->start;
+ uint32_t code_pos;
+
+ /* c[] bindings need to be aligned to 0x100, but we could use relocations
+ * to save space. */
+ if (prog->immd_size) {
+ prog->immd_base = size;
+ size = align(size, 0x40);
+ size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
+ }
+ size = align(size, 0x40); /* required by SP_START_ID */
- ti = CALLOC_STRUCT(nvc0_translation_info);
- ti->prog = prog;
+ ret = nouveau_resource_alloc(screen->text_heap, size, prog, &prog->res);
+ if (ret) {
+ NOUVEAU_ERR("out of code space\n");
+ return FALSE;
+ }
+ prog->code_base = prog->res->start;
+ prog->immd_base = align(prog->res->start + prog->immd_base, 0x100);
+ assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <
+ prog->res->start + prog->res->size));
- ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
+ code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
- prog->vp.edgeflag = PIPE_MAX_ATTRIBS;
+ if (prog->relocs)
+ nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
- if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps)
- ti->append_ucp = TRUE;
+#ifdef DEBUG
+ if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+ nvc0_program_dump(prog);
+#endif
- ret = nvc0_prog_scan(ti);
- if (ret) {
- NOUVEAU_ERR("unsupported shader program\n");
- goto out;
- }
+ nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base,
+ NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0_m2mf_push_linear(&nvc0->base, screen->text,
+ prog->code_base + NVC0_SHADER_HEADER_SIZE,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+ if (prog->immd_size)
+ nvc0_m2mf_push_linear(&nvc0->base,
+ screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+ prog->immd_size, prog->immd_data);
- ret = nvc0_generate_code(ti);
- if (ret)
- NOUVEAU_ERR("shader translation failed\n");
+ BEGIN_RING(screen->base.channel, RING_3D(MEM_BARRIER), 1);
+ OUT_RING (screen->base.channel, 0x1111);
-#if NV50_DEBUG & NV50_DEBUG_SHADER
- unsigned i;
- for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
- debug_printf("HDR[%02lx] = 0x%08x\n",
- i * sizeof(prog->hdr[0]), prog->hdr[i]);
-#endif
+ return TRUE;
+}
-out:
- if (ti->immd32)
- FREE(ti->immd32);
- if (ti->immd32_ty)
- FREE(ti->immd32_ty);
- if (ti->insns)
- FREE(ti->insns);
- if (ti->subr)
- FREE(ti->subr);
- FREE(ti);
- return ret ? FALSE : TRUE;
+/* Upload code for builtin functions like integer division emulation. */
+void
+nvc0_program_library_upload(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+ uint32_t size;
+ const uint32_t *code;
+
+ if (screen->lib_code)
+ return;
+
+ nv50_ir_get_target_library(screen->base.device->chipset, &code, &size);
+ if (!size)
+ return;
+
+ ret = nouveau_resource_alloc(screen->text_heap, align(size, 0x100), NULL,
+ &screen->lib_code);
+ if (ret)
+ return;
+
+ nvc0_m2mf_push_linear(&nvc0->base,
+ screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+ size, code);
+ /* no need for a memory barrier, will be emitted with first program */
}
void
@@ -720,6 +641,8 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (prog->code)
FREE(prog->code);
+ if (prog->immd_data)
+ FREE(prog->immd_data);
if (prog->relocs)
FREE(prog->relocs);
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
index f6fea29780b..239890bd89a 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nvc0/nvc0_program.h
@@ -3,9 +3,8 @@
#define __NVC0_PROGRAM_H__
#include "pipe/p_state.h"
-#include "tgsi/tgsi_scan.h"
-#define NVC0_CAP_MAX_PROGRAM_TEMPS 64
+#define NVC0_CAP_MAX_PROGRAM_TEMPS 128
#define NVC0_SHADER_HEADER_SIZE (20 * 4)
@@ -14,15 +13,17 @@ struct nvc0_program {
ubyte type;
boolean translated;
- ubyte max_gpr;
+ uint8_t max_gpr;
uint32_t *code;
+ uint32_t *immd_data;
unsigned code_base;
unsigned code_size;
- unsigned parm_size;
-
- uint32_t hdr[20]; /* TODO: move this into code to save space */
+ unsigned immd_base;
+ unsigned immd_size; /* size of immediate array data */
+ unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
+ uint32_t hdr[20];
uint32_t flags[2];
struct {
@@ -34,59 +35,14 @@ struct nvc0_program {
uint8_t early_z;
uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
} fp;
+ struct {
+ uint32_t tess_mode; /* ~0 if defined by the other stage */
+ uint32_t input_patch_size;
+ } tp;
void *relocs;
- unsigned num_relocs;
struct nouveau_resource *res;
};
-/* first 2 bits are written into the program header, for each input */
-#define NVC0_INTERP_FLAT (1 << 0)
-#define NVC0_INTERP_PERSPECTIVE (2 << 0)
-#define NVC0_INTERP_LINEAR (3 << 0)
-#define NVC0_INTERP_CENTROID (1 << 2)
-
-/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
-struct nvc0_subroutine {
- unsigned id;
- unsigned first_insn;
- uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
- uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
-};
-
-struct nvc0_translation_info {
- struct nvc0_program *prog;
- struct tgsi_full_instruction *insns;
- unsigned num_insns;
- ubyte input_file;
- ubyte output_file;
- ubyte fp_depth_output;
- uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4];
- uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4];
- uint16_t sysval_loc[TGSI_SEMANTIC_COUNT];
- boolean sysval_in[TGSI_SEMANTIC_COUNT];
- int input_access[PIPE_MAX_SHADER_INPUTS][4];
- int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
- ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
- boolean indirect_inputs;
- boolean indirect_outputs;
- boolean require_stores;
- boolean global_stores;
- uint32_t *immd32;
- ubyte *immd32_ty;
- unsigned immd32_nr;
- unsigned temp128_nr;
- ubyte edgeflag_out;
- struct nvc0_subroutine *subr;
- unsigned num_subrs;
- boolean append_ucp;
- struct tgsi_shader_info scan;
-};
-
-int nvc0_generate_code(struct nvc0_translation_info *);
-
-void nvc0_relocate_program(struct nvc0_program *,
- uint32_t code_base, uint32_t data_base);
-
#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index a8bd09234c2..596a1efc610 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -155,7 +155,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
return 16384;
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
- return 4;
+ return 16;
case PIPE_SHADER_CAP_MAX_INPUTS:
if (shader == PIPE_SHADER_VERTEX)
return 32;
@@ -179,9 +179,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_SUBROUTINES:
- return 0; /* please inline, or provide function declarations */
+ return 1; /* but inlining everything, we need function declarations */
case PIPE_SHADER_CAP_INTEGERS:
- return 0;
+ return 1;
default:
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
return 0;
@@ -225,6 +225,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
nouveau_bo_ref(NULL, &screen->fence.bo);
nouveau_bo_ref(NULL, &screen->vfetch_cache);
+ nouveau_resource_destroy(&screen->lib_code);
nouveau_resource_destroy(&screen->text_heap);
if (screen->tic.entries)
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index a3133b28876..6780e32e302 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -34,6 +34,7 @@ struct nvc0_screen {
uint64_t tls_size;
struct nouveau_resource *text_heap;
+ struct nouveau_resource *lib_code; /* allocated from text_heap */
struct {
struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS];
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
index 287160e0b2a..0a5581241d7 100644
--- a/src/gallium/drivers/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -31,18 +31,37 @@ static INLINE void
nvc0_program_update_context_state(struct nvc0_context *nvc0,
struct nvc0_program *prog, int stage)
{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+
if (prog->hdr[1])
nvc0->state.tls_required |= 1 << stage;
else
nvc0->state.tls_required &= ~(1 << stage);
+
+ if (prog->immd_size) {
+ const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+ BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+ /* NOTE: may overlap code of a different shader */
+ OUT_RING (chan, align(prog->immd_size, 0x100));
+ OUT_RELOCh(chan, nvc0->screen->text, prog->immd_base, rl);
+ OUT_RELOCl(chan, nvc0->screen->text, prog->immd_base, rl);
+ BEGIN_RING(chan, RING_3D(CB_BIND(stage)), 1);
+ OUT_RING (chan, (14 << 4) | 1);
+
+ nvc0->state.c14_bound |= 1 << stage;
+ } else
+ if (nvc0->state.c14_bound & (1 << stage)) {
+ BEGIN_RING(chan, RING_3D(CB_BIND(stage)), 1);
+ OUT_RING (chan, (14 << 4) | 0);
+
+ nvc0->state.c14_bound &= ~(1 << stage);
+ }
}
-static boolean
+static INLINE boolean
nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
- int ret;
- unsigned size;
-
if (prog->translated)
return TRUE;
@@ -50,25 +69,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (!prog->translated)
return FALSE;
- size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100);
-
- ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog,
- &prog->res);
- if (ret)
- return FALSE;
-
- prog->code_base = prog->res->start;
-
- nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text, prog->code_base,
- NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
- nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text,
- prog->code_base + NVC0_SHADER_HEADER_SIZE,
- NOUVEAU_BO_VRAM, prog->code_size, prog->code);
-
- BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1);
- OUT_RING (nvc0->screen->base.channel, 0x1111);
-
- return TRUE;
+ return nvc0_program_upload_code(nvc0, prog);
}
void