diff options
-rw-r--r-- | src/gallium/drivers/nv50/nv50_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/Makefile.sources | 6 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_context.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_context.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_program.c | 973 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_program.h | 66 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_screen.c | 7 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_screen.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_shader_state.c | 47 |
10 files changed, 508 insertions, 612 deletions
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 77dbc823c7a..ecffbbf86e6 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -153,6 +153,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); /* nv50_program.c */ boolean nv50_program_translate(struct nv50_program *); +boolean nv50_program_translate_new(struct nv50_program *); void nv50_program_destroy(struct nv50_context *, struct nv50_program *); /* nv50_query.c */ diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 4def93d6b84..b61400bf81e 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -29,6 +29,8 @@ #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_dump.h" +#include "codegen/nv50_ir_driver.h" + static INLINE unsigned bitcount4(const uint32_t val) { @@ -625,6 +627,17 @@ nv50_prog_scan(struct nv50_translation_info *ti) return ret; } +/* Temporary, need a reference to nv50_ir_generate_code in libnv50 or + * it "gets disappeared" and cannot be used in libnvc0 ... + */ +boolean +nv50_program_translate_new(struct nv50_program *p) +{ + struct nv50_ir_prog_info info; + + return nv50_ir_generate_code(&info); +} + boolean nv50_program_translate(struct nv50_program *p) { diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources index 9b1fb97f0cb..95f796fc065 100644 --- a/src/gallium/drivers/nvc0/Makefile.sources +++ b/src/gallium/drivers/nvc0/Makefile.sources @@ -13,12 +13,6 @@ C_SOURCES := \ nvc0_vbo.c \ nvc0_program.c \ nvc0_shader_state.c \ - nvc0_pc.c \ - nvc0_pc_print.c \ - nvc0_pc_emit.c \ - nvc0_tgsi_to_nc.c \ - nvc0_pc_optimize.c \ - nvc0_pc_regalloc.c \ nvc0_push.c \ nvc0_push2.c \ nvc0_query.c diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 360afbb943e..8fa1675902e 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -152,6 +152,9 @@ nvc0_create(struct pipe_screen *pscreen, void *priv) nouveau_context_init_vdec(&nvc0->base); + /* shader builtin library is per-screen, but we need a context for m2mf */ + nvc0_program_library_upload(nvc0); + return pipe; } diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 353a5418dd5..c11d1c32efe 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -79,6 +79,7 @@ struct nvc0_context { uint8_t num_textures[5]; uint8_t num_samplers[5]; uint8_t tls_required; /* bitmask of shader types using l[] */ + uint8_t c14_bound; /* whether immediate array constbuf is bound */ uint16_t scissor; uint32_t uniform_buffer_bound[5]; } state; @@ -161,7 +162,9 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *); /* nvc0_program.c */ boolean nvc0_program_translate(struct nvc0_program *); +boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *); void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *); +void nvc0_program_library_upload(struct nvc0_context *); /* nvc0_query.c */ void nvc0_init_query_functions(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index bcee027917e..eaad0805909 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -20,479 +20,343 @@ * SOFTWARE. */ -#include "pipe/p_shader_tokens.h" #include "pipe/p_defines.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_dump.h" - #include "nvc0_context.h" -#include "nvc0_pc.h" - -static unsigned -nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c) -{ - unsigned mask = inst->Dst[0].Register.WriteMask; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_COS: - case TGSI_OPCODE_SIN: - return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); - case TGSI_OPCODE_DP3: - return 0x7; - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - case TGSI_OPCODE_KIL: /* WriteMask ignored */ - return 0xf; - case TGSI_OPCODE_DST: - return mask & (c ? 0xa : 0x6); - case TGSI_OPCODE_EX2: - case TGSI_OPCODE_EXP: - case TGSI_OPCODE_LG2: - case TGSI_OPCODE_LOG: - case TGSI_OPCODE_POW: - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_SCS: - return 0x1; - case TGSI_OPCODE_IF: - return 0x1; - case TGSI_OPCODE_LIT: - return 0xb; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXL: - case TGSI_OPCODE_TXP: - { - const struct tgsi_instruction_texture *tex; - - assert(inst->Instruction.Texture); - tex = &inst->Texture; - - mask = 0x7; - if (inst->Instruction.Opcode != TGSI_OPCODE_TEX && - inst->Instruction.Opcode != TGSI_OPCODE_TXD) - mask |= 0x8; /* bias, lod or proj */ - - switch (tex->Texture) { - case TGSI_TEXTURE_1D: - mask &= 0x9; - break; - case TGSI_TEXTURE_SHADOW1D: - mask &= 0x5; - break; - case TGSI_TEXTURE_2D: - mask &= 0xb; - break; - default: - break; - } - } - return mask; - case TGSI_OPCODE_XPD: - { - unsigned x = 0; - if (mask & 1) x |= 0x6; - if (mask & 2) x |= 0x5; - if (mask & 4) x |= 0x3; - return x; - } - default: - break; - } - return mask; -} +#include "nv50/codegen/nv50_ir_driver.h" +/* If only they told use the actual semantic instead of just GENERIC ... */ static void -nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id) +nvc0_mesa_varying_hack(struct nv50_ir_varying *var) { - int i, c; - - for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) - for (c = 0; c < 4; ++c) - ti->input_access[i][c] = id; + unsigned c; - ti->indirect_inputs = TRUE; -} - -static void -nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id) -{ - int i, c; + if (var->sn != TGSI_SEMANTIC_GENERIC) + return; - for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) + if (var->si <= 7) /* gl_TexCoord */ for (c = 0; c < 4; ++c) - ti->output_access[i][c] = id; - - ti->indirect_outputs = TRUE; + var->slot[c] = (0x300 + var->si * 0x10 + c * 0x4) / 4; + else + if (var->si == 9) /* gl_PointCoord */ + for (c = 0; c < 4; ++c) + var->slot[c] = (0x2e0 + c * 0x4) / 4; + else + for (c = 0; c < 4; ++c) /* move down user varyings (first has index 8) */ + var->slot[c] -= 0x80 / 4; } -static INLINE unsigned -nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input) +static uint32_t +nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) { - /* NOTE: locations 0xfxx indicate special regs */ switch (sn) { - /* - case TGSI_SEMANTIC_VERTEXID: - *is_input = TRUE; - return 0x2fc; - */ - case TGSI_SEMANTIC_PRIMID: - *is_input = TRUE; - return 0x60; - /* - case TGSI_SEMANTIC_LAYER_INDEX: - return 0x64; - case TGSI_SEMANTIC_VIEWPORT_INDEX: - return 0x68; - */ - case TGSI_SEMANTIC_INSTANCEID: - *is_input = TRUE; - return 0x2f8; - case TGSI_SEMANTIC_FACE: - *is_input = TRUE; - return 0x3fc; - /* - case TGSI_SEMANTIC_INVOCATIONID: - return 0xf11; - */ +/* case TGSI_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; */ + case TGSI_SEMANTIC_PRIMID: return 0x060; + case TGSI_SEMANTIC_PSIZE: return 0x06c; + case TGSI_SEMANTIC_POSITION: return 0x070; + case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; + case TGSI_SEMANTIC_FOG: return 0x270; + case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; + case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; +/* case TGSI_SEMANTIC_CLIP: return 0x2c0 + si * 0x10; */ +/* case TGSI_SEMANTIC_POINTCOORD: return 0x2e0; */ +/* case TGSI_SEMANTIC_TESSCOORD: return ~0; */ /* 0x2f0, but special load */ + case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; +/* case TGSI_SEMANTIC_VERTEXID: return 0x2fc; */ +/* case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; */ + case TGSI_SEMANTIC_FACE: return 0x3fc; +/* case TGSI_SEMANTIC_INVOCATIONID: return ~0; */ default: - assert(0); - return 0x000; + assert(!"invalid TGSI input semantic"); + return ~0; } } -static INLINE unsigned -nvc0_varying_location(unsigned sn, unsigned si) +static uint32_t +nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase) { switch (sn) { - case TGSI_SEMANTIC_POSITION: - return 0x70; - case TGSI_SEMANTIC_COLOR: - return 0x280 + (si * 16); /* are these hard-wired ? */ - case TGSI_SEMANTIC_BCOLOR: - return 0x2a0 + (si * 16); - case TGSI_SEMANTIC_FOG: - return 0x270; - case TGSI_SEMANTIC_PSIZE: - return 0x6c; - /* - case TGSI_SEMANTIC_PNTC: - return 0x2e0; - */ - case TGSI_SEMANTIC_GENERIC: - /* We'd really like to distinguish between TEXCOORD and GENERIC here, - * since only 0x300 to 0x37c can be replaced by sprite coordinates. - * Also, gl_PointCoord should be a system value and must be assigned to - * address 0x2e0. For now, let's cheat: - */ - assert(si < 31); - if (si <= 7) - return 0x300 + si * 16; - if (si == 9) - return 0x2e0; - return 0x80 + ((si - 8) * 16); - case TGSI_SEMANTIC_NORMAL: - return 0x360; - case TGSI_SEMANTIC_PRIMID: - return 0x40; - case TGSI_SEMANTIC_FACE: - return 0x3fc; - case TGSI_SEMANTIC_EDGEFLAG: /* doesn't exist, set value like for an sreg */ - return 0xf00; - /* - case TGSI_SEMANTIC_CLIP_DISTANCE: - return 0x2c0 + (si * 4); - */ +/* case TGSI_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; */ + case TGSI_SEMANTIC_PRIMID: return 0x040; +/* case TGSI_SEMANTIC_LAYER: return 0x064; */ +/* case TGSI_SEMANTIC_VIEWPORTINDEX: return 0x068; */ + case TGSI_SEMANTIC_PSIZE: return 0x06c; + case TGSI_SEMANTIC_POSITION: return 0x070; + case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; + case TGSI_SEMANTIC_FOG: return 0x270; + case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; + case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; +/* case TGSI_SEMANTIC_CLIP: return 0x2c0 + si * 0x10; */ +/* case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; */ + case TGSI_SEMANTIC_EDGEFLAG: return ~0; default: - assert(0); - return 0x000; + assert(!"invalid TGSI output semantic"); + return ~0; } } -static INLINE unsigned -nvc0_interp_mode(const struct tgsi_full_declaration *decl) +static int +nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) { - unsigned mode; - - if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT) - mode = NVC0_INTERP_FLAT; - else - if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE) - mode = NVC0_INTERP_PERSPECTIVE; - else - if (decl->Declaration.Semantic && decl->Semantic.Name == TGSI_SEMANTIC_COLOR) - mode = NVC0_INTERP_PERSPECTIVE; - else - mode = NVC0_INTERP_LINEAR; + unsigned i, c; - if (decl->Declaration.Centroid) - mode |= NVC0_INTERP_CENTROID; + for (i = 0; i < info->numInputs; ++i) + for (c = 0; c < 4; ++c) + info->in[i].slot[c] = (0x80 + i * 0x10 + c * 0x4) / 4; - return mode; + return 0; } -static void -prog_immediate(struct nvc0_translation_info *ti, - const struct tgsi_full_immediate *imm) +static int +nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) { - int c; - unsigned n = ti->immd32_nr++; + unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); + unsigned offset; + unsigned i, c; - assert(ti->immd32_nr <= ti->scan.immediate_count); + for (i = 0; i < info->numInputs; ++i) { + offset = nvc0_shader_input_address(info->in[i].sn, + info->in[i].si, ubase); + if (info->in[i].patch && offset >= 0x20) + offset = 0x20 + info->in[i].si * 0x10; - for (c = 0; c < 4; ++c) - ti->immd32[n * 4 + c] = imm->u[c].Uint; + for (c = 0; c < 4; ++c) + info->in[i].slot[c] = (offset + c * 0x4) / 4; - ti->immd32_ty[n] = imm->Immediate.DataType; + nvc0_mesa_varying_hack(&info->in[i]); + } + + return 0; } -static boolean -prog_decl(struct nvc0_translation_info *ti, - const struct tgsi_full_declaration *decl) +static int +nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) { + unsigned last = info->prop.fp.numColourResults * 4; unsigned i, c; - unsigned sn = TGSI_SEMANTIC_GENERIC; - unsigned si = 0; - const unsigned first = decl->Range.First; - const unsigned last = decl->Range.Last; - - if (decl->Declaration.Semantic) { - sn = decl->Semantic.Name; - si = decl->Semantic.Index; - } - - switch (decl->Declaration.File) { - case TGSI_FILE_INPUT: - for (i = first; i <= last; ++i) { - if (ti->prog->type == PIPE_SHADER_VERTEX) { - for (c = 0; c < 4; ++c) - ti->input_loc[i][c] = 0x80 + i * 16 + c * 4; - } else { - for (c = 0; c < 4; ++c) - ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; - /* for sprite coordinates: */ - ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4; - } - if (ti->prog->type == PIPE_SHADER_FRAGMENT) - ti->interp_mode[i] = nvc0_interp_mode(decl); - } - break; - case TGSI_FILE_OUTPUT: - for (i = first; i <= last; ++i, ++si) { - if (ti->prog->type == PIPE_SHADER_FRAGMENT) { - si = i; - if (i == ti->fp_depth_output) { - ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4; - } else { - if (i > ti->fp_depth_output) - si -= 1; - for (c = 0; c < 4; ++c) - ti->output_loc[i][c] = si * 4 + c; - } - } else { - if (sn == TGSI_SEMANTIC_EDGEFLAG) - ti->edgeflag_out = i; - for (c = 0; c < 4; ++c) - ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4; - /* for TFB_VARYING_LOCS: */ - ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4; - } - } - break; - case TGSI_FILE_SYSTEM_VALUE: - i = first; - ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]); - assert(first == last); - break; - case TGSI_FILE_TEMPORARY: - ti->temp128_nr = MAX2(ti->temp128_nr, last + 1); - break; - case TGSI_FILE_NULL: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_SAMPLER: - case TGSI_FILE_ADDRESS: - case TGSI_FILE_IMMEDIATE: - case TGSI_FILE_PREDICATE: - break; - default: - NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File); - return FALSE; - } - return TRUE; -} -static void -prog_inst(struct nvc0_translation_info *ti, - const struct tgsi_full_instruction *inst, int id) -{ - const struct tgsi_dst_register *dst; - const struct tgsi_src_register *src; - int s, c, k; - unsigned mask; - - if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { - ti->subr[ti->num_subrs].first_insn = id - 1; - ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */ - ++ti->num_subrs; - } + for (i = 0; i < info->numOutputs; ++i) + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = info->out[i].si * 4 + c; - if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { - dst = &inst->Dst[0].Register; + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.sampleMask].slot[0] = last++; - for (c = 0; c < 4; ++c) { - if (dst->Indirect) - nvc0_indirect_outputs(ti, id); - if (!(dst->WriteMask & (1 << c))) - continue; - ti->output_access[dst->Index][c] = id; - } + if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) + info->out[info->io.fragDepth].slot[2] = last; - if (inst->Instruction.Opcode == TGSI_OPCODE_MOV && - inst->Src[0].Register.File == TGSI_FILE_INPUT && - dst->Index == ti->edgeflag_out) - ti->prog->vp.edgeflag = inst->Src[0].Register.Index; - } else - if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { - if (inst->Dst[0].Register.Indirect) - ti->require_stores = TRUE; - } + return 0; +} - for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { - src = &inst->Src[s].Register; - if (src->File == TGSI_FILE_TEMPORARY) - if (inst->Src[s].Register.Indirect) - ti->require_stores = TRUE; - if (src->File != TGSI_FILE_INPUT) - continue; - mask = nvc0_tgsi_src_mask(inst, s); +static int +nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info) +{ + unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); + unsigned offset; + unsigned i, c; - if (inst->Src[s].Register.Indirect) - nvc0_indirect_inputs(ti, id); + for (i = 0; i < info->numOutputs; ++i) { + offset = nvc0_shader_output_address(info->out[i].sn, + info->out[i].si, ubase); + if (info->out[i].patch && offset >= 0x20) + offset = 0x20 + info->out[i].si * 0x10; - for (c = 0; c < 4; ++c) { - if (!(mask & (1 << c))) - continue; - k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); - if (k <= TGSI_SWIZZLE_W) - ti->input_access[src->Index][k] = id; - } + for (c = 0; c < 4; ++c) + info->out[i].slot[c] = (offset + c * 0x4) / 4; + + nvc0_mesa_varying_hack(&info->out[i]); } + + return 0; } -/* Probably should introduce something like struct tgsi_function_declaration - * instead of trying to guess inputs/outputs. - */ -static void -prog_subroutine_inst(struct nvc0_subroutine *subr, - const struct tgsi_full_instruction *inst) +static int +nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info) { - const struct tgsi_dst_register *dst; - const struct tgsi_src_register *src; - int s, c, k; - unsigned mask; - - for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { - src = &inst->Src[s].Register; - if (src->File != TGSI_FILE_TEMPORARY) - continue; - mask = nvc0_tgsi_src_mask(inst, s); + int ret; - for (c = 0; c < 4; ++c) { - k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + if (info->type == PIPE_SHADER_VERTEX) + ret = nvc0_vp_assign_input_slots(info); + else + ret = nvc0_sp_assign_input_slots(info); + if (ret) + return ret; - if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) - if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) - subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); - } - } + if (info->type == PIPE_SHADER_FRAGMENT) + ret = nvc0_fp_assign_output_slots(info); + else + ret = nvc0_sp_assign_output_slots(info); + return ret; +} - if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { - dst = &inst->Dst[0].Register; +static INLINE void +nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot) +{ + uint8_t min = (vp->hdr[4] >> 12) & 0xff; + uint8_t max = (vp->hdr[4] >> 24); - for (c = 0; c < 4; ++c) - if (dst->WriteMask & (1 << c)) - subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); - } + min = MIN2(min, slot); + max = MAX2(max, slot); + + vp->hdr[4] = (max << 24) | (min << 12); } +/* Common part of header generation for VP, TCP, TEP and GP. */ static int -nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) { - int i, c; - unsigned a; + unsigned i, c, a; - for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { - for (c = 0; c < 4; ++c, ++a) - if (ti->input_access[i][c]) - vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */ + for (i = 0; i < info->numInputs; ++i) { + if (info->in[i].patch) + continue; + for (c = 0; c < 4; ++c) { + a = info->in[i].slot[c]; + if (info->in[i].mask & (1 << c)) + vp->hdr[5 + a / 32] |= 1 << (a % 32); + } } - for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { - a = (ti->output_loc[i][0] - 0x40) / 4; - if (ti->output_loc[i][0] >= 0xf00) + for (i = 0; i < info->numOutputs; ++i) { + if (info->out[i].patch) continue; - for (c = 0; c < 4; ++c, ++a) { - if (!ti->output_access[i][c]) + for (c = 0; c < 4; ++c) { + if (!(info->out[i].mask & (1 << c))) continue; - vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */ + assert(info->out[i].slot[c] >= 0x40 / 4); + a = info->out[i].slot[c] - 0x40 / 4; + vp->hdr[13 + a / 32] |= 1 << (a % 32); + if (info->out[i].oread) + nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]); } } - for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { - a = ti->sysval_loc[i] / 4; - if (a > 0 && a < (0xf00 / 4)) - vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32); + for (i = 0; i < info->numSysVals; ++i) { + switch (info->sv[i].sn) { + case TGSI_SEMANTIC_PRIMID: + vp->hdr[5] |= 1 << 24; + break; + case TGSI_SEMANTIC_INSTANCEID: + vp->hdr[10] |= 1 << 30; + break; + /* + case TGSI_SEMANTIC_VERTEXID: + vp->hdr[10] |= 1 << 31; + break; + */ + default: + break; + } } return 0; } static int -nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti) +nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) { - vp->hdr[0] = 0x20461; + vp->hdr[0] = 0x20061 | (1 << 10); vp->hdr[4] = 0xff000; - vp->hdr[18] = (1 << vp->vp.num_ucps) - 1; + vp->hdr[18] = (1 << info->io.clipDistanceCount) - 1; + + return nvc0_vtgp_gen_header(vp, info); +} + +#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN) +static void +nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) +{ + switch (info->prop.tp.domain) { + case PIPE_PRIM_LINES: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES; + break; + case PIPE_PRIM_TRIANGLES: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES; + if (info->prop.tp.winding > 0) + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW; + break; + case PIPE_PRIM_QUADS: + tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS; + break; + default: + tp->tp.tess_mode = ~0; + return; + } + if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS) + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED; - return nvc0_vp_gp_gen_header(vp, ti); + switch (info->prop.tp.partitioning) { + case PIPE_TESS_PART_INTEGER: + case PIPE_TESS_PART_POW2: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL; + break; + case PIPE_TESS_PART_FRACT_ODD: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD; + break; + case PIPE_TESS_PART_FRACT_EVEN: + tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN; + break; + default: + assert(!"invalid tessellator partitioning"); + break; + } } +#endif +#ifdef PIPE_SHADER_HULL static int -nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) +nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) { - unsigned invocations = 1; - unsigned max_output_verts, output_prim; - unsigned i; + unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ - gp->hdr[0] = 0x21061; + tcp->tp.input_patch_size = info->prop.tp.inputPatchSize; - for (i = 0; i < ti->scan.num_properties; ++i) { - switch (ti->scan.properties[i].name) { - case TGSI_PROPERTY_GS_OUTPUT_PRIM: - output_prim = ti->scan.properties[i].data[0]; - break; - case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: - max_output_verts = ti->scan.properties[i].data[0]; - assert(max_output_verts < 512); - break; - /* - case TGSI_PROPERTY_GS_INVOCATIONS: - invocations = ti->scan.properties[i].data[0]; - assert(invocations <= 32); - break; - */ - default: - break; - } - } + if (info->numPatchConstants) + opcs = 8 + info->numPatchConstants * 4; + + tcp->hdr[0] = 0x20061 | (2 << 10); + + tcp->hdr[1] = opcs << 24; + tcp->hdr[2] = info->prop.tp.outputPatchSize << 24; + + tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */ + + nvc0_vtgp_gen_header(tcp, info); - gp->hdr[2] = MIN2(invocations, 32) << 24; + nvc0_tp_get_tess_mode(tcp, info); - switch (output_prim) { + return 0; +} +#endif + +#ifdef PIPE_SHADER_DOMAIN +static int +nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) +{ + tep->hdr[0] = 0x20061 | (3 << 10); + tep->hdr[4] = 0xff000; + + nvc0_vtgp_gen_header(tep, info); + + nvc0_tp_get_tess_mode(tep, info); + + tep->hdr[18] |= 0x3 << 12; /* ? */ + + return 0; +} +#endif + +static int +nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) +{ + gp->hdr[0] = 0x20061 | (4 << 10); + + gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; + + switch (info->prop.gp.outputPrim) { case PIPE_PRIM_POINTS: gp->hdr[3] = 0x01000000; gp->hdr[0] |= 0xf0000000; @@ -510,206 +374,263 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti) break; } - gp->hdr[4] = max_output_verts & 0x1ff; + gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff; + + return nvc0_vtgp_gen_header(gp, info); +} + +#define NVC0_INTERP_FLAT (1 << 0) +#define NVC0_INTERP_PERSPECTIVE (2 << 0) +#define NVC0_INTERP_LINEAR (3 << 0) +#define NVC0_INTERP_CENTROID (1 << 2) - return nvc0_vp_gp_gen_header(gp, ti); +static uint8_t +nvc0_hdr_interp_mode(const struct nv50_ir_varying *var) +{ + if (var->linear) + return NVC0_INTERP_LINEAR; + if (var->flat) + return NVC0_INTERP_FLAT; + return NVC0_INTERP_PERSPECTIVE; } static int -nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti) +nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) { - int i, c; - unsigned a, m; - - fp->hdr[0] = 0x21462; + unsigned i, c, a, m; + + fp->hdr[0] = 0x20062 | (5 << 10); fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ - if (ti->scan.uses_kill) + if (info->prop.fp.usesDiscard) fp->hdr[0] |= 0x8000; - if (ti->scan.writes_z) { + if (info->prop.fp.numColourResults > 1) + fp->hdr[0] |= 0x4000; + if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) + fp->hdr[19] |= 0x1; + if (info->prop.fp.writesDepth) { fp->hdr[19] |= 0x2; - if (ti->scan.num_outputs > 2) - fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ - } else { - if (ti->scan.num_outputs > 1) - fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */ + fp->flags[0] = 0x11; /* deactivate ZCULL */ } - for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) { - m = ti->interp_mode[i] & 3; + for (i = 0; i < info->numInputs; ++i) { + m = nvc0_hdr_interp_mode(&info->in[i]); for (c = 0; c < 4; ++c) { - if (!ti->input_access[i][c]) + if (!(info->in[i].mask & (1 << c))) continue; - a = ti->input_loc[i][c] / 2; - if (ti->input_loc[i][c] >= 0x2c0) - a -= 32; - if (ti->input_loc[i][0] == 0x70) - fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */ - else - if (ti->input_loc[i][0] == 0x2e0) - fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */ - else + if (info->in[i].slot[0] == (0x070 / 4)) { + fp->hdr[5] |= 1 << (28 + c); + } else + if (info->in[i].slot[0] == (0x2e0 / 4)) { + if (c <= 1) + fp->hdr[14] |= 1 << (24 + c); + } else { + if (info->in[i].slot[c] < (0x040 / 4) || + info->in[i].slot[c] > (0x380 / 4)) + continue; + a = info->in[i].slot[c] * 2; + if (info->in[i].slot[0] >= (0x2c0 / 4)) + a -= 32; fp->hdr[4 + a / 32] |= m << (a % 32); + } } } - for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) { - if (i != ti->fp_depth_output) - fp->hdr[18] |= 0xf << ti->output_loc[i][0]; + for (i = 0; i < info->numOutputs; ++i) { + if (info->out[i].sn == TGSI_SEMANTIC_COLOR) + fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0]; } - for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) { - a = ti->sysval_loc[i] / 2; - if ((a > 0) && (a < 0xf00 / 2)) - fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32); - } + fp->fp.early_z = info->prop.fp.earlyFragTests; + if (fp->fp.early_z == FALSE && fp->code_size >= 0x400) + fp->fp.early_z = !(info->prop.fp.writesDepth || + info->prop.fp.usesDiscard || + (info->io.globalAccess & 2)); return 0; } -static boolean -nvc0_prog_scan(struct nvc0_translation_info *ti) +#ifdef DEBUG +static void +nvc0_program_dump(struct nvc0_program *prog) { - struct nvc0_program *prog = ti->prog; - struct tgsi_parse_context parse; - int ret; - unsigned i; + unsigned pos; -#if NV50_DEBUG & NV50_DEBUG_SHADER - tgsi_dump(prog->pipe.tokens, 0); + for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos) + debug_printf("HDR[%02lx] = 0x%08x\n", + pos * sizeof(prog->hdr[0]), prog->hdr[pos]); + + debug_printf("shader binary code (0x%x bytes):", prog->code_size); + for (pos = 0; pos < prog->code_size / 4; ++pos) { + if ((pos % 8) == 0) + debug_printf("\n"); + debug_printf("%08x ", prog->code[pos]); + } + debug_printf("\n"); +} #endif - tgsi_scan_shader(prog->pipe.tokens, &ti->scan); +boolean +nvc0_program_translate(struct nvc0_program *prog) +{ + struct nv50_ir_prog_info *info; + int ret; - if (ti->prog->type == PIPE_SHADER_FRAGMENT) { - ti->fp_depth_output = 255; - for (i = 0; i < ti->scan.num_outputs; ++i) - if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION) - ti->fp_depth_output = i; - } + info = CALLOC_STRUCT(nv50_ir_prog_info); + if (!info) + return FALSE; - ti->subr = - CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); + info->type = prog->type; + info->target = 0xc0; + info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; + info->bin.source = (void *)prog->pipe.tokens; - ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); - ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); + info->io.clipDistanceCount = prog->vp.num_ucps; - ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0])); + info->assignSlots = nvc0_program_assign_varying_slots; - tgsi_parse_init(&parse, prog->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); +#ifdef DEBUG + info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); + info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); +#else + info->optLevel = 3; +#endif - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - prog_immediate(ti, &parse.FullToken.FullImmediate); - break; - case TGSI_TOKEN_TYPE_DECLARATION: - prog_decl(ti, &parse.FullToken.FullDeclaration); - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - ti->insns[ti->num_insns] = parse.FullToken.FullInstruction; - prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns); - break; - default: - break; - } + ret = nv50_ir_generate_code(info); + if (ret) { + NOUVEAU_ERR("shader translation failed: %i\n", ret); + goto out; } - for (i = 0; i < ti->num_subrs; ++i) { - unsigned pc = ti->subr[i].id; - while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) - prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); - } + prog->code = info->bin.code; + prog->code_size = info->bin.codeSize; + prog->immd_data = info->immd.buf; + prog->immd_size = info->immd.bufSize; + prog->relocs = info->bin.relocData; + prog->max_gpr = MAX2(4, (info->bin.maxGPR + 1)); + + prog->vp.edgeflag = PIPE_MAX_ATTRIBS; switch (prog->type) { case PIPE_SHADER_VERTEX: - ti->input_file = NV_FILE_MEM_A; - ti->output_file = NV_FILE_MEM_V; - ret = nvc0_vp_gen_header(prog, ti); + ret = nvc0_vp_gen_header(prog, info); break; - /* - case PIPE_SHADER_TESSELLATION_CONTROL: - ret = nvc0_tcp_gen_header(ti); +#ifdef PIPE_SHADER_HULL + case PIPE_SHADER_HULL: + ret = nvc0_tcp_gen_header(prog, info); break; - case PIPE_SHADER_TESSELLATION_EVALUATION: - ret = nvc0_tep_gen_header(ti); +#endif +#ifdef PIPE_SHADER_DOMAIN + case PIPE_SHADER_DOMAIN: + ret = nvc0_tep_gen_header(prog, info); break; +#endif case PIPE_SHADER_GEOMETRY: - ret = nvc0_gp_gen_header(ti); + ret = nvc0_gp_gen_header(prog, info); break; - */ case PIPE_SHADER_FRAGMENT: - ti->input_file = NV_FILE_MEM_V; - ti->output_file = NV_FILE_GPR; - - if (ti->scan.writes_z) - prog->flags[0] = 0x11; /* ? */ - else - if (!ti->scan.uses_kill && !ti->global_stores) - prog->fp.early_z = 1; - - ret = nvc0_fp_gen_header(prog, ti); + ret = nvc0_fp_gen_header(prog, info); break; default: - assert(!"unsupported program type"); ret = -1; + NOUVEAU_ERR("unknown program type: %u\n", prog->type); break; } + if (ret) + goto out; - if (ti->require_stores) { + if (info->bin.tlsSpace) { + assert(info->bin.tlsSpace < (1 << 24)); prog->hdr[0] |= 1 << 26; - prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */ + prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */ } + if (info->io.globalAccess) + prog->hdr[0] |= 1 << 16; - assert(!ret); - return ret; +out: + FREE(info); + return !ret; } boolean -nvc0_program_translate(struct nvc0_program *prog) +nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) { - struct nvc0_translation_info *ti; + struct nvc0_screen *screen = nvc0->screen; int ret; + uint32_t size = prog->code_size + NVC0_SHADER_HEADER_SIZE; + uint32_t lib_pos = screen->lib_code->start; + uint32_t code_pos; + + /* c[] bindings need to be aligned to 0x100, but we could use relocations + * to save space. */ + if (prog->immd_size) { + prog->immd_base = size; + size = align(size, 0x40); + size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ + } + size = align(size, 0x40); /* required by SP_START_ID */ - ti = CALLOC_STRUCT(nvc0_translation_info); - ti->prog = prog; + ret = nouveau_resource_alloc(screen->text_heap, size, prog, &prog->res); + if (ret) { + NOUVEAU_ERR("out of code space\n"); + return FALSE; + } + prog->code_base = prog->res->start; + prog->immd_base = align(prog->res->start + prog->immd_base, 0x100); + assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size < + prog->res->start + prog->res->size)); - ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS; + code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; - prog->vp.edgeflag = PIPE_MAX_ATTRIBS; + if (prog->relocs) + nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); - if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps) - ti->append_ucp = TRUE; +#ifdef DEBUG + if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE)) + nvc0_program_dump(prog); +#endif - ret = nvc0_prog_scan(ti); - if (ret) { - NOUVEAU_ERR("unsupported shader program\n"); - goto out; - } + nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base, + NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); + nvc0_m2mf_push_linear(&nvc0->base, screen->text, + prog->code_base + NVC0_SHADER_HEADER_SIZE, + NOUVEAU_BO_VRAM, prog->code_size, prog->code); + if (prog->immd_size) + nvc0_m2mf_push_linear(&nvc0->base, + screen->text, prog->immd_base, NOUVEAU_BO_VRAM, + prog->immd_size, prog->immd_data); - ret = nvc0_generate_code(ti); - if (ret) - NOUVEAU_ERR("shader translation failed\n"); + BEGIN_RING(screen->base.channel, RING_3D(MEM_BARRIER), 1); + OUT_RING (screen->base.channel, 0x1111); -#if NV50_DEBUG & NV50_DEBUG_SHADER - unsigned i; - for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i) - debug_printf("HDR[%02lx] = 0x%08x\n", - i * sizeof(prog->hdr[0]), prog->hdr[i]); -#endif + return TRUE; +} -out: - if (ti->immd32) - FREE(ti->immd32); - if (ti->immd32_ty) - FREE(ti->immd32_ty); - if (ti->insns) - FREE(ti->insns); - if (ti->subr) - FREE(ti->subr); - FREE(ti); - return ret ? FALSE : TRUE; +/* Upload code for builtin functions like integer division emulation. */ +void +nvc0_program_library_upload(struct nvc0_context *nvc0) +{ + struct nvc0_screen *screen = nvc0->screen; + int ret; + uint32_t size; + const uint32_t *code; + + if (screen->lib_code) + return; + + nv50_ir_get_target_library(screen->base.device->chipset, &code, &size); + if (!size) + return; + + ret = nouveau_resource_alloc(screen->text_heap, align(size, 0x100), NULL, + &screen->lib_code); + if (ret) + return; + + nvc0_m2mf_push_linear(&nvc0->base, + screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, + size, code); + /* no need for a memory barrier, will be emitted with first program */ } void @@ -720,6 +641,8 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) if (prog->code) FREE(prog->code); + if (prog->immd_data) + FREE(prog->immd_data); if (prog->relocs) FREE(prog->relocs); diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index f6fea29780b..239890bd89a 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -3,9 +3,8 @@ #define __NVC0_PROGRAM_H__ #include "pipe/p_state.h" -#include "tgsi/tgsi_scan.h" -#define NVC0_CAP_MAX_PROGRAM_TEMPS 64 +#define NVC0_CAP_MAX_PROGRAM_TEMPS 128 #define NVC0_SHADER_HEADER_SIZE (20 * 4) @@ -14,15 +13,17 @@ struct nvc0_program { ubyte type; boolean translated; - ubyte max_gpr; + uint8_t max_gpr; uint32_t *code; + uint32_t *immd_data; unsigned code_base; unsigned code_size; - unsigned parm_size; - - uint32_t hdr[20]; /* TODO: move this into code to save space */ + unsigned immd_base; + unsigned immd_size; /* size of immediate array data */ + unsigned parm_size; /* size of non-bindable uniforms (c0[]) */ + uint32_t hdr[20]; uint32_t flags[2]; struct { @@ -34,59 +35,14 @@ struct nvc0_program { uint8_t early_z; uint8_t in_pos[PIPE_MAX_SHADER_INPUTS]; } fp; + struct { + uint32_t tess_mode; /* ~0 if defined by the other stage */ + uint32_t input_patch_size; + } tp; void *relocs; - unsigned num_relocs; struct nouveau_resource *res; }; -/* first 2 bits are written into the program header, for each input */ -#define NVC0_INTERP_FLAT (1 << 0) -#define NVC0_INTERP_PERSPECTIVE (2 << 0) -#define NVC0_INTERP_LINEAR (3 << 0) -#define NVC0_INTERP_CENTROID (1 << 2) - -/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */ -struct nvc0_subroutine { - unsigned id; - unsigned first_insn; - uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; - uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4]; -}; - -struct nvc0_translation_info { - struct nvc0_program *prog; - struct tgsi_full_instruction *insns; - unsigned num_insns; - ubyte input_file; - ubyte output_file; - ubyte fp_depth_output; - uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4]; - uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4]; - uint16_t sysval_loc[TGSI_SEMANTIC_COUNT]; - boolean sysval_in[TGSI_SEMANTIC_COUNT]; - int input_access[PIPE_MAX_SHADER_INPUTS][4]; - int output_access[PIPE_MAX_SHADER_OUTPUTS][4]; - ubyte interp_mode[PIPE_MAX_SHADER_INPUTS]; - boolean indirect_inputs; - boolean indirect_outputs; - boolean require_stores; - boolean global_stores; - uint32_t *immd32; - ubyte *immd32_ty; - unsigned immd32_nr; - unsigned temp128_nr; - ubyte edgeflag_out; - struct nvc0_subroutine *subr; - unsigned num_subrs; - boolean append_ucp; - struct tgsi_shader_info scan; -}; - -int nvc0_generate_code(struct nvc0_translation_info *); - -void nvc0_relocate_program(struct nvc0_program *, - uint32_t code_base, uint32_t data_base); - #endif diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index a8bd09234c2..596a1efc610 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -155,7 +155,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: return 16384; case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return 4; + return 16; case PIPE_SHADER_CAP_MAX_INPUTS: if (shader == PIPE_SHADER_VERTEX) return 32; @@ -179,9 +179,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_SHADER_CAP_SUBROUTINES: - return 0; /* please inline, or provide function declarations */ + return 1; /* but inlining everything, we need function declarations */ case PIPE_SHADER_CAP_INTEGERS: - return 0; + return 1; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; @@ -225,6 +225,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) nouveau_bo_ref(NULL, &screen->fence.bo); nouveau_bo_ref(NULL, &screen->vfetch_cache); + nouveau_resource_destroy(&screen->lib_code); nouveau_resource_destroy(&screen->text_heap); if (screen->tic.entries) diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h index a3133b28876..6780e32e302 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nvc0/nvc0_screen.h @@ -34,6 +34,7 @@ struct nvc0_screen { uint64_t tls_size; struct nouveau_resource *text_heap; + struct nouveau_resource *lib_code; /* allocated from text_heap */ struct { struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS]; diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index 287160e0b2a..0a5581241d7 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -31,18 +31,37 @@ static INLINE void nvc0_program_update_context_state(struct nvc0_context *nvc0, struct nvc0_program *prog, int stage) { + struct nouveau_channel *chan = nvc0->screen->base.channel; + if (prog->hdr[1]) nvc0->state.tls_required |= 1 << stage; else nvc0->state.tls_required &= ~(1 << stage); + + if (prog->immd_size) { + const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; + + BEGIN_RING(chan, RING_3D(CB_SIZE), 3); + /* NOTE: may overlap code of a different shader */ + OUT_RING (chan, align(prog->immd_size, 0x100)); + OUT_RELOCh(chan, nvc0->screen->text, prog->immd_base, rl); + OUT_RELOCl(chan, nvc0->screen->text, prog->immd_base, rl); + BEGIN_RING(chan, RING_3D(CB_BIND(stage)), 1); + OUT_RING (chan, (14 << 4) | 1); + + nvc0->state.c14_bound |= 1 << stage; + } else + if (nvc0->state.c14_bound & (1 << stage)) { + BEGIN_RING(chan, RING_3D(CB_BIND(stage)), 1); + OUT_RING (chan, (14 << 4) | 0); + + nvc0->state.c14_bound &= ~(1 << stage); + } } -static boolean +static INLINE boolean nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) { - int ret; - unsigned size; - if (prog->translated) return TRUE; @@ -50,25 +69,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) if (!prog->translated) return FALSE; - size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100); - - ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog, - &prog->res); - if (ret) - return FALSE; - - prog->code_base = prog->res->start; - - nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text, prog->code_base, - NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); - nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text, - prog->code_base + NVC0_SHADER_HEADER_SIZE, - NOUVEAU_BO_VRAM, prog->code_size, prog->code); - - BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1); - OUT_RING (nvc0->screen->base.channel, 0x1111); - - return TRUE; + return nvc0_program_upload_code(nvc0, prog); } void |