diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 85 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler.h | 10 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c | 32 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 43 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 169 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.h | 36 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_util.h | 18 |
10 files changed, 283 insertions, 196 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index cf75760eb90..f52003a47ee 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -53,7 +53,7 @@ struct fd3_compile_context { const struct tgsi_token *tokens; bool free_tokens; struct ir3_shader *ir; - struct fd3_shader_stateobj *so; + struct fd3_shader_variant *so; struct ir3_block *block; struct ir3_instruction *current_instr; @@ -90,16 +90,6 @@ struct fd3_compile_context { unsigned num_internal_temps; struct tgsi_src_register internal_temps[6]; - /* inputs start at r0, temporaries start after last input, and - * outputs start after last temporary. - * - * We could be more clever, because this is not a hw restriction, - * but probably best just to implement an optimizing pass to - * reduce the # of registers used and get rid of redundant mov's - * (to output register). - */ - unsigned base_reg[TGSI_FILE_COUNT]; - /* idx/slot for last compiler generated immediate */ unsigned immediate_idx; @@ -133,10 +123,10 @@ static void create_mov(struct fd3_compile_context *ctx, static type_t get_ftype(struct fd3_compile_context *ctx); static unsigned -compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, +compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, const struct tgsi_token *tokens) { - unsigned ret, base = 0; + unsigned ret; struct tgsi_shader_info *info = &ctx->info; const struct fd_lowering_config lconfig = { .lower_DST = true, @@ -172,8 +162,6 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, ctx->num_output_updates = 0; ctx->atomic = false; - memset(ctx->base_reg, 0, sizeof(ctx->base_reg)); - #define FM(x) (1 << TGSI_FILE_##x) /* optimize can't deal with relative addressing: */ if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) | @@ -181,25 +169,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, return TGSI_PARSE_ERROR; /* Immediates go after constants: */ - ctx->base_reg[TGSI_FILE_CONSTANT] = 0; - ctx->base_reg[TGSI_FILE_IMMEDIATE] = - info->file_max[TGSI_FILE_CONSTANT] + 1; - - /* if full precision and fragment shader, don't clobber - * r0.xy w/ bary fetch: - */ - if ((so->type == SHADER_FRAGMENT) && !so->half_precision) - base = 1; - - /* Temporaries after outputs after inputs: */ - ctx->base_reg[TGSI_FILE_INPUT] = base; - ctx->base_reg[TGSI_FILE_OUTPUT] = base + - info->file_max[TGSI_FILE_INPUT] + 1; - ctx->base_reg[TGSI_FILE_TEMPORARY] = base + - info->file_max[TGSI_FILE_INPUT] + 1 + - info->file_max[TGSI_FILE_OUTPUT] + 1; - - so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; + so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1; ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); ret = tgsi_parse_init(&ctx->parser, ctx->tokens); @@ -520,7 +490,7 @@ add_dst_reg_wrmask(struct fd3_compile_context *ctx, switch (dst->File) { case TGSI_FILE_OUTPUT: case TGSI_FILE_TEMPORARY: - num = dst->Index + ctx->base_reg[dst->File]; + /* uses SSA */ break; case TGSI_FILE_ADDRESS: num = REG_A0; @@ -533,8 +503,6 @@ add_dst_reg_wrmask(struct fd3_compile_context *ctx, if (dst->Indirect) flags |= IR3_REG_RELATIV; - if (ctx->so->half_precision) - flags |= IR3_REG_HALF; reg = ir3_reg_create(instr, regid(num, chan), flags); @@ -602,9 +570,12 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx, * TGSI has vec4 immediates, we can only embed scalar (of limited * size, depending on instruction..) */ + flags |= IR3_REG_CONST; + num = src->Index + ctx->so->first_immediate; + break; case TGSI_FILE_CONSTANT: flags |= IR3_REG_CONST; - num = src->Index + ctx->base_reg[src->File]; + num = src->Index; break; case TGSI_FILE_OUTPUT: /* NOTE: we should only end up w/ OUTPUT file for things like @@ -612,7 +583,7 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx, */ case TGSI_FILE_INPUT: case TGSI_FILE_TEMPORARY: - num = src->Index + ctx->base_reg[src->File]; + /* uses SSA */ break; default: compile_error(ctx, "unsupported src register file: %s\n", @@ -626,8 +597,6 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx, flags |= IR3_REG_NEGATE; if (src->Indirect) flags |= IR3_REG_RELATIV; - if (ctx->so->half_precision) - flags |= IR3_REG_HALF; reg = ir3_reg_create(instr, regid(num, chan), flags); @@ -726,9 +695,6 @@ get_internal_temp_hr(struct fd3_compile_context *ctx, struct tgsi_src_register *tmp_src; int n; - if (ctx->so->half_precision) - return get_internal_temp(ctx, tmp_dst); - tmp_dst->File = TGSI_FILE_TEMPORARY; tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; tmp_dst->Indirect = 0; @@ -771,13 +737,13 @@ is_rel_or_const(struct tgsi_src_register *src) static type_t get_ftype(struct fd3_compile_context *ctx) { - return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; + return TYPE_F32; } static type_t get_utype(struct fd3_compile_context *ctx) { - return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; + return TYPE_U32; } static unsigned @@ -1268,8 +1234,7 @@ trans_cmp(const struct instr_translater *t, a1 = &inst->Src[1].Register; a2 = &inst->Src[2].Register; /* sel.{b32,b16} dst, src2, tmp, src1 */ - instr = instr_create(ctx, 3, - ctx->so->half_precision ? OPC_SEL_B16 : OPC_SEL_B32); + instr = instr_create(ctx, 3, OPC_SEL_B32); vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); break; @@ -1691,8 +1656,7 @@ instr_cat3(const struct instr_translater *t, } } - instr = instr_create(ctx, 3, - ctx->so->half_precision ? t->hopc : t->opc); + instr = instr_create(ctx, 3, t->opc); vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, &inst->Src[2].Register, 0); put_dst(ctx, inst, dst); @@ -1773,8 +1737,7 @@ decl_semantic(const struct tgsi_declaration_semantic *sem) static void decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_stateobj *so = ctx->so; - unsigned base = ctx->base_reg[TGSI_FILE_INPUT]; + struct fd3_shader_variant *so = ctx->so; unsigned i, flags = 0; /* I don't think we should get frag shader input without @@ -1784,18 +1747,15 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) || decl->Declaration.Semantic); - if (ctx->so->half_precision) - flags |= IR3_REG_HALF; - for (i = decl->Range.First; i <= decl->Range.Last; i++) { unsigned n = so->inputs_count++; - unsigned r = regid(i + base, 0); + unsigned r = regid(i, 0); unsigned ncomp, j; /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */ ncomp = 4; - DBG("decl in -> r%d", i + base); + DBG("decl in -> r%d", i); so->inputs[n].semantic = decl_semantic(&decl->Semantic); so->inputs[n].compmask = (1 << ncomp) - 1; @@ -1837,15 +1797,14 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) static void decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_stateobj *so = ctx->so; - unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT]; + struct fd3_shader_variant *so = ctx->so; unsigned comp = 0; unsigned name = decl->Semantic.Name; unsigned i; compile_assert(ctx, decl->Declaration.Semantic); - DBG("decl out[%d] -> r%d", name, decl->Range.First + base); + DBG("decl out[%d] -> r%d", name, decl->Range.First); if (ctx->type == TGSI_PROCESSOR_VERTEX) { switch (name) { @@ -1883,7 +1842,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) ncomp = 4; so->outputs[n].semantic = decl_semantic(&decl->Semantic); - so->outputs[n].regid = regid(i + base, comp); + so->outputs[n].regid = regid(i, comp); /* avoid undefined outputs, stick a dummy mov from imm{0.0}, * which if the output is actually assigned will be over- @@ -2013,8 +1972,8 @@ compile_dump(struct fd3_compile_context *ctx) } int -fd3_compile_shader(struct fd3_shader_stateobj *so, - const struct tgsi_token *tokens) +fd3_compile_shader(struct fd3_shader_variant *so, + const struct tgsi_token *tokens, struct fd3_shader_key key) { struct fd3_compile_context ctx; unsigned i, actual_in; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h index f4d55cf68ee..a53bb3ee9a5 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h @@ -33,9 +33,11 @@ #include "fd3_util.h" -int fd3_compile_shader(struct fd3_shader_stateobj *so, - const struct tgsi_token *tokens); -int fd3_compile_shader_old(struct fd3_shader_stateobj *so, - const struct tgsi_token *tokens); +int fd3_compile_shader(struct fd3_shader_variant *so, + const struct tgsi_token *tokens, + struct fd3_shader_key key); +int fd3_compile_shader_old(struct fd3_shader_variant *so, + const struct tgsi_token *tokens, + struct fd3_shader_key key); #endif /* FD3_COMPILER_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c index f38c158693f..31e415c6a70 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c @@ -54,7 +54,7 @@ struct fd3_compile_context { bool free_tokens; struct ir3_shader *ir; struct ir3_block *block; - struct fd3_shader_stateobj *so; + struct fd3_shader_variant *so; struct tgsi_parse_context parser; unsigned type; @@ -120,7 +120,7 @@ static void create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src); static unsigned -compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, +compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, const struct tgsi_token *tokens) { unsigned ret, base = 0; @@ -169,7 +169,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, /* if full precision and fragment shader, don't clobber * r0.x w/ bary fetch: */ - if ((so->type == SHADER_FRAGMENT) && !so->half_precision) + if ((so->type == SHADER_FRAGMENT) && !so->key.half_precision) base = 1; /* Temporaries after outputs after inputs: */ @@ -291,7 +291,7 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, if (dst->Indirect) flags |= IR3_REG_RELATIV; - if (ctx->so->half_precision) + if (ctx->so->key.half_precision) flags |= IR3_REG_HALF; reg = ir3_reg_create(instr, regid(num, chan), flags); @@ -344,7 +344,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, flags |= IR3_REG_NEGATE; if (src->Indirect) flags |= IR3_REG_RELATIV; - if (ctx->so->half_precision) + if (ctx->so->key.half_precision) flags |= IR3_REG_HALF; reg = ir3_reg_create(instr, regid(num, chan), flags); @@ -409,7 +409,7 @@ get_internal_temp_hr(struct fd3_compile_context *ctx, struct tgsi_src_register *tmp_src; int n; - if (ctx->so->half_precision) + if (ctx->so->key.half_precision) return get_internal_temp(ctx, tmp_dst); tmp_dst->File = TGSI_FILE_TEMPORARY; @@ -454,13 +454,13 @@ is_rel_or_const(struct tgsi_src_register *src) static type_t get_ftype(struct fd3_compile_context *ctx) { - return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; + return ctx->so->key.half_precision ? TYPE_F16 : TYPE_F32; } static type_t get_utype(struct fd3_compile_context *ctx) { - return ctx->so->half_precision ? TYPE_U16 : TYPE_U32; + return ctx->so->key.half_precision ? TYPE_U16 : TYPE_U32; } static unsigned @@ -980,7 +980,7 @@ trans_cmp(const struct instr_translater *t, if (t->tgsi_opc == TGSI_OPCODE_CMP) { /* sel.{f32,f16} dst, src2, tmp, src1 */ instr = instr_create(ctx, 3, - ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); + ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32); vectorize(ctx, instr, dst, 3, &inst->Src[2].Register, 0, tmp_src, 0, @@ -990,7 +990,7 @@ trans_cmp(const struct instr_translater *t, get_immediate(ctx, &constval1, fui(1.0)); /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ instr = instr_create(ctx, 3, - ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); + ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32); vectorize(ctx, instr, dst, 3, &constval0, 0, tmp_src, 0, &constval1, 0); } @@ -1210,7 +1210,7 @@ instr_cat3(const struct instr_translater *t, } instr = instr_create(ctx, 3, - ctx->so->half_precision ? t->hopc : t->opc); + ctx->so->key.half_precision ? t->hopc : t->opc); vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, &inst->Src[2].Register, 0); put_dst(ctx, inst, dst); @@ -1297,7 +1297,7 @@ decl_semantic(const struct tgsi_declaration_semantic *sem) static int decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_stateobj *so = ctx->so; + struct fd3_shader_variant *so = ctx->so; unsigned base = ctx->base_reg[TGSI_FILE_INPUT]; unsigned i, flags = 0; int nop = 0; @@ -1309,7 +1309,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) || decl->Declaration.Semantic); - if (ctx->so->half_precision) + if (ctx->so->key.half_precision) flags |= IR3_REG_HALF; for (i = decl->Range.First; i <= decl->Range.Last; i++) { @@ -1362,7 +1362,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) static void decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_stateobj *so = ctx->so; + struct fd3_shader_variant *so = ctx->so; unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT]; unsigned comp = 0; unsigned name = decl->Semantic.Name; @@ -1492,8 +1492,8 @@ compile_instructions(struct fd3_compile_context *ctx) } int -fd3_compile_shader_old(struct fd3_shader_stateobj *so, - const struct tgsi_token *tokens) +fd3_compile_shader_old(struct fd3_shader_variant *so, + const struct tgsi_token *tokens, struct fd3_shader_key key) { struct fd3_compile_context ctx; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 83024c11a8b..7b071b2cd5d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -43,7 +43,8 @@ static void -emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring) +emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd3_shader_key key) { struct fd_vertex_stateobj *vtx = ctx->vtx; struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf; @@ -63,20 +64,21 @@ emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring) bufs[i].format = elem->src_format; } - fd3_emit_vertex_bufs(ring, &ctx->prog, bufs, vtx->num_elements); + fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key), + bufs, vtx->num_elements); } static void draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info, - struct fd_ringbuffer *ring, unsigned dirty, bool binning) + struct fd_ringbuffer *ring, unsigned dirty, struct fd3_shader_key key) { - fd3_emit_state(ctx, ring, &ctx->prog, dirty, binning); + fd3_emit_state(ctx, ring, &ctx->prog, dirty, key); if (dirty & FD_DIRTY_VTXBUF) - emit_vertexbufs(ctx, ring); + emit_vertexbufs(ctx, ring, key); OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); - OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ + OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); OUT_RING(ring, info->min_index); /* VFD_INDEX_MIN */ @@ -88,16 +90,25 @@ draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info, OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ info->restart_index : 0xffffffff); - fd_draw_emit(ctx, ring, binning ? IGNORE_VISIBILITY : USE_VISIBILITY, info); + fd_draw_emit(ctx, ring, + key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, + info); } static void fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) { unsigned dirty = ctx->dirty; + struct fd3_shader_key key = { + /* do binning pass first: */ + .binning_pass = true, + .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, + }; draw_impl(ctx, info, ctx->binning_ring, - dirty & ~(FD_DIRTY_BLEND), true); - draw_impl(ctx, info, ctx->ring, dirty, false); + dirty & ~(FD_DIRTY_BLEND), key); + /* and now regular (non-binning) pass: */ + key.binning_pass = false; + draw_impl(ctx, info, ctx->ring, dirty, key); } /* binning pass cmds for a clear: @@ -113,12 +124,18 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty) { struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->binning_ring; + struct fd3_shader_key key = { + .binning_pass = true, + }; - fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, true); + fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key); - fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { - { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, - }, 1); + fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), + (struct fd3_vertex_buf[]) {{ + .prsc = fd3_ctx->solid_vbuf, + .stride = 12, + .format = PIPE_FORMAT_R32G32B32_FLOAT, + }}, 1); OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | @@ -148,6 +165,8 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, struct fd_ringbuffer *ring = ctx->ring; unsigned dirty = ctx->dirty; unsigned ce, i; + struct fd3_shader_key key = { + }; dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR; dirty |= FD_DIRTY_PROG; @@ -155,7 +174,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, fd3_clear_binning(ctx, dirty); /* emit generic state now: */ - fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, false); + fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key); OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1); OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) | @@ -245,9 +264,12 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0)); - fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { - { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, - }, 1); + fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), + (struct fd3_vertex_buf[]) {{ + .prsc = fd3_ctx->solid_vbuf, + .stride = 12, + .format = PIPE_FORMAT_R32G32B32_FLOAT, + }}, 1); fd_wfi(ctx, ring); fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index a364fbfcd15..0d8d3c5e52c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -87,7 +87,7 @@ static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, - struct fd3_shader_stateobj *shader) + struct fd3_shader_variant *shader) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t base = 0; @@ -293,10 +293,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, + struct fd3_shader_variant *vp, struct fd3_vertex_buf *vbufs, uint32_t n) { - struct fd3_shader_stateobj *vp = prog->vp; uint32_t i; n = MIN2(n, vp->inputs_count); @@ -329,8 +328,15 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, uint32_t dirty, bool binning) + struct fd_program_stateobj *prog, uint32_t dirty, + struct fd3_shader_key key) { + struct fd3_shader_variant *vp; + struct fd3_shader_variant *fp; + + fp = fd3_shader_variant(prog->fp, key); + vp = fd3_shader_variant(prog->vp, key); + emit_marker(ring, 5); if (dirty & FD_DIRTY_SAMPLE_MASK) { @@ -344,7 +350,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa); struct pipe_stencil_ref *sr = &ctx->stencil_ref; - if (!binning) { + if (!key.binning_pass) { struct fd3_context *fd3_ctx = fd3_context(ctx); /* I suppose if we needed to (which I don't *think* we need @@ -372,7 +378,6 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { - struct fd3_shader_stateobj *fp = prog->fp; uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control; if (fp->writes_pos) { val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z; @@ -399,7 +404,6 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, } if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { - struct fd3_shader_stateobj *fp = prog->fp; uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer) ->gras_cl_clip_cntl; if (fp->writes_pos) { @@ -412,7 +416,6 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { struct fd3_rasterizer_stateobj *rasterizer = fd3_rasterizer_stateobj(ctx->rasterizer); - struct fd3_shader_stateobj *fp = prog->fp; uint32_t stride_in_vpc; stride_in_vpc = align(fp->total_in, 4) / 4; @@ -451,7 +454,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & FD_DIRTY_PROG) { fd_wfi(ctx, ring); - fd3_program_emit(ring, prog, binning); + fd3_program_emit(ring, prog, key); } OUT_PKT3(ring, CP_EVENT_WRITE, 1); @@ -463,10 +466,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_wfi(ctx, ring); emit_constants(ring, SB_VERT_SHADER, &ctx->constbuf[PIPE_SHADER_VERTEX], - (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL); + (prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL); emit_constants(ring, SB_FRAG_SHADER, &ctx->constbuf[PIPE_SHADER_FRAGMENT], - (prog->dirty & FD_SHADER_DIRTY_FP) ? prog->fp : NULL); + (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL); } if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index 8584eb5b59b..f2ae4dc295e 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -56,10 +56,11 @@ struct fd3_vertex_buf { }; void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, + struct fd3_shader_variant *vp, struct fd3_vertex_buf *vbufs, uint32_t n); void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, uint32_t dirty, bool binning); + struct fd_program_stateobj *prog, uint32_t dirty, + struct fd3_shader_key key); void fd3_emit_restore(struct fd_context *ctx); #endif /* FD3_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index d3ba6e6812b..d1aa8cf1208 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -43,6 +43,8 @@ #include "fd3_util.h" #include "fd3_zsa.h" +static const struct fd3_shader_key key = { +}; static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, @@ -147,11 +149,13 @@ emit_binning_workaround(struct fd_context *ctx) A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); fd_wfi(ctx, ring); - fd3_program_emit(ring, &ctx->solid_prog, false); - - fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { - { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, - }, 1); + fd3_program_emit(ring, &ctx->solid_prog, key); + fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), + (struct fd3_vertex_buf[]) {{ + .prsc = fd3_ctx->solid_vbuf, + .stride = 12, + .format = PIPE_FORMAT_R32G32B32_FLOAT, + }}, 1); OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4); OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | @@ -365,11 +369,13 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ fd_wfi(ctx, ring); - fd3_program_emit(ring, &ctx->solid_prog, false); - - fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { - { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, - }, 1); + fd3_program_emit(ring, &ctx->solid_prog, key); + fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key), + (struct fd3_vertex_buf[]) {{ + .prsc = fd3_ctx->solid_vbuf, + .stride = 12, + .format = PIPE_FORMAT_R32G32B32_FLOAT, + }}, 1); if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { uint32_t base = 0; @@ -512,12 +518,17 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ fd_wfi(ctx, ring); - fd3_program_emit(ring, &ctx->blit_prog, false); - - fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) { - { .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT }, - { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, - }, 2); + fd3_program_emit(ring, &ctx->blit_prog, key); + fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key), + (struct fd3_vertex_buf[]) {{ + .prsc = fd3_ctx->blit_texcoord_vbuf, + .stride = 8, + .format = PIPE_FORMAT_R32G32_FLOAT, + }, { + .prsc = fd3_ctx->solid_vbuf, + .stride = 12, + .format = PIPE_FORMAT_R32G32B32_FLOAT, + }}, 2); /* for gmem pitch/base calculations, we need to use the non- * truncated tile sizes: diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index c6fb8633cd0..0a7500f1611 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -44,17 +44,17 @@ #include "fd3_util.h" static void -delete_shader(struct fd3_shader_stateobj *so) +delete_variant(struct fd3_shader_variant *v) { - ir3_shader_destroy(so->ir); - fd_bo_del(so->bo); - free(so); + ir3_shader_destroy(v->ir); + fd_bo_del(v->bo); + free(v); } static void -assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so) +assemble_variant(struct fd3_shader_variant *so) { - struct fd_context *ctx = fd_context(pctx); + struct fd_context *ctx = fd_context(so->so->pctx); uint32_t sz, *bin; bin = ir3_shader_assemble(so->ir, &so->info); @@ -77,7 +77,7 @@ assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so) * reflect the # of registers actually used: */ static void -fixup_vp_regfootprint(struct fd3_shader_stateobj *so) +fixup_vp_regfootprint(struct fd3_shader_variant *so) { unsigned i; for (i = 0; i < so->inputs_count; i++) @@ -86,72 +86,123 @@ fixup_vp_regfootprint(struct fd3_shader_stateobj *so) so->info.max_reg = MAX2(so->info.max_reg, (so->outputs[i].regid + 3) >> 2); } -static struct fd3_shader_stateobj * -create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso, - enum shader_t type) +static struct fd3_shader_variant * +create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key) { - struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj); - const struct tgsi_token *tokens = cso->tokens; + struct fd3_shader_variant *v = CALLOC_STRUCT(fd3_shader_variant); + const struct tgsi_token *tokens = so->tokens; int ret; - if (!so) + if (!v) return NULL; - so->type = type; + v->so = so; + v->key = key; + v->type = so->type; if (fd_mesa_debug & FD_DBG_DISASM) { DBG("dump tgsi: type=%d", so->type); tgsi_dump(tokens, 0); } - if ((type == SHADER_FRAGMENT) && (fd_mesa_debug & FD_DBG_FRAGHALF)) - so->half_precision = true; - - if (!(fd_mesa_debug & FD_DBG_NOOPT)) { - ret = fd3_compile_shader(so, tokens); + ret = fd3_compile_shader(v, tokens, key); if (ret) { debug_error("new compiler failed, trying fallback!"); - so->inputs_count = 0; - so->outputs_count = 0; - so->total_in = 0; - so->samplers_count = 0; - so->immediates_count = 0; + v->inputs_count = 0; + v->outputs_count = 0; + v->total_in = 0; + v->samplers_count = 0; + v->immediates_count = 0; } } else { ret = -1; /* force fallback to old compiler */ } if (ret) - ret = fd3_compile_shader_old(so, tokens); + ret = fd3_compile_shader_old(v, tokens, key); if (ret) { debug_error("compile failed!"); goto fail; } - assemble_shader(pctx, so); - if (!so->bo) { + assemble_variant(v); + if (!v->bo) { debug_error("assemble failed!"); goto fail; } - if (type == SHADER_VERTEX) - fixup_vp_regfootprint(so); + if (so->type == SHADER_VERTEX) + fixup_vp_regfootprint(v); if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("disassemble: type=%d", so->type); - disasm_a3xx(fd_bo_map(so->bo), so->info.sizedwords, 0, so->type); + DBG("disassemble: type=%d", v->type); + disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type); } - return so; + return v; fail: - delete_shader(so); + delete_variant(v); return NULL; } +struct fd3_shader_variant * +fd3_shader_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key) +{ + struct fd3_shader_variant *v; + + /* some shader key values only apply to vertex or frag shader, + * so normalize the key to avoid constructing multiple identical + * variants: + */ + if (so->type == SHADER_FRAGMENT) { + key.binning_pass = false; + } + if (so->type == SHADER_VERTEX) { + key.color_two_side = false; + key.half_precision = false; + } + + for (v = so->variants; v; v = v->next) + if (!memcmp(&key, &v->key, sizeof(key))) + return v; + + /* compile new variant if it doesn't exist already: */ + v = create_variant(so, key); + v->next = so->variants; + so->variants = v; + + return v; +} + + +static void +delete_shader(struct fd3_shader_stateobj *so) +{ + struct fd3_shader_variant *v, *t; + for (v = so->variants; v; ) { + t = v; + v = v->next; + delete_variant(t); + } + free((void *)so->tokens); + free(so); +} + +static struct fd3_shader_stateobj * +create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso, + enum shader_t type) +{ + struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj); + so->pctx = pctx; + so->type = type; + so->tokens = tgsi_dup_tokens(cso->tokens); + return so; +} + static void * fd3_fp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) @@ -181,7 +232,7 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) } static void -emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so) +emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_variant *so) { const struct ir3_shader_info *si = &so->info; enum adreno_state_block sb; @@ -222,7 +273,7 @@ emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so) } static int -find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic) +find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) { int j; for (j = 0; j < so->outputs_count; j++) @@ -232,7 +283,7 @@ find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic) } static uint32_t -find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic) +find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic) { int j; for (j = 0; j < so->outputs_count; j++) @@ -243,22 +294,26 @@ find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic) void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, bool binning) + struct fd_program_stateobj *prog, struct fd3_shader_key key) { - const struct fd3_shader_stateobj *vp = prog->vp; - const struct fd3_shader_stateobj *fp = prog->fp; - const struct ir3_shader_info *vsi = &vp->info; - const struct ir3_shader_info *fsi = &fp->info; + const struct fd3_shader_variant *vp, *fp; + const struct ir3_shader_info *vsi, *fsi; uint32_t pos_regid, posz_regid, psize_regid, color_regid; int i; - if (binning) { + vp = fd3_shader_variant(prog->vp, key); + + if (key.binning_pass) { /* use dummy stateobj to simplify binning vs non-binning: */ - static const struct fd3_shader_stateobj binning_fp = {}; + static const struct fd3_shader_variant binning_fp = {}; fp = &binning_fp; - fsi = &fp->info; + } else { + fp = fd3_shader_variant(prog->fp, key); } + vsi = &vp->info; + fsi = &fp->info; + pos_regid = find_output_regid(vp, fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); posz_regid = find_output_regid(fp, @@ -293,7 +348,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) | - COND(binning, A3XX_SP_SP_CTRL_REG_BINNING) | + COND(key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) | A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | A3XX_SP_SP_CTRL_REG_L0MODE(0)); @@ -355,7 +410,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ - if (binning) { + if (key.binning_pass) { OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, 0x00000000); @@ -402,12 +457,12 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4); OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) | - COND(fp->half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION)); + COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION)); OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); - if (binning) { + if (key.binning_pass) { OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1)); @@ -421,16 +476,16 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); - OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ - OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ - OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ - OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ + OUT_RING(ring, fp->so->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ + OUT_RING(ring, fp->so->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ + OUT_RING(ring, fp->so->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ + OUT_RING(ring, fp->so->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); - OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ - OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ - OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ - OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ + OUT_RING(ring, fp->so->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ + OUT_RING(ring, fp->so->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ + OUT_RING(ring, fp->so->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ + OUT_RING(ring, fp->so->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ } OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); @@ -442,7 +497,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ - if (!binning) { + if (!key.binning_pass) { emit_shader(ring, fp); OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index a0a0de8b1ca..26aa9f34de7 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -32,7 +32,7 @@ #include "pipe/p_context.h" #include "freedreno_context.h" - +#include "fd3_util.h" #include "ir3.h" #include "disasm.h" @@ -53,19 +53,14 @@ static inline uint16_t sem2idx(fd3_semantic sem) return sem & 0xff; } -struct fd3_shader_stateobj { - enum shader_t type; - +struct fd3_shader_variant { struct fd_bo *bo; + struct fd3_shader_key key; + struct ir3_shader_info info; struct ir3_shader *ir; - /* is shader using (or more precisely, is color_regid) half- - * precision register? - */ - bool half_precision; - /* the instructions length is in units of instruction groups * (4 instructions, 8 dwords): */ @@ -118,14 +113,35 @@ struct fd3_shader_stateobj { uint32_t val[4]; } immediates[64]; + /* shader varients form a linked list: */ + struct fd3_shader_variant *next; + + /* replicated here to avoid passing extra ptrs everywhere: */ + enum shader_t type; + struct fd3_shader_stateobj *so; +}; + +struct fd3_shader_stateobj { + enum shader_t type; + + struct pipe_context *pctx; + const struct tgsi_token *tokens; + + struct fd3_shader_variant *variants; + /* so far, only used for blit_prog shader.. values for * VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE + * + * Possibly should be in fd3_program_variant? */ uint32_t vinterp[4], vpsrepl[4]; }; +struct fd3_shader_variant * fd3_shader_variant(struct fd3_shader_stateobj *so, + struct fd3_shader_key key); + void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, bool binning); + struct fd_program_stateobj *prog, struct fd3_shader_key key); void fd3_prog_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.h b/src/gallium/drivers/freedreno/a3xx/fd3_util.h index 4681840b173..6462d18f913 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_util.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.h @@ -43,4 +43,22 @@ enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); +/* Configuration key used to identify a shader variant.. different + * shader variants can be used to implement features not supported + * in hw (two sided color), binning-pass vertex shader, etc. + * + * NOTE: this is declared here (rather than fd3_program.h) as it is + * passed around through a lot of the emit code in various parts + * which would otherwise not necessarily need to incl fd3_program.h + */ +struct fd3_shader_key { + /* vertex shader variant parameters: */ + unsigned binning_pass : 1; + + /* fragment shader variant parameters: */ + unsigned color_two_side : 1; + unsigned half_precision : 1; +}; +struct fd3_shader_variant; + #endif /* FD3_UTIL_H_ */ |