summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c85
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.h10
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c32
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_draw.c56
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c25
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.h5
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_gmem.c43
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c169
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h36
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_util.h18
10 files changed, 283 insertions, 196 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index cf75760eb90..f52003a47ee 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -53,7 +53,7 @@ struct fd3_compile_context {
const struct tgsi_token *tokens;
bool free_tokens;
struct ir3_shader *ir;
- struct fd3_shader_stateobj *so;
+ struct fd3_shader_variant *so;
struct ir3_block *block;
struct ir3_instruction *current_instr;
@@ -90,16 +90,6 @@ struct fd3_compile_context {
unsigned num_internal_temps;
struct tgsi_src_register internal_temps[6];
- /* inputs start at r0, temporaries start after last input, and
- * outputs start after last temporary.
- *
- * We could be more clever, because this is not a hw restriction,
- * but probably best just to implement an optimizing pass to
- * reduce the # of registers used and get rid of redundant mov's
- * (to output register).
- */
- unsigned base_reg[TGSI_FILE_COUNT];
-
/* idx/slot for last compiler generated immediate */
unsigned immediate_idx;
@@ -133,10 +123,10 @@ static void create_mov(struct fd3_compile_context *ctx,
static type_t get_ftype(struct fd3_compile_context *ctx);
static unsigned
-compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
const struct tgsi_token *tokens)
{
- unsigned ret, base = 0;
+ unsigned ret;
struct tgsi_shader_info *info = &ctx->info;
const struct fd_lowering_config lconfig = {
.lower_DST = true,
@@ -172,8 +162,6 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
ctx->num_output_updates = 0;
ctx->atomic = false;
- memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
-
#define FM(x) (1 << TGSI_FILE_##x)
/* optimize can't deal with relative addressing: */
if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) |
@@ -181,25 +169,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
return TGSI_PARSE_ERROR;
/* Immediates go after constants: */
- ctx->base_reg[TGSI_FILE_CONSTANT] = 0;
- ctx->base_reg[TGSI_FILE_IMMEDIATE] =
- info->file_max[TGSI_FILE_CONSTANT] + 1;
-
- /* if full precision and fragment shader, don't clobber
- * r0.xy w/ bary fetch:
- */
- if ((so->type == SHADER_FRAGMENT) && !so->half_precision)
- base = 1;
-
- /* Temporaries after outputs after inputs: */
- ctx->base_reg[TGSI_FILE_INPUT] = base;
- ctx->base_reg[TGSI_FILE_OUTPUT] = base +
- info->file_max[TGSI_FILE_INPUT] + 1;
- ctx->base_reg[TGSI_FILE_TEMPORARY] = base +
- info->file_max[TGSI_FILE_INPUT] + 1 +
- info->file_max[TGSI_FILE_OUTPUT] + 1;
-
- so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
+ so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1;
ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
ret = tgsi_parse_init(&ctx->parser, ctx->tokens);
@@ -520,7 +490,7 @@ add_dst_reg_wrmask(struct fd3_compile_context *ctx,
switch (dst->File) {
case TGSI_FILE_OUTPUT:
case TGSI_FILE_TEMPORARY:
- num = dst->Index + ctx->base_reg[dst->File];
+ /* uses SSA */
break;
case TGSI_FILE_ADDRESS:
num = REG_A0;
@@ -533,8 +503,6 @@ add_dst_reg_wrmask(struct fd3_compile_context *ctx,
if (dst->Indirect)
flags |= IR3_REG_RELATIV;
- if (ctx->so->half_precision)
- flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
@@ -602,9 +570,12 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx,
* TGSI has vec4 immediates, we can only embed scalar (of limited
* size, depending on instruction..)
*/
+ flags |= IR3_REG_CONST;
+ num = src->Index + ctx->so->first_immediate;
+ break;
case TGSI_FILE_CONSTANT:
flags |= IR3_REG_CONST;
- num = src->Index + ctx->base_reg[src->File];
+ num = src->Index;
break;
case TGSI_FILE_OUTPUT:
/* NOTE: we should only end up w/ OUTPUT file for things like
@@ -612,7 +583,7 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx,
*/
case TGSI_FILE_INPUT:
case TGSI_FILE_TEMPORARY:
- num = src->Index + ctx->base_reg[src->File];
+ /* uses SSA */
break;
default:
compile_error(ctx, "unsupported src register file: %s\n",
@@ -626,8 +597,6 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx,
flags |= IR3_REG_NEGATE;
if (src->Indirect)
flags |= IR3_REG_RELATIV;
- if (ctx->so->half_precision)
- flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
@@ -726,9 +695,6 @@ get_internal_temp_hr(struct fd3_compile_context *ctx,
struct tgsi_src_register *tmp_src;
int n;
- if (ctx->so->half_precision)
- return get_internal_temp(ctx, tmp_dst);
-
tmp_dst->File = TGSI_FILE_TEMPORARY;
tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
tmp_dst->Indirect = 0;
@@ -771,13 +737,13 @@ is_rel_or_const(struct tgsi_src_register *src)
static type_t
get_ftype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+ return TYPE_F32;
}
static type_t
get_utype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
+ return TYPE_U32;
}
static unsigned
@@ -1268,8 +1234,7 @@ trans_cmp(const struct instr_translater *t,
a1 = &inst->Src[1].Register;
a2 = &inst->Src[2].Register;
/* sel.{b32,b16} dst, src2, tmp, src1 */
- instr = instr_create(ctx, 3,
- ctx->so->half_precision ? OPC_SEL_B16 : OPC_SEL_B32);
+ instr = instr_create(ctx, 3, OPC_SEL_B32);
vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
break;
@@ -1691,8 +1656,7 @@ instr_cat3(const struct instr_translater *t,
}
}
- instr = instr_create(ctx, 3,
- ctx->so->half_precision ? t->hopc : t->opc);
+ instr = instr_create(ctx, 3, t->opc);
vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
&inst->Src[2].Register, 0);
put_dst(ctx, inst, dst);
@@ -1773,8 +1737,7 @@ decl_semantic(const struct tgsi_declaration_semantic *sem)
static void
decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_stateobj *so = ctx->so;
- unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
+ struct fd3_shader_variant *so = ctx->so;
unsigned i, flags = 0;
/* I don't think we should get frag shader input without
@@ -1784,18 +1747,15 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
decl->Declaration.Semantic);
- if (ctx->so->half_precision)
- flags |= IR3_REG_HALF;
-
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
unsigned n = so->inputs_count++;
- unsigned r = regid(i + base, 0);
+ unsigned r = regid(i, 0);
unsigned ncomp, j;
/* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
ncomp = 4;
- DBG("decl in -> r%d", i + base);
+ DBG("decl in -> r%d", i);
so->inputs[n].semantic = decl_semantic(&decl->Semantic);
so->inputs[n].compmask = (1 << ncomp) - 1;
@@ -1837,15 +1797,14 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
static void
decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_stateobj *so = ctx->so;
- unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
+ struct fd3_shader_variant *so = ctx->so;
unsigned comp = 0;
unsigned name = decl->Semantic.Name;
unsigned i;
compile_assert(ctx, decl->Declaration.Semantic);
- DBG("decl out[%d] -> r%d", name, decl->Range.First + base);
+ DBG("decl out[%d] -> r%d", name, decl->Range.First);
if (ctx->type == TGSI_PROCESSOR_VERTEX) {
switch (name) {
@@ -1883,7 +1842,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
ncomp = 4;
so->outputs[n].semantic = decl_semantic(&decl->Semantic);
- so->outputs[n].regid = regid(i + base, comp);
+ so->outputs[n].regid = regid(i, comp);
/* avoid undefined outputs, stick a dummy mov from imm{0.0},
* which if the output is actually assigned will be over-
@@ -2013,8 +1972,8 @@ compile_dump(struct fd3_compile_context *ctx)
}
int
-fd3_compile_shader(struct fd3_shader_stateobj *so,
- const struct tgsi_token *tokens)
+fd3_compile_shader(struct fd3_shader_variant *so,
+ const struct tgsi_token *tokens, struct fd3_shader_key key)
{
struct fd3_compile_context ctx;
unsigned i, actual_in;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
index f4d55cf68ee..a53bb3ee9a5 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
@@ -33,9 +33,11 @@
#include "fd3_util.h"
-int fd3_compile_shader(struct fd3_shader_stateobj *so,
- const struct tgsi_token *tokens);
-int fd3_compile_shader_old(struct fd3_shader_stateobj *so,
- const struct tgsi_token *tokens);
+int fd3_compile_shader(struct fd3_shader_variant *so,
+ const struct tgsi_token *tokens,
+ struct fd3_shader_key key);
+int fd3_compile_shader_old(struct fd3_shader_variant *so,
+ const struct tgsi_token *tokens,
+ struct fd3_shader_key key);
#endif /* FD3_COMPILER_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
index f38c158693f..31e415c6a70 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
@@ -54,7 +54,7 @@ struct fd3_compile_context {
bool free_tokens;
struct ir3_shader *ir;
struct ir3_block *block;
- struct fd3_shader_stateobj *so;
+ struct fd3_shader_variant *so;
struct tgsi_parse_context parser;
unsigned type;
@@ -120,7 +120,7 @@ static void create_mov(struct fd3_compile_context *ctx,
struct tgsi_dst_register *dst, struct tgsi_src_register *src);
static unsigned
-compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
const struct tgsi_token *tokens)
{
unsigned ret, base = 0;
@@ -169,7 +169,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
/* if full precision and fragment shader, don't clobber
* r0.x w/ bary fetch:
*/
- if ((so->type == SHADER_FRAGMENT) && !so->half_precision)
+ if ((so->type == SHADER_FRAGMENT) && !so->key.half_precision)
base = 1;
/* Temporaries after outputs after inputs: */
@@ -291,7 +291,7 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
if (dst->Indirect)
flags |= IR3_REG_RELATIV;
- if (ctx->so->half_precision)
+ if (ctx->so->key.half_precision)
flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
@@ -344,7 +344,7 @@ add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
flags |= IR3_REG_NEGATE;
if (src->Indirect)
flags |= IR3_REG_RELATIV;
- if (ctx->so->half_precision)
+ if (ctx->so->key.half_precision)
flags |= IR3_REG_HALF;
reg = ir3_reg_create(instr, regid(num, chan), flags);
@@ -409,7 +409,7 @@ get_internal_temp_hr(struct fd3_compile_context *ctx,
struct tgsi_src_register *tmp_src;
int n;
- if (ctx->so->half_precision)
+ if (ctx->so->key.half_precision)
return get_internal_temp(ctx, tmp_dst);
tmp_dst->File = TGSI_FILE_TEMPORARY;
@@ -454,13 +454,13 @@ is_rel_or_const(struct tgsi_src_register *src)
static type_t
get_ftype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+ return ctx->so->key.half_precision ? TYPE_F16 : TYPE_F32;
}
static type_t
get_utype(struct fd3_compile_context *ctx)
{
- return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
+ return ctx->so->key.half_precision ? TYPE_U16 : TYPE_U32;
}
static unsigned
@@ -980,7 +980,7 @@ trans_cmp(const struct instr_translater *t,
if (t->tgsi_opc == TGSI_OPCODE_CMP) {
/* sel.{f32,f16} dst, src2, tmp, src1 */
instr = instr_create(ctx, 3,
- ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
vectorize(ctx, instr, dst, 3,
&inst->Src[2].Register, 0,
tmp_src, 0,
@@ -990,7 +990,7 @@ trans_cmp(const struct instr_translater *t,
get_immediate(ctx, &constval1, fui(1.0));
/* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
instr = instr_create(ctx, 3,
- ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
+ ctx->so->key.half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
vectorize(ctx, instr, dst, 3,
&constval0, 0, tmp_src, 0, &constval1, 0);
}
@@ -1210,7 +1210,7 @@ instr_cat3(const struct instr_translater *t,
}
instr = instr_create(ctx, 3,
- ctx->so->half_precision ? t->hopc : t->opc);
+ ctx->so->key.half_precision ? t->hopc : t->opc);
vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
&inst->Src[2].Register, 0);
put_dst(ctx, inst, dst);
@@ -1297,7 +1297,7 @@ decl_semantic(const struct tgsi_declaration_semantic *sem)
static int
decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_stateobj *so = ctx->so;
+ struct fd3_shader_variant *so = ctx->so;
unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
unsigned i, flags = 0;
int nop = 0;
@@ -1309,7 +1309,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) ||
decl->Declaration.Semantic);
- if (ctx->so->half_precision)
+ if (ctx->so->key.half_precision)
flags |= IR3_REG_HALF;
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
@@ -1362,7 +1362,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
static void
decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
- struct fd3_shader_stateobj *so = ctx->so;
+ struct fd3_shader_variant *so = ctx->so;
unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
unsigned comp = 0;
unsigned name = decl->Semantic.Name;
@@ -1492,8 +1492,8 @@ compile_instructions(struct fd3_compile_context *ctx)
}
int
-fd3_compile_shader_old(struct fd3_shader_stateobj *so,
- const struct tgsi_token *tokens)
+fd3_compile_shader_old(struct fd3_shader_variant *so,
+ const struct tgsi_token *tokens, struct fd3_shader_key key)
{
struct fd3_compile_context ctx;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index 83024c11a8b..7b071b2cd5d 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -43,7 +43,8 @@
static void
-emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring)
+emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring,
+ struct fd3_shader_key key)
{
struct fd_vertex_stateobj *vtx = ctx->vtx;
struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
@@ -63,20 +64,21 @@ emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring)
bufs[i].format = elem->src_format;
}
- fd3_emit_vertex_bufs(ring, &ctx->prog, bufs, vtx->num_elements);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key),
+ bufs, vtx->num_elements);
}
static void
draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
- struct fd_ringbuffer *ring, unsigned dirty, bool binning)
+ struct fd_ringbuffer *ring, unsigned dirty, struct fd3_shader_key key)
{
- fd3_emit_state(ctx, ring, &ctx->prog, dirty, binning);
+ fd3_emit_state(ctx, ring, &ctx->prog, dirty, key);
if (dirty & FD_DIRTY_VTXBUF)
- emit_vertexbufs(ctx, ring);
+ emit_vertexbufs(ctx, ring, key);
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
- OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
+ OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
OUT_RING(ring, info->min_index); /* VFD_INDEX_MIN */
@@ -88,16 +90,25 @@ draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
info->restart_index : 0xffffffff);
- fd_draw_emit(ctx, ring, binning ? IGNORE_VISIBILITY : USE_VISIBILITY, info);
+ fd_draw_emit(ctx, ring,
+ key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
+ info);
}
static void
fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
{
unsigned dirty = ctx->dirty;
+ struct fd3_shader_key key = {
+ /* do binning pass first: */
+ .binning_pass = true,
+ .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+ };
draw_impl(ctx, info, ctx->binning_ring,
- dirty & ~(FD_DIRTY_BLEND), true);
- draw_impl(ctx, info, ctx->ring, dirty, false);
+ dirty & ~(FD_DIRTY_BLEND), key);
+ /* and now regular (non-binning) pass: */
+ key.binning_pass = false;
+ draw_impl(ctx, info, ctx->ring, dirty, key);
}
/* binning pass cmds for a clear:
@@ -113,12 +124,18 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
{
struct fd3_context *fd3_ctx = fd3_context(ctx);
struct fd_ringbuffer *ring = ctx->binning_ring;
+ struct fd3_shader_key key = {
+ .binning_pass = true,
+ };
- fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, true);
+ fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);
- fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 1);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 1);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
@@ -148,6 +165,8 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
struct fd_ringbuffer *ring = ctx->ring;
unsigned dirty = ctx->dirty;
unsigned ce, i;
+ struct fd3_shader_key key = {
+ };
dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
dirty |= FD_DIRTY_PROG;
@@ -155,7 +174,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
fd3_clear_binning(ctx, dirty);
/* emit generic state now: */
- fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, false);
+ fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);
OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
@@ -245,9 +264,12 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
- fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 1);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 1);
fd_wfi(ctx, ring);
fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index a364fbfcd15..0d8d3c5e52c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -87,7 +87,7 @@ static void
emit_constants(struct fd_ringbuffer *ring,
enum adreno_state_block sb,
struct fd_constbuf_stateobj *constbuf,
- struct fd3_shader_stateobj *shader)
+ struct fd3_shader_variant *shader)
{
uint32_t enabled_mask = constbuf->enabled_mask;
uint32_t base = 0;
@@ -293,10 +293,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
void
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog,
+ struct fd3_shader_variant *vp,
struct fd3_vertex_buf *vbufs, uint32_t n)
{
- struct fd3_shader_stateobj *vp = prog->vp;
uint32_t i;
n = MIN2(n, vp->inputs_count);
@@ -329,8 +328,15 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
void
fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog, uint32_t dirty, bool binning)
+ struct fd_program_stateobj *prog, uint32_t dirty,
+ struct fd3_shader_key key)
{
+ struct fd3_shader_variant *vp;
+ struct fd3_shader_variant *fp;
+
+ fp = fd3_shader_variant(prog->fp, key);
+ vp = fd3_shader_variant(prog->vp, key);
+
emit_marker(ring, 5);
if (dirty & FD_DIRTY_SAMPLE_MASK) {
@@ -344,7 +350,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
struct pipe_stencil_ref *sr = &ctx->stencil_ref;
- if (!binning) {
+ if (!key.binning_pass) {
struct fd3_context *fd3_ctx = fd3_context(ctx);
/* I suppose if we needed to (which I don't *think* we need
@@ -372,7 +378,6 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
}
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
- struct fd3_shader_stateobj *fp = prog->fp;
uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
if (fp->writes_pos) {
val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
@@ -399,7 +404,6 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
}
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
- struct fd3_shader_stateobj *fp = prog->fp;
uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
->gras_cl_clip_cntl;
if (fp->writes_pos) {
@@ -412,7 +416,6 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
struct fd3_rasterizer_stateobj *rasterizer =
fd3_rasterizer_stateobj(ctx->rasterizer);
- struct fd3_shader_stateobj *fp = prog->fp;
uint32_t stride_in_vpc;
stride_in_vpc = align(fp->total_in, 4) / 4;
@@ -451,7 +454,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & FD_DIRTY_PROG) {
fd_wfi(ctx, ring);
- fd3_program_emit(ring, prog, binning);
+ fd3_program_emit(ring, prog, key);
}
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
@@ -463,10 +466,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
fd_wfi(ctx, ring);
emit_constants(ring, SB_VERT_SHADER,
&ctx->constbuf[PIPE_SHADER_VERTEX],
- (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL);
+ (prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
emit_constants(ring, SB_FRAG_SHADER,
&ctx->constbuf[PIPE_SHADER_FRAGMENT],
- (prog->dirty & FD_SHADER_DIRTY_FP) ? prog->fp : NULL);
+ (prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
}
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index 8584eb5b59b..f2ae4dc295e 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -56,10 +56,11 @@ struct fd3_vertex_buf {
};
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog,
+ struct fd3_shader_variant *vp,
struct fd3_vertex_buf *vbufs, uint32_t n);
void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog, uint32_t dirty, bool binning);
+ struct fd_program_stateobj *prog, uint32_t dirty,
+ struct fd3_shader_key key);
void fd3_emit_restore(struct fd_context *ctx);
#endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index d3ba6e6812b..d1aa8cf1208 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -43,6 +43,8 @@
#include "fd3_util.h"
#include "fd3_zsa.h"
+static const struct fd3_shader_key key = {
+};
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
@@ -147,11 +149,13 @@ emit_binning_workaround(struct fd_context *ctx)
A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
fd_wfi(ctx, ring);
- fd3_program_emit(ring, &ctx->solid_prog, false);
-
- fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 1);
+ fd3_program_emit(ring, &ctx->solid_prog, key);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 1);
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
@@ -365,11 +369,13 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
fd_wfi(ctx, ring);
- fd3_program_emit(ring, &ctx->solid_prog, false);
-
- fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 1);
+ fd3_program_emit(ring, &ctx->solid_prog, key);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 1);
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
uint32_t base = 0;
@@ -512,12 +518,17 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
fd_wfi(ctx, ring);
- fd3_program_emit(ring, &ctx->blit_prog, false);
-
- fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
- { .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT },
- { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
- }, 2);
+ fd3_program_emit(ring, &ctx->blit_prog, key);
+ fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key),
+ (struct fd3_vertex_buf[]) {{
+ .prsc = fd3_ctx->blit_texcoord_vbuf,
+ .stride = 8,
+ .format = PIPE_FORMAT_R32G32_FLOAT,
+ }, {
+ .prsc = fd3_ctx->solid_vbuf,
+ .stride = 12,
+ .format = PIPE_FORMAT_R32G32B32_FLOAT,
+ }}, 2);
/* for gmem pitch/base calculations, we need to use the non-
* truncated tile sizes:
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index c6fb8633cd0..0a7500f1611 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -44,17 +44,17 @@
#include "fd3_util.h"
static void
-delete_shader(struct fd3_shader_stateobj *so)
+delete_variant(struct fd3_shader_variant *v)
{
- ir3_shader_destroy(so->ir);
- fd_bo_del(so->bo);
- free(so);
+ ir3_shader_destroy(v->ir);
+ fd_bo_del(v->bo);
+ free(v);
}
static void
-assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so)
+assemble_variant(struct fd3_shader_variant *so)
{
- struct fd_context *ctx = fd_context(pctx);
+ struct fd_context *ctx = fd_context(so->so->pctx);
uint32_t sz, *bin;
bin = ir3_shader_assemble(so->ir, &so->info);
@@ -77,7 +77,7 @@ assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so)
* reflect the # of registers actually used:
*/
static void
-fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
+fixup_vp_regfootprint(struct fd3_shader_variant *so)
{
unsigned i;
for (i = 0; i < so->inputs_count; i++)
@@ -86,72 +86,123 @@ fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
so->info.max_reg = MAX2(so->info.max_reg, (so->outputs[i].regid + 3) >> 2);
}
-static struct fd3_shader_stateobj *
-create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
- enum shader_t type)
+static struct fd3_shader_variant *
+create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
{
- struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
- const struct tgsi_token *tokens = cso->tokens;
+ struct fd3_shader_variant *v = CALLOC_STRUCT(fd3_shader_variant);
+ const struct tgsi_token *tokens = so->tokens;
int ret;
- if (!so)
+ if (!v)
return NULL;
- so->type = type;
+ v->so = so;
+ v->key = key;
+ v->type = so->type;
if (fd_mesa_debug & FD_DBG_DISASM) {
DBG("dump tgsi: type=%d", so->type);
tgsi_dump(tokens, 0);
}
- if ((type == SHADER_FRAGMENT) && (fd_mesa_debug & FD_DBG_FRAGHALF))
- so->half_precision = true;
-
-
if (!(fd_mesa_debug & FD_DBG_NOOPT)) {
- ret = fd3_compile_shader(so, tokens);
+ ret = fd3_compile_shader(v, tokens, key);
if (ret) {
debug_error("new compiler failed, trying fallback!");
- so->inputs_count = 0;
- so->outputs_count = 0;
- so->total_in = 0;
- so->samplers_count = 0;
- so->immediates_count = 0;
+ v->inputs_count = 0;
+ v->outputs_count = 0;
+ v->total_in = 0;
+ v->samplers_count = 0;
+ v->immediates_count = 0;
}
} else {
ret = -1; /* force fallback to old compiler */
}
if (ret)
- ret = fd3_compile_shader_old(so, tokens);
+ ret = fd3_compile_shader_old(v, tokens, key);
if (ret) {
debug_error("compile failed!");
goto fail;
}
- assemble_shader(pctx, so);
- if (!so->bo) {
+ assemble_variant(v);
+ if (!v->bo) {
debug_error("assemble failed!");
goto fail;
}
- if (type == SHADER_VERTEX)
- fixup_vp_regfootprint(so);
+ if (so->type == SHADER_VERTEX)
+ fixup_vp_regfootprint(v);
if (fd_mesa_debug & FD_DBG_DISASM) {
- DBG("disassemble: type=%d", so->type);
- disasm_a3xx(fd_bo_map(so->bo), so->info.sizedwords, 0, so->type);
+ DBG("disassemble: type=%d", v->type);
+ disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
}
- return so;
+ return v;
fail:
- delete_shader(so);
+ delete_variant(v);
return NULL;
}
+struct fd3_shader_variant *
+fd3_shader_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
+{
+ struct fd3_shader_variant *v;
+
+ /* some shader key values only apply to vertex or frag shader,
+ * so normalize the key to avoid constructing multiple identical
+ * variants:
+ */
+ if (so->type == SHADER_FRAGMENT) {
+ key.binning_pass = false;
+ }
+ if (so->type == SHADER_VERTEX) {
+ key.color_two_side = false;
+ key.half_precision = false;
+ }
+
+ for (v = so->variants; v; v = v->next)
+ if (!memcmp(&key, &v->key, sizeof(key)))
+ return v;
+
+ /* compile new variant if it doesn't exist already: */
+ v = create_variant(so, key);
+ v->next = so->variants;
+ so->variants = v;
+
+ return v;
+}
+
+
+static void
+delete_shader(struct fd3_shader_stateobj *so)
+{
+ struct fd3_shader_variant *v, *t;
+ for (v = so->variants; v; ) {
+ t = v;
+ v = v->next;
+ delete_variant(t);
+ }
+ free((void *)so->tokens);
+ free(so);
+}
+
+static struct fd3_shader_stateobj *
+create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+ enum shader_t type)
+{
+ struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
+ so->pctx = pctx;
+ so->type = type;
+ so->tokens = tgsi_dup_tokens(cso->tokens);
+ return so;
+}
+
static void *
fd3_fp_state_create(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
@@ -181,7 +232,7 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
}
static void
-emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so)
+emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_variant *so)
{
const struct ir3_shader_info *si = &so->info;
enum adreno_state_block sb;
@@ -222,7 +273,7 @@ emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_stateobj *so)
}
static int
-find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output(const struct fd3_shader_variant *so, fd3_semantic semantic)
{
int j;
for (j = 0; j < so->outputs_count; j++)
@@ -232,7 +283,7 @@ find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
}
static uint32_t
-find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic)
{
int j;
for (j = 0; j < so->outputs_count; j++)
@@ -243,22 +294,26 @@ find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
void
fd3_program_emit(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog, bool binning)
+ struct fd_program_stateobj *prog, struct fd3_shader_key key)
{
- const struct fd3_shader_stateobj *vp = prog->vp;
- const struct fd3_shader_stateobj *fp = prog->fp;
- const struct ir3_shader_info *vsi = &vp->info;
- const struct ir3_shader_info *fsi = &fp->info;
+ const struct fd3_shader_variant *vp, *fp;
+ const struct ir3_shader_info *vsi, *fsi;
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
int i;
- if (binning) {
+ vp = fd3_shader_variant(prog->vp, key);
+
+ if (key.binning_pass) {
/* use dummy stateobj to simplify binning vs non-binning: */
- static const struct fd3_shader_stateobj binning_fp = {};
+ static const struct fd3_shader_variant binning_fp = {};
fp = &binning_fp;
- fsi = &fp->info;
+ } else {
+ fp = fd3_shader_variant(prog->fp, key);
}
+ vsi = &vp->info;
+ fsi = &fp->info;
+
pos_regid = find_output_regid(vp,
fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
posz_regid = find_output_regid(fp,
@@ -293,7 +348,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
- COND(binning, A3XX_SP_SP_CTRL_REG_BINNING) |
+ COND(key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
A3XX_SP_SP_CTRL_REG_L0MODE(0));
@@ -355,7 +410,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
- if (binning) {
+ if (key.binning_pass) {
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
OUT_RING(ring, 0x00000000);
@@ -402,12 +457,12 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) |
- COND(fp->half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION));
+ COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
- if (binning) {
+ if (key.binning_pass) {
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
A3XX_VPC_ATTR_LMSIZE(1));
@@ -421,16 +476,16 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
- OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
- OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
- OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
- OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
+ OUT_RING(ring, fp->so->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
+ OUT_RING(ring, fp->so->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
+ OUT_RING(ring, fp->so->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
+ OUT_RING(ring, fp->so->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
- OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
- OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
- OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
- OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
+ OUT_RING(ring, fp->so->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
+ OUT_RING(ring, fp->so->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
+ OUT_RING(ring, fp->so->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
+ OUT_RING(ring, fp->so->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
}
OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
@@ -442,7 +497,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
- if (!binning) {
+ if (!key.binning_pass) {
emit_shader(ring, fp);
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index a0a0de8b1ca..26aa9f34de7 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -32,7 +32,7 @@
#include "pipe/p_context.h"
#include "freedreno_context.h"
-
+#include "fd3_util.h"
#include "ir3.h"
#include "disasm.h"
@@ -53,19 +53,14 @@ static inline uint16_t sem2idx(fd3_semantic sem)
return sem & 0xff;
}
-struct fd3_shader_stateobj {
- enum shader_t type;
-
+struct fd3_shader_variant {
struct fd_bo *bo;
+ struct fd3_shader_key key;
+
struct ir3_shader_info info;
struct ir3_shader *ir;
- /* is shader using (or more precisely, is color_regid) half-
- * precision register?
- */
- bool half_precision;
-
/* the instructions length is in units of instruction groups
* (4 instructions, 8 dwords):
*/
@@ -118,14 +113,35 @@ struct fd3_shader_stateobj {
uint32_t val[4];
} immediates[64];
+ /* shader varients form a linked list: */
+ struct fd3_shader_variant *next;
+
+ /* replicated here to avoid passing extra ptrs everywhere: */
+ enum shader_t type;
+ struct fd3_shader_stateobj *so;
+};
+
+struct fd3_shader_stateobj {
+ enum shader_t type;
+
+ struct pipe_context *pctx;
+ const struct tgsi_token *tokens;
+
+ struct fd3_shader_variant *variants;
+
/* so far, only used for blit_prog shader.. values for
* VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE
+ *
+ * Possibly should be in fd3_program_variant?
*/
uint32_t vinterp[4], vpsrepl[4];
};
+struct fd3_shader_variant * fd3_shader_variant(struct fd3_shader_stateobj *so,
+ struct fd3_shader_key key);
+
void fd3_program_emit(struct fd_ringbuffer *ring,
- struct fd_program_stateobj *prog, bool binning);
+ struct fd_program_stateobj *prog, struct fd3_shader_key key);
void fd3_prog_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.h b/src/gallium/drivers/freedreno/a3xx/fd3_util.h
index 4681840b173..6462d18f913 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_util.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.h
@@ -43,4 +43,22 @@ enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+/* Configuration key used to identify a shader variant.. different
+ * shader variants can be used to implement features not supported
+ * in hw (two sided color), binning-pass vertex shader, etc.
+ *
+ * NOTE: this is declared here (rather than fd3_program.h) as it is
+ * passed around through a lot of the emit code in various parts
+ * which would otherwise not necessarily need to incl fd3_program.h
+ */
+struct fd3_shader_key {
+ /* vertex shader variant parameters: */
+ unsigned binning_pass : 1;
+
+ /* fragment shader variant parameters: */
+ unsigned color_two_side : 1;
+ unsigned half_precision : 1;
+};
+struct fd3_shader_variant;
+
#endif /* FD3_UTIL_H_ */