summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c47
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c2
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c13
-rw-r--r--src/gallium/drivers/freedreno/freedreno_lowering.c245
-rw-r--r--src/gallium/drivers/freedreno/freedreno_lowering.h19
5 files changed, 269 insertions, 57 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index ae88d6ef54d..7450fac23c1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -152,6 +152,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
unsigned ret;
struct tgsi_shader_info *info = &ctx->info;
const struct fd_lowering_config lconfig = {
+ .color_two_side = so->key.color_two_side,
.lower_DST = true,
.lower_XPD = true,
.lower_SCS = true,
@@ -2003,6 +2004,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
so->writes_psize = true;
break;
case TGSI_SEMANTIC_COLOR:
+ case TGSI_SEMANTIC_BCOLOR:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_TEXCOORD:
@@ -2059,24 +2061,31 @@ fixup_frag_inputs(struct fd3_compile_context *ctx)
{
struct fd3_shader_variant *so = ctx->so;
struct ir3_block *block = ctx->block;
+ struct ir3_instruction **inputs;
struct ir3_instruction *instr;
- int regid = 0;
+ int n, regid = 0;
block->ninputs = 0;
+ n = 4; /* always have frag_pos */
+ n += COND(so->frag_face, 4);
+ n += COND(so->frag_coord, 4);
+
+ inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
+
if (so->frag_face) {
/* this ultimately gets assigned to hr0.x so doesn't conflict
* with frag_coord/frag_pos..
*/
- block->inputs[block->ninputs++] = ctx->frag_face;
+ inputs[block->ninputs++] = ctx->frag_face;
ctx->frag_face->regs[0]->num = 0;
/* remaining channels not used, but let's avoid confusing
* other parts that expect inputs to come in groups of vec4
*/
- block->inputs[block->ninputs++] = NULL;
- block->inputs[block->ninputs++] = NULL;
- block->inputs[block->ninputs++] = NULL;
+ inputs[block->ninputs++] = NULL;
+ inputs[block->ninputs++] = NULL;
+ inputs[block->ninputs++] = NULL;
}
/* since we don't know where to set the regid for frag_coord,
@@ -2090,10 +2099,10 @@ fixup_frag_inputs(struct fd3_compile_context *ctx)
ctx->frag_coord[2]->regs[0]->num = regid++;
ctx->frag_coord[3]->regs[0]->num = regid++;
- block->inputs[block->ninputs++] = ctx->frag_coord[0];
- block->inputs[block->ninputs++] = ctx->frag_coord[1];
- block->inputs[block->ninputs++] = ctx->frag_coord[2];
- block->inputs[block->ninputs++] = ctx->frag_coord[3];
+ inputs[block->ninputs++] = ctx->frag_coord[0];
+ inputs[block->ninputs++] = ctx->frag_coord[1];
+ inputs[block->ninputs++] = ctx->frag_coord[2];
+ inputs[block->ninputs++] = ctx->frag_coord[3];
}
/* we always have frag_pos: */
@@ -2102,14 +2111,16 @@ fixup_frag_inputs(struct fd3_compile_context *ctx)
/* r0.x */
instr = create_input(block, NULL, block->ninputs);
instr->regs[0]->num = regid++;
- block->inputs[block->ninputs++] = instr;
+ inputs[block->ninputs++] = instr;
ctx->frag_pos->regs[1]->instr = instr;
/* r0.y */
instr = create_input(block, NULL, block->ninputs);
instr->regs[0]->num = regid++;
- block->inputs[block->ninputs++] = instr;
+ inputs[block->ninputs++] = instr;
ctx->frag_pos->regs[2]->instr = instr;
+
+ block->inputs = inputs;
}
static void
@@ -2189,10 +2200,6 @@ compile_instructions(struct fd3_compile_context *ctx)
break;
}
}
-
- /* fixup actual inputs for frag shader: */
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT)
- fixup_frag_inputs(ctx);
}
static void
@@ -2217,6 +2224,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
{
struct fd3_compile_context ctx;
struct ir3_block *block;
+ struct ir3_instruction **inputs;
unsigned i, j, actual_in;
int ret = 0;
@@ -2235,6 +2243,13 @@ fd3_compile_shader(struct fd3_shader_variant *so,
block = ctx.block;
+ /* keep track of the inputs from TGSI perspective.. */
+ inputs = block->inputs;
+
+ /* but fixup actual inputs for frag shader: */
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+ fixup_frag_inputs(&ctx);
+
/* at this point, for binning pass, throw away unneeded outputs: */
if (key.binning_pass) {
for (i = 0, j = 0; i < so->outputs_count; i++) {
@@ -2320,7 +2335,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
for (i = 0; i < so->inputs_count; i++) {
unsigned j, regid = ~0, compmask = 0;
for (j = 0; j < 4; j++) {
- struct ir3_instruction *in = block->inputs[(i*4) + j];
+ struct ir3_instruction *in = inputs[(i*4) + j];
if (in) {
compmask |= (1 << j);
regid = in->regs[0]->num - j;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
index 9a0bbb5edff..76de287b163 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c
@@ -126,6 +126,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so,
unsigned ret, base = 0;
struct tgsi_shader_info *info = &ctx->info;
const struct fd_lowering_config lconfig = {
+ .color_two_side = so->key.color_two_side,
.lower_DST = true,
.lower_XPD = true,
.lower_SCS = true,
@@ -1383,6 +1384,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
so->writes_psize = true;
break;
case TGSI_SEMANTIC_COLOR:
+ case TGSI_SEMANTIC_BCOLOR:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_TEXCOORD:
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 4cdd9387f9d..a84351ae887 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -284,9 +284,22 @@ static int
find_output(const struct fd3_shader_variant *so, fd3_semantic semantic)
{
int j;
+
for (j = 0; j < so->outputs_count; j++)
if (so->outputs[j].semantic == semantic)
return j;
+
+ /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n]
+ * in the vertex shader.. but the fragment shader doesn't know this
+ * so it will always have both IN.COLOR[n] and IN.BCOLOR[n]. So
+ * at link time if there is no matching OUT.BCOLOR[n], we must map
+ * OUT.COLOR[n] to IN.BCOLOR[n].
+ */
+ if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) {
+ unsigned idx = sem2idx(semantic);
+ return find_output(so, fd3_semantic_name(TGSI_SEMANTIC_COLOR, idx));
+ }
+
return 0;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_lowering.c b/src/gallium/drivers/freedreno/freedreno_lowering.c
index 607a5acbadb..ffc7eaea53f 100644
--- a/src/gallium/drivers/freedreno/freedreno_lowering.c
+++ b/src/gallium/drivers/freedreno/freedreno_lowering.c
@@ -39,6 +39,10 @@ struct fd_lowering_context {
struct tgsi_transform_context base;
const struct fd_lowering_config *config;
struct tgsi_shader_info *info;
+ unsigned two_side_colors;
+ unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
+ unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
+ int face_idx;
unsigned numtmp;
struct {
struct tgsi_full_src_register src;
@@ -977,56 +981,188 @@ transform_dotp(struct tgsi_transform_context *tctx,
}
}
+
+/* Two-sided color emulation:
+ * For each COLOR input, create a corresponding BCOLOR input, plus
+ * CMP instruction to select front or back color based on FACE
+ */
+#define TWOSIDE_GROW(n) ( \
+ 2 + /* FACE */ \
+ ((n) * 2) + /* IN[] BCOLOR[n] */ \
+ ((n) * 1) + /* TEMP[] */ \
+ ((n) * 5) /* CMP instr */ \
+ )
+
static void
-transform_instr(struct tgsi_transform_context *tctx,
- struct tgsi_full_instruction *inst)
+emit_twoside(struct tgsi_transform_context *tctx)
{
struct fd_lowering_context *ctx = fd_lowering_context(tctx);
+ struct tgsi_shader_info *info = ctx->info;
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_instruction new_inst;
+ unsigned inbase, tmpbase;
+ int i;
+
+ inbase = info->file_max[TGSI_FILE_INPUT] + 1;
+ tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
+
+ /* additional inputs for BCOLOR's */
+ for (i = 0; i < ctx->two_side_colors; i++) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = true;
+ decl.Range.First = decl.Range.Last = inbase + i;
+ decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
+ decl.Semantic.Index =
+ info->input_semantic_index[ctx->two_side_idx[i]];
+ tctx->emit_declaration(tctx, &decl);
+ }
- if (!ctx->emitted_decls) {
- struct tgsi_full_declaration decl;
- struct tgsi_full_immediate immed;
- unsigned tmpbase = ctx->info->file_max[TGSI_FILE_TEMPORARY] + 1;
- int i;
+ /* additional input for FACE */
+ if (ctx->two_side_colors && (ctx->face_idx == -1)) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_INPUT;
+ decl.Declaration.Semantic = true;
+ decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
+ decl.Semantic.Name = TGSI_SEMANTIC_FACE;
+ decl.Semantic.Index = 0;
+ tctx->emit_declaration(tctx, &decl);
+
+ ctx->face_idx = decl.Range.First;
+ }
- /* declare immediate: */
- immed = tgsi_default_full_immediate();
- immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
- immed.u[0].Float = 0.0;
- immed.u[1].Float = 1.0;
- immed.u[2].Float = 128.0;
- immed.u[3].Float = 0.0;
- tctx->emit_immediate(tctx, &immed);
-
- ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
- ctx->imm.Register.Index = ctx->info->immediate_count;
- ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
- ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
- ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
- ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
-
- /* declare temp regs: */
- for (i = 0; i < ctx->numtmp; i++) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_TEMPORARY;
- decl.Range.First = decl.Range.Last = tmpbase + i;
- tctx->emit_declaration(tctx, &decl);
-
- ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
- ctx->tmp[i].src.Register.Index = tmpbase + i;
- ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
- ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
- ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
- ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
-
- ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
- ctx->tmp[i].dst.Register.Index = tmpbase + i;
- ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ /* additional temps for COLOR/BCOLOR selection: */
+ for (i = 0; i < ctx->two_side_colors; i++) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
+ tctx->emit_declaration(tctx, &decl);
+ }
+
+ /* and finally additional instructions to select COLOR/BCOLOR: */
+ for (i = 0; i < ctx->two_side_colors; i++) {
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
+
+ new_inst.Instruction.NumDstRegs = 1;
+ new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+ new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
+ new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+ new_inst.Instruction.NumSrcRegs = 3;
+ new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
+ new_inst.Src[0].Register.Index = ctx->face_idx;
+ new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+ new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+ new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
+ new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
+ new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
+ new_inst.Src[1].Register.Index = inbase + i;
+ new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
+ new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
+ new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
+ new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
+ new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
+ new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
+ new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
+ new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
+
+ tctx->emit_instruction(tctx, &new_inst);
+ }
+}
+
+static void
+emit_decls(struct tgsi_transform_context *tctx)
+{
+ struct fd_lowering_context *ctx = fd_lowering_context(tctx);
+ struct tgsi_shader_info *info = ctx->info;
+ struct tgsi_full_declaration decl;
+ struct tgsi_full_immediate immed;
+ unsigned tmpbase;
+ int i;
+
+ tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
+
+ ctx->color_base = tmpbase + ctx->numtmp;
+
+ /* declare immediate: */
+ immed = tgsi_default_full_immediate();
+ immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
+ immed.u[0].Float = 0.0;
+ immed.u[1].Float = 1.0;
+ immed.u[2].Float = 128.0;
+ immed.u[3].Float = 0.0;
+ tctx->emit_immediate(tctx, &immed);
+
+ ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
+ ctx->imm.Register.Index = info->immediate_count;
+ ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
+ ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
+ ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
+
+ /* declare temp regs: */
+ for (i = 0; i < ctx->numtmp; i++) {
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First = decl.Range.Last = tmpbase + i;
+ tctx->emit_declaration(tctx, &decl);
+
+ ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
+ ctx->tmp[i].src.Register.Index = tmpbase + i;
+ ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
+ ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
+ ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
+ ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
+
+ ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
+ ctx->tmp[i].dst.Register.Index = tmpbase + i;
+ ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ }
+
+ if (ctx->two_side_colors)
+ emit_twoside(tctx);
+}
+
+static void
+rename_color_inputs(struct fd_lowering_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ unsigned i, j;
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ struct tgsi_src_register *src = &inst->Src[i].Register;
+ if (src->File == TGSI_FILE_INPUT) {
+ for (j = 0; j < ctx->two_side_colors; j++) {
+ if (src->Index == ctx->two_side_idx[j]) {
+ src->File = TGSI_FILE_TEMPORARY;
+ src->Index = ctx->color_base + j;
+ break;
+ }
+ }
}
+ }
+
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct fd_lowering_context *ctx = fd_lowering_context(tctx);
+ if (!ctx->emitted_decls) {
+ emit_decls(tctx);
ctx->emitted_decls = 1;
}
+ /* if emulating two-sided-color, we need to re-write some
+ * src registers:
+ */
+ if (ctx->two_side_colors)
+ rename_color_inputs(ctx, inst);
+
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_DST:
if (!ctx->config->lower_DST)
@@ -1125,6 +1261,22 @@ fd_transform_lowering(const struct fd_lowering_config *config,
tgsi_scan_shader(tokens, info);
+ /* if we are adding fragment shader support to emulate two-sided
+ * color, then figure out the number of additional inputs we need
+ * to create for BCOLOR's..
+ */
+ if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
+ config->color_two_side) {
+ int i;
+ ctx.face_idx = -1;
+ for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
+ if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
+ ctx.two_side_idx[ctx.two_side_colors++] = i;
+ if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
+ ctx.face_idx = i;
+ }
+ }
+
#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
/* if there are no instructions to lower, then we are done: */
if (!(OPCS(DST) ||
@@ -1140,7 +1292,8 @@ fd_transform_lowering(const struct fd_lowering_config *config,
OPCS(DP3) ||
OPCS(DPH) ||
OPCS(DP2) ||
- OPCS(DP2A)))
+ OPCS(DP2A) ||
+ ctx.two_side_colors))
return NULL;
#if 0 /* debug */
@@ -1207,8 +1360,18 @@ fd_transform_lowering(const struct fd_lowering_config *config,
numtmp = MAX2(numtmp, DOTP_TMP);
}
+ /* specifically don't include two_side_colors temps in the count: */
ctx.numtmp = numtmp;
+ if (ctx.two_side_colors) {
+ newlen += TWOSIDE_GROW(ctx.two_side_colors);
+ /* note: we permanently consume temp regs, re-writing references
+ * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
+ * instruction that selects which varying to use):
+ */
+ numtmp += ctx.two_side_colors;
+ }
+
newlen += 2 * numtmp;
newlen += 5; /* immediate */
diff --git a/src/gallium/drivers/freedreno/freedreno_lowering.h b/src/gallium/drivers/freedreno/freedreno_lowering.h
index 2862e5d3b6b..2d36d8faf81 100644
--- a/src/gallium/drivers/freedreno/freedreno_lowering.h
+++ b/src/gallium/drivers/freedreno/freedreno_lowering.h
@@ -33,6 +33,25 @@
#include "tgsi/tgsi_scan.h"
struct fd_lowering_config {
+ /* For fragment shaders, generate a shader that emulates two
+ * sided color by inserting a BGCOLOR input for each COLOR
+ * input, and insert a CMP instruction to select the correct
+ * color to use based on the TGSI_SEMANTIC_FACE input.
+ *
+ * Note that drivers which use this to emulate two sided color
+ * will:
+ * a) need to generate (on demand) alternate shaders to use
+ * depending on the rasterizer state (ie. whether two
+ * sided shading enabled)
+ * b) expect to see the BGCOLOR semantic name in fragment
+ * shaders. During linkage, the driver should simply
+ * map VS.OUT.BGCOLOR[n] to FS.IN.BGCOLOR[n] (in the
+ * same was as linking other outs/ins).
+ */
+ unsigned color_two_side : 1;
+
+ /* TODO support for alpha_to_one as well?? */
+
/* Individual OPC lowerings, if lower_<opc> is TRUE then
* enable lowering of TGSI_OPCODE_<opc>
*/