diff options
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_lowering.c | 245 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_lowering.h | 19 |
5 files changed, 269 insertions, 57 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index ae88d6ef54d..7450fac23c1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -152,6 +152,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, unsigned ret; struct tgsi_shader_info *info = &ctx->info; const struct fd_lowering_config lconfig = { + .color_two_side = so->key.color_two_side, .lower_DST = true, .lower_XPD = true, .lower_SCS = true, @@ -2003,6 +2004,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) so->writes_psize = true; break; case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: case TGSI_SEMANTIC_GENERIC: case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_TEXCOORD: @@ -2059,24 +2061,31 @@ fixup_frag_inputs(struct fd3_compile_context *ctx) { struct fd3_shader_variant *so = ctx->so; struct ir3_block *block = ctx->block; + struct ir3_instruction **inputs; struct ir3_instruction *instr; - int regid = 0; + int n, regid = 0; block->ninputs = 0; + n = 4; /* always have frag_pos */ + n += COND(so->frag_face, 4); + n += COND(so->frag_coord, 4); + + inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); + if (so->frag_face) { /* this ultimately gets assigned to hr0.x so doesn't conflict * with frag_coord/frag_pos.. */ - block->inputs[block->ninputs++] = ctx->frag_face; + inputs[block->ninputs++] = ctx->frag_face; ctx->frag_face->regs[0]->num = 0; /* remaining channels not used, but let's avoid confusing * other parts that expect inputs to come in groups of vec4 */ - block->inputs[block->ninputs++] = NULL; - block->inputs[block->ninputs++] = NULL; - block->inputs[block->ninputs++] = NULL; + inputs[block->ninputs++] = NULL; + inputs[block->ninputs++] = NULL; + inputs[block->ninputs++] = NULL; } /* since we don't know where to set the regid for frag_coord, @@ -2090,10 +2099,10 @@ fixup_frag_inputs(struct fd3_compile_context *ctx) ctx->frag_coord[2]->regs[0]->num = regid++; ctx->frag_coord[3]->regs[0]->num = regid++; - block->inputs[block->ninputs++] = ctx->frag_coord[0]; - block->inputs[block->ninputs++] = ctx->frag_coord[1]; - block->inputs[block->ninputs++] = ctx->frag_coord[2]; - block->inputs[block->ninputs++] = ctx->frag_coord[3]; + inputs[block->ninputs++] = ctx->frag_coord[0]; + inputs[block->ninputs++] = ctx->frag_coord[1]; + inputs[block->ninputs++] = ctx->frag_coord[2]; + inputs[block->ninputs++] = ctx->frag_coord[3]; } /* we always have frag_pos: */ @@ -2102,14 +2111,16 @@ fixup_frag_inputs(struct fd3_compile_context *ctx) /* r0.x */ instr = create_input(block, NULL, block->ninputs); instr->regs[0]->num = regid++; - block->inputs[block->ninputs++] = instr; + inputs[block->ninputs++] = instr; ctx->frag_pos->regs[1]->instr = instr; /* r0.y */ instr = create_input(block, NULL, block->ninputs); instr->regs[0]->num = regid++; - block->inputs[block->ninputs++] = instr; + inputs[block->ninputs++] = instr; ctx->frag_pos->regs[2]->instr = instr; + + block->inputs = inputs; } static void @@ -2189,10 +2200,6 @@ compile_instructions(struct fd3_compile_context *ctx) break; } } - - /* fixup actual inputs for frag shader: */ - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) - fixup_frag_inputs(ctx); } static void @@ -2217,6 +2224,7 @@ fd3_compile_shader(struct fd3_shader_variant *so, { struct fd3_compile_context ctx; struct ir3_block *block; + struct ir3_instruction **inputs; unsigned i, j, actual_in; int ret = 0; @@ -2235,6 +2243,13 @@ fd3_compile_shader(struct fd3_shader_variant *so, block = ctx.block; + /* keep track of the inputs from TGSI perspective.. */ + inputs = block->inputs; + + /* but fixup actual inputs for frag shader: */ + if (ctx.type == TGSI_PROCESSOR_FRAGMENT) + fixup_frag_inputs(&ctx); + /* at this point, for binning pass, throw away unneeded outputs: */ if (key.binning_pass) { for (i = 0, j = 0; i < so->outputs_count; i++) { @@ -2320,7 +2335,7 @@ fd3_compile_shader(struct fd3_shader_variant *so, for (i = 0; i < so->inputs_count; i++) { unsigned j, regid = ~0, compmask = 0; for (j = 0; j < 4; j++) { - struct ir3_instruction *in = block->inputs[(i*4) + j]; + struct ir3_instruction *in = inputs[(i*4) + j]; if (in) { compmask |= (1 << j); regid = in->regs[0]->num - j; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c index 9a0bbb5edff..76de287b163 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c @@ -126,6 +126,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, unsigned ret, base = 0; struct tgsi_shader_info *info = &ctx->info; const struct fd_lowering_config lconfig = { + .color_two_side = so->key.color_two_side, .lower_DST = true, .lower_XPD = true, .lower_SCS = true, @@ -1383,6 +1384,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) so->writes_psize = true; break; case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: case TGSI_SEMANTIC_GENERIC: case TGSI_SEMANTIC_FOG: case TGSI_SEMANTIC_TEXCOORD: diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 4cdd9387f9d..a84351ae887 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -284,9 +284,22 @@ static int find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) { int j; + for (j = 0; j < so->outputs_count; j++) if (so->outputs[j].semantic == semantic) return j; + + /* it seems optional to have a OUT.BCOLOR[n] for each OUT.COLOR[n] + * in the vertex shader.. but the fragment shader doesn't know this + * so it will always have both IN.COLOR[n] and IN.BCOLOR[n]. So + * at link time if there is no matching OUT.BCOLOR[n], we must map + * OUT.COLOR[n] to IN.BCOLOR[n]. + */ + if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) { + unsigned idx = sem2idx(semantic); + return find_output(so, fd3_semantic_name(TGSI_SEMANTIC_COLOR, idx)); + } + return 0; } diff --git a/src/gallium/drivers/freedreno/freedreno_lowering.c b/src/gallium/drivers/freedreno/freedreno_lowering.c index 607a5acbadb..ffc7eaea53f 100644 --- a/src/gallium/drivers/freedreno/freedreno_lowering.c +++ b/src/gallium/drivers/freedreno/freedreno_lowering.c @@ -39,6 +39,10 @@ struct fd_lowering_context { struct tgsi_transform_context base; const struct fd_lowering_config *config; struct tgsi_shader_info *info; + unsigned two_side_colors; + unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; + unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ + int face_idx; unsigned numtmp; struct { struct tgsi_full_src_register src; @@ -977,56 +981,188 @@ transform_dotp(struct tgsi_transform_context *tctx, } } + +/* Two-sided color emulation: + * For each COLOR input, create a corresponding BCOLOR input, plus + * CMP instruction to select front or back color based on FACE + */ +#define TWOSIDE_GROW(n) ( \ + 2 + /* FACE */ \ + ((n) * 2) + /* IN[] BCOLOR[n] */ \ + ((n) * 1) + /* TEMP[] */ \ + ((n) * 5) /* CMP instr */ \ + ) + static void -transform_instr(struct tgsi_transform_context *tctx, - struct tgsi_full_instruction *inst) +emit_twoside(struct tgsi_transform_context *tctx) { struct fd_lowering_context *ctx = fd_lowering_context(tctx); + struct tgsi_shader_info *info = ctx->info; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction new_inst; + unsigned inbase, tmpbase; + int i; + + inbase = info->file_max[TGSI_FILE_INPUT] + 1; + tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; + + /* additional inputs for BCOLOR's */ + for (i = 0; i < ctx->two_side_colors; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = true; + decl.Range.First = decl.Range.Last = inbase + i; + decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; + decl.Semantic.Index = + info->input_semantic_index[ctx->two_side_idx[i]]; + tctx->emit_declaration(tctx, &decl); + } - if (!ctx->emitted_decls) { - struct tgsi_full_declaration decl; - struct tgsi_full_immediate immed; - unsigned tmpbase = ctx->info->file_max[TGSI_FILE_TEMPORARY] + 1; - int i; + /* additional input for FACE */ + if (ctx->two_side_colors && (ctx->face_idx == -1)) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = true; + decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; + decl.Semantic.Name = TGSI_SEMANTIC_FACE; + decl.Semantic.Index = 0; + tctx->emit_declaration(tctx, &decl); + + ctx->face_idx = decl.Range.First; + } - /* declare immediate: */ - immed = tgsi_default_full_immediate(); - immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ - immed.u[0].Float = 0.0; - immed.u[1].Float = 1.0; - immed.u[2].Float = 128.0; - immed.u[3].Float = 0.0; - tctx->emit_immediate(tctx, &immed); - - ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; - ctx->imm.Register.Index = ctx->info->immediate_count; - ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; - ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; - ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; - ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; - - /* declare temp regs: */ - for (i = 0; i < ctx->numtmp; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.Range.First = decl.Range.Last = tmpbase + i; - tctx->emit_declaration(tctx, &decl); - - ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; - ctx->tmp[i].src.Register.Index = tmpbase + i; - ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; - ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; - ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; - ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; - - ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; - ctx->tmp[i].dst.Register.Index = tmpbase + i; - ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; + /* additional temps for COLOR/BCOLOR selection: */ + for (i = 0; i < ctx->two_side_colors; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; + tctx->emit_declaration(tctx, &decl); + } + + /* and finally additional instructions to select COLOR/BCOLOR: */ + for (i = 0; i < ctx->two_side_colors; i++) { + new_inst = tgsi_default_full_instruction(); + new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; + + new_inst.Instruction.NumDstRegs = 1; + new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; + new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; + new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; + + new_inst.Instruction.NumSrcRegs = 3; + new_inst.Src[0].Register.File = TGSI_FILE_INPUT; + new_inst.Src[0].Register.Index = ctx->face_idx; + new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; + new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; + new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; + new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; + new_inst.Src[1].Register.File = TGSI_FILE_INPUT; + new_inst.Src[1].Register.Index = inbase + i; + new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; + new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; + new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; + new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; + new_inst.Src[2].Register.File = TGSI_FILE_INPUT; + new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; + new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; + new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; + new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; + new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; + + tctx->emit_instruction(tctx, &new_inst); + } +} + +static void +emit_decls(struct tgsi_transform_context *tctx) +{ + struct fd_lowering_context *ctx = fd_lowering_context(tctx); + struct tgsi_shader_info *info = ctx->info; + struct tgsi_full_declaration decl; + struct tgsi_full_immediate immed; + unsigned tmpbase; + int i; + + tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; + + ctx->color_base = tmpbase + ctx->numtmp; + + /* declare immediate: */ + immed = tgsi_default_full_immediate(); + immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ + immed.u[0].Float = 0.0; + immed.u[1].Float = 1.0; + immed.u[2].Float = 128.0; + immed.u[3].Float = 0.0; + tctx->emit_immediate(tctx, &immed); + + ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; + ctx->imm.Register.Index = info->immediate_count; + ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; + ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; + ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; + + /* declare temp regs: */ + for (i = 0; i < ctx->numtmp; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.Range.First = decl.Range.Last = tmpbase + i; + tctx->emit_declaration(tctx, &decl); + + ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; + ctx->tmp[i].src.Register.Index = tmpbase + i; + ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; + ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; + ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; + + ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; + ctx->tmp[i].dst.Register.Index = tmpbase + i; + ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; + } + + if (ctx->two_side_colors) + emit_twoside(tctx); +} + +static void +rename_color_inputs(struct fd_lowering_context *ctx, + struct tgsi_full_instruction *inst) +{ + unsigned i, j; + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_src_register *src = &inst->Src[i].Register; + if (src->File == TGSI_FILE_INPUT) { + for (j = 0; j < ctx->two_side_colors; j++) { + if (src->Index == ctx->two_side_idx[j]) { + src->File = TGSI_FILE_TEMPORARY; + src->Index = ctx->color_base + j; + break; + } + } } + } + +} + +static void +transform_instr(struct tgsi_transform_context *tctx, + struct tgsi_full_instruction *inst) +{ + struct fd_lowering_context *ctx = fd_lowering_context(tctx); + if (!ctx->emitted_decls) { + emit_decls(tctx); ctx->emitted_decls = 1; } + /* if emulating two-sided-color, we need to re-write some + * src registers: + */ + if (ctx->two_side_colors) + rename_color_inputs(ctx, inst); + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_DST: if (!ctx->config->lower_DST) @@ -1125,6 +1261,22 @@ fd_transform_lowering(const struct fd_lowering_config *config, tgsi_scan_shader(tokens, info); + /* if we are adding fragment shader support to emulate two-sided + * color, then figure out the number of additional inputs we need + * to create for BCOLOR's.. + */ + if ((info->processor == TGSI_PROCESSOR_FRAGMENT) && + config->color_two_side) { + int i; + ctx.face_idx = -1; + for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { + if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) + ctx.two_side_idx[ctx.two_side_colors++] = i; + if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) + ctx.face_idx = i; + } + } + #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) /* if there are no instructions to lower, then we are done: */ if (!(OPCS(DST) || @@ -1140,7 +1292,8 @@ fd_transform_lowering(const struct fd_lowering_config *config, OPCS(DP3) || OPCS(DPH) || OPCS(DP2) || - OPCS(DP2A))) + OPCS(DP2A) || + ctx.two_side_colors)) return NULL; #if 0 /* debug */ @@ -1207,8 +1360,18 @@ fd_transform_lowering(const struct fd_lowering_config *config, numtmp = MAX2(numtmp, DOTP_TMP); } + /* specifically don't include two_side_colors temps in the count: */ ctx.numtmp = numtmp; + if (ctx.two_side_colors) { + newlen += TWOSIDE_GROW(ctx.two_side_colors); + /* note: we permanently consume temp regs, re-writing references + * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP + * instruction that selects which varying to use): + */ + numtmp += ctx.two_side_colors; + } + newlen += 2 * numtmp; newlen += 5; /* immediate */ diff --git a/src/gallium/drivers/freedreno/freedreno_lowering.h b/src/gallium/drivers/freedreno/freedreno_lowering.h index 2862e5d3b6b..2d36d8faf81 100644 --- a/src/gallium/drivers/freedreno/freedreno_lowering.h +++ b/src/gallium/drivers/freedreno/freedreno_lowering.h @@ -33,6 +33,25 @@ #include "tgsi/tgsi_scan.h" struct fd_lowering_config { + /* For fragment shaders, generate a shader that emulates two + * sided color by inserting a BGCOLOR input for each COLOR + * input, and insert a CMP instruction to select the correct + * color to use based on the TGSI_SEMANTIC_FACE input. + * + * Note that drivers which use this to emulate two sided color + * will: + * a) need to generate (on demand) alternate shaders to use + * depending on the rasterizer state (ie. whether two + * sided shading enabled) + * b) expect to see the BGCOLOR semantic name in fragment + * shaders. During linkage, the driver should simply + * map VS.OUT.BGCOLOR[n] to FS.IN.BGCOLOR[n] (in the + * same was as linking other outs/ins). + */ + unsigned color_two_side : 1; + + /* TODO support for alpha_to_one as well?? */ + /* Individual OPC lowerings, if lower_<opc> is TRUE then * enable lowering of TGSI_OPCODE_<opc> */ |