diff options
author | Rob Clark <[email protected]> | 2018-08-10 11:57:26 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2018-08-14 17:59:02 -0400 |
commit | fdd35f497bd9fe840b5bcc4a21464967e7abf866 (patch) | |
tree | 22bf077794e6c8c63ca4ec2f46716e5b75642e40 | |
parent | e97b56172cd5f7f3ccc8a609713e556a8b9552ec (diff) |
freedreno/ir3: stop hard-coding FS input regs
We originally did this because at the time we didn't know all the
bitfields to configure where various frag shader sysval's went. But
we do.
So switch to using sysvals for all the frag shader inputs.
Signed-off-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_program.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_program.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 185 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c | 41 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 4 |
7 files changed, 103 insertions, 183 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 64eeb106e53..b6f8ff33c47 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -140,7 +140,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, const struct ir3_info *vsi, *fsi; enum a3xx_instrbuffermode fpbuffer, vpbuffer; uint32_t fpbuffersz, vpbuffersz, fsoff; - uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; + uint32_t pos_regid, posz_regid, psize_regid; + uint32_t vcoord_regid, face_regid, coord_regid, zwcoord_regid; + uint32_t color_regid[4] = {0}; int constmode; int i, j; @@ -208,6 +210,11 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3); } + face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_VARYING_COORD); + /* adjust regids for alpha output formats. there is no alpha render * format, so it's just treated like red */ @@ -230,10 +237,11 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | - COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(regid(0,0)) | - A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(regid(0,2)))); - OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); - OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid)); + A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) | + A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid)); + OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) | + A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid)); + OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid)); OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) | A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) | A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz)); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 7c399d99a1f..bfe2be6b07b 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -202,7 +202,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; - uint32_t face_regid, coord_regid, zwcoord_regid; + uint32_t face_regid, coord_regid, zwcoord_regid, vcoord_regid; enum a3xx_threadsize fssz; int constmode; int i, j; @@ -244,10 +244,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - /* TODO get these dynamically: */ - face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); - coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); - zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); + face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -273,7 +273,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | 0x3f3f000 | /* XXX */ A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); - OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) | + OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid) | 0xfcfcfc00); OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 286411ec44e..ba3339fb1d2 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -350,13 +350,12 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); + samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); samp_mask_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN); - /* TODO get these dynamically: */ - face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); - coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); - zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); - vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0); + face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 44ee5b2c4a4..e4979a60a02 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -71,7 +71,7 @@ struct ir3_context { struct ir3_instruction *frag_vcoord; /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */ - struct ir3_instruction *frag_face, *frag_coord[4]; + struct ir3_instruction *frag_face, *frag_coord; /* For vertex shaders, keep track of the system values sources */ struct ir3_instruction *vertex_id, *basevertex, *instance_id; @@ -781,43 +781,6 @@ create_frag_input(struct ir3_context *ctx, bool use_ldlv) } static struct ir3_instruction * -create_frag_coord(struct ir3_context *ctx, unsigned comp) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *instr; - - compile_assert(ctx, !ctx->frag_coord[comp]); - - ctx->frag_coord[comp] = create_input(ctx, 0); - - switch (comp) { - case 0: /* .x */ - case 1: /* .y */ - /* for frag_coord, we get unsigned values.. we need - * to subtract (integer) 8 and divide by 16 (right- - * shift by 4) then convert to float: - * - * sub.s tmp, src, 8 - * shr.b tmp, tmp, 4 - * mov.u32f32 dst, tmp - * - */ - instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0, - create_immed(block, 8), 0); - instr = ir3_SHR_B(block, instr, 0, - create_immed(block, 4), 0); - instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); - - return instr; - case 2: /* .z */ - case 3: /* .w */ - default: - /* seems that we can use these as-is: */ - return ctx->frag_coord[comp]; - } -} - -static struct ir3_instruction * create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ @@ -2448,6 +2411,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) if (!ctx->frag_face) { ctx->so->frag_face = true; ctx->frag_face = create_input(ctx, 0); + add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face); ctx->frag_face->regs[0]->flags |= IR3_REG_HALF; } /* for fragface, we get -1 for back and 0 for front. However this is @@ -3229,6 +3193,46 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl) ir3_END(ctx->block); } +static struct ir3_instruction * +create_frag_coord(struct ir3_context *ctx, unsigned comp) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *instr; + + if (!ctx->frag_coord) { + ctx->frag_coord = create_input_compmask(ctx, 0, 0xf); + /* defer add_sysval_input() until after all inputs created */ + } + + split_dest(block, &instr, ctx->frag_coord, comp, 1); + + switch (comp) { + case 0: /* .x */ + case 1: /* .y */ + /* for frag_coord, we get unsigned values.. we need + * to subtract (integer) 8 and divide by 16 (right- + * shift by 4) then convert to float: + * + * sub.s tmp, src, 8 + * shr.b tmp, tmp, 4 + * mov.u32f32 dst, tmp + * + */ + instr = ir3_SUB_S(block, instr, 0, + create_immed(block, 8), 0); + instr = ir3_SHR_B(block, instr, 0, + create_immed(block, 4), 0); + instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); + + return instr; + case 2: /* .z */ + case 3: /* .w */ + default: + /* seems that we can use these as-is: */ + return instr; + } +} + static void setup_input(struct ir3_context *ctx, nir_variable *in) { @@ -3406,7 +3410,7 @@ max_drvloc(struct exec_list *vars) } static const unsigned max_sysvals[SHADER_MAX] = { - [SHADER_FRAGMENT] = 8, + [SHADER_FRAGMENT] = 24, // TODO [SHADER_VERTEX] = 16, [SHADER_COMPUTE] = 16, // TODO how many do we actually need? }; @@ -3433,17 +3437,17 @@ emit_instructions(struct ir3_context *ctx) ninputs -= max_sysvals[ctx->so->type]; - /* for fragment shader, we have a single input register (usually - * r0.xy) which is used as the base for bary.f varying fetch instrs: + /* for fragment shader, the vcoord input register is used as the + * base for bary.f varying fetch instrs: */ + struct ir3_instruction *vcoord = NULL; if (ctx->so->type == SHADER_FRAGMENT) { - // TODO maybe a helper for fi since we need it a few places.. - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, OPC_META_FI); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ - ctx->frag_vcoord = instr; + struct ir3_instruction *xy[2]; + + vcoord = create_input_compmask(ctx, 0, 0x3); + split_dest(ctx->block, xy, vcoord, 0, 2); + + ctx->frag_vcoord = create_collect(ctx, xy, 2); } /* Setup inputs: */ @@ -3451,6 +3455,19 @@ emit_instructions(struct ir3_context *ctx) setup_input(ctx, var); } + /* Defer add_sysval_input() stuff until after setup_inputs(), + * because sysvals need to be appended after varyings: + */ + if (vcoord) { + add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD, + 0x3, vcoord); + } + + if (ctx->frag_coord) { + add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD, + 0xf, ctx->frag_coord); + } + /* Setup outputs: */ nir_foreach_variable(var, &ctx->s->outputs) { setup_output(ctx, var); @@ -3470,76 +3487,26 @@ emit_instructions(struct ir3_context *ctx) emit_function(ctx, fxn); } -/* from NIR perspective, we actually have inputs. But most of the "inputs" - * for a fragment shader are just bary.f instructions. The *actual* inputs - * from the hw perspective are the frag_vcoord and optionally frag_coord and - * frag_face. +/* from NIR perspective, we actually have varying inputs. But the varying + * inputs, from an IR standpoint, are just bary.f/ldlv instructions. The + * only actual inputs are the sysvals. */ static void fixup_frag_inputs(struct ir3_context *ctx) { struct ir3_shader_variant *so = ctx->so; struct ir3 *ir = ctx->ir; - struct ir3_instruction **inputs; - struct ir3_instruction *instr; - int n, regid = 0; - - ir->ninputs = 0; - - n = 4; /* always have frag_vcoord */ - n += COND(so->frag_face, 4); - n += COND(so->frag_coord, 4); + unsigned i = 0; - inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); + /* sysvals should appear at the end of the inputs, drop everything else: */ + while ((i < so->inputs_count) && !so->inputs[i].sysval) + i++; - if (so->frag_face) { - /* this ultimately gets assigned to hr0.x so doesn't conflict - * with frag_coord/frag_vcoord.. - */ - inputs[ir->ninputs++] = ctx->frag_face; - ctx->frag_face->regs[0]->num = 0; + /* at IR level, inputs are always blocks of 4 scalars: */ + i *= 4; - /* remaining channels not used, but let's avoid confusing - * other parts that expect inputs to come in groups of vec4 - */ - inputs[ir->ninputs++] = NULL; - inputs[ir->ninputs++] = NULL; - inputs[ir->ninputs++] = NULL; - } - - /* since we don't know where to set the regid for frag_coord, - * we have to use r0.x for it. But we don't want to *always* - * use r1.x for frag_vcoord as that could increase the register - * footprint on simple shaders: - */ - if (so->frag_coord) { - ctx->frag_coord[0]->regs[0]->num = regid++; - ctx->frag_coord[1]->regs[0]->num = regid++; - ctx->frag_coord[2]->regs[0]->num = regid++; - ctx->frag_coord[3]->regs[0]->num = regid++; - - inputs[ir->ninputs++] = ctx->frag_coord[0]; - inputs[ir->ninputs++] = ctx->frag_coord[1]; - inputs[ir->ninputs++] = ctx->frag_coord[2]; - inputs[ir->ninputs++] = ctx->frag_coord[3]; - } - - /* we always have frag_vcoord: */ - so->pos_regid = regid; - - /* r0.x */ - instr = create_input(ctx, ir->ninputs); - instr->regs[0]->num = regid++; - inputs[ir->ninputs++] = instr; - ctx->frag_vcoord->regs[1]->instr = instr; - - /* r0.y */ - instr = create_input(ctx, ir->ninputs); - instr->regs[0]->num = regid++; - inputs[ir->ninputs++] = instr; - ctx->frag_vcoord->regs[2]->instr = instr; - - ir->inputs = inputs; + ir->inputs = &ir->inputs[i]; + ir->ninputs -= i; } /* Fixup tex sampler state for astc/srgb workaround instructions. We diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 74d85eaf8e2..83bc375aeb5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -1047,49 +1047,10 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) static int ra_alloc(struct ir3_ra_ctx *ctx) { - unsigned n = 0; - - /* frag shader inputs get pre-assigned, since we have some - * constraints/unknowns about setup for some of these regs: - */ - if (ctx->type == SHADER_FRAGMENT) { - struct ir3 *ir = ctx->ir; - unsigned i = 0, j; - if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) { - struct ir3_instruction *instr = ir->inputs[i]; - int cls = size_to_class(1, true, false); - unsigned name = __ra_name(ctx, cls, instr); - unsigned reg = ctx->set->gpr_to_ra_reg[cls][0]; - - /* if we have frag_face, it gets hr0.x */ - ra_set_node_reg(ctx->g, name, reg); - i += 4; - } - - j = 0; - for (; i < ir->ninputs; i++) { - struct ir3_instruction *instr = ir->inputs[i]; - if (instr) { - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - - if (id->defn == instr) { - unsigned name, reg; - - name = ra_name(ctx, id); - reg = ctx->set->gpr_to_ra_reg[id->cls][j]; - - ra_set_node_reg(ctx->g, name, reg); - j += id->sz; - } - } - } - n = j; - } - /* pre-assign array elements: */ list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { - unsigned base = n; + unsigned base = 0; if (arr->end_ip == 0) continue; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index b0663d5c5ca..7bb4263b177 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -97,18 +97,6 @@ fixup_regfootprint(struct ir3_shader_variant *v) int32_t regid = (v->outputs[i].regid + 3) >> 2; v->info.max_reg = MAX2(v->info.max_reg, regid); } - - if (v->type == SHADER_FRAGMENT) { - /* NOTE: not sure how to turn pos_regid off.. but this could - * be, for example, r1.x while max reg used by the shader is - * r0.*, in which case we need to fixup the reg footprint: - */ - v->info.max_reg = MAX2(v->info.max_reg, v->pos_regid >> 2); - if (v->frag_coord) - debug_assert(v->info.max_reg >= 0); /* hard coded r0.x */ - if (v->frag_face) - debug_assert(v->info.max_half_reg >= 0); /* hr0.x */ - } } /* wrapper for ir3_assemble() which does some info fixup based on @@ -518,7 +506,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) dump_output(out, so, VARYING_SLOT_PSIZ, "psize"); break; case SHADER_FRAGMENT: - dump_reg(out, "pos (bary)", so->pos_regid); + dump_reg(out, "pos (bary)", + ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD)); dump_output(out, so, FRAG_RESULT_DEPTH, "posz"); if (so->color0_mrt) { dump_output(out, so, FRAG_RESULT_COLOR, "color"); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 93182c710c2..507e89c4735 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -251,10 +251,6 @@ struct ir3_shader_variant { * + From the vert shader, we only need the output regid */ - /* for frag shader, pos_regid holds the frag_vcoord, ie. what is passed - * to bary.f instructions - */ - uint8_t pos_regid; bool frag_coord, frag_face, color0_mrt; /* NOTE: for input/outputs, slot is: |