diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_program.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_program.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 185 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c | 41 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 4 |
7 files changed, 103 insertions, 183 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 64eeb106e53..b6f8ff33c47 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -140,7 +140,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, const struct ir3_info *vsi, *fsi; enum a3xx_instrbuffermode fpbuffer, vpbuffer; uint32_t fpbuffersz, vpbuffersz, fsoff; - uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; + uint32_t pos_regid, posz_regid, psize_regid; + uint32_t vcoord_regid, face_regid, coord_regid, zwcoord_regid; + uint32_t color_regid[4] = {0}; int constmode; int i, j; @@ -208,6 +210,11 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3); } + face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_VARYING_COORD); + /* adjust regids for alpha output formats. there is no alpha render * format, so it's just treated like red */ @@ -230,10 +237,11 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | - COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(regid(0,0)) | - A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(regid(0,2)))); - OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); - OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid)); + A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) | + A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid)); + OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) | + A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid)); + OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid)); OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) | A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) | A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz)); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 7c399d99a1f..bfe2be6b07b 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -202,7 +202,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; - uint32_t face_regid, coord_regid, zwcoord_regid; + uint32_t face_regid, coord_regid, zwcoord_regid, vcoord_regid; enum a3xx_threadsize fssz; int constmode; int i, j; @@ -244,10 +244,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - /* TODO get these dynamically: */ - face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); - coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); - zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); + face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -273,7 +273,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | 0x3f3f000 | /* XXX */ A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); - OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) | + OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid) | 0xfcfcfc00); OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 286411ec44e..ba3339fb1d2 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -350,13 +350,12 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); + samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); samp_mask_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN); - /* TODO get these dynamically: */ - face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); - coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); - zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); - vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0); + face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 44ee5b2c4a4..e4979a60a02 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -71,7 +71,7 @@ struct ir3_context { struct ir3_instruction *frag_vcoord; /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */ - struct ir3_instruction *frag_face, *frag_coord[4]; + struct ir3_instruction *frag_face, *frag_coord; /* For vertex shaders, keep track of the system values sources */ struct ir3_instruction *vertex_id, *basevertex, *instance_id; @@ -781,43 +781,6 @@ create_frag_input(struct ir3_context *ctx, bool use_ldlv) } static struct ir3_instruction * -create_frag_coord(struct ir3_context *ctx, unsigned comp) -{ - struct ir3_block *block = ctx->block; - struct ir3_instruction *instr; - - compile_assert(ctx, !ctx->frag_coord[comp]); - - ctx->frag_coord[comp] = create_input(ctx, 0); - - switch (comp) { - case 0: /* .x */ - case 1: /* .y */ - /* for frag_coord, we get unsigned values.. we need - * to subtract (integer) 8 and divide by 16 (right- - * shift by 4) then convert to float: - * - * sub.s tmp, src, 8 - * shr.b tmp, tmp, 4 - * mov.u32f32 dst, tmp - * - */ - instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0, - create_immed(block, 8), 0); - instr = ir3_SHR_B(block, instr, 0, - create_immed(block, 4), 0); - instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); - - return instr; - case 2: /* .z */ - case 3: /* .w */ - default: - /* seems that we can use these as-is: */ - return ctx->frag_coord[comp]; - } -} - -static struct ir3_instruction * create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ @@ -2448,6 +2411,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) if (!ctx->frag_face) { ctx->so->frag_face = true; ctx->frag_face = create_input(ctx, 0); + add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face); ctx->frag_face->regs[0]->flags |= IR3_REG_HALF; } /* for fragface, we get -1 for back and 0 for front. However this is @@ -3229,6 +3193,46 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl) ir3_END(ctx->block); } +static struct ir3_instruction * +create_frag_coord(struct ir3_context *ctx, unsigned comp) +{ + struct ir3_block *block = ctx->block; + struct ir3_instruction *instr; + + if (!ctx->frag_coord) { + ctx->frag_coord = create_input_compmask(ctx, 0, 0xf); + /* defer add_sysval_input() until after all inputs created */ + } + + split_dest(block, &instr, ctx->frag_coord, comp, 1); + + switch (comp) { + case 0: /* .x */ + case 1: /* .y */ + /* for frag_coord, we get unsigned values.. we need + * to subtract (integer) 8 and divide by 16 (right- + * shift by 4) then convert to float: + * + * sub.s tmp, src, 8 + * shr.b tmp, tmp, 4 + * mov.u32f32 dst, tmp + * + */ + instr = ir3_SUB_S(block, instr, 0, + create_immed(block, 8), 0); + instr = ir3_SHR_B(block, instr, 0, + create_immed(block, 4), 0); + instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32); + + return instr; + case 2: /* .z */ + case 3: /* .w */ + default: + /* seems that we can use these as-is: */ + return instr; + } +} + static void setup_input(struct ir3_context *ctx, nir_variable *in) { @@ -3406,7 +3410,7 @@ max_drvloc(struct exec_list *vars) } static const unsigned max_sysvals[SHADER_MAX] = { - [SHADER_FRAGMENT] = 8, + [SHADER_FRAGMENT] = 24, // TODO [SHADER_VERTEX] = 16, [SHADER_COMPUTE] = 16, // TODO how many do we actually need? }; @@ -3433,17 +3437,17 @@ emit_instructions(struct ir3_context *ctx) ninputs -= max_sysvals[ctx->so->type]; - /* for fragment shader, we have a single input register (usually - * r0.xy) which is used as the base for bary.f varying fetch instrs: + /* for fragment shader, the vcoord input register is used as the + * base for bary.f varying fetch instrs: */ + struct ir3_instruction *vcoord = NULL; if (ctx->so->type == SHADER_FRAGMENT) { - // TODO maybe a helper for fi since we need it a few places.. - struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, OPC_META_FI); - ir3_reg_create(instr, 0, 0); - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ - ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ - ctx->frag_vcoord = instr; + struct ir3_instruction *xy[2]; + + vcoord = create_input_compmask(ctx, 0, 0x3); + split_dest(ctx->block, xy, vcoord, 0, 2); + + ctx->frag_vcoord = create_collect(ctx, xy, 2); } /* Setup inputs: */ @@ -3451,6 +3455,19 @@ emit_instructions(struct ir3_context *ctx) setup_input(ctx, var); } + /* Defer add_sysval_input() stuff until after setup_inputs(), + * because sysvals need to be appended after varyings: + */ + if (vcoord) { + add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD, + 0x3, vcoord); + } + + if (ctx->frag_coord) { + add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD, + 0xf, ctx->frag_coord); + } + /* Setup outputs: */ nir_foreach_variable(var, &ctx->s->outputs) { setup_output(ctx, var); @@ -3470,76 +3487,26 @@ emit_instructions(struct ir3_context *ctx) emit_function(ctx, fxn); } -/* from NIR perspective, we actually have inputs. But most of the "inputs" - * for a fragment shader are just bary.f instructions. The *actual* inputs - * from the hw perspective are the frag_vcoord and optionally frag_coord and - * frag_face. +/* from NIR perspective, we actually have varying inputs. But the varying + * inputs, from an IR standpoint, are just bary.f/ldlv instructions. The + * only actual inputs are the sysvals. */ static void fixup_frag_inputs(struct ir3_context *ctx) { struct ir3_shader_variant *so = ctx->so; struct ir3 *ir = ctx->ir; - struct ir3_instruction **inputs; - struct ir3_instruction *instr; - int n, regid = 0; - - ir->ninputs = 0; - - n = 4; /* always have frag_vcoord */ - n += COND(so->frag_face, 4); - n += COND(so->frag_coord, 4); + unsigned i = 0; - inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); + /* sysvals should appear at the end of the inputs, drop everything else: */ + while ((i < so->inputs_count) && !so->inputs[i].sysval) + i++; - if (so->frag_face) { - /* this ultimately gets assigned to hr0.x so doesn't conflict - * with frag_coord/frag_vcoord.. - */ - inputs[ir->ninputs++] = ctx->frag_face; - ctx->frag_face->regs[0]->num = 0; + /* at IR level, inputs are always blocks of 4 scalars: */ + i *= 4; - /* remaining channels not used, but let's avoid confusing - * other parts that expect inputs to come in groups of vec4 - */ - inputs[ir->ninputs++] = NULL; - inputs[ir->ninputs++] = NULL; - inputs[ir->ninputs++] = NULL; - } - - /* since we don't know where to set the regid for frag_coord, - * we have to use r0.x for it. But we don't want to *always* - * use r1.x for frag_vcoord as that could increase the register - * footprint on simple shaders: - */ - if (so->frag_coord) { - ctx->frag_coord[0]->regs[0]->num = regid++; - ctx->frag_coord[1]->regs[0]->num = regid++; - ctx->frag_coord[2]->regs[0]->num = regid++; - ctx->frag_coord[3]->regs[0]->num = regid++; - - inputs[ir->ninputs++] = ctx->frag_coord[0]; - inputs[ir->ninputs++] = ctx->frag_coord[1]; - inputs[ir->ninputs++] = ctx->frag_coord[2]; - inputs[ir->ninputs++] = ctx->frag_coord[3]; - } - - /* we always have frag_vcoord: */ - so->pos_regid = regid; - - /* r0.x */ - instr = create_input(ctx, ir->ninputs); - instr->regs[0]->num = regid++; - inputs[ir->ninputs++] = instr; - ctx->frag_vcoord->regs[1]->instr = instr; - - /* r0.y */ - instr = create_input(ctx, ir->ninputs); - instr->regs[0]->num = regid++; - inputs[ir->ninputs++] = instr; - ctx->frag_vcoord->regs[2]->instr = instr; - - ir->inputs = inputs; + ir->inputs = &ir->inputs[i]; + ir->ninputs -= i; } /* Fixup tex sampler state for astc/srgb workaround instructions. We diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 74d85eaf8e2..83bc375aeb5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -1047,49 +1047,10 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block) static int ra_alloc(struct ir3_ra_ctx *ctx) { - unsigned n = 0; - - /* frag shader inputs get pre-assigned, since we have some - * constraints/unknowns about setup for some of these regs: - */ - if (ctx->type == SHADER_FRAGMENT) { - struct ir3 *ir = ctx->ir; - unsigned i = 0, j; - if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) { - struct ir3_instruction *instr = ir->inputs[i]; - int cls = size_to_class(1, true, false); - unsigned name = __ra_name(ctx, cls, instr); - unsigned reg = ctx->set->gpr_to_ra_reg[cls][0]; - - /* if we have frag_face, it gets hr0.x */ - ra_set_node_reg(ctx->g, name, reg); - i += 4; - } - - j = 0; - for (; i < ir->ninputs; i++) { - struct ir3_instruction *instr = ir->inputs[i]; - if (instr) { - struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip]; - - if (id->defn == instr) { - unsigned name, reg; - - name = ra_name(ctx, id); - reg = ctx->set->gpr_to_ra_reg[id->cls][j]; - - ra_set_node_reg(ctx->g, name, reg); - j += id->sz; - } - } - } - n = j; - } - /* pre-assign array elements: */ list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) { - unsigned base = n; + unsigned base = 0; if (arr->end_ip == 0) continue; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index b0663d5c5ca..7bb4263b177 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -97,18 +97,6 @@ fixup_regfootprint(struct ir3_shader_variant *v) int32_t regid = (v->outputs[i].regid + 3) >> 2; v->info.max_reg = MAX2(v->info.max_reg, regid); } - - if (v->type == SHADER_FRAGMENT) { - /* NOTE: not sure how to turn pos_regid off.. but this could - * be, for example, r1.x while max reg used by the shader is - * r0.*, in which case we need to fixup the reg footprint: - */ - v->info.max_reg = MAX2(v->info.max_reg, v->pos_regid >> 2); - if (v->frag_coord) - debug_assert(v->info.max_reg >= 0); /* hard coded r0.x */ - if (v->frag_face) - debug_assert(v->info.max_half_reg >= 0); /* hr0.x */ - } } /* wrapper for ir3_assemble() which does some info fixup based on @@ -518,7 +506,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) dump_output(out, so, VARYING_SLOT_PSIZ, "psize"); break; case SHADER_FRAGMENT: - dump_reg(out, "pos (bary)", so->pos_regid); + dump_reg(out, "pos (bary)", + ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD)); dump_output(out, so, FRAG_RESULT_DEPTH, "posz"); if (so->color0_mrt) { dump_output(out, so, FRAG_RESULT_COLOR, "color"); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 93182c710c2..507e89c4735 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -251,10 +251,6 @@ struct ir3_shader_variant { * + From the vert shader, we only need the output regid */ - /* for frag shader, pos_regid holds the frag_vcoord, ie. what is passed - * to bary.f instructions - */ - uint8_t pos_regid; bool frag_coord, frag_face, color0_mrt; /* NOTE: for input/outputs, slot is: |