summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c18
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c12
-rw-r--r--src/gallium/drivers/freedreno/a5xx/fd5_program.c11
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c185
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c41
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c15
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h4
7 files changed, 103 insertions, 183 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 64eeb106e53..b6f8ff33c47 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -140,7 +140,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
const struct ir3_info *vsi, *fsi;
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
uint32_t fpbuffersz, vpbuffersz, fsoff;
- uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
+ uint32_t pos_regid, posz_regid, psize_regid;
+ uint32_t vcoord_regid, face_regid, coord_regid, zwcoord_regid;
+ uint32_t color_regid[4] = {0};
int constmode;
int i, j;
@@ -208,6 +210,11 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
}
+ face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
+ coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
+ zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
+ vcoord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_VARYING_COORD);
+
/* adjust regids for alpha output formats. there is no alpha render
* format, so it's just treated like red
*/
@@ -230,10 +237,11 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
- COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(regid(0,0)) |
- A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(regid(0,2))));
- OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
- OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
+ A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
+ A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
+ A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
+ OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid));
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index 7c399d99a1f..bfe2be6b07b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -202,7 +202,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
{
struct stage s[MAX_STAGES];
uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
- uint32_t face_regid, coord_regid, zwcoord_regid;
+ uint32_t face_regid, coord_regid, zwcoord_regid, vcoord_regid;
enum a3xx_threadsize fssz;
int constmode;
int i, j;
@@ -244,10 +244,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
}
- /* TODO get these dynamically: */
- face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
- coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
- zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
+ face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
+ coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
+ zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
+ vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD);
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
@@ -273,7 +273,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
0x3f3f000 | /* XXX */
A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
- OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) |
+ OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid) |
0xfcfcfc00);
OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
index 286411ec44e..ba3339fb1d2 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c
@@ -350,13 +350,12 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
}
- samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID);
+ samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID);
samp_mask_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN);
- /* TODO get these dynamically: */
- face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
- coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
- zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
- vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0);
+ face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
+ coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
+ zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
+ vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_VARYING_COORD);
/* we could probably divide this up into things that need to be
* emitted if frag-prog is dirty vs if vert-prog is dirty..
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 44ee5b2c4a4..e4979a60a02 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -71,7 +71,7 @@ struct ir3_context {
struct ir3_instruction *frag_vcoord;
/* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
- struct ir3_instruction *frag_face, *frag_coord[4];
+ struct ir3_instruction *frag_face, *frag_coord;
/* For vertex shaders, keep track of the system values sources */
struct ir3_instruction *vertex_id, *basevertex, *instance_id;
@@ -781,43 +781,6 @@ create_frag_input(struct ir3_context *ctx, bool use_ldlv)
}
static struct ir3_instruction *
-create_frag_coord(struct ir3_context *ctx, unsigned comp)
-{
- struct ir3_block *block = ctx->block;
- struct ir3_instruction *instr;
-
- compile_assert(ctx, !ctx->frag_coord[comp]);
-
- ctx->frag_coord[comp] = create_input(ctx, 0);
-
- switch (comp) {
- case 0: /* .x */
- case 1: /* .y */
- /* for frag_coord, we get unsigned values.. we need
- * to subtract (integer) 8 and divide by 16 (right-
- * shift by 4) then convert to float:
- *
- * sub.s tmp, src, 8
- * shr.b tmp, tmp, 4
- * mov.u32f32 dst, tmp
- *
- */
- instr = ir3_SUB_S(block, ctx->frag_coord[comp], 0,
- create_immed(block, 8), 0);
- instr = ir3_SHR_B(block, instr, 0,
- create_immed(block, 4), 0);
- instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
-
- return instr;
- case 2: /* .z */
- case 3: /* .w */
- default:
- /* seems that we can use these as-is: */
- return ctx->frag_coord[comp];
- }
-}
-
-static struct ir3_instruction *
create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
{
/* first four vec4 sysval's reserved for UBOs: */
@@ -2448,6 +2411,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
if (!ctx->frag_face) {
ctx->so->frag_face = true;
ctx->frag_face = create_input(ctx, 0);
+ add_sysval_input(ctx, SYSTEM_VALUE_FRONT_FACE, ctx->frag_face);
ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
}
/* for fragface, we get -1 for back and 0 for front. However this is
@@ -3229,6 +3193,46 @@ emit_function(struct ir3_context *ctx, nir_function_impl *impl)
ir3_END(ctx->block);
}
+static struct ir3_instruction *
+create_frag_coord(struct ir3_context *ctx, unsigned comp)
+{
+ struct ir3_block *block = ctx->block;
+ struct ir3_instruction *instr;
+
+ if (!ctx->frag_coord) {
+ ctx->frag_coord = create_input_compmask(ctx, 0, 0xf);
+ /* defer add_sysval_input() until after all inputs created */
+ }
+
+ split_dest(block, &instr, ctx->frag_coord, comp, 1);
+
+ switch (comp) {
+ case 0: /* .x */
+ case 1: /* .y */
+ /* for frag_coord, we get unsigned values.. we need
+ * to subtract (integer) 8 and divide by 16 (right-
+ * shift by 4) then convert to float:
+ *
+ * sub.s tmp, src, 8
+ * shr.b tmp, tmp, 4
+ * mov.u32f32 dst, tmp
+ *
+ */
+ instr = ir3_SUB_S(block, instr, 0,
+ create_immed(block, 8), 0);
+ instr = ir3_SHR_B(block, instr, 0,
+ create_immed(block, 4), 0);
+ instr = ir3_COV(block, instr, TYPE_U32, TYPE_F32);
+
+ return instr;
+ case 2: /* .z */
+ case 3: /* .w */
+ default:
+ /* seems that we can use these as-is: */
+ return instr;
+ }
+}
+
static void
setup_input(struct ir3_context *ctx, nir_variable *in)
{
@@ -3406,7 +3410,7 @@ max_drvloc(struct exec_list *vars)
}
static const unsigned max_sysvals[SHADER_MAX] = {
- [SHADER_FRAGMENT] = 8,
+ [SHADER_FRAGMENT] = 24, // TODO
[SHADER_VERTEX] = 16,
[SHADER_COMPUTE] = 16, // TODO how many do we actually need?
};
@@ -3433,17 +3437,17 @@ emit_instructions(struct ir3_context *ctx)
ninputs -= max_sysvals[ctx->so->type];
- /* for fragment shader, we have a single input register (usually
- * r0.xy) which is used as the base for bary.f varying fetch instrs:
+ /* for fragment shader, the vcoord input register is used as the
+ * base for bary.f varying fetch instrs:
*/
+ struct ir3_instruction *vcoord = NULL;
if (ctx->so->type == SHADER_FRAGMENT) {
- // TODO maybe a helper for fi since we need it a few places..
- struct ir3_instruction *instr;
- instr = ir3_instr_create(ctx->block, OPC_META_FI);
- ir3_reg_create(instr, 0, 0);
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */
- ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */
- ctx->frag_vcoord = instr;
+ struct ir3_instruction *xy[2];
+
+ vcoord = create_input_compmask(ctx, 0, 0x3);
+ split_dest(ctx->block, xy, vcoord, 0, 2);
+
+ ctx->frag_vcoord = create_collect(ctx, xy, 2);
}
/* Setup inputs: */
@@ -3451,6 +3455,19 @@ emit_instructions(struct ir3_context *ctx)
setup_input(ctx, var);
}
+ /* Defer add_sysval_input() stuff until after setup_inputs(),
+ * because sysvals need to be appended after varyings:
+ */
+ if (vcoord) {
+ add_sysval_input_compmask(ctx, SYSTEM_VALUE_VARYING_COORD,
+ 0x3, vcoord);
+ }
+
+ if (ctx->frag_coord) {
+ add_sysval_input_compmask(ctx, SYSTEM_VALUE_FRAG_COORD,
+ 0xf, ctx->frag_coord);
+ }
+
/* Setup outputs: */
nir_foreach_variable(var, &ctx->s->outputs) {
setup_output(ctx, var);
@@ -3470,76 +3487,26 @@ emit_instructions(struct ir3_context *ctx)
emit_function(ctx, fxn);
}
-/* from NIR perspective, we actually have inputs. But most of the "inputs"
- * for a fragment shader are just bary.f instructions. The *actual* inputs
- * from the hw perspective are the frag_vcoord and optionally frag_coord and
- * frag_face.
+/* from NIR perspective, we actually have varying inputs. But the varying
+ * inputs, from an IR standpoint, are just bary.f/ldlv instructions. The
+ * only actual inputs are the sysvals.
*/
static void
fixup_frag_inputs(struct ir3_context *ctx)
{
struct ir3_shader_variant *so = ctx->so;
struct ir3 *ir = ctx->ir;
- struct ir3_instruction **inputs;
- struct ir3_instruction *instr;
- int n, regid = 0;
-
- ir->ninputs = 0;
-
- n = 4; /* always have frag_vcoord */
- n += COND(so->frag_face, 4);
- n += COND(so->frag_coord, 4);
+ unsigned i = 0;
- inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *)));
+ /* sysvals should appear at the end of the inputs, drop everything else: */
+ while ((i < so->inputs_count) && !so->inputs[i].sysval)
+ i++;
- if (so->frag_face) {
- /* this ultimately gets assigned to hr0.x so doesn't conflict
- * with frag_coord/frag_vcoord..
- */
- inputs[ir->ninputs++] = ctx->frag_face;
- ctx->frag_face->regs[0]->num = 0;
+ /* at IR level, inputs are always blocks of 4 scalars: */
+ i *= 4;
- /* remaining channels not used, but let's avoid confusing
- * other parts that expect inputs to come in groups of vec4
- */
- inputs[ir->ninputs++] = NULL;
- inputs[ir->ninputs++] = NULL;
- inputs[ir->ninputs++] = NULL;
- }
-
- /* since we don't know where to set the regid for frag_coord,
- * we have to use r0.x for it. But we don't want to *always*
- * use r1.x for frag_vcoord as that could increase the register
- * footprint on simple shaders:
- */
- if (so->frag_coord) {
- ctx->frag_coord[0]->regs[0]->num = regid++;
- ctx->frag_coord[1]->regs[0]->num = regid++;
- ctx->frag_coord[2]->regs[0]->num = regid++;
- ctx->frag_coord[3]->regs[0]->num = regid++;
-
- inputs[ir->ninputs++] = ctx->frag_coord[0];
- inputs[ir->ninputs++] = ctx->frag_coord[1];
- inputs[ir->ninputs++] = ctx->frag_coord[2];
- inputs[ir->ninputs++] = ctx->frag_coord[3];
- }
-
- /* we always have frag_vcoord: */
- so->pos_regid = regid;
-
- /* r0.x */
- instr = create_input(ctx, ir->ninputs);
- instr->regs[0]->num = regid++;
- inputs[ir->ninputs++] = instr;
- ctx->frag_vcoord->regs[1]->instr = instr;
-
- /* r0.y */
- instr = create_input(ctx, ir->ninputs);
- instr->regs[0]->num = regid++;
- inputs[ir->ninputs++] = instr;
- ctx->frag_vcoord->regs[2]->instr = instr;
-
- ir->inputs = inputs;
+ ir->inputs = &ir->inputs[i];
+ ir->ninputs -= i;
}
/* Fixup tex sampler state for astc/srgb workaround instructions. We
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 74d85eaf8e2..83bc375aeb5 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -1047,49 +1047,10 @@ ra_block_alloc(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static int
ra_alloc(struct ir3_ra_ctx *ctx)
{
- unsigned n = 0;
-
- /* frag shader inputs get pre-assigned, since we have some
- * constraints/unknowns about setup for some of these regs:
- */
- if (ctx->type == SHADER_FRAGMENT) {
- struct ir3 *ir = ctx->ir;
- unsigned i = 0, j;
- if (ctx->frag_face && (i < ir->ninputs) && ir->inputs[i]) {
- struct ir3_instruction *instr = ir->inputs[i];
- int cls = size_to_class(1, true, false);
- unsigned name = __ra_name(ctx, cls, instr);
- unsigned reg = ctx->set->gpr_to_ra_reg[cls][0];
-
- /* if we have frag_face, it gets hr0.x */
- ra_set_node_reg(ctx->g, name, reg);
- i += 4;
- }
-
- j = 0;
- for (; i < ir->ninputs; i++) {
- struct ir3_instruction *instr = ir->inputs[i];
- if (instr) {
- struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
-
- if (id->defn == instr) {
- unsigned name, reg;
-
- name = ra_name(ctx, id);
- reg = ctx->set->gpr_to_ra_reg[id->cls][j];
-
- ra_set_node_reg(ctx->g, name, reg);
- j += id->sz;
- }
- }
- }
- n = j;
- }
-
/* pre-assign array elements:
*/
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
- unsigned base = n;
+ unsigned base = 0;
if (arr->end_ip == 0)
continue;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index b0663d5c5ca..7bb4263b177 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -97,18 +97,6 @@ fixup_regfootprint(struct ir3_shader_variant *v)
int32_t regid = (v->outputs[i].regid + 3) >> 2;
v->info.max_reg = MAX2(v->info.max_reg, regid);
}
-
- if (v->type == SHADER_FRAGMENT) {
- /* NOTE: not sure how to turn pos_regid off.. but this could
- * be, for example, r1.x while max reg used by the shader is
- * r0.*, in which case we need to fixup the reg footprint:
- */
- v->info.max_reg = MAX2(v->info.max_reg, v->pos_regid >> 2);
- if (v->frag_coord)
- debug_assert(v->info.max_reg >= 0); /* hard coded r0.x */
- if (v->frag_face)
- debug_assert(v->info.max_half_reg >= 0); /* hr0.x */
- }
}
/* wrapper for ir3_assemble() which does some info fixup based on
@@ -518,7 +506,8 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
dump_output(out, so, VARYING_SLOT_PSIZ, "psize");
break;
case SHADER_FRAGMENT:
- dump_reg(out, "pos (bary)", so->pos_regid);
+ dump_reg(out, "pos (bary)",
+ ir3_find_sysval_regid(so, SYSTEM_VALUE_VARYING_COORD));
dump_output(out, so, FRAG_RESULT_DEPTH, "posz");
if (so->color0_mrt) {
dump_output(out, so, FRAG_RESULT_COLOR, "color");
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 93182c710c2..507e89c4735 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -251,10 +251,6 @@ struct ir3_shader_variant {
* + From the vert shader, we only need the output regid
*/
- /* for frag shader, pos_regid holds the frag_vcoord, ie. what is passed
- * to bary.f instructions
- */
- uint8_t pos_regid;
bool frag_coord, frag_face, color0_mrt;
/* NOTE: for input/outputs, slot is: