summaryrefslogtreecommitdiffstats
path: root/src/broadcom/compiler/nir_to_vir.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2019-02-22 14:26:26 -0800
committerEric Anholt <[email protected]>2019-03-05 10:59:40 -0800
commit2780a99ff80cf84f887e8a1dca0079271f90f947 (patch)
treea78779a32e805091a16d98e2c60e0d0e596903a2 /src/broadcom/compiler/nir_to_vir.c
parenta9dd227a47c8fd767b313827ccbb9e3c67e6b8e7 (diff)
v3d: Move the stores for fixed function VS output reads into NIR.
This lets us emit the VPM_WRITEs directly from nir_intrinsic_store_output() (useful once NIR scheduling is in place so that we can reduce register pressure), and lets future NIR scheduling schedule the math to generate them. Even in the meantime, it looks like this lets NIR DCE some more code and make better decisions. total instructions in shared programs: 6429246 -> 6412976 (-0.25%) total threads in shared programs: 153924 -> 153934 (<.01%) total loops in shared programs: 486 -> 483 (-0.62%) total uniforms in shared programs: 2385436 -> 2388195 (0.12%) Acked-by: Ian Romanick <[email protected]> (nir)
Diffstat (limited to 'src/broadcom/compiler/nir_to_vir.c')
-rw-r--r--src/broadcom/compiler/nir_to_vir.c235
1 files changed, 61 insertions, 174 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index f5729ffa238..8faca1502bf 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1284,151 +1284,19 @@ emit_frag_end(struct v3d_compile *c)
}
static void
-vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t *vpm_index)
+vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index)
{
if (c->devinfo->ver >= 40) {
- vir_STVPMV(c, vir_uniform_ui(c, *vpm_index), val);
- *vpm_index = *vpm_index + 1;
+ vir_STVPMV(c, vir_uniform_ui(c, vpm_index), val);
} else {
+ /* XXX: v3d33_vir_vpm_write_setup(c); */
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
}
-
- c->num_vpm_writes++;
-}
-
-static void
-emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w,
- uint32_t *vpm_index)
-{
- for (int i = 0; i < 2; i++) {
- struct qreg coord = c->outputs[c->output_position_index + i];
- coord = vir_FMUL(c, coord,
- vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i,
- 0));
- coord = vir_FMUL(c, coord, rcp_w);
- vir_VPM_WRITE(c, vir_FTOIN(c, coord), vpm_index);
- }
-
-}
-
-static void
-emit_zs_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index)
-{
- struct qreg zscale = vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
- struct qreg zoffset = vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
-
- struct qreg z = c->outputs[c->output_position_index + 2];
- z = vir_FMUL(c, z, zscale);
- z = vir_FMUL(c, z, rcp_w);
- z = vir_FADD(c, z, zoffset);
- vir_VPM_WRITE(c, z, vpm_index);
-}
-
-static void
-emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index)
-{
- vir_VPM_WRITE(c, rcp_w, vpm_index);
-}
-
-static void
-emit_point_size_write(struct v3d_compile *c, uint32_t *vpm_index)
-{
- struct qreg point_size;
-
- if (c->output_point_size_index != -1)
- point_size = c->outputs[c->output_point_size_index];
- else
- point_size = vir_uniform_f(c, 1.0);
-
- /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
- * BCM21553).
- */
- point_size = vir_FMAX(c, point_size, vir_uniform_f(c, .125));
-
- vir_VPM_WRITE(c, point_size, vpm_index);
-}
-
-static void
-emit_vpm_write_setup(struct v3d_compile *c)
-{
- if (c->devinfo->ver >= 40)
- return;
-
- v3d33_vir_vpm_write_setup(c);
-}
-
-/**
- * Sets up c->outputs[c->output_position_index] for the vertex shader
- * epilogue, if an output vertex position wasn't specified in the user's
- * shader. This may be the case for transform feedback with rasterizer
- * discard enabled.
- */
-static void
-setup_default_position(struct v3d_compile *c)
-{
- if (c->output_position_index != -1)
- return;
-
- c->output_position_index = c->outputs_array_size;
- for (int i = 0; i < 4; i++) {
- add_output(c,
- c->output_position_index + i,
- VARYING_SLOT_POS, i);
- }
}
static void
emit_vert_end(struct v3d_compile *c)
{
- setup_default_position(c);
-
- uint32_t vpm_index = 0;
- struct qreg rcp_w = vir_RECIP(c,
- c->outputs[c->output_position_index + 3]);
-
- emit_vpm_write_setup(c);
-
- if (c->vs_key->is_coord) {
- for (int i = 0; i < 4; i++)
- vir_VPM_WRITE(c, c->outputs[c->output_position_index + i],
- &vpm_index);
- emit_scaled_viewport_write(c, rcp_w, &vpm_index);
- if (c->vs_key->per_vertex_point_size) {
- emit_point_size_write(c, &vpm_index);
- /* emit_rcp_wc_write(c, rcp_w); */
- }
- /* XXX: Z-only rendering */
- if (0)
- emit_zs_write(c, rcp_w, &vpm_index);
- } else {
- emit_scaled_viewport_write(c, rcp_w, &vpm_index);
- emit_zs_write(c, rcp_w, &vpm_index);
- emit_rcp_wc_write(c, rcp_w, &vpm_index);
- if (c->vs_key->per_vertex_point_size)
- emit_point_size_write(c, &vpm_index);
- }
-
- for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
- struct v3d_varying_slot input = c->vs_key->fs_inputs[i];
- int j;
-
- for (j = 0; j < c->num_outputs; j++) {
- struct v3d_varying_slot output = c->output_slots[j];
-
- if (!memcmp(&input, &output, sizeof(input))) {
- vir_VPM_WRITE(c, c->outputs[j],
- &vpm_index);
- break;
- }
- }
- /* Emit padding if we didn't find a declared VS output for
- * this FS input.
- */
- if (j == c->num_outputs)
- vir_VPM_WRITE(c, vir_uniform_f(c, 0.0),
- &vpm_index);
- }
-
/* GFXH-1684: VPM writes need to be complete by the end of the shader.
*/
if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
@@ -1619,6 +1487,9 @@ ntq_setup_fs_inputs(struct v3d_compile *c)
static void
ntq_setup_outputs(struct v3d_compile *c)
{
+ if (c->s->info.stage != MESA_SHADER_FRAGMENT)
+ return;
+
nir_foreach_variable(var, &c->s->outputs) {
unsigned array_len = MAX2(glsl_get_length(var->type), 1);
unsigned loc = var->data.driver_location * 4;
@@ -1632,37 +1503,26 @@ ntq_setup_outputs(struct v3d_compile *c)
var->data.location_frac + i);
}
- if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
- switch (var->data.location) {
- case FRAG_RESULT_COLOR:
- c->output_color_var[0] = var;
- c->output_color_var[1] = var;
- c->output_color_var[2] = var;
- c->output_color_var[3] = var;
- break;
- case FRAG_RESULT_DATA0:
- case FRAG_RESULT_DATA1:
- case FRAG_RESULT_DATA2:
- case FRAG_RESULT_DATA3:
- c->output_color_var[var->data.location -
- FRAG_RESULT_DATA0] = var;
- break;
- case FRAG_RESULT_DEPTH:
- c->output_position_index = loc;
- break;
- case FRAG_RESULT_SAMPLE_MASK:
- c->output_sample_mask_index = loc;
- break;
- }
- } else {
- switch (var->data.location) {
- case VARYING_SLOT_POS:
- c->output_position_index = loc;
- break;
- case VARYING_SLOT_PSIZ:
- c->output_point_size_index = loc;
- break;
- }
+ switch (var->data.location) {
+ case FRAG_RESULT_COLOR:
+ c->output_color_var[0] = var;
+ c->output_color_var[1] = var;
+ c->output_color_var[2] = var;
+ c->output_color_var[3] = var;
+ break;
+ case FRAG_RESULT_DATA0:
+ case FRAG_RESULT_DATA1:
+ case FRAG_RESULT_DATA2:
+ case FRAG_RESULT_DATA3:
+ c->output_color_var[var->data.location -
+ FRAG_RESULT_DATA0] = var;
+ break;
+ case FRAG_RESULT_DEPTH:
+ c->output_position_index = loc;
+ break;
+ case FRAG_RESULT_SAMPLE_MASK:
+ c->output_sample_mask_index = loc;
+ break;
}
}
}
@@ -1842,6 +1702,26 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
}
break;
+ case nir_intrinsic_load_viewport_x_scale:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE, 0));
+ break;
+
+ case nir_intrinsic_load_viewport_y_scale:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_VIEWPORT_Y_SCALE, 0));
+ break;
+
+ case nir_intrinsic_load_viewport_z_scale:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0));
+ break;
+
+ case nir_intrinsic_load_viewport_z_offset:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0));
+ break;
+
case nir_intrinsic_load_alpha_ref_float:
ntq_store_dest(c, &instr->dest, 0,
vir_uniform(c, QUNIFORM_ALPHA_REF, 0));
@@ -1919,16 +1799,23 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_store_output:
- offset = ((nir_intrinsic_base(instr) +
- nir_src_as_uint(instr->src[1])) * 4 +
- nir_intrinsic_component(instr));
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ offset = ((nir_intrinsic_base(instr) +
+ nir_src_as_uint(instr->src[1])) * 4 +
+ nir_intrinsic_component(instr));
+ for (int i = 0; i < instr->num_components; i++) {
+ c->outputs[offset + i] =
+ vir_MOV(c,
+ ntq_get_src(c,
+ instr->src[0], i));
+ }
+ } else {
+ assert(instr->num_components == 1);
- for (int i = 0; i < instr->num_components; i++) {
- c->outputs[offset + i] =
- vir_MOV(c, ntq_get_src(c, instr->src[0], i));
+ vir_VPM_WRITE(c,
+ ntq_get_src(c, instr->src[0], 0),
+ nir_intrinsic_base(instr));
}
- c->num_outputs = MAX2(c->num_outputs,
- offset + instr->num_components);
break;
case nir_intrinsic_image_deref_size: