diff options
author | Eric Anholt <[email protected]> | 2019-02-22 14:26:26 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2019-03-05 10:59:40 -0800 |
commit | 2780a99ff80cf84f887e8a1dca0079271f90f947 (patch) | |
tree | a78779a32e805091a16d98e2c60e0d0e596903a2 /src/broadcom/compiler/nir_to_vir.c | |
parent | a9dd227a47c8fd767b313827ccbb9e3c67e6b8e7 (diff) |
v3d: Move the stores for fixed function VS output reads into NIR.
This lets us emit the VPM_WRITEs directly from
nir_intrinsic_store_output() (useful once NIR scheduling is in place so
that we can reduce register pressure), and lets future NIR scheduling
schedule the math to generate them. Even in the meantime, it looks like
this lets NIR DCE some more code and make better decisions.
total instructions in shared programs: 6429246 -> 6412976 (-0.25%)
total threads in shared programs: 153924 -> 153934 (<.01%)
total loops in shared programs: 486 -> 483 (-0.62%)
total uniforms in shared programs: 2385436 -> 2388195 (0.12%)
Acked-by: Ian Romanick <[email protected]> (nir)
Diffstat (limited to 'src/broadcom/compiler/nir_to_vir.c')
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 235 |
1 files changed, 61 insertions, 174 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index f5729ffa238..8faca1502bf 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1284,151 +1284,19 @@ emit_frag_end(struct v3d_compile *c) } static void -vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t *vpm_index) +vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index) { if (c->devinfo->ver >= 40) { - vir_STVPMV(c, vir_uniform_ui(c, *vpm_index), val); - *vpm_index = *vpm_index + 1; + vir_STVPMV(c, vir_uniform_ui(c, vpm_index), val); } else { + /* XXX: v3d33_vir_vpm_write_setup(c); */ vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val); } - - c->num_vpm_writes++; -} - -static void -emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w, - uint32_t *vpm_index) -{ - for (int i = 0; i < 2; i++) { - struct qreg coord = c->outputs[c->output_position_index + i]; - coord = vir_FMUL(c, coord, - vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, - 0)); - coord = vir_FMUL(c, coord, rcp_w); - vir_VPM_WRITE(c, vir_FTOIN(c, coord), vpm_index); - } - -} - -static void -emit_zs_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index) -{ - struct qreg zscale = vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0); - struct qreg zoffset = vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0); - - struct qreg z = c->outputs[c->output_position_index + 2]; - z = vir_FMUL(c, z, zscale); - z = vir_FMUL(c, z, rcp_w); - z = vir_FADD(c, z, zoffset); - vir_VPM_WRITE(c, z, vpm_index); -} - -static void -emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index) -{ - vir_VPM_WRITE(c, rcp_w, vpm_index); -} - -static void -emit_point_size_write(struct v3d_compile *c, uint32_t *vpm_index) -{ - struct qreg point_size; - - if (c->output_point_size_index != -1) - point_size = c->outputs[c->output_point_size_index]; - else - point_size = vir_uniform_f(c, 1.0); - - /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835, - * BCM21553). - */ - point_size = vir_FMAX(c, point_size, vir_uniform_f(c, .125)); - - vir_VPM_WRITE(c, point_size, vpm_index); -} - -static void -emit_vpm_write_setup(struct v3d_compile *c) -{ - if (c->devinfo->ver >= 40) - return; - - v3d33_vir_vpm_write_setup(c); -} - -/** - * Sets up c->outputs[c->output_position_index] for the vertex shader - * epilogue, if an output vertex position wasn't specified in the user's - * shader. This may be the case for transform feedback with rasterizer - * discard enabled. - */ -static void -setup_default_position(struct v3d_compile *c) -{ - if (c->output_position_index != -1) - return; - - c->output_position_index = c->outputs_array_size; - for (int i = 0; i < 4; i++) { - add_output(c, - c->output_position_index + i, - VARYING_SLOT_POS, i); - } } static void emit_vert_end(struct v3d_compile *c) { - setup_default_position(c); - - uint32_t vpm_index = 0; - struct qreg rcp_w = vir_RECIP(c, - c->outputs[c->output_position_index + 3]); - - emit_vpm_write_setup(c); - - if (c->vs_key->is_coord) { - for (int i = 0; i < 4; i++) - vir_VPM_WRITE(c, c->outputs[c->output_position_index + i], - &vpm_index); - emit_scaled_viewport_write(c, rcp_w, &vpm_index); - if (c->vs_key->per_vertex_point_size) { - emit_point_size_write(c, &vpm_index); - /* emit_rcp_wc_write(c, rcp_w); */ - } - /* XXX: Z-only rendering */ - if (0) - emit_zs_write(c, rcp_w, &vpm_index); - } else { - emit_scaled_viewport_write(c, rcp_w, &vpm_index); - emit_zs_write(c, rcp_w, &vpm_index); - emit_rcp_wc_write(c, rcp_w, &vpm_index); - if (c->vs_key->per_vertex_point_size) - emit_point_size_write(c, &vpm_index); - } - - for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { - struct v3d_varying_slot input = c->vs_key->fs_inputs[i]; - int j; - - for (j = 0; j < c->num_outputs; j++) { - struct v3d_varying_slot output = c->output_slots[j]; - - if (!memcmp(&input, &output, sizeof(input))) { - vir_VPM_WRITE(c, c->outputs[j], - &vpm_index); - break; - } - } - /* Emit padding if we didn't find a declared VS output for - * this FS input. - */ - if (j == c->num_outputs) - vir_VPM_WRITE(c, vir_uniform_f(c, 0.0), - &vpm_index); - } - /* GFXH-1684: VPM writes need to be complete by the end of the shader. */ if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42) @@ -1619,6 +1487,9 @@ ntq_setup_fs_inputs(struct v3d_compile *c) static void ntq_setup_outputs(struct v3d_compile *c) { + if (c->s->info.stage != MESA_SHADER_FRAGMENT) + return; + nir_foreach_variable(var, &c->s->outputs) { unsigned array_len = MAX2(glsl_get_length(var->type), 1); unsigned loc = var->data.driver_location * 4; @@ -1632,37 +1503,26 @@ ntq_setup_outputs(struct v3d_compile *c) var->data.location_frac + i); } - if (c->s->info.stage == MESA_SHADER_FRAGMENT) { - switch (var->data.location) { - case FRAG_RESULT_COLOR: - c->output_color_var[0] = var; - c->output_color_var[1] = var; - c->output_color_var[2] = var; - c->output_color_var[3] = var; - break; - case FRAG_RESULT_DATA0: - case FRAG_RESULT_DATA1: - case FRAG_RESULT_DATA2: - case FRAG_RESULT_DATA3: - c->output_color_var[var->data.location - - FRAG_RESULT_DATA0] = var; - break; - case FRAG_RESULT_DEPTH: - c->output_position_index = loc; - break; - case FRAG_RESULT_SAMPLE_MASK: - c->output_sample_mask_index = loc; - break; - } - } else { - switch (var->data.location) { - case VARYING_SLOT_POS: - c->output_position_index = loc; - break; - case VARYING_SLOT_PSIZ: - c->output_point_size_index = loc; - break; - } + switch (var->data.location) { + case FRAG_RESULT_COLOR: + c->output_color_var[0] = var; + c->output_color_var[1] = var; + c->output_color_var[2] = var; + c->output_color_var[3] = var; + break; + case FRAG_RESULT_DATA0: + case FRAG_RESULT_DATA1: + case FRAG_RESULT_DATA2: + case FRAG_RESULT_DATA3: + c->output_color_var[var->data.location - + FRAG_RESULT_DATA0] = var; + break; + case FRAG_RESULT_DEPTH: + c->output_position_index = loc; + break; + case FRAG_RESULT_SAMPLE_MASK: + c->output_sample_mask_index = loc; + break; } } } @@ -1842,6 +1702,26 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) } break; + case nir_intrinsic_load_viewport_x_scale: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE, 0)); + break; + + case nir_intrinsic_load_viewport_y_scale: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform(c, QUNIFORM_VIEWPORT_Y_SCALE, 0)); + break; + + case nir_intrinsic_load_viewport_z_scale: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0)); + break; + + case nir_intrinsic_load_viewport_z_offset: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0)); + break; + case nir_intrinsic_load_alpha_ref_float: ntq_store_dest(c, &instr->dest, 0, vir_uniform(c, QUNIFORM_ALPHA_REF, 0)); @@ -1919,16 +1799,23 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_store_output: - offset = ((nir_intrinsic_base(instr) + - nir_src_as_uint(instr->src[1])) * 4 + - nir_intrinsic_component(instr)); + if (c->s->info.stage == MESA_SHADER_FRAGMENT) { + offset = ((nir_intrinsic_base(instr) + + nir_src_as_uint(instr->src[1])) * 4 + + nir_intrinsic_component(instr)); + for (int i = 0; i < instr->num_components; i++) { + c->outputs[offset + i] = + vir_MOV(c, + ntq_get_src(c, + instr->src[0], i)); + } + } else { + assert(instr->num_components == 1); - for (int i = 0; i < instr->num_components; i++) { - c->outputs[offset + i] = - vir_MOV(c, ntq_get_src(c, instr->src[0], i)); + vir_VPM_WRITE(c, + ntq_get_src(c, instr->src[0], 0), + nir_intrinsic_base(instr)); } - c->num_outputs = MAX2(c->num_outputs, - offset + instr->num_components); break; case nir_intrinsic_image_deref_size: |