diff options
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 5 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 8 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_nir_lower_io.c | 40 | ||||
-rw-r--r-- | src/compiler/nir/nir_intrinsics.py | 4 | ||||
-rw-r--r-- | src/gallium/drivers/v3d/v3d_uniforms.c | 8 |
5 files changed, 65 insertions, 0 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index e56632590d6..e2de77ddf05 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2346,6 +2346,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) ntq_store_dest(c, &instr->dest, 0, vir_IID(c)); break; + case nir_intrinsic_load_fb_layers_v3d: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform(c, QUNIFORM_FB_LAYERS, 0)); + break; + default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 4249c181bf1..0489ebdc12e 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -279,6 +279,14 @@ enum quniform_contents { * L2T cache will effectively be the shared memory area. */ QUNIFORM_SHARED_OFFSET, + + /** + * Returns the number of layers in the framebuffer. + * + * This is used to cap gl_Layer in geometry shaders to avoid + * out-of-bounds accesses into the tile state during binning. + */ + QUNIFORM_FB_LAYERS, }; static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value) diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c index 9b7db65db7e..855b9c44b46 100644 --- a/src/broadcom/compiler/v3d_nir_lower_io.c +++ b/src/broadcom/compiler/v3d_nir_lower_io.c @@ -193,6 +193,46 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b, v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src); } + if (var->data.location == VARYING_SLOT_LAYER) { + assert(c->s->info.stage == MESA_SHADER_GEOMETRY); + nir_ssa_def *header = nir_load_var(b, state->gs.header_var); + header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff)); + + /* From the GLES 3.2 spec: + * + * "When fragments are written to a layered framebuffer, the + * fragment’s layer number selects an image from the array + * of images at each attachment (...). If the fragment’s + * layer number is negative, or greater than or equal to + * the minimum number of layers of any attachment, the + * effects of the fragment on the framebuffer contents are + * undefined." + * + * This suggests we can just ignore that situation, however, + * for V3D an out-of-bounds layer index means that the binner + * might do out-of-bounds writes access to the tile state. The + * simulator has an assert to catch this, so we play safe here + * and we make sure that doesn't happen by setting gl_Layer + * to 0 in that case (we always allocate tile state for at + * least one layer). + */ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_load_fb_layers_v3d); + load->num_components = 1; + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); + nir_builder_instr_insert(b, &load->instr); + nir_ssa_def *fb_layers = &load->dest.ssa; + + nir_ssa_def *cond = nir_ige(b, src, fb_layers); + nir_ssa_def *layer_id = + nir_bcsel(b, cond, + nir_imm_int(b, 0), + nir_ishl(b, src, nir_imm_int(b, 16))); + header = nir_ior(b, header, layer_id); + nir_store_var(b, state->gs.header_var, header, 0x1); + } + /* Scalarize outputs if it hasn't happened already, since we want to * schedule each VPM write individually. We can skip any outut * components not read by the FS. diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 3939f8ff510..c53babdde55 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -868,3 +868,7 @@ load("tlb_color_v3d", 1, [BASE, COMPONENT], []) # src[] = { value, render_target } # BASE = sample index store("tlb_sample_color_v3d", 2, [BASE, COMPONENT, TYPE], []) + +# V3D-specific intrinsic to load the number of layers attached to +# the target framebuffer +intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c index c94f6be4b76..ab57880f06f 100644 --- a/src/gallium/drivers/v3d/v3d_uniforms.c +++ b/src/gallium/drivers/v3d/v3d_uniforms.c @@ -373,6 +373,10 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job, v3d->compute_shared_memory, 0); break; + case QUNIFORM_FB_LAYERS: + cl_aligned_u32(&uniforms, job->num_layers); + break; + default: assert(quniform_contents_is_texture_p0(uinfo->contents[i])); @@ -465,6 +469,10 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader) /* Compute always recalculates uniforms. */ break; + case QUNIFORM_FB_LAYERS: + dirty |= VC5_DIRTY_FRAMEBUFFER; + break; + default: assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i])); dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX | |