aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/broadcom/compiler/nir_to_vir.c5
-rw-r--r--src/broadcom/compiler/v3d_compiler.h8
-rw-r--r--src/broadcom/compiler/v3d_nir_lower_io.c40
-rw-r--r--src/compiler/nir/nir_intrinsics.py4
-rw-r--r--src/gallium/drivers/v3d/v3d_uniforms.c8
5 files changed, 65 insertions, 0 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index e56632590d6..e2de77ddf05 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -2346,6 +2346,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
ntq_store_dest(c, &instr->dest, 0, vir_IID(c));
break;
+ case nir_intrinsic_load_fb_layers_v3d:
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_uniform(c, QUNIFORM_FB_LAYERS, 0));
+ break;
+
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 4249c181bf1..0489ebdc12e 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -279,6 +279,14 @@ enum quniform_contents {
* L2T cache will effectively be the shared memory area.
*/
QUNIFORM_SHARED_OFFSET,
+
+ /**
+ * Returns the number of layers in the framebuffer.
+ *
+ * This is used to cap gl_Layer in geometry shaders to avoid
+ * out-of-bounds accesses into the tile state during binning.
+ */
+ QUNIFORM_FB_LAYERS,
};
static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)
diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c
index 9b7db65db7e..855b9c44b46 100644
--- a/src/broadcom/compiler/v3d_nir_lower_io.c
+++ b/src/broadcom/compiler/v3d_nir_lower_io.c
@@ -193,6 +193,46 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
}
+ if (var->data.location == VARYING_SLOT_LAYER) {
+ assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
+ nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
+ header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
+
+ /* From the GLES 3.2 spec:
+ *
+ * "When fragments are written to a layered framebuffer, the
+ * fragment’s layer number selects an image from the array
+ * of images at each attachment (...). If the fragment’s
+ * layer number is negative, or greater than or equal to
+ * the minimum number of layers of any attachment, the
+ * effects of the fragment on the framebuffer contents are
+ * undefined."
+ *
+ * This suggests we can just ignore that situation, however,
+ * for V3D an out-of-bounds layer index means that the binner
+ * might do out-of-bounds writes access to the tile state. The
+ * simulator has an assert to catch this, so we play safe here
+ * and we make sure that doesn't happen by setting gl_Layer
+ * to 0 in that case (we always allocate tile state for at
+ * least one layer).
+ */
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_fb_layers_v3d);
+ load->num_components = 1;
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+ nir_ssa_def *fb_layers = &load->dest.ssa;
+
+ nir_ssa_def *cond = nir_ige(b, src, fb_layers);
+ nir_ssa_def *layer_id =
+ nir_bcsel(b, cond,
+ nir_imm_int(b, 0),
+ nir_ishl(b, src, nir_imm_int(b, 16)));
+ header = nir_ior(b, header, layer_id);
+ nir_store_var(b, state->gs.header_var, header, 0x1);
+ }
+
/* Scalarize outputs if it hasn't happened already, since we want to
* schedule each VPM write individually. We can skip any outut
* components not read by the FS.
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 3939f8ff510..c53babdde55 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -868,3 +868,7 @@ load("tlb_color_v3d", 1, [BASE, COMPONENT], [])
# src[] = { value, render_target }
# BASE = sample index
store("tlb_sample_color_v3d", 2, [BASE, COMPONENT, TYPE], [])
+
+# V3D-specific intrinsic to load the number of layers attached to
+# the target framebuffer
+intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c
index c94f6be4b76..ab57880f06f 100644
--- a/src/gallium/drivers/v3d/v3d_uniforms.c
+++ b/src/gallium/drivers/v3d/v3d_uniforms.c
@@ -373,6 +373,10 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
v3d->compute_shared_memory, 0);
break;
+ case QUNIFORM_FB_LAYERS:
+ cl_aligned_u32(&uniforms, job->num_layers);
+ break;
+
default:
assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
@@ -465,6 +469,10 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader)
/* Compute always recalculates uniforms. */
break;
+ case QUNIFORM_FB_LAYERS:
+ dirty |= VC5_DIRTY_FRAMEBUFFER;
+ break;
+
default:
assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX |