summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2018-06-14 11:17:11 -0700
committerEric Anholt <[email protected]>2018-06-15 16:09:39 -0700
commit778594ae12be51110b6b1411e72c5fbb95cbadd3 (patch)
tree1ecbcb9329343f883a65c90efdc66fb09930bacd /src
parente130ada24310f0e3adc0349c0fb2098ad20d65d9 (diff)
v3d: Limit shader threading according to our maximum TMU fifo usage.
Fixes simulator assertion failures in dEQP-GLES3.functional.shaders.texture_functions.texture.samplercubeshadow_bias_fragment and similar complicated cases.
Diffstat (limited to 'src')
-rw-r--r--src/broadcom/compiler/v3d40_tex.c34
1 files changed, 24 insertions, 10 deletions
diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d40_tex.c
index 0b41f37d1ea..2fd8c3b5a6e 100644
--- a/src/broadcom/compiler/v3d40_tex.c
+++ b/src/broadcom/compiler/v3d40_tex.c
@@ -31,9 +31,12 @@
#include "cle/v3d_packet_v41_pack.h"
static void
-vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val)
+vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val,
+ int *tmu_writes)
{
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, waddr), val);
+
+ (*tmu_writes)++;
}
static void
@@ -49,6 +52,7 @@ void
v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
{
unsigned unit = instr->texture_index;
+ int tmu_writes = 0;
struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
};
@@ -82,29 +86,32 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
if (non_array_components > 1) {
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUT,
ntq_get_src(c, instr->src[i].src,
- 1));
+ 1), &tmu_writes);
}
if (non_array_components > 2) {
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUR,
ntq_get_src(c, instr->src[i].src,
- 2));
+ 2), &tmu_writes);
}
if (instr->is_array) {
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUI,
ntq_get_src(c, instr->src[i].src,
- instr->coord_components - 1));
+ instr->coord_components - 1),
+ &tmu_writes);
}
break;
case nir_tex_src_bias:
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUB,
- ntq_get_src(c, instr->src[i].src, 0));
+ ntq_get_src(c, instr->src[i].src, 0),
+ &tmu_writes);
break;
case nir_tex_src_lod:
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUB,
- ntq_get_src(c, instr->src[i].src, 0));
+ ntq_get_src(c, instr->src[i].src, 0),
+ &tmu_writes);
if (instr->op != nir_texop_txf &&
instr->op != nir_texop_tg4) {
@@ -114,7 +121,8 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
case nir_tex_src_comparator:
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUDREF,
- ntq_get_src(c, instr->src[i].src, 0));
+ ntq_get_src(c, instr->src[i].src, 0),
+ &tmu_writes);
break;
case nir_tex_src_offset: {
@@ -173,15 +181,21 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
if (instr->op == nir_texop_txf) {
assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
- vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s);
+ vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s, &tmu_writes);
} else if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
- vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s);
+ vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s, &tmu_writes);
} else {
- vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s);
+ vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s, &tmu_writes);
}
vir_emit_thrsw(c);
+ /* The input FIFO has 16 slots across all threads, so make sure we
+ * don't overfill our allocation.
+ */
+ while (tmu_writes > 16 / c->threads)
+ c->threads /= 2;
+
struct qreg return_values[4];
for (int i = 0; i < 4; i++) {
/* Swizzling .zw of an RG texture should give undefined