summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2017-02-06 17:30:59 -0800
committerEric Anholt <[email protected]>2017-02-10 14:17:04 -0800
commitb2309393039b2ec0cc00a8e6fd828c60c4ef1e11 (patch)
treed3968c9e1e8bc045e5cd4612b85155f3e6e31dab /src/gallium/drivers/vc4
parentd7b3707c612027b354deea6bc5eae56a02d5f8d5 (diff)
vc4: Avoid emitting small immediates for UBO indirect load address guards.
The kernel will reject our shader if we emit one here, and having 4, 8, or 12 as the top end of our UBO clamp rare is enough that it's not worth making the kernel let us. Fixes piglit fs-const-array-of-struct and fs-const-array-of-struct-of-array since recent GLSL linking changes made us get this as an indirect load of a uniform, instead of a tempoary. Cc: "13.0 17.0" <[email protected]>
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_small_immediates.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c2
5 files changed, 20 insertions, 4 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index 89c48578021..07eca71f23e 100644
--- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -52,6 +52,17 @@ qir_opt_small_immediates(struct vc4_compile *c)
if (uses_small_imm)
continue;
+ /* Don't propagate small immediates into the top-end bounds
+ * checking for indirect UBO loads. The kernel doesn't parse
+ * small immediates and rejects the shader in this case. UBO
+ * loads are much more expensive than the uniform load, and
+ * indirect UBO regions are usually much larger than a small
+ * immediate, so it's not worth updating the kernel to allow
+ * optimizing it.
+ */
+ if (inst->op == QOP_MIN_NOIMM)
+ continue;
+
for (int i = 0; i < qir_get_nsrc(inst); i++) {
struct qreg src = qir_follow_movs(c, inst->src[i]);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a7cc6687962..4865bcbd283 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -116,9 +116,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
/* Clamp to [0, array size). Note that MIN/MAX are signed. */
indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
- indirect_offset = qir_MIN(c, indirect_offset,
- qir_uniform_ui(c, (range->dst_offset +
- range->size - 4)));
+ indirect_offset = qir_MIN_NOIMM(c, indirect_offset,
+ qir_uniform_ui(c, (range->dst_offset +
+ range->size - 4)));
qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
indirect_offset,
@@ -382,7 +382,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
/* Perform the clamping required by kernel validation. */
addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
- addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4));
+ addr = qir_MIN_NOIMM(c, addr, qir_uniform_ui(c, size - 4));
qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index d4f35d8f01a..ed76c64ba9c 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -58,6 +58,7 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_ASR] = { "asr", 1, 2 },
[QOP_SHL] = { "shl", 1, 2 },
[QOP_MIN] = { "min", 1, 2 },
+ [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 },
[QOP_MAX] = { "max", 1, 2 },
[QOP_AND] = { "and", 1, 2 },
[QOP_OR] = { "or", 1, 2 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index e189bc32d94..daeb52b9aee 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -123,6 +123,7 @@ enum qop {
QOP_SHR,
QOP_ASR,
QOP_MIN,
+ QOP_MIN_NOIMM,
QOP_MAX,
QOP_AND,
QOP_OR,
@@ -725,6 +726,7 @@ QIR_ALU2(SHL)
QIR_ALU2(SHR)
QIR_ALU2(ASR)
QIR_ALU2(MIN)
+QIR_ALU2(MIN_NOIMM)
QIR_ALU2(MAX)
QIR_ALU2(AND)
QIR_ALU2(OR)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 47fc0b0928b..60ca87aa467 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -285,6 +285,8 @@ vc4_generate_code_block(struct vc4_compile *c,
[QOP_MOV] = { QPU_A_OR },
[QOP_FMOV] = { QPU_A_FMAX },
[QOP_MMOV] = { QPU_M_V8MIN },
+
+ [QOP_MIN_NOIMM] = { QPU_A_MIN },
};
uint64_t unpack = 0;