aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/vc4/vc4_opt_small_immediates.c11
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c8
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c2
5 files changed, 20 insertions, 4 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
index 89c48578021..07eca71f23e 100644
--- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
+++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c
@@ -52,6 +52,17 @@ qir_opt_small_immediates(struct vc4_compile *c)
if (uses_small_imm)
continue;
+ /* Don't propagate small immediates into the top-end bounds
+ * checking for indirect UBO loads. The kernel doesn't parse
+ * small immediates and rejects the shader in this case. UBO
+ * loads are much more expensive than the uniform load, and
+ * indirect UBO regions are usually much larger than a small
+ * immediate, so it's not worth updating the kernel to allow
+ * optimizing it.
+ */
+ if (inst->op == QOP_MIN_NOIMM)
+ continue;
+
for (int i = 0; i < qir_get_nsrc(inst); i++) {
struct qreg src = qir_follow_movs(c, inst->src[i]);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a7cc6687962..4865bcbd283 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -116,9 +116,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
/* Clamp to [0, array size). Note that MIN/MAX are signed. */
indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
- indirect_offset = qir_MIN(c, indirect_offset,
- qir_uniform_ui(c, (range->dst_offset +
- range->size - 4)));
+ indirect_offset = qir_MIN_NOIMM(c, indirect_offset,
+ qir_uniform_ui(c, (range->dst_offset +
+ range->size - 4)));
qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
indirect_offset,
@@ -382,7 +382,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
/* Perform the clamping required by kernel validation. */
addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
- addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4));
+ addr = qir_MIN_NOIMM(c, addr, qir_uniform_ui(c, size - 4));
qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index d4f35d8f01a..ed76c64ba9c 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -58,6 +58,7 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_ASR] = { "asr", 1, 2 },
[QOP_SHL] = { "shl", 1, 2 },
[QOP_MIN] = { "min", 1, 2 },
+ [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 },
[QOP_MAX] = { "max", 1, 2 },
[QOP_AND] = { "and", 1, 2 },
[QOP_OR] = { "or", 1, 2 },
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index e189bc32d94..daeb52b9aee 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -123,6 +123,7 @@ enum qop {
QOP_SHR,
QOP_ASR,
QOP_MIN,
+ QOP_MIN_NOIMM,
QOP_MAX,
QOP_AND,
QOP_OR,
@@ -725,6 +726,7 @@ QIR_ALU2(SHL)
QIR_ALU2(SHR)
QIR_ALU2(ASR)
QIR_ALU2(MIN)
+QIR_ALU2(MIN_NOIMM)
QIR_ALU2(MAX)
QIR_ALU2(AND)
QIR_ALU2(OR)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 47fc0b0928b..60ca87aa467 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -285,6 +285,8 @@ vc4_generate_code_block(struct vc4_compile *c,
[QOP_MOV] = { QPU_A_OR },
[QOP_FMOV] = { QPU_A_FMAX },
[QOP_MMOV] = { QPU_M_V8MIN },
+
+ [QOP_MIN_NOIMM] = { QPU_A_MIN },
};
uint64_t unpack = 0;