diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_opt_small_immediates.c | 11 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 2 |
5 files changed, 20 insertions, 4 deletions
diff --git a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c index 89c48578021..07eca71f23e 100644 --- a/src/gallium/drivers/vc4/vc4_opt_small_immediates.c +++ b/src/gallium/drivers/vc4/vc4_opt_small_immediates.c @@ -52,6 +52,17 @@ qir_opt_small_immediates(struct vc4_compile *c) if (uses_small_imm) continue; + /* Don't propagate small immediates into the top-end bounds + * checking for indirect UBO loads. The kernel doesn't parse + * small immediates and rejects the shader in this case. UBO + * loads are much more expensive than the uniform load, and + * indirect UBO regions are usually much larger than a small + * immediate, so it's not worth updating the kernel to allow + * optimizing it. + */ + if (inst->op == QOP_MIN_NOIMM) + continue; + for (int i = 0; i < qir_get_nsrc(inst); i++) { struct qreg src = qir_follow_movs(c, inst->src[i]); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index c2d67277ef8..974204ae897 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -116,9 +116,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr) /* Clamp to [0, array size). Note that MIN/MAX are signed. */ indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0)); - indirect_offset = qir_MIN(c, indirect_offset, - qir_uniform_ui(c, (range->dst_offset + - range->size - 4))); + indirect_offset = qir_MIN_NOIMM(c, indirect_offset, + qir_uniform_ui(c, (range->dst_offset + + range->size - 4))); qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0), indirect_offset, @@ -382,7 +382,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr) /* Perform the clamping required by kernel validation. */ addr = qir_MAX(c, addr, qir_uniform_ui(c, 0)); - addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4)); + addr = qir_MIN_NOIMM(c, addr, qir_uniform_ui(c, size - 4)); qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0), addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit)); diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index d4f35d8f01a..ed76c64ba9c 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -58,6 +58,7 @@ static const struct qir_op_info qir_op_info[] = { [QOP_ASR] = { "asr", 1, 2 }, [QOP_SHL] = { "shl", 1, 2 }, [QOP_MIN] = { "min", 1, 2 }, + [QOP_MIN_NOIMM] = { "min_noimm", 1, 2 }, [QOP_MAX] = { "max", 1, 2 }, [QOP_AND] = { "and", 1, 2 }, [QOP_OR] = { "or", 1, 2 }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index e189bc32d94..daeb52b9aee 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -123,6 +123,7 @@ enum qop { QOP_SHR, QOP_ASR, QOP_MIN, + QOP_MIN_NOIMM, QOP_MAX, QOP_AND, QOP_OR, @@ -725,6 +726,7 @@ QIR_ALU2(SHL) QIR_ALU2(SHR) QIR_ALU2(ASR) QIR_ALU2(MIN) +QIR_ALU2(MIN_NOIMM) QIR_ALU2(MAX) QIR_ALU2(AND) QIR_ALU2(OR) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 47fc0b0928b..60ca87aa467 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -285,6 +285,8 @@ vc4_generate_code_block(struct vc4_compile *c, [QOP_MOV] = { QPU_A_OR }, [QOP_FMOV] = { QPU_A_FMAX }, [QOP_MMOV] = { QPU_M_V8MIN }, + + [QOP_MIN_NOIMM] = { QPU_A_MIN }, }; uint64_t unpack = 0; |