aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c88
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c3
2 files changed, 66 insertions, 25 deletions
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
index ba1ae0a1925..ab9a6512e82 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
@@ -58,7 +58,8 @@ struct vc4_shader_validation_state {
*
* This is used for the validation of direct address memory reads.
*/
- uint32_t live_clamp_offsets[32 + 32 + 4];
+ uint32_t live_min_clamp_offsets[32 + 32 + 4];
+ bool live_max_clamp_regs[32 + 32 + 4];
};
static uint32_t
@@ -80,13 +81,14 @@ waddr_to_live_reg_index(uint32_t waddr, bool is_b)
static uint32_t
raddr_add_a_to_live_reg_index(uint64_t inst)
{
+ uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
if (add_a == QPU_MUX_A) {
return raddr_a;
- } else if (add_a == QPU_MUX_B) {
+ } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) {
return 32 + raddr_b;
} else if (add_a <= QPU_MUX_R3) {
return 64 + add_a;
@@ -182,7 +184,7 @@ check_tmu_write(uint64_t inst,
return false;
}
- clamp_offset = validation_state->live_clamp_offsets[clamp_reg];
+ clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
if (clamp_offset == ~0) {
DRM_ERROR("direct TMU load wasn't clamped\n");
return false;
@@ -245,8 +247,6 @@ check_register_write(uint64_t inst,
uint32_t waddr = (is_mul ?
QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
QPU_GET_FIELD(inst, QPU_WADDR_ADD));
- bool is_b = is_mul != ((inst & QPU_WS) != 0);
- uint32_t live_reg_index;
switch (waddr) {
case QPU_W_UNIFORMS_ADDRESS:
@@ -301,14 +301,6 @@ check_register_write(uint64_t inst,
return true;
}
- /* Clear out the live offset clamp tracking for the written register.
- * If this particular instruction is setting up an offset clamp, it'll
- * get tracked immediately after we return.
- */
- live_reg_index = waddr_to_live_reg_index(waddr, is_b);
- if (live_reg_index != ~0)
- validation_state->live_clamp_offsets[live_reg_index] = ~0;
-
return true;
}
@@ -317,26 +309,72 @@ track_live_clamps(uint64_t inst,
struct vc4_validated_shader_info *validated_shader,
struct vc4_shader_validation_state *validation_state)
{
+ uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+ uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+ uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+ uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
- bool is_b = inst & QPU_WS;
- uint32_t live_reg_index;
+ bool ws = inst & QPU_WS;
+ uint32_t lri_add_a, lri_add, lri_mul;
+ bool add_a_is_min_0;
- if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN)
+ /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
+ * before we clear previous live state.
+ */
+ lri_add_a = raddr_add_a_to_live_reg_index(inst);
+ add_a_is_min_0 = (lri_add_a != ~0 &&
+ validation_state->live_max_clamp_regs[lri_add_a]);
+
+ /* Clear live state for registers written by our instruction. */
+ lri_add = waddr_to_live_reg_index(waddr_add, ws);
+ lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
+ if (lri_mul != ~0) {
+ validation_state->live_max_clamp_regs[lri_mul] = false;
+ validation_state->live_min_clamp_offsets[lri_mul] = ~0;
+ }
+ if (lri_add != ~0) {
+ validation_state->live_max_clamp_regs[lri_add] = false;
+ validation_state->live_min_clamp_offsets[lri_add] = ~0;
+ } else {
+ /* Nothing further to do for live tracking, since only ADDs
+ * generate new live clamp registers.
+ */
return;
+ }
- if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
- !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
- sig != QPU_SIG_SMALL_IMM)) {
+ /* Now, handle remaining live clamp tracking for the ADD operation. */
+
+ if (cond_add != QPU_COND_ALWAYS)
return;
- }
- live_reg_index = waddr_to_live_reg_index(waddr_add, is_b);
- if (live_reg_index != ~0) {
- validation_state->live_clamp_offsets[live_reg_index] =
+ if (op_add == QPU_A_MAX) {
+ /* Track live clamps of a value to a minimum of 0 (in either
+ * arg).
+ */
+ if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
+ (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
+ return;
+ }
+
+ validation_state->live_max_clamp_regs[lri_add] = true;
+ } if (op_add == QPU_A_MIN) {
+ /* Track live clamps of a value clamped to a minimum of 0 and
+ * a maximum of some uniform's offset.
+ */
+ if (!add_a_is_min_0)
+ return;
+
+ if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+ !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
+ sig != QPU_SIG_SMALL_IMM)) {
+ return;
+ }
+
+ validation_state->live_min_clamp_offsets[lri_add] =
validated_shader->uniforms_size;
}
}
@@ -398,8 +436,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
for (i = 0; i < 8; i++)
validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
- for (i = 0; i < ARRAY_SIZE(validation_state.live_clamp_offsets); i++)
- validation_state.live_clamp_offsets[i] = ~0;
+ for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
+ validation_state.live_min_clamp_offsets[i] = ~0;
shader = shader_obj->vaddr;
max_ip = shader_obj->base.size / sizeof(uint64_t);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 91540cfe2fa..bb45eb1288e 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -147,6 +147,9 @@ indirect_uniform_load(struct vc4_compile *c,
indirect_offset = qir_ADD(c, indirect_offset,
qir_uniform_ui(c, (range->dst_offset +
offset)));
+
+ /* Clamp to [0, array size). Note that MIN/MAX are signed. */
+ indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
indirect_offset = qir_MIN(c, indirect_offset,
qir_uniform_ui(c, (range->dst_offset +
range->size - 4)));