diff options
-rw-r--r-- | src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c | 88 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 3 |
2 files changed, 66 insertions, 25 deletions
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index ba1ae0a1925..ab9a6512e82 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -58,7 +58,8 @@ struct vc4_shader_validation_state { * * This is used for the validation of direct address memory reads. */ - uint32_t live_clamp_offsets[32 + 32 + 4]; + uint32_t live_min_clamp_offsets[32 + 32 + 4]; + bool live_max_clamp_regs[32 + 32 + 4]; }; static uint32_t @@ -80,13 +81,14 @@ waddr_to_live_reg_index(uint32_t waddr, bool is_b) static uint32_t raddr_add_a_to_live_reg_index(uint64_t inst) { + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); if (add_a == QPU_MUX_A) { return raddr_a; - } else if (add_a == QPU_MUX_B) { + } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) { return 32 + raddr_b; } else if (add_a <= QPU_MUX_R3) { return 64 + add_a; @@ -182,7 +184,7 @@ check_tmu_write(uint64_t inst, return false; } - clamp_offset = validation_state->live_clamp_offsets[clamp_reg]; + clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; if (clamp_offset == ~0) { DRM_ERROR("direct TMU load wasn't clamped\n"); return false; @@ -245,8 +247,6 @@ check_register_write(uint64_t inst, uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); - bool is_b = is_mul != ((inst & QPU_WS) != 0); - uint32_t live_reg_index; switch (waddr) { case QPU_W_UNIFORMS_ADDRESS: @@ -301,14 +301,6 @@ check_register_write(uint64_t inst, return true; } - /* Clear out the live offset clamp tracking for the written register. - * If this particular instruction is setting up an offset clamp, it'll - * get tracked immediately after we return. - */ - live_reg_index = waddr_to_live_reg_index(waddr, is_b); - if (live_reg_index != ~0) - validation_state->live_clamp_offsets[live_reg_index] = ~0; - return true; } @@ -317,26 +309,72 @@ track_live_clamps(uint64_t inst, struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state) { + uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); + uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); + uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); - bool is_b = inst & QPU_WS; - uint32_t live_reg_index; + bool ws = inst & QPU_WS; + uint32_t lri_add_a, lri_add, lri_mul; + bool add_a_is_min_0; - if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN) + /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), + * before we clear previous live state. + */ + lri_add_a = raddr_add_a_to_live_reg_index(inst); + add_a_is_min_0 = (lri_add_a != ~0 && + validation_state->live_max_clamp_regs[lri_add_a]); + + /* Clear live state for registers written by our instruction. */ + lri_add = waddr_to_live_reg_index(waddr_add, ws); + lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); + if (lri_mul != ~0) { + validation_state->live_max_clamp_regs[lri_mul] = false; + validation_state->live_min_clamp_offsets[lri_mul] = ~0; + } + if (lri_add != ~0) { + validation_state->live_max_clamp_regs[lri_add] = false; + validation_state->live_min_clamp_offsets[lri_add] = ~0; + } else { + /* Nothing further to do for live tracking, since only ADDs + * generate new live clamp registers. + */ return; + } - if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && - !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && - sig != QPU_SIG_SMALL_IMM)) { + /* Now, handle remaining live clamp tracking for the ADD operation. */ + + if (cond_add != QPU_COND_ALWAYS) return; - } - live_reg_index = waddr_to_live_reg_index(waddr_add, is_b); - if (live_reg_index != ~0) { - validation_state->live_clamp_offsets[live_reg_index] = + if (op_add == QPU_A_MAX) { + /* Track live clamps of a value to a minimum of 0 (in either + * arg). + */ + if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || + (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { + return; + } + + validation_state->live_max_clamp_regs[lri_add] = true; + } if (op_add == QPU_A_MIN) { + /* Track live clamps of a value clamped to a minimum of 0 and + * a maximum of some uniform's offset. + */ + if (!add_a_is_min_0) + return; + + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && + sig != QPU_SIG_SMALL_IMM)) { + return; + } + + validation_state->live_min_clamp_offsets[lri_add] = validated_shader->uniforms_size; } } @@ -398,8 +436,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) for (i = 0; i < 8; i++) validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; - for (i = 0; i < ARRAY_SIZE(validation_state.live_clamp_offsets); i++) - validation_state.live_clamp_offsets[i] = ~0; + for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) + validation_state.live_min_clamp_offsets[i] = ~0; shader = shader_obj->vaddr; max_ip = shader_obj->base.size / sizeof(uint64_t); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 91540cfe2fa..bb45eb1288e 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -147,6 +147,9 @@ indirect_uniform_load(struct vc4_compile *c, indirect_offset = qir_ADD(c, indirect_offset, qir_uniform_ui(c, (range->dst_offset + offset))); + + /* Clamp to [0, array size). Note that MIN/MAX are signed. */ + indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0)); indirect_offset = qir_MIN(c, indirect_offset, qir_uniform_ui(c, (range->dst_offset + range->size - 4))); |