aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/vc4
diff options
context:
space:
mode:
authorJonas Pfeil <[email protected]>2016-11-10 17:52:03 -0800
committerEric Anholt <[email protected]>2016-11-12 19:21:46 -0800
commit5debfeb86f524b06aa3b80f9855947a57b5f91e8 (patch)
tree6b01741d60d5c8a45c609bd70761c812fe591ce8 /src/gallium/drivers/vc4
parent96ffee2d02a9cbf7ff0042b3083376724b68ed36 (diff)
vc4: Add simulator kernel validation for multithreaded fragment shaders.
This is Jonas Pfeil's code from the kernel, brought back to Mesa by anholt.
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_drv.h2
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_validate.c17
-rw-r--r--src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c62
3 files changed, 76 insertions, 5 deletions
diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h
index 90f45397d83..8f5ed00d96f 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -150,6 +150,8 @@ struct vc4_validated_shader_info
uint32_t num_uniform_addr_offsets;
uint32_t *uniform_addr_offsets;
+
+ bool is_threaded;
};
/* vc4_validate.c */
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index a9dce1fa379..bd193b993b3 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -780,11 +780,6 @@ validate_gl_shader_rec(struct drm_device *dev,
exec->shader_rec_v += roundup(packet_size, 16);
exec->shader_rec_size -= packet_size;
- if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
- DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
- return -EINVAL;
- }
-
for (i = 0; i < shader_reloc_count; i++) {
if (src_handles[i] > exec->bo_count) {
DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
@@ -801,6 +796,18 @@ validate_gl_shader_rec(struct drm_device *dev,
return -EINVAL;
}
+ if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
+ to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
+ DRM_ERROR("Thread mode of CL and FS do not match\n");
+ return -EINVAL;
+ }
+
+ if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
+ to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
+ DRM_ERROR("cs and vs cannot be threaded\n");
+ return -EINVAL;
+ }
+
for (i = 0; i < shader_reloc_count; i++) {
struct vc4_validated_shader_info *validated_shader;
uint32_t o = shader_reloc_offsets[i];
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
index 0ff3d01f3f2..d93f5239d7d 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
@@ -84,6 +84,14 @@ struct vc4_shader_validation_state {
* basic blocks.
*/
bool needs_uniform_address_for_loop;
+
+ /* Set when we find an instruction which violates the criterion for a
+ * threaded shader. These are:
+ * - only write the lower half of the register space
+ * - last thread switch signaled at the end
+ * So track the usage of the thread switches and the register usage.
+ */
+ bool all_registers_used;
};
static uint32_t
@@ -119,6 +127,12 @@ raddr_add_a_to_live_reg_index(uint64_t inst)
return ~0;
}
+static bool live_reg_is_upper_half(uint32_t lri)
+{
+ return (lri >=16 && lri < 32) ||
+ (lri >=32 + 16 && lri < 32 + 32);
+}
+
static bool
is_tmu_submit(uint32_t waddr)
{
@@ -385,6 +399,9 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
} else {
validation_state->live_immediates[lri] = ~0;
}
+
+ if (live_reg_is_upper_half(lri))
+ validation_state->all_registers_used = true;
}
switch (waddr) {
@@ -593,6 +610,11 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
}
}
+ if ((raddr_a >= 16 && raddr_a < 32) ||
+ (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
+ validation_state->all_registers_used = true;
+ }
+
return true;
}
@@ -748,6 +770,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
{
bool found_shader_end = false;
int shader_end_ip = 0;
+ uint32_t last_thread_switch_ip = -3;
uint32_t ip;
struct vc4_validated_shader_info *validated_shader = NULL;
struct vc4_shader_validation_state validation_state;
@@ -780,6 +803,16 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
if (!vc4_handle_branch_target(&validation_state))
goto fail;
+ if (ip == last_thread_switch_ip + 3) {
+ /* Reset r0-r3 live clamp data */
+ int i;
+ for (i = 64; i < LIVE_REG_COUNT; i++) {
+ validation_state.live_min_clamp_offsets[i] = ~0;
+ validation_state.live_max_clamp_regs[i] = false;
+ validation_state.live_immediates[i] = ~0;
+ }
+ }
+
switch (sig) {
case QPU_SIG_NONE:
case QPU_SIG_WAIT_FOR_SCOREBOARD:
@@ -789,6 +822,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
case QPU_SIG_LOAD_TMU1:
case QPU_SIG_PROG_END:
case QPU_SIG_SMALL_IMM:
+ case QPU_SIG_THREAD_SWITCH:
+ case QPU_SIG_LAST_THREAD_SWITCH:
if (!check_instruction_writes(validated_shader,
&validation_state)) {
DRM_ERROR("Bad write at ip %d\n", ip);
@@ -804,6 +839,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
shader_end_ip = ip;
}
+ if (sig == QPU_SIG_THREAD_SWITCH ||
+ sig == QPU_SIG_LAST_THREAD_SWITCH) {
+ validated_shader->is_threaded = true;
+
+ if (ip < last_thread_switch_ip + 3) {
+ DRM_ERROR("Thread switch too soon after "
+ "last switch at ip %d\n", ip);
+ goto fail;
+ }
+ last_thread_switch_ip = ip;
+ }
+
break;
case QPU_SIG_LOAD_IMM:
@@ -818,6 +865,13 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
if (!check_branch(inst, validated_shader,
&validation_state, ip))
goto fail;
+
+ if (ip < last_thread_switch_ip + 3) {
+ DRM_ERROR("Branch in thread switch at ip %d",
+ ip);
+ goto fail;
+ }
+
break;
default:
DRM_ERROR("Unsupported QPU signal %d at "
@@ -839,6 +893,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
goto fail;
}
+ /* Might corrupt other thread */
+ if (validated_shader->is_threaded &&
+ validation_state.all_registers_used) {
+ DRM_ERROR("Shader uses threading, but uses the upper "
+ "half of the registers, too\n");
+ goto fail;
+ }
+
/* If we did a backwards branch and we haven't emitted a uniforms
* reset since then, we still need the uniforms stream to have the
* uniforms address available so that the backwards branch can do its