summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/ir3
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2017-11-10 12:53:13 -0500
committerRob Clark <[email protected]>2017-11-12 12:28:59 -0500
commite7b2719f69a77b9daad5c17d651b54d98720669f (patch)
treea5446c82e51513a3150dfe9c5c63395a03313cf4 /src/gallium/drivers/freedreno/ir3
parent471aa1b6d03e92d601439cfb2c79a78c12234fdb (diff)
freedreno/a5xx: indirect grids
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/ir3')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.c62
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_shader.h5
2 files changed, 58 insertions, 9 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 61a336ed7dd..3337543113d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -859,15 +859,59 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
/* emit compute-shader driver-params: */
uint32_t offset = v->constbase.driver_param;
if (v->constlen > offset) {
- uint32_t compute_params[IR3_DP_CS_COUNT] = {
- [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
- [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
- [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
- /* do we need work-group-size? */
- };
-
fd_wfi(ctx->batch, ring);
- ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0,
- ARRAY_SIZE(compute_params), compute_params, NULL);
+
+ if (info->indirect) {
+ struct pipe_resource *indirect = NULL;
+ unsigned indirect_offset;
+
+ /* This is a bit awkward, but CP_LOAD_STATE.EXT_SRC_ADDR needs
+ * to be aligned more strongly than 4 bytes. So in this case
+ * we need a temporary buffer to copy NumWorkGroups.xyz to.
+ *
+ * TODO if previous compute job is writing to info->indirect,
+ * we might need a WFI.. but since we currently flush for each
+ * compute job, we are probably ok for now.
+ */
+ if (info->indirect_offset & 0xf) {
+ indirect = pipe_buffer_create(&ctx->screen->base,
+ PIPE_BIND_COMMAND_ARGS_BUFFER, PIPE_USAGE_STREAM,
+ 0x1000);
+ indirect_offset = 0;
+
+ if (is_a5xx(ctx->screen)) {
+ struct fd_bo *src = fd_resource(info->indirect)->bo;
+ struct fd_bo *dst = fd_resource(indirect)->bo;
+ for (unsigned i = 0; i < 3; i++) {
+ unsigned dst_off = i * 4;
+ unsigned src_off = (i * 4) + info->indirect_offset;
+ OUT_PKT7(ring, CP_MEM_TO_MEM, 5);
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOCW(ring, dst, dst_off, 0, 0);
+ OUT_RELOC (ring, src, src_off, 0, 0);
+ }
+ } else {
+ assert(0);
+ }
+ } else {
+ pipe_resource_reference(&indirect, info->indirect);
+ indirect_offset = info->indirect_offset;
+ }
+
+ ctx->emit_const(ring, SHADER_COMPUTE, offset * 4,
+ indirect_offset, 4, NULL, indirect);
+
+ pipe_resource_reference(&indirect, NULL);
+ } else {
+ uint32_t compute_params[IR3_DP_CS_COUNT] = {
+ [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
+ [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
+ [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
+ /* do we need work-group-size? */
+ };
+
+ ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0,
+ ARRAY_SIZE(compute_params), compute_params, NULL);
+ }
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 3886cce5571..040ea4cd9d8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -44,6 +44,11 @@ enum ir3_driver_param {
IR3_DP_NUM_WORK_GROUPS_X = 0,
IR3_DP_NUM_WORK_GROUPS_Y = 1,
IR3_DP_NUM_WORK_GROUPS_Z = 2,
+ /* NOTE: gl_NumWorkGroups should be vec4 aligned because
+ * glDispatchComputeIndirect() needs to load these from
+ * the info->indirect buffer. Keep that in mind when/if
+ * adding any addition CS driver params.
+ */
IR3_DP_CS_COUNT = 4, /* must be aligned to vec4 */
/* vertex shader driver params: */