From ebbe6cdad7ab082d2b191fe6c7c0eaa6921d55de Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Thu, 17 Sep 2015 16:25:24 -0700 Subject: i965/cs: Implement DispatchComputeIndirect support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jordan Justen Reviewed-by: Kristian Høgsberg --- src/mesa/drivers/dri/i965/brw_compute.c | 57 ++++++++++++++++++++++++++++++--- src/mesa/drivers/dri/i965/brw_defines.h | 2 ++ src/mesa/drivers/dri/i965/intel_reg.h | 5 +++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 5693ab507d4..c392152e48d 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -31,14 +31,46 @@ #include "brw_draw.h" #include "brw_state.h" #include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" #include "brw_defines.h" static void -brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups) +brw_emit_gpgpu_walker(struct brw_context *brw, + const void *compute_param, + bool indirect) { const struct brw_cs_prog_data *prog_data = brw->cs.prog_data; + const GLuint *num_groups; + uint32_t indirect_flag; + + if (!indirect) { + num_groups = (const GLuint *)compute_param; + indirect_flag = 0; + } else { + GLintptr indirect_offset = (GLintptr)compute_param; + static const GLuint indirect_group_counts[3] = { 0, 0, 0 }; + num_groups = indirect_group_counts; + + struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer; + drm_intel_bo *bo = intel_bufferobj_buffer(brw, + intel_buffer_object(indirect_buffer), + indirect_offset, 3 * sizeof(GLuint)); + + indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE; + + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 0); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 4); + brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo, + I915_GEM_DOMAIN_VERTEX, 0, + indirect_offset + 8); + } + const unsigned simd_size = prog_data->simd_size; unsigned group_size = prog_data->local_size[0] * prog_data->local_size[1] * prog_data->local_size[2]; @@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups) uint32_t dwords = brw->gen < 8 ? 11 : 15; BEGIN_BATCH(dwords); - OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2)); + OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag); OUT_BATCH(0); if (brw->gen >= 8) { OUT_BATCH(0); /* Indirect Data Length */ @@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups) static void -brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) +brw_dispatch_compute_common(struct gl_context *ctx, + const void *compute_param, + bool indirect) { struct brw_context *brw = brw_context(ctx); int estimated_buffer_space_needed; @@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) brw->no_batch_wrap = true; brw_upload_compute_state(brw); - brw_emit_gpgpu_walker(brw, num_groups); + brw_emit_gpgpu_walker(brw, compute_param, indirect); brw->no_batch_wrap = false; @@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) */ } +static void +brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) { + brw_dispatch_compute_common(ctx, + num_groups, + false); +} + +static void +brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect) +{ + brw_dispatch_compute_common(ctx, + (void *)indirect, + true); +} void brw_init_compute_functions(struct dd_function_table *functions) { functions->DispatchCompute = brw_dispatch_compute; + functions->DispatchComputeIndirect = brw_dispatch_compute_indirect; } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index f9d8d1b98f2..f0797985a19 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2770,6 +2770,8 @@ enum brw_wm_barycentric_interp_mode { # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0) #define MEDIA_STATE_FLUSH 0x7004 #define GPGPU_WALKER 0x7105 +/* GEN7 DW0 */ +# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE (1 << 10) /* GEN8+ DW2 */ # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0 # define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0) diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index 58007d3047f..a261c2bbb8a 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -173,6 +173,11 @@ #define GEN7_3DPRIM_START_INSTANCE 0x243C #define GEN7_3DPRIM_BASE_VERTEX 0x2440 +/* Auto-Compute / Indirect Registers */ +#define GEN7_GPGPU_DISPATCHDIMX 0x2500 +#define GEN7_GPGPU_DISPATCHDIMY 0x2504 +#define GEN7_GPGPU_DISPATCHDIMZ 0x2508 + #define GEN7_CACHE_MODE_1 0x7004 # define GEN8_HIZ_NP_PMA_FIX_ENABLE (1 << 11) # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13) -- cgit v1.2.3