diff options
author | Jordan Justen <[email protected]> | 2014-08-28 14:47:19 -0700 |
---|---|---|
committer | Jordan Justen <[email protected]> | 2015-05-02 00:50:00 -0700 |
commit | 8d87070af295140fb3558b6784dc6303fde11a67 (patch) | |
tree | 0ba2aa56c39f61348a148c41ded1b8908fe99115 | |
parent | 0e0e23ef537c9add672ff322f34e129a07edc55e (diff) |
i965/cs: Implement brw_emit_gpgpu_walker
Tested on Ivybridge, Haswell and Broadwell.
v2:
* Use SET_FIELD. (Ken)
* Use simd_size / 16 to support SIMD8/16/32. Ken suggested
that we might be able to do it arithmetically rather than just
supporting SIMD8 and SIMD16 with a conditional.
Signed-off-by: Jordan Justen <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compute.c | 39 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 13 |
2 files changed, 51 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index baed7010a3d..044deae83c9 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -31,12 +31,49 @@ #include "brw_draw.h" #include "brw_state.h" #include "intel_batchbuffer.h" +#include "brw_defines.h" static void brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups) { - _mesa_problem(&brw->ctx, "TODO: implement brw_emit_gpgpu_walker"); + const struct brw_cs_prog_data *prog_data = brw->cs.prog_data; + + const unsigned simd_size = prog_data->simd_size; + unsigned group_size = prog_data->local_size[0] * + prog_data->local_size[1] * prog_data->local_size[2]; + unsigned thread_width_max = + (group_size + simd_size - 1) / simd_size; + + uint32_t right_mask = (1u << simd_size) - 1; + const unsigned right_non_aligned = group_size & (simd_size - 1); + if (right_non_aligned != 0) + right_mask >>= (simd_size - right_non_aligned); + + uint32_t dwords = brw->gen < 8 ? 11 : 15; + BEGIN_BATCH(dwords); + OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2)); + OUT_BATCH(0); + if (brw->gen >= 8) { + OUT_BATCH(0); /* Indirect Data Length */ + OUT_BATCH(0); /* Indirect Data Start Address */ + } + assert(thread_width_max <= brw->max_cs_threads); + OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) | + SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX)); + OUT_BATCH(0); /* Thread Group ID Starting X */ + if (brw->gen >= 8) + OUT_BATCH(0); /* MBZ */ + OUT_BATCH(num_groups[0]); /* Thread Group ID X Dimension */ + OUT_BATCH(0); /* Thread Group ID Starting Y */ + if (brw->gen >= 8) + OUT_BATCH(0); /* MBZ */ + OUT_BATCH(num_groups[1]); /* Thread Group ID Y Dimension */ + OUT_BATCH(0); /* Thread Group ID Starting/Resume Z */ + OUT_BATCH(num_groups[2]); /* Thread Group ID Z Dimension */ + OUT_BATCH(right_mask); /* Right Execution Mask */ + OUT_BATCH(0xffffffff); /* Bottom Execution Mask */ + ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index d4b5b249614..54179845771 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2469,5 +2469,18 @@ enum brw_wm_barycentric_interp_mode { # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0) #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002 +#define GPGPU_WALKER 0x7105 +/* GEN8+ DW2 */ +# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0 +# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0) +/* GEN7 DW2, GEN8+ DW4 */ +# define GPGPU_WALKER_SIMD_SIZE_SHIFT 30 +# define GPGPU_WALKER_SIMD_SIZE_MASK INTEL_MASK(31, 30) +# define GPGPU_WALKER_THREAD_DEPTH_MAX_SHIFT 16 +# define GPGPU_WALKER_THREAD_DEPTH_MAX_MASK INTEL_MASK(21, 16) +# define GPGPU_WALKER_THREAD_HEIGHT_MAX_SHIFT 8 +# define GPGPU_WALKER_THREAD_HEIGHT_MAX_MASK INTEL_MASK(31, 8) +# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT 0 +# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK INTEL_MASK(5, 0) #endif |