summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJordan Justen <[email protected]>2014-08-28 14:47:19 -0700
committerJordan Justen <[email protected]>2015-05-02 00:50:00 -0700
commit8d87070af295140fb3558b6784dc6303fde11a67 (patch)
tree0ba2aa56c39f61348a148c41ded1b8908fe99115 /src
parent0e0e23ef537c9add672ff322f34e129a07edc55e (diff)
i965/cs: Implement brw_emit_gpgpu_walker
Tested on Ivybridge, Haswell and Broadwell. v2: * Use SET_FIELD. (Ken) * Use simd_size / 16 to support SIMD8/16/32. Ken suggested that we might be able to do it arithmetically rather than just supporting SIMD8 and SIMD16 with a conditional. Signed-off-by: Jordan Justen <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_compute.c39
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h13
2 files changed, 51 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index baed7010a3d..044deae83c9 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -31,12 +31,49 @@
#include "brw_draw.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
+#include "brw_defines.h"
static void
brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
{
- _mesa_problem(&brw->ctx, "TODO: implement brw_emit_gpgpu_walker");
+ const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
+
+ const unsigned simd_size = prog_data->simd_size;
+ unsigned group_size = prog_data->local_size[0] *
+ prog_data->local_size[1] * prog_data->local_size[2];
+ unsigned thread_width_max =
+ (group_size + simd_size - 1) / simd_size;
+
+ uint32_t right_mask = (1u << simd_size) - 1;
+ const unsigned right_non_aligned = group_size & (simd_size - 1);
+ if (right_non_aligned != 0)
+ right_mask >>= (simd_size - right_non_aligned);
+
+ uint32_t dwords = brw->gen < 8 ? 11 : 15;
+ BEGIN_BATCH(dwords);
+ OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
+ OUT_BATCH(0);
+ if (brw->gen >= 8) {
+ OUT_BATCH(0); /* Indirect Data Length */
+ OUT_BATCH(0); /* Indirect Data Start Address */
+ }
+ assert(thread_width_max <= brw->max_cs_threads);
+ OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) |
+ SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX));
+ OUT_BATCH(0); /* Thread Group ID Starting X */
+ if (brw->gen >= 8)
+ OUT_BATCH(0); /* MBZ */
+ OUT_BATCH(num_groups[0]); /* Thread Group ID X Dimension */
+ OUT_BATCH(0); /* Thread Group ID Starting Y */
+ if (brw->gen >= 8)
+ OUT_BATCH(0); /* MBZ */
+ OUT_BATCH(num_groups[1]); /* Thread Group ID Y Dimension */
+ OUT_BATCH(0); /* Thread Group ID Starting/Resume Z */
+ OUT_BATCH(num_groups[2]); /* Thread Group ID Z Dimension */
+ OUT_BATCH(right_mask); /* Right Execution Mask */
+ OUT_BATCH(0xffffffff); /* Bottom Execution Mask */
+ ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index d4b5b249614..54179845771 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2469,5 +2469,18 @@ enum brw_wm_barycentric_interp_mode {
# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0)
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+#define GPGPU_WALKER 0x7105
+/* GEN8+ DW2 */
+# define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT 0
+# define GPGPU_WALKER_INDIRECT_LENGTH_MASK INTEL_MASK(15, 0)
+/* GEN7 DW2, GEN8+ DW4 */
+# define GPGPU_WALKER_SIMD_SIZE_SHIFT 30
+# define GPGPU_WALKER_SIMD_SIZE_MASK INTEL_MASK(31, 30)
+# define GPGPU_WALKER_THREAD_DEPTH_MAX_SHIFT 16
+# define GPGPU_WALKER_THREAD_DEPTH_MAX_MASK INTEL_MASK(21, 16)
+# define GPGPU_WALKER_THREAD_HEIGHT_MAX_SHIFT 8
+# define GPGPU_WALKER_THREAD_HEIGHT_MAX_MASK INTEL_MASK(31, 8)
+# define GPGPU_WALKER_THREAD_WIDTH_MAX_SHIFT 0
+# define GPGPU_WALKER_THREAD_WIDTH_MAX_MASK INTEL_MASK(5, 0)
#endif