summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJordan Justen <[email protected]>2015-06-11 19:17:03 -0700
committerJordan Justen <[email protected]>2015-07-14 14:44:48 -0700
commit320089dbd63de3ac1bd3d42ee8cec41837486d8c (patch)
treec283e313a874a402190388e1a33cf5ad76331a26
parentcd7dd45bfec9ad68719c5e4e04b66ea4bcc1a2c1 (diff)
i965/cs: Initialize GPGPU Thread Count
This field should always be set for gen8. In the bdw PRM, Volume 2d: Command Reference: Structures under INTERFACE_DESCRIPTOR_DATA, DWORD 6, Bits 9:0, Number of Threads in GPGPU Thread Group: "This field should not be set to 0 even if the barrier is disabled, since an accurate value is needed for proper pre-emption." In the HSW PRM, the it doesn't mention that it must always be set, but it should not hurt. Reported-by: Kristian Høgsberg <[email protected]> Signed-off-by: Jordan Justen <[email protected]> Reviewed-by: Ben Widawsky <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.cpp20
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h5
2 files changed, 25 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp
index 4c5082c82c4..d61bba002c4 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cs.cpp
@@ -291,6 +291,17 @@ brw_cs_precompile(struct gl_context *ctx,
}
+static unsigned
+get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
+{
+ const unsigned simd_size = cs_prog_data->simd_size;
+ unsigned group_size = cs_prog_data->local_size[0] *
+ cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
+
+ return (group_size + simd_size - 1) / simd_size;
+}
+
+
static void
brw_upload_cs_state(struct brw_context *brw)
{
@@ -316,6 +327,8 @@ brw_upload_cs_state(struct brw_context *brw)
prog_data->binding_table.size_bytes,
32, &stage_state->bind_bo_offset);
+ unsigned threads = get_cs_thread_count(cs_prog_data);
+
uint32_t dwords = brw->gen < 8 ? 8 : 9;
BEGIN_BATCH(dwords);
OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
@@ -365,6 +378,13 @@ brw_upload_cs_state(struct brw_context *brw)
desc[dw++] = 0;
desc[dw++] = 0;
desc[dw++] = stage_state->bind_bo_offset;
+ desc[dw++] = 0;
+ const uint32_t media_threads =
+ brw->gen >= 8 ?
+ SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
+ SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
+ assert(threads <= brw->max_cs_threads);
+ desc[dw++] = media_threads;
BEGIN_BATCH(4);
OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 19489aba5be..b1a1c11b3ae 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2518,6 +2518,11 @@ enum brw_wm_barycentric_interp_mode {
# define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0)
#define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002
+/* GEN7 DW5, GEN8+ DW6 */
+# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
+# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
+# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
#define MEDIA_STATE_FLUSH 0x7004
#define GPGPU_WALKER 0x7105
/* GEN8+ DW2 */