diff options
author | Jordan Justen <jordan.l.justen@intel.com> | 2015-06-11 19:17:03 -0700 |
---|---|---|
committer | Jordan Justen <jordan.l.justen@intel.com> | 2015-07-14 14:44:48 -0700 |
commit | 320089dbd63de3ac1bd3d42ee8cec41837486d8c (patch) | |
tree | c283e313a874a402190388e1a33cf5ad76331a26 | |
parent | cd7dd45bfec9ad68719c5e4e04b66ea4bcc1a2c1 (diff) |
i965/cs: Initialize GPGPU Thread Count
This field should always be set for gen8. In the bdw PRM, Volume 2d:
Command Reference: Structures under INTERFACE_DESCRIPTOR_DATA, DWORD
6, Bits 9:0, Number of Threads in GPGPU Thread Group:
"This field should not be set to 0 even if the barrier is disabled,
since an accurate value is needed for proper pre-emption."
In the HSW PRM, the it doesn't mention that it must always be set, but
it should not hurt.
Reported-by: Kristian Høgsberg <krh@bitplanet.net>
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.cpp | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 5 |
2 files changed, 25 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 4c5082c82c4..d61bba002c4 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -291,6 +291,17 @@ brw_cs_precompile(struct gl_context *ctx, } +static unsigned +get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data) +{ + const unsigned simd_size = cs_prog_data->simd_size; + unsigned group_size = cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; + + return (group_size + simd_size - 1) / simd_size; +} + + static void brw_upload_cs_state(struct brw_context *brw) { @@ -316,6 +327,8 @@ brw_upload_cs_state(struct brw_context *brw) prog_data->binding_table.size_bytes, 32, &stage_state->bind_bo_offset); + unsigned threads = get_cs_thread_count(cs_prog_data); + uint32_t dwords = brw->gen < 8 ? 8 : 9; BEGIN_BATCH(dwords); OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2)); @@ -365,6 +378,13 @@ brw_upload_cs_state(struct brw_context *brw) desc[dw++] = 0; desc[dw++] = 0; desc[dw++] = stage_state->bind_bo_offset; + desc[dw++] = 0; + const uint32_t media_threads = + brw->gen >= 8 ? + SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : + SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT); + assert(threads <= brw->max_cs_threads); + desc[dw++] = media_threads; BEGIN_BATCH(4); OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 19489aba5be..b1a1c11b3ae 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2518,6 +2518,11 @@ enum brw_wm_barycentric_interp_mode { # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0) #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002 +/* GEN7 DW5, GEN8+ DW6 */ +# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0) +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0) #define MEDIA_STATE_FLUSH 0x7004 #define GPGPU_WALKER 0x7105 /* GEN8+ DW2 */ |