diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.cpp | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 5 |
2 files changed, 25 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 4c5082c82c4..d61bba002c4 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -291,6 +291,17 @@ brw_cs_precompile(struct gl_context *ctx, } +static unsigned +get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data) +{ + const unsigned simd_size = cs_prog_data->simd_size; + unsigned group_size = cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; + + return (group_size + simd_size - 1) / simd_size; +} + + static void brw_upload_cs_state(struct brw_context *brw) { @@ -316,6 +327,8 @@ brw_upload_cs_state(struct brw_context *brw) prog_data->binding_table.size_bytes, 32, &stage_state->bind_bo_offset); + unsigned threads = get_cs_thread_count(cs_prog_data); + uint32_t dwords = brw->gen < 8 ? 8 : 9; BEGIN_BATCH(dwords); OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2)); @@ -365,6 +378,13 @@ brw_upload_cs_state(struct brw_context *brw) desc[dw++] = 0; desc[dw++] = 0; desc[dw++] = stage_state->bind_bo_offset; + desc[dw++] = 0; + const uint32_t media_threads = + brw->gen >= 8 ? + SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : + SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT); + assert(threads <= brw->max_cs_threads); + desc[dw++] = media_threads; BEGIN_BATCH(4); OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 19489aba5be..b1a1c11b3ae 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2518,6 +2518,11 @@ enum brw_wm_barycentric_interp_mode { # define MEDIA_VFE_STATE_CURBE_ALLOC_MASK INTEL_MASK(15, 0) #define MEDIA_INTERFACE_DESCRIPTOR_LOAD 0x7002 +/* GEN7 DW5, GEN8+ DW6 */ +# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0) +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0 +# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0) #define MEDIA_STATE_FLUSH 0x7004 #define GPGPU_WALKER 0x7105 /* GEN8+ DW2 */ |