diff options
author | Jordan Justen <[email protected]> | 2016-05-26 13:49:07 -0700 |
---|---|---|
committer | Jordan Justen <[email protected]> | 2016-06-01 19:29:02 -0700 |
commit | 1b79e7ebbd77a7e714fafadd91459059aacf2407 (patch) | |
tree | 9321a46aae9763a888e2796e31dc6fbb493251f3 /src/mesa | |
parent | 3ef0957dac11edee7babc9746ec766dcb055d909 (diff) |
i965: Store number of threads in brw_cs_prog_data
Cc: "12.0" <[email protected]>
Signed-off-by: Jordan Justen <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compiler.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_cs_state.c | 32 |
3 files changed, 23 insertions, 25 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index bed969cf18a..f1f9e5614e2 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -430,6 +430,7 @@ struct brw_cs_prog_data { GLuint dispatch_grf_start_reg_16; unsigned local_size[3]; unsigned simd_size; + unsigned threads; bool uses_barrier; bool uses_num_work_groups; unsigned local_invocation_id_regs; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 975ac9eb6ba..14b0b428d21 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6569,6 +6569,15 @@ fs_visitor::emit_cs_work_group_id_setup() return reg; } +static void +cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size) +{ + cs_prog_data->simd_size = size; + unsigned group_size = cs_prog_data->local_size[0] * + cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; + cs_prog_data->threads = (group_size + size - 1) / size; +} + const unsigned * brw_compile_cs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, @@ -6625,7 +6634,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, fail_msg = v8.fail_msg; } else { cfg = v8.cfg; - prog_data->simd_size = 8; + cs_set_simd_size(prog_data, 8); prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs; } } @@ -6650,7 +6659,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, } } else { cfg = v16.cfg; - prog_data->simd_size = 16; + cs_set_simd_size(prog_data, 16); prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs; } } @@ -6677,7 +6686,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, } } else { cfg = v32.cfg; - prog_data->simd_size = 32; + cs_set_simd_size(prog_data, 32); } } diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index 7f484dd1586..619edfb0acc 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -33,17 +33,6 @@ #include "program/prog_statevars.h" #include "compiler/glsl/ir_uniform.h" -static unsigned -get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data) -{ - const unsigned simd_size = cs_prog_data->simd_size; - unsigned group_size = cs_prog_data->local_size[0] * - cs_prog_data->local_size[1] * cs_prog_data->local_size[2]; - - return (group_size + simd_size - 1) / simd_size; -} - - static void brw_upload_cs_state(struct brw_context *brw) { @@ -79,7 +68,6 @@ brw_upload_cs_state(struct brw_context *brw) (prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value); unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32); unsigned push_constant_regs = reg_aligned_constant_size / 32; - unsigned threads = get_cs_thread_count(cs_prog_data); uint32_t dwords = brw->gen < 8 ? 8 : 9; BEGIN_BATCH(dwords); @@ -129,7 +117,8 @@ brw_upload_cs_state(struct brw_context *brw) * * Note: The constant data is built in brw_upload_cs_push_constants below. */ - const uint32_t vfe_curbe_allocation = push_constant_regs * threads; + const uint32_t vfe_curbe_allocation = + push_constant_regs * cs_prog_data->threads; OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) | SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC)); OUT_BATCH(0); @@ -141,7 +130,7 @@ brw_upload_cs_state(struct brw_context *brw) BEGIN_BATCH(4); OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2)); OUT_BATCH(0); - OUT_BATCH(ALIGN(reg_aligned_constant_size * threads, 64)); + OUT_BATCH(ALIGN(reg_aligned_constant_size * cs_prog_data->threads, 64)); OUT_BATCH(stage_state->push_const_offset); ADVANCE_BATCH(); } @@ -163,9 +152,9 @@ brw_upload_cs_state(struct brw_context *brw) desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH); const uint32_t media_threads = brw->gen >= 8 ? - SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : - SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT); - assert(threads <= brw->max_cs_threads); + SET_FIELD(cs_prog_data->threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) : + SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT); + assert(cs_prog_data->threads <= brw->max_cs_threads); assert(prog_data->total_shared <= 64 * 1024); uint32_t slm_size = 0; @@ -247,21 +236,20 @@ brw_upload_cs_push_constants(struct brw_context *brw, const unsigned param_aligned_count = reg_aligned_constant_size / sizeof(*param); - unsigned threads = get_cs_thread_count(cs_prog_data); - param = (gl_constant_value*) brw_state_batch(brw, type, - ALIGN(reg_aligned_constant_size * threads, 64), + ALIGN(reg_aligned_constant_size * + cs_prog_data->threads, 64), 64, &stage_state->push_const_offset); assert(param); STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); - brw_cs_fill_local_id_payload(cs_prog_data, param, threads, + brw_cs_fill_local_id_payload(cs_prog_data, param, cs_prog_data->threads, reg_aligned_constant_size); /* _NEW_PROGRAM_CONSTANTS */ - for (t = 0; t < threads; t++) { + for (t = 0; t < cs_prog_data->threads; t++) { gl_constant_value *next_param = ¶m[t * param_aligned_count + local_id_dwords]; for (i = 0; i < prog_data->nr_params; i++) { |