diff options
author | Jordan Justen <[email protected]> | 2016-05-31 15:45:24 -0700 |
---|---|---|
committer | Jordan Justen <[email protected]> | 2016-06-01 19:29:02 -0700 |
commit | b1f22c6317940dac543e44dd638ea9f4fbcd6ca7 (patch) | |
tree | f8b6db7e39cfa11bec0a65986b96d88138495466 | |
parent | 3ba9594f32239031ddeff764e9896d48d05125d0 (diff) |
i965: Enable cross-thread constants and compact local IDs for hsw+
The cross thread constant support appears on Haswell. It allows us to
upload a set of uniform data for all threads without duplicating it
per thread.
One complication is that cross-thread constants are loaded into
registers before per-thread constants. Previously, our local IDs were
loaded before the uniform data and treated as 'payload' data, even
though they were actually pushed into the registers like the other
uniform data.
Therefore, in this patch we simultaneously enable a newer layout where
each thread now uses a single uniform slot for a unique local ID for
the thread. This uniform is handled specially to make sure it is added
last into the uniform push constant registers. This minimizes our
usage of push constant registers, and maximizes our ability to use
cross-thread constants for registers.
To swap from the old to the new layout, we also need to flip some
lowering pass switches to let our driver handle the lowering instead.
We also no longer force thread_local_id_index to -1.
v4:
* Minimize size of patch that switches from the old local ID layout
to the new layout (Jason)
Cc: "12.0" <[email protected]>
Signed-off-by: Jordan Justen <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compiler.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 16 |
3 files changed, 6 insertions, 14 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index bb06733bb0a..a4855a09137 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -40,8 +40,7 @@ .lower_fdiv = true, \ .lower_flrp64 = true, \ .native_integers = true, \ - .vertex_id_zero_based = true, \ - .lower_cs_local_index_from_id = true + .vertex_id_zero_based = true static const struct nir_shader_compiler_options scalar_nir_options = { COMMON_OPTIONS, diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index ad8d5140a3f..97dc22621c9 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -599,7 +599,6 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxClipPlanes = 8; ctx->Const.LowerTessLevel = true; - ctx->Const.LowerCsDerivedVariables = true; ctx->Const.PrimitiveRestartForPatches = true; ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 4de2563e66c..0b766a4a848 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6586,7 +6586,7 @@ cs_fill_push_const_info(const struct brw_device_info *devinfo, bool fill_thread_id = cs_prog_data->thread_local_id_index >= 0 && cs_prog_data->thread_local_id_index < (int)prog_data->nr_params; - bool cross_thread_supported = false; /* Not yet supported by driver. */ + bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell; /* The thread ID should be stored in the last param dword */ assert(prog_data->nr_params > 0 || !fill_thread_id); @@ -6652,19 +6652,13 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, brw_nir_lower_cs_shared(shader); prog_data->base.total_shared += shader->num_shared; - /* The driver isn't yet ready to support thread_local_id_index, so we force - * it to disabled for now. - */ - prog_data->thread_local_id_index = -1; - /* Now that we cloned the nir_shader, we can update num_uniforms based on * the thread_local_id_index. */ - if (prog_data->thread_local_id_index >= 0) { - shader->num_uniforms = - MAX2(shader->num_uniforms, - (unsigned)4 * (prog_data->thread_local_id_index + 1)); - } + assert(prog_data->thread_local_id_index >= 0); + shader->num_uniforms = + MAX2(shader->num_uniforms, + (unsigned)4 * (prog_data->thread_local_id_index + 1)); brw_nir_lower_intrinsics(shader, &prog_data->base); shader = brw_postprocess_nir(shader, compiler->devinfo, true); |