summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJordan Justen <[email protected]>2016-05-22 21:46:28 -0700
committerEmil Velikov <[email protected]>2016-06-02 13:56:54 +0100
commitee77c4a099b16a2581c80fcd73929b77f2ddb507 (patch)
tree4ecb17a1e56f6ec21141ddbdee433792765926cf
parenta94be40ecc021bfb08cf3d71f073364e770f2dc3 (diff)
i965: Add CS push constant info to brw_cs_prog_data
We need information about push constants in a few places for the GL driver, and another couple places for the vulkan driver. When we add support for uploading both a common (cross-thread) set of push constants, combined with the previous per-thread push constant data, things are going to get even more complicated. To simplify things, we add push constant info into the cs prog_data struct. The cross-thread constant support is added as of Haswell. To support it we need to make sure all push constants with uniform values are added to earlier registers. The register that varies per thread and holds the thread invocation's unique local ID needs to be added last. For now we add the code that would calculate cross-thread constatn information for hsw+, but we force it (cross_thread_supported) off until the other parts of the driver support it. v4: * Support older local ID push constant layout as well. (Jason) Cc: "12.0" <[email protected]> Signed-off-by: Jordan Justen <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]> (cherry picked from commit d437798ace47e47dbcb1244734dc1af3ecb5ab84)
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp61
2 files changed, 73 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index f1f9e5614e2..dda62974297 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -424,6 +424,12 @@ struct brw_wm_prog_data {
int urb_setup[VARYING_SLOT_MAX];
};
+struct brw_push_const_block {
+ unsigned dwords; /* Dword count, not reg aligned */
+ unsigned regs;
+ unsigned size; /* Bytes, register aligned */
+};
+
struct brw_cs_prog_data {
struct brw_stage_prog_data base;
@@ -437,6 +443,12 @@ struct brw_cs_prog_data {
int thread_local_id_index;
struct {
+ struct brw_push_const_block cross_thread;
+ struct brw_push_const_block per_thread;
+ struct brw_push_const_block total;
+ } push;
+
+ struct {
/** @{
* surface indices the CS-specific surfaces
*/
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 14b0b428d21..4de2563e66c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6570,6 +6570,64 @@ fs_visitor::emit_cs_work_group_id_setup()
}
static void
+fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
+{
+ block->dwords = dwords;
+ block->regs = DIV_ROUND_UP(dwords, 8);
+ block->size = block->regs * 32;
+}
+
+static void
+cs_fill_push_const_info(const struct brw_device_info *devinfo,
+ struct brw_cs_prog_data *cs_prog_data)
+{
+ const struct brw_stage_prog_data *prog_data =
+ (struct brw_stage_prog_data*) cs_prog_data;
+ bool fill_thread_id =
+ cs_prog_data->thread_local_id_index >= 0 &&
+ cs_prog_data->thread_local_id_index < (int)prog_data->nr_params;
+ bool cross_thread_supported = false; /* Not yet supported by driver. */
+
+ /* The thread ID should be stored in the last param dword */
+ assert(prog_data->nr_params > 0 || !fill_thread_id);
+ assert(!fill_thread_id ||
+ cs_prog_data->thread_local_id_index ==
+ (int)prog_data->nr_params - 1);
+
+ unsigned cross_thread_dwords, per_thread_dwords;
+ if (!cross_thread_supported) {
+ cross_thread_dwords = 0u;
+ per_thread_dwords =
+ 8 * cs_prog_data->local_invocation_id_regs +
+ prog_data->nr_params;
+ } else if (fill_thread_id) {
+ /* Fill all but the last register with cross-thread payload */
+ cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8);
+ per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
+ assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
+ } else {
+ /* Fill all data using cross-thread payload */
+ cross_thread_dwords = prog_data->nr_params;
+ per_thread_dwords = 0u;
+ }
+
+ fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
+ fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
+
+ unsigned total_dwords =
+ (cs_prog_data->push.per_thread.size * cs_prog_data->threads +
+ cs_prog_data->push.cross_thread.size) / 4;
+ fill_push_const_block_info(&cs_prog_data->push.total, total_dwords);
+
+ assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
+ cs_prog_data->push.per_thread.size == 0);
+ assert(cs_prog_data->push.cross_thread.dwords +
+ cs_prog_data->push.per_thread.dwords ==
+ 8 * cs_prog_data->local_invocation_id_regs +
+ prog_data->nr_params);
+}
+
+static void
cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size)
{
cs_prog_data->simd_size = size;
@@ -6635,6 +6693,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
} else {
cfg = v8.cfg;
cs_set_simd_size(prog_data, 8);
+ cs_fill_push_const_info(compiler->devinfo, prog_data);
prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs;
}
}
@@ -6660,6 +6719,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
} else {
cfg = v16.cfg;
cs_set_simd_size(prog_data, 16);
+ cs_fill_push_const_info(compiler->devinfo, prog_data);
prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
}
}
@@ -6687,6 +6747,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
} else {
cfg = v32.cfg;
cs_set_simd_size(prog_data, 32);
+ cs_fill_push_const_info(compiler->devinfo, prog_data);
}
}