diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.cpp | 25 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 |
2 files changed, 24 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index d273f99c5ca..9ee5ae52798 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -473,8 +473,9 @@ const struct brw_tracked_state brw_cs_state = { * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 * registers worth of push constant space. * - * Note: Any updates to brw_cs_prog_local_id_payload_dwords or - * fill_local_id_payload need to coordinated. + * Note: Any updates to brw_cs_prog_local_id_payload_dwords, + * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need + * to coordinated. * * FINISHME: There are a few easy optimizations to consider. * @@ -522,6 +523,26 @@ fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data, } +fs_reg * +fs_visitor::emit_cs_local_invocation_id_setup() +{ + assert(stage == MESA_SHADER_COMPUTE); + + fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type)); + + struct brw_reg src = + brw_vec8_grf(payload.local_invocation_id_reg, 0); + src = retype(src, BRW_REGISTER_TYPE_UD); + bld.MOV(*reg, src); + src.nr += dispatch_width / 8; + bld.MOV(offset(*reg, bld, 1), src); + src.nr += dispatch_width / 8; + bld.MOV(offset(*reg, bld, 2), src); + + return reg; +} + + /** * Creates a region containing the push constants for the CS on gen7+. * diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index c584cc70cb8..6bfc29002a3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -275,6 +275,7 @@ public: void emit_fb_writes(); void emit_urb_writes(); void emit_cs_terminate(); + fs_reg *emit_cs_local_invocation_id_setup(); void emit_barrier(); |