diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.cpp | 29 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 |
4 files changed, 45 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 980ef52fe17..757c77d459a 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -458,6 +458,35 @@ const struct brw_tracked_state brw_cs_state = { /** + * We are building the local ID push constant data using the simplest possible + * method. We simply push the local IDs directly as they should appear in the + * registers for the uvec3 gl_LocalInvocationID variable. + * + * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6 + * registers worth of push constant space. + * + * FINISHME: There are a few easy optimizations to consider. + * + * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is + * no need for using push constant space for that dimension. + * + * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can + * easily use 16-bit words rather than 32-bit dwords in the push constant + * data. + * + * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for + * conveying the data, and thereby reduce push constant usage. + * + */ +unsigned +brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog, + unsigned dispatch_width) +{ + return 3 * dispatch_width; +} + + +/** * Creates a region containing the push constants for the CS on gen7+. * * Push constants are constant values (such as GLSL uniforms) that are diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 8404aa3e824..08310df77c1 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -42,6 +42,11 @@ void brw_upload_cs_prog(struct brw_context *brw); #ifdef __cplusplus + +unsigned +brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog, + unsigned dispatch_width); + } #endif diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 320f612682d..10417c87484 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -42,6 +42,7 @@ #include "brw_eu.h" #include "brw_wm.h" #include "brw_fs.h" +#include "brw_cs.h" #include "brw_cfg.h" #include "brw_dead_control_flow.h" #include "main/uniforms.h" @@ -4731,6 +4732,15 @@ fs_visitor::setup_cs_payload() assert(devinfo->gen >= 7); payload.num_regs = 1; + + if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { + const unsigned local_id_dwords = + brw_cs_prog_local_id_payload_dwords(prog, dispatch_width); + assert((local_id_dwords & 0x7) == 0); + const unsigned local_id_regs = local_id_dwords / 8; + payload.local_invocation_id_reg = payload.num_regs; + payload.num_regs += local_id_regs; + } } void diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index dd0526a1550..c584cc70cb8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -364,6 +364,7 @@ public: uint8_t sample_pos_reg; uint8_t sample_mask_in_reg; uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; + uint8_t local_invocation_id_reg; /** The number of thread payload registers the hardware will supply. */ uint8_t num_regs; |