diff options
author | Francisco Jerez <[email protected]> | 2017-01-13 15:36:51 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2018-06-28 13:19:38 -0700 |
commit | f6c4aace22aceb9089eb0e3fa297eab09dac4ce1 (patch) | |
tree | 01ca19e582f6facbe746dc6bca70f0e3292c1df3 /src/intel/compiler/brw_fs.h | |
parent | 8f143f70d64786a521fe57f109bff9a084cdf27f (diff) |
intel/fs: Extend thread payload layout to SIMD32
And handle 32-wide payload register reads in fetch_payload_reg().
v2 (Jason Ekstrand);
- Fix some whitespace and brace placement
Reviewed-by: Jason Ekstrand <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_fs.h')
-rw-r--r-- | src/intel/compiler/brw_fs.h | 42 |
1 files changed, 31 insertions, 11 deletions
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f0b2d5e8ad3..51529e89795 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -338,14 +338,15 @@ public: /** Register numbers for thread payload fields. */ struct thread_payload { - uint8_t source_depth_reg; - uint8_t source_w_reg; - uint8_t aa_dest_stencil_reg; - uint8_t dest_depth_reg; - uint8_t sample_pos_reg; - uint8_t sample_mask_in_reg; - uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT]; - uint8_t local_invocation_id_reg; + uint8_t subspan_coord_reg[2]; + uint8_t source_depth_reg[2]; + uint8_t source_w_reg[2]; + uint8_t aa_dest_stencil_reg[2]; + uint8_t dest_depth_reg[2]; + uint8_t sample_pos_reg[2]; + uint8_t sample_mask_in_reg[2]; + uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2]; + uint8_t local_invocation_id_reg[2]; /** The number of thread payload registers the hardware will supply. */ uint8_t num_regs; @@ -499,13 +500,32 @@ private: namespace brw { inline fs_reg - fetch_payload_reg(const brw::fs_builder &bld, uint8_t reg, + fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2], brw_reg_type type = BRW_REGISTER_TYPE_F, unsigned n = 1) { - if (!reg) { + if (!regs[0]) return fs_reg(); + + if (bld.dispatch_width() > 16) { + const fs_reg tmp = bld.vgrf(type, n); + const brw::fs_builder hbld = bld.exec_all().group(16, 0); + const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); + fs_reg *const components = new fs_reg[n * m]; + + for (unsigned c = 0; c < n; c++) { + for (unsigned g = 0; g < m; g++) { + components[c * m + g] = + offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c); + } + } + + hbld.LOAD_PAYLOAD(tmp, components, n * m, 0); + + delete[] components; + return tmp; + } else { - return fs_reg(retype(brw_vec8_grf(reg, 0), type)); + return fs_reg(retype(brw_vec8_grf(regs[0], 0), type)); } } } |