diff options
author | Iago Toral Quiroga <[email protected]> | 2016-05-05 12:55:44 +0200 |
---|---|---|
committer | Samuel Iglesias Gonsálvez <[email protected]> | 2016-05-16 09:55:33 +0200 |
commit | 6eab06b866916d4fd52adf7b8bb6113948a3811a (patch) | |
tree | 564297fca4b35dd4ed9d84327be7a2673e1a9cfd | |
parent | b86d4780ed203b2a22afba5f95c73b15165a7259 (diff) |
i965/fs: Add do_untyped_vector_read helper
We are going to need the same logic for anything that reads
doubles via untyped messages (CS shared variables and SSBOs). Add a
helper function with that logic so that we can reuse it.
v2:
- Make this a static function instead of a method of fs_visitor (Iago)
- We only support types with a size of 4 or 8 (Curro)
- Avoid retypes by using a separate vgrf for the packed result (Curro)
- Put dst parameter before source parameters (Curro)
Reviewed-by: Kenneth Graunke <[email protected]>
Reviewed-by: Francisco Jerez <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 0ff197f3bde..32128bb1025 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2131,6 +2131,69 @@ fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr) return get_nir_src(*offset_src); } +static void +do_untyped_vector_read(const fs_builder &bld, + const fs_reg dest, + const fs_reg surf_index, + const fs_reg offset_reg, + unsigned num_components) +{ + if (type_sz(dest.type) == 4) { + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + num_components, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + for (unsigned i = 0; i < num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); + } else if (type_sz(dest.type) == 8) { + /* Reading a dvec, so we need to: + * + * 1. Multiply num_components by 2, to account for the fact that we + * need to read 64-bit components. + * 2. Shuffle the result of the load to form valid 64-bit elements + * 3. Emit a second load (for components z/w) if needed. + */ + fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(read_offset, offset_reg); + + int iters = num_components <= 2 ? 1 : 2; + + /* Load the dvec, the first iteration loads components x/y, the second + * iteration, if needed, loads components z/w + */ + for (int it = 0; it < iters; it++) { + /* Compute number of components to read in this iteration */ + int iter_components = MIN2(2, num_components); + num_components -= iter_components; + + /* Read. Since this message reads 32-bit components, we need to + * read twice as many components. + */ + fs_reg read_result = emit_untyped_read(bld, surf_index, read_offset, + 1 /* dims */, + iter_components * 2, + BRW_PREDICATE_NONE); + + /* Shuffle the 32-bit load result into valid 64-bit data */ + const fs_reg packed_result = bld.vgrf(dest.type, iter_components); + shuffle_32bit_load_result_to_64bit_data( + bld, packed_result, read_result, iter_components); + + /* Move each component to its destination */ + read_result = retype(read_result, BRW_REGISTER_TYPE_DF); + for (int c = 0; c < iter_components; c++) { + bld.MOV(offset(dest, bld, it * 2 + c), + offset(packed_result, bld, c)); + } + + bld.ADD(read_offset, read_offset, brw_imm_ud(16)); + } + } else { + unreachable("Unsupported type"); + } +} + void fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) |