diff options
author | Iago Toral Quiroga <[email protected]> | 2016-01-22 14:00:38 +0100 |
---|---|---|
committer | Samuel Iglesias Gonsálvez <[email protected]> | 2016-05-16 09:55:33 +0200 |
commit | 50b7676dc46bae39c5e9b779828ef4fb2e1fbefc (patch) | |
tree | 0f212d8224f5a0fa9cb27a652d6075550bf84d56 | |
parent | 4d9c461e53440182de42d0a16ec66ad7f5c3b00a (diff) |
i965/fs: add shuffle_32bit_load_result_to_64bit_data helper
There will be a few places where we need to shuffle the result of a 32-bit
load into valid 64-bit data, so extract this logic into a separate helper
that we can reuse.
v2 (Curro):
- Use subscript() instead of stride()
- Assert on the input types rather than retyping.
- Use offset() instead of horiz_offset(), drop the multiplier definition.
- Don't use force_writemask_all.
- Mark component_i as const.
- Make the function name lower case.
v3 (Curro):
- Pass src and dst by reference.
- Move to brw_fs_nir.cpp
Reviewed-by: Kenneth Graunke <[email protected]>
Reviewed-by: Francisco Jerez <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 53 |
2 files changed, 58 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d4eb8fb7be4..286e7186d1f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -535,3 +535,8 @@ private: bool brw_do_channel_expressions(struct exec_list *instructions); bool brw_do_vector_splitting(struct exec_list *instructions); + +void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld, + const fs_reg &dst, + const fs_reg &src, + uint32_t components); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7458b73b1d4..584a0d6bd52 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -3980,3 +3980,56 @@ fs_visitor::nir_emit_jump(const fs_builder &bld, nir_jump_instr *instr) unreachable("unknown jump"); } } + +/** + * This helper takes the result of a load operation that reads 32-bit elements + * in this format: + * + * x x x x x x x x + * y y y y y y y y + * z z z z z z z z + * w w w w w w w w + * + * and shuffles the data to get this: + * + * x y x y x y x y + * x y x y x y x y + * z w z w z w z w + * z w z w z w z w + * + * Which is exactly what we want if the load is reading 64-bit components + * like doubles, where x represents the low 32-bit of the x double component + * and y represents the high 32-bit of the x double component (likewise with + * z and w for double component y). The parameter @components represents + * the number of 64-bit components present in @src. This would typically be + * 2 at most, since we can only fit 2 double elements in the result of a + * vec4 load. + * + * Notice that @dst and @src can be the same register. + */ +void +shuffle_32bit_load_result_to_64bit_data(const fs_builder &bld, + const fs_reg &dst, + const fs_reg &src, + uint32_t components) +{ + assert(type_sz(src.type) == 4); + assert(type_sz(dst.type) == 8); + + /* A temporary that we will use to shuffle the 32-bit data of each + * component in the vector into valid 64-bit data. We can't write directly + * to dst because dst can be (and would usually be) the same as src + * and in that case the first MOV in the loop below would overwrite the + * data read in the second MOV. + */ + fs_reg tmp = bld.vgrf(dst.type); + + for (unsigned i = 0; i < components; i++) { + const fs_reg component_i = offset(src, bld, 2 * i); + + bld.MOV(subscript(tmp, src.type, 0), component_i); + bld.MOV(subscript(tmp, src.type, 1), offset(component_i, bld, 1)); + + bld.MOV(offset(dst, bld, i), tmp); + } +} |