diff options
author | Francisco Jerez <[email protected]> | 2016-05-18 01:26:03 -0700 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2016-05-27 23:19:21 -0700 |
commit | 64a6cb87f1fbfe2e410d6a4087450c2d4eb72228 (patch) | |
tree | ff5c354de796bc7fa2152e3c732251f7982117c3 /src | |
parent | d8a3294ac21741c3a78eef72b832902e15fbd948 (diff) |
i965/fs: Implement promotion of varying pull loads on Gen4 during SIMD lowering.
Varying pull constant loads inherit the same limitation of pre-ILK
hardware that requires expanding SIMD8 texel fetch instructions to
SIMD16, we can deal with pull constant loads in the same way it's done
for texturing during SIMD lowering.
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 336806b9e51..6e2b71650c3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -178,36 +178,25 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, fs_reg vec4_offset = vgrf(glsl_type::uint_type); bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf)); - int scale = 1; - if (devinfo->gen == 4 && bld.dispatch_width() == 8) { - /* Pre-gen5, we can either use a SIMD8 message that requires (header, - * u, v, r) as parameters, or we can just use the SIMD16 message - * consisting of (header, u). We choose the second, at the cost of a - * longer return length. - */ - scale = 2; - } - /* The pull load message will load a vec4 (16 bytes). If we are loading * a double this means we are only loading 2 elements worth of data. * We also want to use a 32-bit data type for the dst of the load operation * so other parts of the driver don't get confused about the size of the * result. */ - fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * scale); + fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4); fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, vec4_result, surf_index, vec4_offset); - inst->regs_written = 4 * (bld.dispatch_width() / 8) * scale; + inst->regs_written = 4 * bld.dispatch_width() / 8; if (type_sz(dst.type) == 8) { - assert(scale == 1); shuffle_32bit_load_result_to_64bit_data( bld, retype(vec4_result, dst.type), vec4_result, 2); } vec4_result.type = dst.type; bld.MOV(dst, offset(vec4_result, bld, - (const_offset & 0xf) / type_sz(vec4_result.type) * scale)); + (const_offset & 0xf) / type_sz(vec4_result.type))); } /** @@ -4641,6 +4630,16 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, */ return inst->exec_size / DIV_ROUND_UP(reg_count, 2); } + + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: + /* Pre-ILK hardware doesn't have a SIMD8 variant of the texel fetch + * message used to implement varying pull constant loads, so expand it + * to SIMD16. An alternative with longer message payload length but + * shorter return payload would be to use the SIMD8 sampler message that + * takes (header, u, v, r) as parameters instead of (header, u). + */ + return (devinfo->gen == 4 ? 16 : MIN2(16, inst->exec_size)); + case SHADER_OPCODE_MULH: /* MULH is lowered to the MUL/MACH sequence using the accumulator, which * is 8-wide on Gen7+. |