diff options
author | Eric Anholt <[email protected]> | 2013-03-18 10:16:42 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2013-04-01 16:17:26 -0700 |
commit | 70b27e0e4b5d15e575ea477d63c0f6cb19d645c2 (patch) | |
tree | 802df334fa4aa36763c8c791fdc9399059511d8a /src/mesa/drivers/dri/i965/brw_fs_emit.cpp | |
parent | ce316f62efa208b1a43fe81831126fc75c5807c5 (diff) |
i965/fs: Use LD messages for pre-gen7 varying-index uniform loads
This comes at a minor performance cost at the moment (-3.2% +/- 0.2%, n=14 on
my GM45 forced to load all uniforms through the varying-index path), but we
get a whole vec4 at a time to reuse in the next commit.
v2: Fix comment about channels in the other message.
Reviewed-by: Kenneth Graunke <[email protected]>
NOTE: This is a candidate for the 9.1 branch.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_emit.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 55 |
1 files changed, 37 insertions, 18 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index bc1fef16b01..0f6b7155cbe 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -674,47 +674,66 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, void fs_generator::generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst, - struct brw_reg index) + struct brw_reg index, + struct brw_reg offset) { assert(intel->gen < 7); /* Should use the gen7 variant. */ assert(inst->header_present); + assert(inst->mlen); assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; - uint32_t msg_type, msg_control, rlen; - if (intel->gen >= 6) - msg_type = GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ; - else if (intel->gen == 5 || intel->is_g4x) - msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ; - else - msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ; - + uint32_t simd_mode, rlen, msg_type; if (dispatch_width == 16) { - msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS; - rlen = 2; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + rlen = 8; } else { - msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS; - rlen = 1; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + rlen = 4; + } + + if (intel->gen >= 5) + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; + else { + /* We always use the SIMD16 message so that we only have to load U, and + * not V or R. + */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; + assert(inst->mlen == 3); + assert(inst->regs_written == 8); + rlen = 8; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; } + struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1), + BRW_REGISTER_TYPE_D); + brw_MOV(p, offset_mrf, offset); + struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + send->header.compression_control = BRW_COMPRESSION_NONE; brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (intel->gen < 6) send->header.destreg__conditionalmod = inst->base_mrf; - brw_set_dp_read_message(p, send, + + /* Our surface is set up as floats, regardless of what actual data is + * stored in it. + */ + uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + brw_set_sampler_message(p, send, surf_index, - msg_control, + 0, /* sampler (unused) */ msg_type, - BRW_DATAPORT_READ_TARGET_DATA_CACHE, + rlen, inst->mlen, inst->header_present, - rlen); + simd_mode, + return_format); } void @@ -1305,7 +1324,7 @@ fs_generator::generate_code(exec_list *instructions) break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: - generate_varying_pull_constant_load(inst, dst, src[0]); + generate_varying_pull_constant_load(inst, dst, src[0], src[1]); break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: |