aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-03-13 14:48:55 -0700
committerEric Anholt <[email protected]>2013-04-01 16:17:25 -0700
commitdca5fc14358a8b267b3854c39c976a822885898f (patch)
tree3cc2d6bd82d31e722735d2cd592ac42909f899e7 /src/mesa/drivers/dri/i965/brw_fs_emit.cpp
parentbc0e1591f64b8b3f2693fceaaa8bba9198e26171 (diff)
i965/fs: Improve performance of varying-index uniform loads on IVB.
Like we have done for the VS and for constant-index uniform loads, we use the sampler engine to get caching in front of the L3 to avoid tickling the IVB L3 bug. This is also a bit of a functional change, as we're now loading a vec4 instead of a single dword, though we're not taking advantage of the other 3 components of the vec4 (yet). With the driver hacked to always take the varying-index path for all uniforms, improves performance of my old GLSL demo by 315% +/- 2% (n=4). This a major fix for some blur shaders in compositors from the varying-index uniforms support I introduced in 9.1. v2: Move old offset computation into the pre-gen7 path. Reviewed-by: Kenneth Graunke <[email protected]> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=61554 NOTE: This is a candidate for the 9.1 branch.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_emit.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp27
1 files changed, 14 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index a729569c840..bc1fef16b01 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -734,28 +734,29 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
- uint32_t msg_control, rlen, mlen;
+ uint32_t simd_mode, rlen, mlen;
if (dispatch_width == 16) {
- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS;
- mlen = rlen = 2;
+ mlen = 2;
+ rlen = 8;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
} else {
- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS;
- mlen = rlen = 1;
+ mlen = 1;
+ rlen = 4;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
}
struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, offset);
- if (intel->gen < 6)
- send->header.destreg__conditionalmod = inst->base_mrf;
- brw_set_dp_read_message(p, send,
+ brw_set_sampler_message(p, send,
surf_index,
- msg_control,
- GEN7_DATAPORT_DC_DWORD_SCATTERED_READ,
- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 0, /* LD message ignores sampler unit */
+ GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+ rlen,
mlen,
- inst->header_present,
- rlen);
+ false, /* no header */
+ simd_mode,
+ 0);
}
/**