aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-03-18 10:16:42 -0700
committerEric Anholt <[email protected]>2013-04-01 16:17:26 -0700
commit70b27e0e4b5d15e575ea477d63c0f6cb19d645c2 (patch)
tree802df334fa4aa36763c8c791fdc9399059511d8a /src/mesa/drivers/dri/i965/brw_fs_emit.cpp
parentce316f62efa208b1a43fe81831126fc75c5807c5 (diff)
i965/fs: Use LD messages for pre-gen7 varying-index uniform loads
This comes at a minor performance cost at the moment (-3.2% +/- 0.2%, n=14 on my GM45 forced to load all uniforms through the varying-index path), but we get a whole vec4 at a time to reuse in the next commit. v2: Fix comment about channels in the other message. Reviewed-by: Kenneth Graunke <[email protected]> NOTE: This is a candidate for the 9.1 branch.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_emit.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp55
1 files changed, 37 insertions, 18 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index bc1fef16b01..0f6b7155cbe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -674,47 +674,66 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
void
fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
struct brw_reg dst,
- struct brw_reg index)
+ struct brw_reg index,
+ struct brw_reg offset)
{
assert(intel->gen < 7); /* Should use the gen7 variant. */
assert(inst->header_present);
+ assert(inst->mlen);
assert(index.file == BRW_IMMEDIATE_VALUE &&
index.type == BRW_REGISTER_TYPE_UD);
uint32_t surf_index = index.dw1.ud;
- uint32_t msg_type, msg_control, rlen;
- if (intel->gen >= 6)
- msg_type = GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
- else if (intel->gen == 5 || intel->is_g4x)
- msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
- else
- msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
-
+ uint32_t simd_mode, rlen, msg_type;
if (dispatch_width == 16) {
- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS;
- rlen = 2;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ rlen = 8;
} else {
- msg_control = BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS;
- rlen = 1;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+ rlen = 4;
+ }
+
+ if (intel->gen >= 5)
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+ else {
+ /* We always use the SIMD16 message so that we only have to load U, and
+ * not V or R.
+ */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
+ assert(inst->mlen == 3);
+ assert(inst->regs_written == 8);
+ rlen = 8;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
}
+ struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1),
+ BRW_REGISTER_TYPE_D);
+ brw_MOV(p, offset_mrf, offset);
+
struct brw_reg header = brw_vec8_grf(0, 0);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ send->header.compression_control = BRW_COMPRESSION_NONE;
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
if (intel->gen < 6)
send->header.destreg__conditionalmod = inst->base_mrf;
- brw_set_dp_read_message(p, send,
+
+ /* Our surface is set up as floats, regardless of what actual data is
+ * stored in it.
+ */
+ uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ brw_set_sampler_message(p, send,
surf_index,
- msg_control,
+ 0, /* sampler (unused) */
msg_type,
- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ rlen,
inst->mlen,
inst->header_present,
- rlen);
+ simd_mode,
+ return_format);
}
void
@@ -1305,7 +1324,7 @@ fs_generator::generate_code(exec_list *instructions)
break;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
- generate_varying_pull_constant_load(inst, dst, src[0]);
+ generate_varying_pull_constant_load(inst, dst, src[0], src[1]);
break;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: