aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-10-09 17:17:59 -0700
committerEric Anholt <[email protected]>2013-10-10 15:54:16 -0700
commit36fbe66d3a71df76fcb6f915846da4471b3a8442 (patch)
tree636b90e73c5b7f977811014791dcb1c90ab45eac /src/mesa/drivers/dri/i965/brw_fs_generator.cpp
parentee21c8b1e6d3a506fa04d8f86e99b2afe9fca841 (diff)
i965/fs: Convert gen7 to using GRFs for texture messages.
Looking at Lightsmark's shaders, the way we used MRFs (or in gen7's case, GRFs) was bad in a couple of ways. One was that it prevented compute-to-MRF for the common case of a texcoord that gets used exactly once, but where the texcoord setup all gets emitted before the texture calls (such as when it's a bare fragment shader input, which gets interpolated before processing main()). Another was that it introduced a bunch of dependencies that constrained scheduling, and forced waits for texture operations to be done before they are required. For example, we can now move the compute-to-MRF interpolation for the second texture send down after the first send. The downside is that this generally prevents remove_duplicate_mrf_writes() from doing anything, whereas previously it avoided work for the case of sampling from the same texcoord twice. However, I suspect that most of the win that originally justified that code was in avoiding the WAR stall on the first send, which this patch also avoids, rather than the small cost of the extra instruction. We see instruction count regressions in shaders in unigine, yofrankie, savage2, hon, and gstreamer. Improves GLB2.7 performance by 0.633628% +/- 0.491809% (n=121/125, avg of ~66fps, outliers below 61 dropped). Improves openarena performance by 1.01092% +/- 0.66897% (n=425). No significant difference on Lightsmark (n=44). v2: Squash in the fix for register unspilling for send-from-GRF, fixing a segfault in lightsmark. Reviewed-by: Kenneth Graunke <[email protected]> Acked-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_generator.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp27
1 files changed, 23 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index dbfbc113dc7..4b668f162aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -501,24 +501,43 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
dst = vec16(dst);
}
+ if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
+ /* The send-from-GRF for 16-wide texturing with a header has an extra
+ * hardware register allocated to it, which we need to skip over (since
+ * our coordinates in the payload are in the even-numbered registers,
+ * and the header comes right before the first one).
+ */
+ assert(src.file == BRW_GENERAL_REGISTER_FILE);
+ src.nr++;
+ }
+
/* Load the message header if present. If there's a texture offset,
* we need to set it up explicitly and load the offset bitfield.
* Otherwise, we can use an implied move from g0 to the first message reg.
*/
if (inst->texture_offset) {
+ struct brw_reg header_reg;
+
+ if (brw->gen >= 7) {
+ header_reg = src;
+ } else {
+ assert(inst->base_mrf != -1);
+ header_reg = retype(brw_message_reg(inst->base_mrf),
+ BRW_REGISTER_TYPE_UD);
+ }
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
/* Explicitly set up the message header by copying g0 to the MRF. */
- brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_MOV(p, header_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* Then set the offset bits in DWord 2. */
- brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
- inst->base_mrf, 2), BRW_REGISTER_TYPE_UD),
+ brw_MOV(p, retype(brw_vec1_reg(header_reg.file,
+ header_reg.nr, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(inst->texture_offset));
brw_pop_insn_state(p);
} else if (inst->header_present) {
+ assert(brw->gen < 7);
/* Set up an implied move from g0 to the MRF. */
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
}