summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler/brw_fs.cpp
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-02-28 19:57:44 -0800
committerJason Ekstrand <[email protected]>2018-03-01 15:11:01 -0800
commitff4726077d86800d33520581f154a27dac408588 (patch)
tree8306ac0ca888b6ea68b5df271170873a7036e0b2 /src/intel/compiler/brw_fs.cpp
parentf5305c1b44a81d8e022997e0f2f5bd7556be7dea (diff)
intel/fs: Set up sampler message headers in the visitor on gen7+
This gives the scheduler visibility into the headers which should improve scheduling. More importantly, however, it lets the scheduler know that the header gets written. As-is, the scheduler thinks that a texture instruction only reads it's payload and is unaware that it may write to the first register so it may reorder it with respect to a read from that register. This is causing issues in a couple of Dota 2 vertex shaders. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104923 Cc: [email protected] Reviewed-by: Francisco Jerez <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_fs.cpp')
-rw-r--r--src/intel/compiler/brw_fs.cpp40
1 files changed, 36 insertions, 4 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 244c6cda03a..0d7988dae47 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4192,17 +4192,15 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
op == SHADER_OPCODE_SAMPLEINFO ||
is_high_sampler(devinfo, sampler)) {
/* For general texture offsets (no txf workaround), we need a header to
- * put them in. Note that we're only reserving space for it in the
- * message payload as it will be initialized implicitly by the
- * generator.
+ * put them in.
*
* TG4 needs to place its channel select in the header, for interaction
* with ARB_texture_swizzle. The sampler index is only 4-bits, so for
* larger sampler numbers we need to offset the Sampler State Pointer in
* the header.
*/
+ fs_reg header = retype(sources[0], BRW_REGISTER_TYPE_UD);
header_size = 1;
- sources[0] = fs_reg();
length++;
/* If we're requesting fewer than four channels worth of response,
@@ -4214,6 +4212,40 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf;
inst->offset |= mask << 12;
}
+
+ /* Build the actual header */
+ const fs_builder ubld = bld.exec_all().group(8, 0);
+ const fs_builder ubld1 = ubld.group(1, 0);
+ ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ if (inst->offset) {
+ ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset));
+ } else if (bld.shader->stage != MESA_SHADER_VERTEX &&
+ bld.shader->stage != MESA_SHADER_FRAGMENT) {
+ /* The vertex and fragment stages have g0.2 set to 0, so
+ * header0.2 is 0 when g0 is copied. Other stages may not, so we
+ * must set it to 0 to avoid setting undesirable bits in the
+ * message.
+ */
+ ubld1.MOV(component(header, 2), brw_imm_ud(0));
+ }
+
+ if (is_high_sampler(devinfo, sampler)) {
+ if (sampler.file == BRW_IMMEDIATE_VALUE) {
+ assert(sampler.ud >= 16);
+ const int sampler_state_size = 16; /* 16 bytes */
+
+ ubld1.ADD(component(header, 3),
+ retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(16 * (sampler.ud / 16) * sampler_state_size));
+ } else {
+ fs_reg tmp = ubld1.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld1.AND(tmp, sampler, brw_imm_ud(0x0f0));
+ ubld1.SHL(tmp, tmp, brw_imm_ud(4));
+ ubld1.ADD(component(header, 3),
+ retype(brw_vec1_grf(0, 3), BRW_REGISTER_TYPE_UD),
+ tmp);
+ }
+ }
}
if (shadow_c.file != BAD_FILE) {