diff options
author | Kenneth Graunke <[email protected]> | 2016-04-05 19:29:36 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2016-04-20 16:18:47 -0700 |
commit | 57118a19da932b4b5756021a0d75e91f42a68d99 (patch) | |
tree | f29518a0fd847ac3eccf9a5372dc389068106eea | |
parent | 528255b0b1498d22c820cecc5d75591d25ddb375 (diff) |
i965: Simplify gl_SampleID setup on Gen8+.
On Gen7+, the thread payload provides the sample ID - we can read it
in two instructions, without any elaborate calculations. We don't even
need a state dependency - this will properly produce zero in the
non-MSAA case. Unfortunately, we need the state flag anyway, so we
may as well continue to use it to produce a single MOV 0 instead of
SHR/AND.
For some reason, the sample ID field is always zero on Gen7/7.5, so
we can't use this yet. However, it works fine on Gen8+. So, land the
code and use it where it's working, and leave a TODO for later.
v2: Fix register types in the comment (caught by Matt Turner!).
Signed-off-by: Kenneth Graunke <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 42 |
1 files changed, 37 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 551294dd7eb..748b242ad2d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1343,6 +1343,41 @@ fs_visitor::emit_sampleid_setup() * rasterization is disabled, gl_SampleID will always be zero." */ abld.MOV(*reg, brw_imm_d(0)); + } else if (devinfo->gen >= 8) { + /* Sample ID comes in as 4-bit numbers in g1.0: + * + * 15:12 Slot 3 SampleID (only used in SIMD16) + * 11:8 Slot 2 SampleID (only used in SIMD16) + * 7:4 Slot 1 SampleID + * 3:0 Slot 0 SampleID + * + * Each slot corresponds to four channels, so we want to replicate each + * half-byte value to 4 channels in a row: + * + * dst+0: .7 .6 .5 .4 .3 .2 .1 .0 + * 7:4 7:4 7:4 7:4 3:0 3:0 3:0 3:0 + * + * dst+1: .7 .6 .5 .4 .3 .2 .1 .0 (if SIMD16) + * 15:12 15:12 15:12 15:12 11:8 11:8 11:8 11:8 + * + * First, we read g1.0 with a <1,8,0>UB region, causing the first 8 + * channels to read the first byte (7:0), and the second group of 8 + * channels to read the second byte (15:8). Then, we shift right by + * a vector immediate of <4, 4, 4, 4, 0, 0, 0, 0>, moving the slot 1 / 3 + * values into place. Finally, we AND with 0xf to keep the low nibble. + * + * shr(16) tmp<1>W g1.0<1,8,0>B 0x44440000:V + * and(16) dst<1>D tmp<8,8,1>W 0xf:W + * + * TODO: These payload bits exist on Gen7 too, but they appear to always + * be zero, so this code fails to work. We should find out why. + */ + fs_reg tmp(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W); + + abld.SHR(tmp, fs_reg(stride(retype(brw_vec1_grf(1, 0), + BRW_REGISTER_TYPE_B), 1, 8, 0)), + brw_imm_v(0x44440000)); + abld.AND(*reg, tmp, brw_imm_w(0xf)); } else { fs_reg t1(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_D); t1.set_smear(0); @@ -1371,16 +1406,13 @@ fs_visitor::emit_sampleid_setup() /* SKL+ has an extra bit for the Starting Sample Pair Index to * accomodate 16x MSAA. */ - unsigned sspi_mask = devinfo->gen >= 9 ? 0x1c0 : 0xc0; - abld.exec_all().group(1, 0) .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), - brw_imm_ud(sspi_mask)); + brw_imm_ud(0xc0)); abld.exec_all().group(1, 0).SHR(t1, t1, brw_imm_d(5)); /* This works for both SIMD8 and SIMD16 */ - abld.exec_all().group(4, 0) - .MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)); + abld.exec_all().group(4, 0).MOV(t2, brw_imm_v(0x3210)); /* This special instruction takes care of setting vstride=1, * width=4, hstride=0 of t2 during an ADD instruction. |