diff options
author | Francisco Jerez <[email protected]> | 2015-11-23 19:18:26 +0200 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2015-11-26 14:07:58 +0200 |
commit | 55ffa64daf765b1229364518106a4124bd84b9a7 (patch) | |
tree | bbc31d37dc2eebaf480f08a1b3ba55ba4162d9f7 /src/mesa/drivers | |
parent | bc8182808aea111aea3cfcba4da3dd861689d890 (diff) |
i965/gen9+: Switch thread scratch space to non-coherent stateless access.
The thread scratch space is thread-local so using the full IA-coherent
stateless surface index (255 since Gen8) is unnecessary and
potentially expensive. On Gen8 and early steppings of Gen9 this is
not a functional change because the kernel already sets bit 4 of
HDC_CHICKEN0 which overrides all HDC memory access to be non-coherent
in order to workaround a hardware bug.
This happens to fix a full system hang when running any spilling code
on a pre-production SKL GT4e machine I have on my desk (forcing all
HDC access to non-coherent from the kernel up to stepping F0 might be
a good idea though regardless of this patch), and improves performance
of the OglPSBump2 SynMark benchmark run with INTEL_DEBUG=spill_fs by
33% (11 runs, 5% significance) on a production SKL GT2 (on which HDC
IA-coherency is apparently functional so it wouldn't make sense to
disable globally).
Reviewed-by: Kristian Høgsberg <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 17 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 |
3 files changed, 19 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index ef9434734df..686b42be11e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -315,6 +315,8 @@ void brw_oword_block_read(struct brw_codegen *p, uint32_t offset, uint32_t bind_table_index); +unsigned brw_scratch_surface_idx(const struct brw_codegen *p); + void brw_oword_block_read_scratch(struct brw_codegen *p, struct brw_reg dest, struct brw_reg mrf, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index da1ddfddb50..bb6f5dce91b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1997,6 +1997,19 @@ void gen6_math(struct brw_codegen *p, brw_set_src1(p, insn, src1); } +/** + * Return the right surface index to access the thread scratch space using + * stateless dataport messages. + */ +unsigned +brw_scratch_surface_idx(const struct brw_codegen *p) +{ + /* The scratch space is thread-local so IA coherency is unnecessary. */ + if (p->devinfo->gen >= 8) + return GEN8_BTI_STATELESS_NON_COHERENT; + else + return BRW_BTI_STATELESS; +} /** * Write a block of OWORDs (half a GRF each) from the scratch buffer, @@ -2097,7 +2110,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_set_dp_write_message(p, insn, - 255, /* binding table index (255=stateless) */ + brw_scratch_surface_idx(p), msg_control, msg_type, mlen, @@ -2183,7 +2196,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p, brw_set_dp_read_message(p, insn, - 255, /* binding table index (255=stateless) */ + brw_scratch_surface_idx(p), msg_control, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ BRW_DATAPORT_READ_TARGET_RENDER_CACHE, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index acf92867689..434c4dc420a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -802,7 +802,7 @@ generate_scratch_read(struct brw_codegen *p, if (devinfo->gen < 6) brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf); brw_set_dp_read_message(p, send, - 255, /* binding table index: stateless access */ + brw_scratch_surface_idx(p), BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, BRW_DATAPORT_READ_TARGET_RENDER_CACHE, @@ -875,7 +875,7 @@ generate_scratch_write(struct brw_codegen *p, if (devinfo->gen < 6) brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); brw_set_dp_write_message(p, send, - 255, /* binding table index: stateless access */ + brw_scratch_surface_idx(p), BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, 3, /* mlen */ |