diff options
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/vulkan/Makefile.sources | 4 | ||||
-rw-r--r-- | src/intel/vulkan/anv_genX.h | 2 | ||||
-rw-r--r-- | src/intel/vulkan/gen7_cmd_buffer.c | 2 | ||||
-rw-r--r-- | src/intel/vulkan/gen8_cmd_buffer.c | 2 | ||||
-rw-r--r-- | src/intel/vulkan/genX_cmd_buffer.c | 160 | ||||
-rw-r--r-- | src/intel/vulkan/genX_l3.c | 199 |
6 files changed, 162 insertions, 207 deletions
diff --git a/src/intel/vulkan/Makefile.sources b/src/intel/vulkan/Makefile.sources index 9e56fe70eea..8b4b97ffd23 100644 --- a/src/intel/vulkan/Makefile.sources +++ b/src/intel/vulkan/Makefile.sources @@ -73,7 +73,6 @@ VULKAN_GENERATED_FILES := \ GEN7_FILES := \ genX_cmd_buffer.c \ - genX_l3.c \ genX_pipeline.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ @@ -81,7 +80,6 @@ GEN7_FILES := \ GEN75_FILES := \ genX_cmd_buffer.c \ - genX_l3.c \ genX_pipeline.c \ gen7_cmd_buffer.c \ gen7_pipeline.c \ @@ -89,7 +87,6 @@ GEN75_FILES := \ GEN8_FILES := \ genX_cmd_buffer.c \ - genX_l3.c \ genX_pipeline.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ @@ -97,7 +94,6 @@ GEN8_FILES := \ GEN9_FILES := \ genX_cmd_buffer.c \ - genX_l3.c \ genX_pipeline.c \ gen8_cmd_buffer.c \ gen8_pipeline.c \ diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 11814dd2833..bfec1aeca86 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -45,7 +45,7 @@ void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer); void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, - const struct anv_pipeline *pipeline); + const struct gen_l3_config *cfg); void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer); void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer); diff --git a/src/intel/vulkan/gen7_cmd_buffer.c b/src/intel/vulkan/gen7_cmd_buffer.c index 61778aa25b1..b627ef0a6ff 100644 --- a/src/intel/vulkan/gen7_cmd_buffer.c +++ b/src/intel/vulkan/gen7_cmd_buffer.c @@ -189,7 +189,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - genX(cmd_buffer_config_l3)(cmd_buffer, pipeline); + genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); genX(flush_pipeline_select_gpgpu)(cmd_buffer); diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c index e22b4e2132f..70586086efc 100644 --- a/src/intel/vulkan/gen8_cmd_buffer.c +++ b/src/intel/vulkan/gen8_cmd_buffer.c @@ -380,7 +380,7 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); - genX(cmd_buffer_config_l3)(cmd_buffer, pipeline); + genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); genX(flush_pipeline_select_gpgpu)(cmd_buffer); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2806231ea2a..b6f93e7740d 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -26,6 +26,7 @@ #include "anv_private.h" +#include "common/gen_l3_config.h" #include "genxml/gen_macros.h" #include "genxml/genX_pack.h" @@ -149,6 +150,163 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) } } +#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000 +#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000 +#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000 + +/** + * Program the hardware to use the specified L3 configuration. + */ +void +genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, + const struct gen_l3_config *cfg) +{ + assert(cfg); + if (cfg == cmd_buffer->state.current_l3_config) + return; + + if (unlikely(INTEL_DEBUG & DEBUG_L3)) { + fprintf(stderr, "L3 config transition: "); + gen_dump_l3_config(cfg, stderr); + } + + const bool has_slm = cfg->n[GEN_L3P_SLM]; + + /* According to the hardware docs, the L3 partitioning can only be changed + * while the pipeline is completely drained and the caches are flushed, + * which involves a first PIPE_CONTROL flush which stalls the pipeline... + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.DCFlushEnable = true; + pc.PostSyncOperation = NoWrite; + pc.CommandStreamerStallEnable = true; + } + + /* ...followed by a second pipelined PIPE_CONTROL that initiates + * invalidation of the relevant caches. Note that because RO invalidation + * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL + * command is processed by the CS) we cannot combine it with the previous + * stalling flush as the hardware documentation suggests, because that + * would cause the CS to stall on previous rendering *after* RO + * invalidation and wouldn't prevent the RO caches from being polluted by + * concurrent rendering before the stall completes. This intentionally + * doesn't implement the SKL+ hardware workaround suggesting to enable CS + * stall on PIPE_CONTROLs with the texture cache invalidation bit set for + * GPGPU workloads because the previous and subsequent PIPE_CONTROLs + * already guarantee that there is no concurrent GPGPU kernel execution + * (see SKL HSD 2132585). + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.TextureCacheInvalidationEnable = true; + pc.ConstantCacheInvalidationEnable = true; + pc.InstructionCacheInvalidateEnable = true; + pc.StateCacheInvalidationEnable = true; + pc.PostSyncOperation = NoWrite; + } + + /* Now send a third stalling flush to make sure that invalidation is + * complete when the L3 configuration registers are modified. + */ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.DCFlushEnable = true; + pc.PostSyncOperation = NoWrite; + pc.CommandStreamerStallEnable = true; + } + +#if GEN_GEN >= 8 + + assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]); + + uint32_t l3cr; + anv_pack_struct(&l3cr, GENX(L3CNTLREG), + .SLMEnable = has_slm, + .URBAllocation = cfg->n[GEN_L3P_URB], + .ROAllocation = cfg->n[GEN_L3P_RO], + .DCAllocation = cfg->n[GEN_L3P_DC], + .AllAllocation = cfg->n[GEN_L3P_ALL]); + + /* Set up the L3 partitioning. */ + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG_num), l3cr); + +#else + + const bool has_dc = cfg->n[GEN_L3P_DC] || cfg->n[GEN_L3P_ALL]; + const bool has_is = cfg->n[GEN_L3P_IS] || cfg->n[GEN_L3P_RO] || + cfg->n[GEN_L3P_ALL]; + const bool has_c = cfg->n[GEN_L3P_C] || cfg->n[GEN_L3P_RO] || + cfg->n[GEN_L3P_ALL]; + const bool has_t = cfg->n[GEN_L3P_T] || cfg->n[GEN_L3P_RO] || + cfg->n[GEN_L3P_ALL]; + + assert(!cfg->n[GEN_L3P_ALL]); + + /* When enabled SLM only uses a portion of the L3 on half of the banks, + * the matching space on the remaining banks has to be allocated to a + * client (URB for all validated configurations) set to the + * lower-bandwidth 2-bank address hashing mode. + */ + const struct gen_device_info *devinfo = &cmd_buffer->device->info; + const bool urb_low_bw = has_slm && !devinfo->is_baytrail; + assert(!urb_low_bw || cfg->n[GEN_L3P_URB] == cfg->n[GEN_L3P_SLM]); + + /* Minimum number of ways that can be allocated to the URB. */ + const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0); + assert(cfg->n[GEN_L3P_URB] >= n0_urb); + + uint32_t l3sqcr1, l3cr2, l3cr3; + anv_pack_struct(&l3sqcr1, GENX(L3SQCREG1), + .ConvertDC_UC = !has_dc, + .ConvertIS_UC = !has_is, + .ConvertC_UC = !has_c, + .ConvertT_UC = !has_t); + l3sqcr1 |= + GEN_IS_HASWELL ? HSW_L3SQCREG1_SQGHPCI_DEFAULT : + devinfo->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT : + IVB_L3SQCREG1_SQGHPCI_DEFAULT; + + anv_pack_struct(&l3cr2, GENX(L3CNTLREG2), + .SLMEnable = has_slm, + .URBLowBandwidth = urb_low_bw, + .URBAllocation = cfg->n[GEN_L3P_URB], +#if !GEN_IS_HASWELL + .ALLAllocation = cfg->n[GEN_L3P_ALL], +#endif + .ROAllocation = cfg->n[GEN_L3P_RO], + .DCAllocation = cfg->n[GEN_L3P_DC]); + + anv_pack_struct(&l3cr3, GENX(L3CNTLREG3), + .ISAllocation = cfg->n[GEN_L3P_IS], + .ISLowBandwidth = 0, + .CAllocation = cfg->n[GEN_L3P_C], + .CLowBandwidth = 0, + .TAllocation = cfg->n[GEN_L3P_T], + .TLowBandwidth = 0); + + /* Set up the L3 partitioning. */ + emit_lri(&cmd_buffer->batch, GENX(L3SQCREG1_num), l3sqcr1); + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2_num), l3cr2); + emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3_num), l3cr3); + +#if GEN_IS_HASWELL + if (cmd_buffer->device->instance->physicalDevice.cmd_parser_version >= 4) { + /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep + * them disabled to avoid crashing the system hard. + */ + uint32_t scratch1, chicken3; + anv_pack_struct(&scratch1, GENX(SCRATCH1), + .L3AtomicDisable = !has_dc); + anv_pack_struct(&chicken3, GENX(CHICKEN3), + .L3AtomicDisable = !has_dc); + emit_lri(&cmd_buffer->batch, GENX(SCRATCH1_num), scratch1); + emit_lri(&cmd_buffer->batch, GENX(CHICKEN3_num), chicken3); + } +#endif + +#endif + + cmd_buffer->state.current_l3_config = cfg; +} + void genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) { @@ -471,7 +629,7 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0); - genX(cmd_buffer_config_l3)(cmd_buffer, pipeline); + genX(cmd_buffer_config_l3)(cmd_buffer, pipeline->urb.l3_config); genX(flush_pipeline_select_3d)(cmd_buffer); diff --git a/src/intel/vulkan/genX_l3.c b/src/intel/vulkan/genX_l3.c deleted file mode 100644 index 3a96693572e..00000000000 --- a/src/intel/vulkan/genX_l3.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "anv_private.h" - -#include "common/gen_l3_config.h" -#include "genxml/gen_macros.h" -#include "genxml/genX_pack.h" - -#define emit_lri(batch, reg, imm) \ - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \ - lri.RegisterOffset = __anv_reg_num(reg); \ - lri.DataDWord = imm; \ - } - -#define IVB_L3SQCREG1_SQGHPCI_DEFAULT 0x00730000 -#define VLV_L3SQCREG1_SQGHPCI_DEFAULT 0x00d30000 -#define HSW_L3SQCREG1_SQGHPCI_DEFAULT 0x00610000 - -/** - * Program the hardware to use the specified L3 configuration. - */ -static void -setup_l3_config(struct anv_cmd_buffer *cmd_buffer/*, struct brw_context *brw*/, - const struct gen_l3_config *cfg) -{ - const bool has_slm = cfg->n[GEN_L3P_SLM]; - - /* According to the hardware docs, the L3 partitioning can only be changed - * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline... - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.DCFlushEnable = true; - pc.PostSyncOperation = NoWrite; - pc.CommandStreamerStallEnable = true; - } - - /* ...followed by a second pipelined PIPE_CONTROL that initiates - * invalidation of the relevant caches. Note that because RO invalidation - * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL - * command is processed by the CS) we cannot combine it with the previous - * stalling flush as the hardware documentation suggests, because that - * would cause the CS to stall on previous rendering *after* RO - * invalidation and wouldn't prevent the RO caches from being polluted by - * concurrent rendering before the stall completes. This intentionally - * doesn't implement the SKL+ hardware workaround suggesting to enable CS - * stall on PIPE_CONTROLs with the texture cache invalidation bit set for - * GPGPU workloads because the previous and subsequent PIPE_CONTROLs - * already guarantee that there is no concurrent GPGPU kernel execution - * (see SKL HSD 2132585). - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.TextureCacheInvalidationEnable = true; - pc.ConstantCacheInvalidationEnable = true; - pc.InstructionCacheInvalidateEnable = true; - pc.StateCacheInvalidationEnable = true; - pc.PostSyncOperation = NoWrite; - } - - /* Now send a third stalling flush to make sure that invalidation is - * complete when the L3 configuration registers are modified. - */ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.DCFlushEnable = true; - pc.PostSyncOperation = NoWrite; - pc.CommandStreamerStallEnable = true; - } - -#if GEN_GEN >= 8 - - assert(!cfg->n[GEN_L3P_IS] && !cfg->n[GEN_L3P_C] && !cfg->n[GEN_L3P_T]); - - uint32_t l3cr; - anv_pack_struct(&l3cr, GENX(L3CNTLREG), - .SLMEnable = has_slm, - .URBAllocation = cfg->n[GEN_L3P_URB], - .ROAllocation = cfg->n[GEN_L3P_RO], - .DCAllocation = cfg->n[GEN_L3P_DC], - .AllAllocation = cfg->n[GEN_L3P_ALL]); - - /* Set up the L3 partitioning. */ - emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr); - -#else - - const bool has_dc = cfg->n[GEN_L3P_DC] || cfg->n[GEN_L3P_ALL]; - const bool has_is = cfg->n[GEN_L3P_IS] || cfg->n[GEN_L3P_RO] || - cfg->n[GEN_L3P_ALL]; - const bool has_c = cfg->n[GEN_L3P_C] || cfg->n[GEN_L3P_RO] || - cfg->n[GEN_L3P_ALL]; - const bool has_t = cfg->n[GEN_L3P_T] || cfg->n[GEN_L3P_RO] || - cfg->n[GEN_L3P_ALL]; - - assert(!cfg->n[GEN_L3P_ALL]); - - /* When enabled SLM only uses a portion of the L3 on half of the banks, - * the matching space on the remaining banks has to be allocated to a - * client (URB for all validated configurations) set to the - * lower-bandwidth 2-bank address hashing mode. - */ - const struct gen_device_info *devinfo = &cmd_buffer->device->info; - const bool urb_low_bw = has_slm && !devinfo->is_baytrail; - assert(!urb_low_bw || cfg->n[GEN_L3P_URB] == cfg->n[GEN_L3P_SLM]); - - /* Minimum number of ways that can be allocated to the URB. */ - const unsigned n0_urb = (devinfo->is_baytrail ? 32 : 0); - assert(cfg->n[GEN_L3P_URB] >= n0_urb); - - uint32_t l3sqcr1, l3cr2, l3cr3; - anv_pack_struct(&l3sqcr1, GENX(L3SQCREG1), - .ConvertDC_UC = !has_dc, - .ConvertIS_UC = !has_is, - .ConvertC_UC = !has_c, - .ConvertT_UC = !has_t); - l3sqcr1 |= - GEN_IS_HASWELL ? HSW_L3SQCREG1_SQGHPCI_DEFAULT : - devinfo->is_baytrail ? VLV_L3SQCREG1_SQGHPCI_DEFAULT : - IVB_L3SQCREG1_SQGHPCI_DEFAULT; - - anv_pack_struct(&l3cr2, GENX(L3CNTLREG2), - .SLMEnable = has_slm, - .URBLowBandwidth = urb_low_bw, - .URBAllocation = cfg->n[GEN_L3P_URB], -#if !GEN_IS_HASWELL - .ALLAllocation = cfg->n[GEN_L3P_ALL], -#endif - .ROAllocation = cfg->n[GEN_L3P_RO], - .DCAllocation = cfg->n[GEN_L3P_DC]); - - anv_pack_struct(&l3cr3, GENX(L3CNTLREG3), - .ISAllocation = cfg->n[GEN_L3P_IS], - .ISLowBandwidth = 0, - .CAllocation = cfg->n[GEN_L3P_C], - .CLowBandwidth = 0, - .TAllocation = cfg->n[GEN_L3P_T], - .TLowBandwidth = 0); - - /* Set up the L3 partitioning. */ - emit_lri(&cmd_buffer->batch, GENX(L3SQCREG1), l3sqcr1); - emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2); - emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3); - -#if GEN_IS_HASWELL - if (cmd_buffer->device->instance->physicalDevice.cmd_parser_version >= 4) { - /* Enable L3 atomics on HSW if we have a DC partition, otherwise keep - * them disabled to avoid crashing the system hard. - */ - uint32_t scratch1, chicken3; - anv_pack_struct(&scratch1, GENX(SCRATCH1), - .L3AtomicDisable = !has_dc); - anv_pack_struct(&chicken3, GENX(CHICKEN3), - .L3AtomicDisable = !has_dc); - emit_lri(&cmd_buffer->batch, GENX(SCRATCH1), scratch1); - emit_lri(&cmd_buffer->batch, GENX(CHICKEN3), chicken3); - } -#endif - -#endif - -} - -void -genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, - const struct anv_pipeline *pipeline) -{ - struct anv_cmd_state *state = &cmd_buffer->state; - const struct gen_l3_config *const cfg = pipeline->urb.l3_config; - assert(cfg); - if (cfg != state->current_l3_config) { - setup_l3_config(cmd_buffer, cfg); - state->current_l3_config = cfg; - - if (unlikely(INTEL_DEBUG & DEBUG_L3)) { - fprintf(stderr, "L3 config transition: "); - gen_dump_l3_config(cfg, stderr); - } - } -} |