diff options
author | Anuj Phogat <[email protected]> | 2016-01-05 08:41:39 -0800 |
---|---|---|
committer | Anuj Phogat <[email protected]> | 2017-06-09 16:02:59 -0700 |
commit | f9e31a26d4cf075e236e92aea63bb69eb9fad533 (patch) | |
tree | 4dc2ec84e9a66435198746ea6b2e0c431068f78d /src | |
parent | b76659997ebb08e69430bfd5eafbe2af5d494a8f (diff) |
i965/cnl: Make URB {VS, GS, HS, DS} sizes non multiple of 3
v1: By Ben Widawsky <[email protected]>
v2: v1 had an assert only for VS. Add the restriction for GS, HS and
DS as well and make sure the allocated sizes are not multiple of 3.
v3: Move the entry_size checks in to compiler code (Ken)
Signed-off-by: Anuj Phogat <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/compiler/brw_shader.cpp | 8 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4.cpp | 11 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_gs_visitor.cpp | 11 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_tcs.cpp | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_urb.c | 1 |
5 files changed, 34 insertions, 4 deletions
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 269b8a099a4..53d0742d2e8 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -1197,6 +1197,14 @@ brw_compile_tes(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (devinfo->gen == 10 && + prog_data->base.urb_entry_size % 3 == 0) + prog_data->base.urb_entry_size++; + prog_data->base.urb_read_length = 0; STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index a641ebfbba9..b443effca9a 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -2839,10 +2839,17 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, const unsigned vue_entries = MAX2(nr_attribute_slots, (unsigned)prog_data->base.vue_map.num_slots); - if (compiler->devinfo->gen == 6) + if (compiler->devinfo->gen == 6) { prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8); - else + } else { prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4); + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (compiler->devinfo->gen == 10 && + prog_data->base.urb_entry_size % 3 == 0) + prog_data->base.urb_entry_size++; + } if (INTEL_DEBUG & DEBUG_VS) { fprintf(stderr, "VS Output "); diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index d0236df0912..f763f482365 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -817,10 +817,17 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and * a multiple of 128 bytes in gen6. */ - if (compiler->devinfo->gen >= 7) + if (compiler->devinfo->gen >= 7) { prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - else + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (compiler->devinfo->gen == 10 && + prog_data->base.urb_entry_size % 3 == 0) + prog_data->base.urb_entry_size++; + } else { prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; + } assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim)); prog_data->output_topology = diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index 96597b8f2ad..c4d9f89a91b 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -441,6 +441,13 @@ brw_compile_tcs(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (devinfo->gen == 10 && + vue_prog_data->urb_entry_size % 3 == 0) + vue_prog_data->urb_entry_size++; + /* HS does not use the usual payload pushing from URB to GRFs, * because we don't have enough registers for a full-size payload, and * the hardware is broken on Haswell anyway. diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 028161df395..525c9c4d3da 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -224,6 +224,7 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size, BEGIN_BATCH(8); for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + assert(brw->gen != 10 || entry_size[i] % 3); OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2)); OUT_BATCH(entries[i] | ((entry_size[i] - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) | |