diff options
author | Caio Marcelo de Oliveira Filho <[email protected]> | 2019-05-23 14:17:59 -0700 |
---|---|---|
committer | Caio Marcelo de Oliveira Filho <[email protected]> | 2019-06-03 14:14:45 -0700 |
commit | 97cd865be28b3ef7508b243ed59e43cbc2a936f5 (patch) | |
tree | d4f6c3fd27dbd6474d0ff35dd423a7dc88f83fe2 /src/gallium/drivers/iris/iris_program.c | |
parent | 79f1529ae03f9f85c0844eaa08e84abe7ba0882f (diff) |
iris: Compact binding tables
Change the iris_binding_table to keep track of what surfaces are
actually going to be used, then assign binding table indices just for
those. Reducing unused bytes on those are valuable because we use a
reduced space for those tables in Iris.
The rest of the driver can go from "group indices" (i.e. UBO #2) to
BTI and vice-versa using helper functions. The value
IRIS_SURFACE_NOT_USED is returned to indicate a certain group index is
not used or a certain BTI is not valid.
The environment variable INTEL_DISABLE_COMPACT_BINDING_TABLE can be
set to skip compacting binding table.
v2: (all from Ken)
Use BITFIELD64_MASK helper. Improve comments.
Assert all group is marked as used when we have indirects.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/gallium/drivers/iris/iris_program.c')
-rw-r--r-- | src/gallium/drivers/iris/iris_program.c | 219 |
1 files changed, 180 insertions, 39 deletions
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 2a1815b30ce..bd60e757d17 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -37,6 +37,7 @@ #include "pipe/p_screen.h" #include "util/u_atomic.h" #include "util/u_upload_mgr.h" +#include "util/debug.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_serialize.h" @@ -501,30 +502,103 @@ iris_setup_uniforms(const struct brw_compiler *compiler, *out_num_cbufs = num_cbufs; } +enum { + /* Max elements in a surface group. */ + SURFACE_GROUP_MAX_ELEMENTS = 64, +}; + +/** + * Map a <group, index> pair to a binding table index. + * + * For example: <UBO, 5> => binding table index 12 + */ +uint32_t +iris_group_index_to_bti(const struct iris_binding_table *bt, + enum iris_surface_group group, uint32_t index) +{ + assert(index < bt->sizes[group]); + uint64_t mask = bt->used_mask[group]; + uint64_t bit = 1ull << index; + if (bit & mask) { + return bt->offsets[group] + util_bitcount64((bit - 1) & mask); + } else { + return IRIS_SURFACE_NOT_USED; + } +} + +/** + * Map a binding table index back to a <group, index> pair. + * + * For example: binding table index 12 => <UBO, 5> + */ +uint32_t +iris_bti_to_group_index(const struct iris_binding_table *bt, + enum iris_surface_group group, uint32_t bti) +{ + uint64_t used_mask = bt->used_mask[group]; + assert(bti >= bt->offsets[group]); + + uint32_t c = bti - bt->offsets[group]; + while (used_mask) { + int i = u_bit_scan64(&used_mask); + if (c == 0) + return i; + c--; + } + + return IRIS_SURFACE_NOT_USED; +} + static void rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt, nir_instr *instr, nir_src *src, enum iris_surface_group group) { - assert(bt->offsets[group] != 0xd0d0d0d0); + assert(bt->sizes[group] > 0); b->cursor = nir_before_instr(instr); nir_ssa_def *bti; if (nir_src_is_const(*src)) { - bti = nir_imm_intN_t(b, nir_src_as_uint(*src) + bt->offsets[group], + uint32_t index = nir_src_as_uint(*src); + bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index), src->ssa->bit_size); } else { + /* Indirect usage makes all the surfaces of the group to be available, + * so we can just add the base. + */ + assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group])); bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]); } nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti)); } +static void +mark_used_with_src(struct iris_binding_table *bt, nir_src *src, + enum iris_surface_group group) +{ + assert(bt->sizes[group] > 0); + + if (nir_src_is_const(*src)) { + uint64_t index = nir_src_as_uint(*src); + assert(index < bt->sizes[group]); + bt->used_mask[group] |= 1ull << index; + } else { + /* There's an indirect usage, we need all the surfaces. */ + bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]); + } +} + +static bool +skip_compacting_binding_tables(void) +{ + static int skip = -1; + if (skip < 0) + skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false); + return skip; +} + /** * Set up the binding table indices and apply to the shader. - * - * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're - * unused but also make sure that addition of small offsets to them will - * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. */ static void iris_setup_binding_table(struct nir_shader *nir, @@ -536,31 +610,24 @@ iris_setup_binding_table(struct nir_shader *nir, const struct shader_info *info = &nir->info; memset(bt, 0, sizeof(*bt)); - for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) - bt->offsets[i] = 0xd0d0d0d0; - /* Calculate the initial binding table index for each group. */ - uint32_t next_offset; + /* Set the sizes for each surface group. For some groups, we already know + * upfront how many will be used, so mark them. + */ if (info->stage == MESA_SHADER_FRAGMENT) { - next_offset = num_render_targets; - bt->offsets[IRIS_SURFACE_GROUP_RENDER_TARGET] = 0; + bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets; + /* All render targets used. */ + bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] = + BITFIELD64_MASK(num_render_targets); } else if (info->stage == MESA_SHADER_COMPUTE) { - next_offset = 1; - bt->offsets[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 0; - } else { - next_offset = 0; + bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1; + bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1; } - unsigned num_textures = util_last_bit(info->textures_used); - if (num_textures) { - bt->offsets[IRIS_SURFACE_GROUP_TEXTURE] = next_offset; - next_offset += num_textures; - } + bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used); + bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used; - if (info->num_images) { - bt->offsets[IRIS_SURFACE_GROUP_IMAGE] = next_offset; - next_offset += info->num_images; - } + bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images; /* Allocate a slot in the UBO section for NIR constants if present. * We don't include them in iris_compiled_shader::num_cbufs because @@ -569,22 +636,93 @@ iris_setup_binding_table(struct nir_shader *nir, */ if (nir->constant_data_size > 0) num_cbufs++; + bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs; - if (num_cbufs) { - //assert(info->num_ubos <= BRW_MAX_UBO); - bt->offsets[IRIS_SURFACE_GROUP_UBO] = next_offset; - next_offset += num_cbufs; - } + /* The first IRIS_MAX_ABOs indices in the SSBO group are for atomics, real + * SSBOs start after that. Compaction will remove unused ABOs. + */ + bt->sizes[IRIS_SURFACE_GROUP_SSBO] = IRIS_MAX_ABOS + info->num_ssbos; + + for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) + assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS); + + /* Mark surfaces used for the cases we don't have the information available + * upfront. + */ + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_foreach_block (block, impl) { + nir_foreach_instr (instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; - if (info->num_ssbos || info->num_abos) { - bt->offsets[IRIS_SURFACE_GROUP_SSBO] = next_offset; - // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment - next_offset += IRIS_MAX_ABOS + info->num_ssbos; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_image_size: + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_load_raw_intel: + case nir_intrinsic_image_store_raw_intel: + mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE); + break; + + case nir_intrinsic_load_ubo: + mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO); + break; + + case nir_intrinsic_store_ssbo: + mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO); + break; + + case nir_intrinsic_get_buffer_size: + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_fmin: + case nir_intrinsic_ssbo_atomic_fmax: + case nir_intrinsic_ssbo_atomic_fcomp_swap: + case nir_intrinsic_load_ssbo: + mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO); + break; + + default: + break; + } + } } - bt->size_bytes = next_offset * 4; + /* When disable we just mark everything as used. */ + if (unlikely(skip_compacting_binding_tables())) { + for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) + bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]); + } - nir_function_impl *impl = nir_shader_get_entrypoint(nir); + /* Calculate the offsets and the binding table size based on the used + * surfaces. After this point, the functions to go between "group indices" + * and binding table indices can be used. + */ + uint32_t next = 0; + for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) { + if (bt->used_mask[i] != 0) { + bt->offsets[i] = next; + next += util_bitcount64(bt->used_mask[i]); + } + } + bt->size_bytes = next * 4; /* Apply the binding table indices. The backend compiler is not expected * to change those, as we haven't set any of the *_start entries in brw @@ -596,9 +734,10 @@ iris_setup_binding_table(struct nir_shader *nir, nir_foreach_block (block, impl) { nir_foreach_instr (instr, block) { if (instr->type == nir_instr_type_tex) { - assert(bt->offsets[IRIS_SURFACE_GROUP_TEXTURE] != 0xd0d0d0d0); - nir_instr_as_tex(instr)->texture_index += - bt->offsets[IRIS_SURFACE_GROUP_TEXTURE]; + nir_tex_instr *tex = nir_instr_as_tex(instr); + tex->texture_index = + iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE, + tex->texture_index); continue; } @@ -935,6 +1074,8 @@ iris_compile_tcs(struct iris_context *ice, /* Manually setup the TCS binding table. */ memset(&bt, 0, sizeof(bt)); + bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1; + bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1; bt.size_bytes = 4; prog_data->ubo_ranges[0].length = 1; |