diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/iris/iris_context.h | 18 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_program.c | 219 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_state.c | 73 |
3 files changed, 234 insertions, 76 deletions
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index f7dcf8d64ec..4e544f830fd 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -292,10 +292,22 @@ enum iris_surface_group { IRIS_SURFACE_GROUP_COUNT, }; +enum { + /* Invalid value for a binding table index. */ + IRIS_SURFACE_NOT_USED = 0xa0a0a0a0, +}; + struct iris_binding_table { uint32_t size_bytes; + /** Number of surfaces in each group, before compacting. */ + uint32_t sizes[IRIS_SURFACE_GROUP_COUNT]; + + /** Initial offset of each group. */ uint32_t offsets[IRIS_SURFACE_GROUP_COUNT]; + + /** Mask of surfaces used in each group. */ + uint64_t used_mask[IRIS_SURFACE_GROUP_COUNT]; }; /** @@ -814,6 +826,12 @@ const struct shader_info *iris_get_shader_info(const struct iris_context *ice, struct iris_bo *iris_get_scratch_space(struct iris_context *ice, unsigned per_thread_scratch, gl_shader_stage stage); +uint32_t iris_group_index_to_bti(const struct iris_binding_table *bt, + enum iris_surface_group group, + uint32_t index); +uint32_t iris_bti_to_group_index(const struct iris_binding_table *bt, + enum iris_surface_group group, + uint32_t bti); /* iris_disk_cache.c */ diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 2a1815b30ce..bd60e757d17 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -37,6 +37,7 @@ #include "pipe/p_screen.h" #include "util/u_atomic.h" #include "util/u_upload_mgr.h" +#include "util/debug.h" #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_serialize.h" @@ -501,30 +502,103 @@ iris_setup_uniforms(const struct brw_compiler *compiler, *out_num_cbufs = num_cbufs; } +enum { + /* Max elements in a surface group. */ + SURFACE_GROUP_MAX_ELEMENTS = 64, +}; + +/** + * Map a <group, index> pair to a binding table index. + * + * For example: <UBO, 5> => binding table index 12 + */ +uint32_t +iris_group_index_to_bti(const struct iris_binding_table *bt, + enum iris_surface_group group, uint32_t index) +{ + assert(index < bt->sizes[group]); + uint64_t mask = bt->used_mask[group]; + uint64_t bit = 1ull << index; + if (bit & mask) { + return bt->offsets[group] + util_bitcount64((bit - 1) & mask); + } else { + return IRIS_SURFACE_NOT_USED; + } +} + +/** + * Map a binding table index back to a <group, index> pair. + * + * For example: binding table index 12 => <UBO, 5> + */ +uint32_t +iris_bti_to_group_index(const struct iris_binding_table *bt, + enum iris_surface_group group, uint32_t bti) +{ + uint64_t used_mask = bt->used_mask[group]; + assert(bti >= bt->offsets[group]); + + uint32_t c = bti - bt->offsets[group]; + while (used_mask) { + int i = u_bit_scan64(&used_mask); + if (c == 0) + return i; + c--; + } + + return IRIS_SURFACE_NOT_USED; +} + static void rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt, nir_instr *instr, nir_src *src, enum iris_surface_group group) { - assert(bt->offsets[group] != 0xd0d0d0d0); + assert(bt->sizes[group] > 0); b->cursor = nir_before_instr(instr); nir_ssa_def *bti; if (nir_src_is_const(*src)) { - bti = nir_imm_intN_t(b, nir_src_as_uint(*src) + bt->offsets[group], + uint32_t index = nir_src_as_uint(*src); + bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index), src->ssa->bit_size); } else { + /* Indirect usage makes all the surfaces of the group to be available, + * so we can just add the base. + */ + assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group])); bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]); } nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti)); } +static void +mark_used_with_src(struct iris_binding_table *bt, nir_src *src, + enum iris_surface_group group) +{ + assert(bt->sizes[group] > 0); + + if (nir_src_is_const(*src)) { + uint64_t index = nir_src_as_uint(*src); + assert(index < bt->sizes[group]); + bt->used_mask[group] |= 1ull << index; + } else { + /* There's an indirect usage, we need all the surfaces. */ + bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]); + } +} + +static bool +skip_compacting_binding_tables(void) +{ + static int skip = -1; + if (skip < 0) + skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false); + return skip; +} + /** * Set up the binding table indices and apply to the shader. - * - * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're - * unused but also make sure that addition of small offsets to them will - * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. */ static void iris_setup_binding_table(struct nir_shader *nir, @@ -536,31 +610,24 @@ iris_setup_binding_table(struct nir_shader *nir, const struct shader_info *info = &nir->info; memset(bt, 0, sizeof(*bt)); - for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) - bt->offsets[i] = 0xd0d0d0d0; - /* Calculate the initial binding table index for each group. */ - uint32_t next_offset; + /* Set the sizes for each surface group. For some groups, we already know + * upfront how many will be used, so mark them. + */ if (info->stage == MESA_SHADER_FRAGMENT) { - next_offset = num_render_targets; - bt->offsets[IRIS_SURFACE_GROUP_RENDER_TARGET] = 0; + bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets; + /* All render targets used. */ + bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] = + BITFIELD64_MASK(num_render_targets); } else if (info->stage == MESA_SHADER_COMPUTE) { - next_offset = 1; - bt->offsets[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 0; - } else { - next_offset = 0; + bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1; + bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1; } - unsigned num_textures = util_last_bit(info->textures_used); - if (num_textures) { - bt->offsets[IRIS_SURFACE_GROUP_TEXTURE] = next_offset; - next_offset += num_textures; - } + bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = util_last_bit(info->textures_used); + bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used; - if (info->num_images) { - bt->offsets[IRIS_SURFACE_GROUP_IMAGE] = next_offset; - next_offset += info->num_images; - } + bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images; /* Allocate a slot in the UBO section for NIR constants if present. * We don't include them in iris_compiled_shader::num_cbufs because @@ -569,22 +636,93 @@ iris_setup_binding_table(struct nir_shader *nir, */ if (nir->constant_data_size > 0) num_cbufs++; + bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs; - if (num_cbufs) { - //assert(info->num_ubos <= BRW_MAX_UBO); - bt->offsets[IRIS_SURFACE_GROUP_UBO] = next_offset; - next_offset += num_cbufs; - } + /* The first IRIS_MAX_ABOs indices in the SSBO group are for atomics, real + * SSBOs start after that. Compaction will remove unused ABOs. + */ + bt->sizes[IRIS_SURFACE_GROUP_SSBO] = IRIS_MAX_ABOS + info->num_ssbos; + + for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) + assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS); + + /* Mark surfaces used for the cases we don't have the information available + * upfront. + */ + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_foreach_block (block, impl) { + nir_foreach_instr (instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; - if (info->num_ssbos || info->num_abos) { - bt->offsets[IRIS_SURFACE_GROUP_SSBO] = next_offset; - // XXX: see iris_state "wasting 16 binding table slots for ABOs" comment - next_offset += IRIS_MAX_ABOS + info->num_ssbos; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_image_size: + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_load_raw_intel: + case nir_intrinsic_image_store_raw_intel: + mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE); + break; + + case nir_intrinsic_load_ubo: + mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO); + break; + + case nir_intrinsic_store_ssbo: + mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO); + break; + + case nir_intrinsic_get_buffer_size: + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + case nir_intrinsic_ssbo_atomic_fmin: + case nir_intrinsic_ssbo_atomic_fmax: + case nir_intrinsic_ssbo_atomic_fcomp_swap: + case nir_intrinsic_load_ssbo: + mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO); + break; + + default: + break; + } + } } - bt->size_bytes = next_offset * 4; + /* When disable we just mark everything as used. */ + if (unlikely(skip_compacting_binding_tables())) { + for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) + bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]); + } - nir_function_impl *impl = nir_shader_get_entrypoint(nir); + /* Calculate the offsets and the binding table size based on the used + * surfaces. After this point, the functions to go between "group indices" + * and binding table indices can be used. + */ + uint32_t next = 0; + for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) { + if (bt->used_mask[i] != 0) { + bt->offsets[i] = next; + next += util_bitcount64(bt->used_mask[i]); + } + } + bt->size_bytes = next * 4; /* Apply the binding table indices. The backend compiler is not expected * to change those, as we haven't set any of the *_start entries in brw @@ -596,9 +734,10 @@ iris_setup_binding_table(struct nir_shader *nir, nir_foreach_block (block, impl) { nir_foreach_instr (instr, block) { if (instr->type == nir_instr_type_tex) { - assert(bt->offsets[IRIS_SURFACE_GROUP_TEXTURE] != 0xd0d0d0d0); - nir_instr_as_tex(instr)->texture_index += - bt->offsets[IRIS_SURFACE_GROUP_TEXTURE]; + nir_tex_instr *tex = nir_instr_as_tex(instr); + tex->texture_index = + iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE, + tex->texture_index); continue; } @@ -935,6 +1074,8 @@ iris_compile_tcs(struct iris_context *ice, /* Manually setup the TCS binding table. */ memset(&bt, 0, sizeof(bt)); + bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1; + bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1; bt.size_bytes = 4; prog_data->ubo_ranges[0].length = 1; diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 71816ca5a8a..abfd812c572 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4102,8 +4102,9 @@ use_image(struct iris_batch *batch, struct iris_context *ice, assert(s < shader->bt.size_bytes / sizeof(uint32_t)); \ if (!pin_only) bt_map[s++] = (addr) - binder_addr; -#define bt_assert(section, exists) \ - if (!pin_only) assert(shader->bt.offsets[section] == (exists) ? s : 0xd0d0d0d0) +#define bt_assert(section) \ + if (!pin_only && shader->bt.used_mask[section] != 0) \ + assert(shader->bt.offsets[section] == s); /** * Populate the binding table for a given shader stage. @@ -4124,6 +4125,7 @@ iris_populate_binding_table(struct iris_context *ice, if (!shader) return; + struct iris_binding_table *bt = &shader->bt; UNUSED struct brw_stage_prog_data *prog_data = shader->prog_data; struct iris_shader_state *shs = &ice->state.shaders[stage]; uint32_t binder_addr = binder->bo->gtt_offset; @@ -4168,54 +4170,45 @@ iris_populate_binding_table(struct iris_context *ice, } } - unsigned num_textures = util_last_bit(info->textures_used); +#define foreach_surface_used(index, group) \ + bt_assert(group); \ + for (int index = 0; index < bt->sizes[group]; index++) \ + if (iris_group_index_to_bti(bt, group, index) != \ + IRIS_SURFACE_NOT_USED) - bt_assert(IRIS_SURFACE_GROUP_TEXTURE, num_textures > 0); - - for (int i = 0; i < num_textures; i++) { + foreach_surface_used(i, IRIS_SURFACE_GROUP_TEXTURE) { struct iris_sampler_view *view = shs->textures[i]; uint32_t addr = view ? use_sampler_view(ice, batch, view) : use_null_surface(batch, ice); push_bt_entry(addr); } - bt_assert(IRIS_SURFACE_GROUP_IMAGE, info->num_images > 0); - - for (int i = 0; i < info->num_images; i++) { + foreach_surface_used(i, IRIS_SURFACE_GROUP_IMAGE) { uint32_t addr = use_image(batch, ice, shs, i); push_bt_entry(addr); } - bt_assert(IRIS_SURFACE_GROUP_UBO, shader->num_cbufs > 0); + foreach_surface_used(i, IRIS_SURFACE_GROUP_UBO) { + uint32_t addr; - for (int i = 0; i < shader->num_cbufs; i++) { - uint32_t addr = use_ubo_ssbo(batch, ice, &shs->constbuf[i], - &shs->constbuf_surf_state[i], false); - push_bt_entry(addr); - } + if ((i == bt->sizes[IRIS_SURFACE_GROUP_UBO] - 1) && ish->const_data) { + iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data), false); + iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data_state.res), + false); + addr = ish->const_data_state.offset; + } else { + addr = use_ubo_ssbo(batch, ice, &shs->constbuf[i], + &shs->constbuf_surf_state[i], false); + } - if (ish->const_data) { - iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data), false); - iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data_state.res), - false); - uint32_t addr = ish->const_data_state.offset; push_bt_entry(addr); } - bt_assert(IRIS_SURFACE_GROUP_SSBO, info->num_abos + info->num_ssbos > 0); - - /* XXX: st is wasting 16 binding table slots for ABOs. Should add a cap - * for changing nir_lower_atomics_to_ssbos setting and buffer_base offset - * in st_atom_storagebuf.c so it'll compact them into one range, with - * SSBOs starting at info->num_abos. Ideally it'd reset num_abos to 0 too - */ - if (info->num_abos + info->num_ssbos > 0) { - for (int i = 0; i < IRIS_MAX_ABOS + info->num_ssbos; i++) { - uint32_t addr = - use_ubo_ssbo(batch, ice, &shs->ssbo[i], &shs->ssbo_surf_state[i], - shs->writable_ssbos & (1u << i)); - push_bt_entry(addr); - } + foreach_surface_used(i, IRIS_SURFACE_GROUP_SSBO) { + uint32_t addr = + use_ubo_ssbo(batch, ice, &shs->ssbo[i], &shs->ssbo_surf_state[i], + shs->writable_ssbos & (1u << i)); + push_bt_entry(addr); } #if 0 @@ -4336,7 +4329,9 @@ iris_restore_render_saved_bos(struct iris_context *ice, continue; /* Range block is a binding table index, map back to UBO index. */ - unsigned block_index = range->block - shader->bt.offsets[IRIS_SURFACE_GROUP_UBO]; + unsigned block_index = iris_bti_to_group_index( + &shader->bt, IRIS_SURFACE_GROUP_UBO, range->block); + assert(block_index != IRIS_SURFACE_NOT_USED); struct pipe_shader_buffer *cbuf = &shs->constbuf[block_index]; struct iris_resource *res = (void *) cbuf->buffer; @@ -4424,7 +4419,9 @@ iris_restore_compute_saved_bos(struct iris_context *ice, if (range->length > 0) { /* Range block is a binding table index, map back to UBO index. */ - unsigned block_index = range->block - shader->bt.offsets[IRIS_SURFACE_GROUP_UBO]; + unsigned block_index = iris_bti_to_group_index( + &shader->bt, IRIS_SURFACE_GROUP_UBO, range->block); + assert(block_index != IRIS_SURFACE_NOT_USED); struct pipe_shader_buffer *cbuf = &shs->constbuf[block_index]; struct iris_resource *res = (void *) cbuf->buffer; @@ -4700,7 +4697,9 @@ iris_upload_dirty_render_state(struct iris_context *ice, continue; /* Range block is a binding table index, map back to UBO index. */ - unsigned block_index = range->block - shader->bt.offsets[IRIS_SURFACE_GROUP_UBO]; + unsigned block_index = iris_bti_to_group_index( + &shader->bt, IRIS_SURFACE_GROUP_UBO, range->block); + assert(block_index != IRIS_SURFACE_NOT_USED); struct pipe_shader_buffer *cbuf = &shs->constbuf[block_index]; struct iris_resource *res = (void *) cbuf->buffer; |