diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/freedreno/ir3/ir3_compiler.c | 4 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_compiler.h | 10 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c | 24 |
3 files changed, 27 insertions, 11 deletions
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 3bb71c8942d..366e2008f8b 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -73,6 +73,8 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id compiler->unminify_coords = false; compiler->txf_ms_with_isaml = false; compiler->array_index_add_half = true; + compiler->max_const = 1024; + compiler->const_upload_unit = 4; } else { /* no special handling for "flat" */ compiler->flat_bypass = false; @@ -80,6 +82,8 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id compiler->unminify_coords = true; compiler->txf_ms_with_isaml = true; compiler->array_index_add_half = false; + compiler->max_const = 512; + compiler->const_upload_unit = 8; } return compiler; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 9b5307ed936..a5af717471e 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -67,6 +67,16 @@ struct ir3_compiler { /* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders: */ bool samgq_workaround; + + /* on a3xx, the limit on const access is lower than later gens (in vec4 + * units): + */ + uint32_t max_const; + + /* on a3xx, the unit of indirect const load is higher than later gens (in + * vec4 units): + */ + uint32_t const_upload_unit; }; struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id); diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 393b948847a..ba72e7351ef 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -28,15 +28,15 @@ #include "util/u_math.h" static inline struct ir3_ubo_range -get_ubo_load_range(nir_intrinsic_instr *instr) +get_ubo_load_range(nir_intrinsic_instr *instr, uint32_t alignment) { struct ir3_ubo_range r; int offset = nir_src_as_uint(instr->src[1]); const int bytes = nir_intrinsic_dest_components(instr) * 4; - r.start = ROUND_DOWN_TO(offset, 16 * 4); - r.end = ALIGN(offset + bytes, 16 * 4); + r.start = ROUND_DOWN_TO(offset, alignment * 16); + r.end = ALIGN(offset + bytes, alignment * 16); return r; } @@ -85,7 +85,7 @@ get_existing_range(nir_intrinsic_instr *instr, static void gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr, - struct ir3_ubo_analysis_state *state) + struct ir3_ubo_analysis_state *state, uint32_t alignment) { struct ir3_ubo_range *old_r = get_existing_range(instr, state, true); if (!old_r) @@ -97,13 +97,13 @@ gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr, * load_uniform. Set the range to cover all of UBO 0. */ old_r->start = 0; - old_r->end = ALIGN(nir->num_uniforms * 16, 16 * 4); + old_r->end = ALIGN(nir->num_uniforms * 16, alignment * 16); } return; } - const struct ir3_ubo_range r = get_ubo_load_range(instr); + const struct ir3_ubo_range r = get_ubo_load_range(instr, alignment); /* if UBO lowering is disabled, we still want to lower block 0 * (which is normal uniforms): @@ -207,7 +207,7 @@ lower_ubo_block_decrement(nir_intrinsic_instr *instr, nir_builder *b, int *num_u static void lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b, - struct ir3_ubo_analysis_state *state, int *num_ubos) + struct ir3_ubo_analysis_state *state, int *num_ubos, uint32_t alignment) { b->cursor = nir_before_instr(&instr->instr); @@ -234,7 +234,7 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b, /* After gathering the UBO access ranges, we limit the total * upload. Reject if we're now outside the range. */ - const struct ir3_ubo_range r = get_ubo_load_range(instr); + const struct ir3_ubo_range r = get_ubo_load_range(instr, alignment); if (!(range->start <= r.start && r.end <= range->end)) { lower_ubo_block_decrement(instr, b, num_ubos); return; @@ -325,7 +325,8 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) nir_foreach_block (block, function->impl) { nir_foreach_instr (instr, block) { if (instr_is_load_ubo(instr)) - gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), state); + gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), + state, shader->compiler->const_upload_unit); } } } @@ -339,7 +340,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) * dynamically accessed ranges separately and upload static rangtes * first. */ - const uint32_t max_upload = 16 * 1024; + const uint32_t max_upload = shader->compiler->max_const * 16; uint32_t offset = shader->const_state.num_reserved_user_consts * 16; state->num_enabled = ARRAY_SIZE(state->range); for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) { @@ -370,7 +371,8 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader) nir_foreach_instr_safe (instr, block) { if (instr_is_load_ubo(instr)) lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr), - &builder, state, &num_ubos); + &builder, state, &num_ubos, + shader->compiler->const_upload_unit); } } |