aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2020-05-17 18:08:11 -0400
committerIlia Mirkin <[email protected]>2020-05-17 19:51:40 -0400
commitb5accb3ff997c073f9a9418c97ddd2365f81504f (patch)
tree9acf712ec7e97f5cdb0245ea8c0e3f142b913d58 /src
parent475fb28377fa4f5293c1a0853f2c4260418aea7f (diff)
freedreno/a3xx: parameterize ubo optimization
A3xx apparently has higher alignment requirements than later gens for indirect const uploads. It also has fewer of them. Add compiler parameters for both settings, and set accordingly for a3xx and a4xx+. This fixes all the ubo test failures caused by this optimization. Signed-off-by: Ilia Mirkin <[email protected]> Reviewed-by: Rob Clark <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5077>
Diffstat (limited to 'src')
-rw-r--r--src/freedreno/ir3/ir3_compiler.c4
-rw-r--r--src/freedreno/ir3/ir3_compiler.h10
-rw-r--r--src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c24
3 files changed, 27 insertions, 11 deletions
diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index 3bb71c8942d..366e2008f8b 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -73,6 +73,8 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id
compiler->unminify_coords = false;
compiler->txf_ms_with_isaml = false;
compiler->array_index_add_half = true;
+ compiler->max_const = 1024;
+ compiler->const_upload_unit = 4;
} else {
/* no special handling for "flat" */
compiler->flat_bypass = false;
@@ -80,6 +82,8 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id
compiler->unminify_coords = true;
compiler->txf_ms_with_isaml = true;
compiler->array_index_add_half = false;
+ compiler->max_const = 512;
+ compiler->const_upload_unit = 8;
}
return compiler;
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h
index 9b5307ed936..a5af717471e 100644
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -67,6 +67,16 @@ struct ir3_compiler {
/* on a6xx, rewrite samgp to sequence of samgq0-3 in vertex shaders:
*/
bool samgq_workaround;
+
+ /* on a3xx, the limit on const access is lower than later gens (in vec4
+ * units):
+ */
+ uint32_t max_const;
+
+ /* on a3xx, the unit of indirect const load is higher than later gens (in
+ * vec4 units):
+ */
+ uint32_t const_upload_unit;
};
struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id);
diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
index 393b948847a..ba72e7351ef 100644
--- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
+++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
@@ -28,15 +28,15 @@
#include "util/u_math.h"
static inline struct ir3_ubo_range
-get_ubo_load_range(nir_intrinsic_instr *instr)
+get_ubo_load_range(nir_intrinsic_instr *instr, uint32_t alignment)
{
struct ir3_ubo_range r;
int offset = nir_src_as_uint(instr->src[1]);
const int bytes = nir_intrinsic_dest_components(instr) * 4;
- r.start = ROUND_DOWN_TO(offset, 16 * 4);
- r.end = ALIGN(offset + bytes, 16 * 4);
+ r.start = ROUND_DOWN_TO(offset, alignment * 16);
+ r.end = ALIGN(offset + bytes, alignment * 16);
return r;
}
@@ -85,7 +85,7 @@ get_existing_range(nir_intrinsic_instr *instr,
static void
gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
- struct ir3_ubo_analysis_state *state)
+ struct ir3_ubo_analysis_state *state, uint32_t alignment)
{
struct ir3_ubo_range *old_r = get_existing_range(instr, state, true);
if (!old_r)
@@ -97,13 +97,13 @@ gather_ubo_ranges(nir_shader *nir, nir_intrinsic_instr *instr,
* load_uniform. Set the range to cover all of UBO 0.
*/
old_r->start = 0;
- old_r->end = ALIGN(nir->num_uniforms * 16, 16 * 4);
+ old_r->end = ALIGN(nir->num_uniforms * 16, alignment * 16);
}
return;
}
- const struct ir3_ubo_range r = get_ubo_load_range(instr);
+ const struct ir3_ubo_range r = get_ubo_load_range(instr, alignment);
/* if UBO lowering is disabled, we still want to lower block 0
* (which is normal uniforms):
@@ -207,7 +207,7 @@ lower_ubo_block_decrement(nir_intrinsic_instr *instr, nir_builder *b, int *num_u
static void
lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
- struct ir3_ubo_analysis_state *state, int *num_ubos)
+ struct ir3_ubo_analysis_state *state, int *num_ubos, uint32_t alignment)
{
b->cursor = nir_before_instr(&instr->instr);
@@ -234,7 +234,7 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
/* After gathering the UBO access ranges, we limit the total
* upload. Reject if we're now outside the range.
*/
- const struct ir3_ubo_range r = get_ubo_load_range(instr);
+ const struct ir3_ubo_range r = get_ubo_load_range(instr, alignment);
if (!(range->start <= r.start && r.end <= range->end)) {
lower_ubo_block_decrement(instr, b, num_ubos);
return;
@@ -325,7 +325,8 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
nir_foreach_block (block, function->impl) {
nir_foreach_instr (instr, block) {
if (instr_is_load_ubo(instr))
- gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr), state);
+ gather_ubo_ranges(nir, nir_instr_as_intrinsic(instr),
+ state, shader->compiler->const_upload_unit);
}
}
}
@@ -339,7 +340,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
* dynamically accessed ranges separately and upload static rangtes
* first.
*/
- const uint32_t max_upload = 16 * 1024;
+ const uint32_t max_upload = shader->compiler->max_const * 16;
uint32_t offset = shader->const_state.num_reserved_user_consts * 16;
state->num_enabled = ARRAY_SIZE(state->range);
for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
@@ -370,7 +371,8 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
nir_foreach_instr_safe (instr, block) {
if (instr_is_load_ubo(instr))
lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr),
- &builder, state, &num_ubos);
+ &builder, state, &num_ubos,
+ shader->compiler->const_upload_unit);
}
}