summaryrefslogtreecommitdiffstats
path: root/src/freedreno
diff options
context:
space:
mode:
authorKristian H. Kristensen <[email protected]>2019-03-26 10:31:54 -0700
committerKristian H. Kristensen <[email protected]>2019-03-27 13:26:02 -0700
commit893425a607a63a83e8a4c13fd963367c8d174678 (patch)
tree8854bd3625f9fbf5706dbd934f7c9f3f8fe37ded /src/freedreno
parent3c8779af325965a6c200b14ab4cc44c8f0b835e8 (diff)
freedreno/ir3: Push UBOs to constant file
We have a rather big constant file and it seems that the best way to use it is to upload all UBOs and lower UBO access the load_uniform. Signed-off-by: Kristian H. Kristensen <[email protected]> Reviewed-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/freedreno')
-rw-r--r--src/freedreno/ir3/ir3_context.c2
-rw-r--r--src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c111
-rw-r--r--src/freedreno/ir3/ir3_shader.h17
3 files changed, 118 insertions, 12 deletions
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c
index 7c35b9ba65f..d6267165ec7 100644
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -124,7 +124,7 @@ ir3_context_init(struct ir3_compiler *compiler,
* Immediates go last mostly because they are inserted in the CP pass
* after the nir -> ir3 frontend.
*/
- unsigned constoff = align(ctx->s->num_uniforms, 4);
+ unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4);
unsigned ptrsz = ir3_pointer_size(ctx);
memset(&so->constbase, ~0, sizeof(so->constbase));
diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
index 35b921990a2..aaa2a8684a2 100644
--- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
+++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c
@@ -27,9 +27,38 @@
#include "util/u_dynarray.h"
#include "mesa/main/macros.h"
-struct ir3_ubo_analysis_state {
- unsigned lower_count;
-};
+static inline struct ir3_ubo_range
+get_ubo_load_range(nir_intrinsic_instr *instr)
+{
+ struct ir3_ubo_range r;
+
+ const int bytes = nir_intrinsic_dest_components(instr) *
+ (nir_dest_bit_size(instr->dest) / 8);
+
+ r.start = ROUND_DOWN_TO(nir_src_as_uint(instr->src[1]), 16 * 4);
+ r.end = ALIGN(r.start + bytes, 16 * 4);
+
+ return r;
+}
+
+static void
+gather_ubo_ranges(nir_intrinsic_instr *instr,
+ struct ir3_ubo_analysis_state *state)
+{
+ if (!nir_src_is_const(instr->src[0]))
+ return;
+
+ if (!nir_src_is_const(instr->src[1]))
+ return;
+
+ const struct ir3_ubo_range r = get_ubo_load_range(instr);
+ const uint32_t block = nir_src_as_uint(instr->src[0]);
+
+ if (r.start < state->range[block].start)
+ state->range[block].start = r.start;
+ if (state->range[block].end < r.end)
+ state->range[block].end = r.end;
+}
static void
lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
@@ -43,15 +72,37 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
return;
const uint32_t block = nir_src_as_uint(instr->src[0]);
- if (block > 0)
- return;
+
+ if (block > 0) {
+ /* We don't lower dynamic array indexing either, but we definitely should.
+ * We don't have a good way of determining the range of the dynamic
+ * access, so for now just fall back to pulling.
+ */
+ if (!nir_src_is_const(instr->src[1]))
+ return;
+
+ /* After gathering the UBO access ranges, we limit the total
+ * upload. Reject if we're now outside the range.
+ */
+ const struct ir3_ubo_range r = get_ubo_load_range(instr);
+ if (!(state->range[block].start <= r.start &&
+ r.end <= state->range[block].end))
+ return;
+ }
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *ubo_offset = nir_ssa_for_src(b, instr->src[1], 1);
- nir_ssa_def *uniform_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
- if (uniform_offset == NULL)
- uniform_offset = nir_ushr(b, ubo_offset, nir_imm_int(b, 2));
+ nir_ssa_def *new_offset = ir3_nir_try_propagate_bit_shift(b, ubo_offset, -2);
+ if (new_offset)
+ ubo_offset = new_offset;
+ else
+ ubo_offset = nir_ushr(b, ubo_offset, nir_imm_int(b, 2));
+
+ const int range_offset =
+ (state->range[block].offset - state->range[block].start) / 4;
+ nir_ssa_def *uniform_offset =
+ nir_iadd(b, ubo_offset, nir_imm_int(b, range_offset));
nir_intrinsic_instr *uniform =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
@@ -72,7 +123,45 @@ lower_ubo_load_to_uniform(nir_intrinsic_instr *instr, nir_builder *b,
bool
ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
{
- struct ir3_ubo_analysis_state state = { 0 };
+ struct ir3_ubo_analysis_state *state = &shader->ubo_state;
+
+ memset(state, 0, sizeof(*state));
+ state->range[0].end = nir->num_uniforms * 16;
+
+ nir_foreach_function(function, nir) {
+ if (function->impl) {
+ nir_foreach_block(block, function->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type == nir_instr_type_intrinsic &&
+ nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_ubo)
+ gather_ubo_ranges(nir_instr_as_intrinsic(instr), state);
+ }
+ }
+ }
+ }
+
+ /* For now, everything we upload is accessed statically and thus will be
+ * used by the shader. Once we can upload dynamically indexed data, we may
+ * upload sparsely accessed arrays, at which point we probably want to
+ * give priority to smaller UBOs, on the assumption that big UBOs will be
+ * accessed dynamically. Alternatively, we can track statically and
+ * dynamically accessed ranges separately and upload static rangtes
+ * first.
+ */
+ const uint32_t max_upload = 16 * 1024;
+ uint32_t offset = 0;
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->range); i++) {
+ uint32_t range_size = state->range[i].end - state->range[i].start;
+
+ debug_assert(offset <= max_upload);
+ state->range[i].offset = offset;
+ if (offset + range_size > max_upload) {
+ range_size = max_upload - offset;
+ state->range[i].end = state->range[i].start + range_size;
+ }
+ offset += range_size;
+ }
+ state->size = offset;
nir_foreach_function(function, nir) {
if (function->impl) {
@@ -82,7 +171,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
nir_foreach_instr_safe(instr, block) {
if (instr->type == nir_instr_type_intrinsic &&
nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_ubo)
- lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr), &builder, &state);
+ lower_ubo_load_to_uniform(nir_instr_as_intrinsic(instr), &builder, state);
}
}
@@ -91,5 +180,5 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader *shader)
}
}
- return state.lower_count > 0;
+ return state->lower_count > 0;
}
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 647651c03b0..58d14197879 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -67,6 +67,8 @@ enum ir3_driver_param {
#define IR3_MAX_SHADER_IMAGES 32
#define IR3_MAX_SO_BUFFERS 4
#define IR3_MAX_SO_OUTPUTS 64
+#define IR3_MAX_CONSTANT_BUFFERS 32
+
/**
* For consts needed to pass internal values to shader which may or may not
@@ -474,6 +476,19 @@ struct ir3_shader_variant {
struct ir3_shader *shader;
};
+struct ir3_ubo_range {
+ uint32_t offset; /* start offset of this block in const register file */
+ uint32_t start, end; /* range of block that's actually used */
+};
+
+struct ir3_ubo_analysis_state
+{
+ struct ir3_ubo_range range[IR3_MAX_CONSTANT_BUFFERS];
+ uint32_t size;
+ uint32_t lower_count;
+};
+
+
struct ir3_shader {
gl_shader_stage type;
@@ -486,6 +501,8 @@ struct ir3_shader {
struct ir3_compiler *compiler;
+ struct ir3_ubo_analysis_state ubo_state;
+
struct nir_shader *nir;
struct ir3_stream_output_info stream_output;