diff options
author | Rob Clark <[email protected]> | 2019-05-06 14:52:27 -0700 |
---|---|---|
committer | Rob Clark <[email protected]> | 2019-05-07 07:26:00 -0700 |
commit | 23e7a34466c448c4c7c9a2c2e4d200dedf2584f7 (patch) | |
tree | 9feaa00be35bce3642c01a29588956ae237c6b09 /src/freedreno | |
parent | ef3eecd66bdcaa3991dd2b53cb3e7285bed6d718 (diff) |
freedreno/ir3: consolidate const state
Combine the offsets of differenet parts of the constant space with (what
was formerly known as) ir3_driver_const_layout. Bunch of churn, but no
functional change.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/freedreno')
-rw-r--r-- | src/freedreno/ir3/ir3_a4xx.c | 7 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_compiler_nir.c | 22 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_context.c | 53 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_cp.c | 4 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir.c | 2 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_nir.h | 2 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_shader.c | 3 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_shader.h | 72 |
8 files changed, 90 insertions, 75 deletions
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 426a143acfb..5fe15cf8e27 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -217,10 +217,11 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var, /* to calculate the byte offset (yes, uggg) we need (up to) three * const values to know the bytes per pixel, and y and z stride: */ - unsigned cb = regid(ctx->so->constbase.image_dims, 0) + - ctx->so->const_layout.image_dims.off[var->data.driver_location]; + struct ir3_const_state *const_state = &ctx->so->const_state; + unsigned cb = regid(const_state->offsets.image_dims, 0) + + const_state->image_dims.off[var->data.driver_location]; - debug_assert(ctx->so->const_layout.image_dims.mask & + debug_assert(const_state->image_dims.mask & (1 << var->data.driver_location)); /* offset = coords.x * bytes_per_pixel: */ diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 7a3b4a19ad7..3eb34f44b14 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -107,7 +107,8 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ /* NOTE: dp is in scalar, but there can be >4 dp components: */ - unsigned n = ctx->so->constbase.driver_param; + struct ir3_const_state *const_state = &ctx->so->const_state; + unsigned n = const_state->offsets.driver_param; unsigned r = regid(n + dp / 4, dp % 4); return create_uniform(ctx->block, r); } @@ -683,7 +684,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr, /* UBO addresses are the first driver params, but subtract 2 here to * account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0 * is the uniforms: */ - unsigned ubo = regid(ctx->so->constbase.ubo, 0) - 2; + struct ir3_const_state *const_state = &ctx->so->const_state; + unsigned ubo = regid(const_state->offsets.ubo, 0) - 2; const unsigned ptrsz = ir3_pointer_size(ctx->compiler); int off = 0; @@ -751,11 +753,12 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { /* SSBO size stored as a const starting at ssbo_sizes: */ + struct ir3_const_state *const_state = &ctx->so->const_state; unsigned blk_idx = nir_src_as_uint(intr->src[0]); - unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) + - ctx->so->const_layout.ssbo_size.off[blk_idx]; + unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) + + const_state->ssbo_size.off[blk_idx]; - debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx)); + debug_assert(const_state->ssbo_size.mask & (1 << blk_idx)); dst[0] = create_uniform(ctx->block, idx); } @@ -1006,8 +1009,9 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, * bytes-per-pixel should have been emitted in 2nd slot of * image_dims. See ir3_shader::emit_image_dims(). */ - unsigned cb = regid(ctx->so->constbase.image_dims, 0) + - ctx->so->const_layout.image_dims.off[var->data.driver_location]; + struct ir3_const_state *const_state = &ctx->so->const_state; + unsigned cb = regid(const_state->offsets.image_dims, 0) + + const_state->image_dims.off[var->data.driver_location]; struct ir3_instruction *aux = create_uniform(b, cb + 1); tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0); @@ -2225,7 +2229,6 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list) static void emit_stream_out(struct ir3_context *ctx) { - struct ir3_shader_variant *v = ctx->so; struct ir3 *ir = ctx->ir; struct ir3_stream_output_info *strmout = &ctx->so->shader->stream_output; @@ -2283,10 +2286,11 @@ emit_stream_out(struct ir3_context *ctx) * stripped out in the backend. */ for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) { + struct ir3_const_state *const_state = &ctx->so->const_state; unsigned stride = strmout->stride[i]; struct ir3_instruction *base, *off; - base = create_uniform(ctx->block, regid(v->constbase.tfbo, i)); + base = create_uniform(ctx->block, regid(const_state->offsets.tfbo, i)); /* 24-bit should be enough: */ off = ir3_MUL_U(ctx->block, vtxcnt, 0, diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 8c7d9a33f3a..d2210184a60 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -101,51 +101,34 @@ ir3_context_init(struct ir3_compiler *compiler, nir_print_shader(ctx->s, stderr); } - ir3_nir_scan_driver_consts(ctx->s, &so->const_layout); + ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); - so->num_uniforms = ctx->s->num_uniforms; - so->num_ubos = ctx->s->info.num_ubos; + struct ir3_const_state *const_state = &so->const_state; + memset(&const_state->offsets, ~0, sizeof(const_state->offsets)); - ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures); + ir3_nir_scan_driver_consts(ctx->s, const_state); + + const_state->num_uniforms = ctx->s->num_uniforms; + const_state->num_ubos = ctx->s->info.num_ubos; - /* Layout of constant registers, each section aligned to vec4. Note - * that pointer size (ubo, etc) changes depending on generation. - * - * user consts - * UBO addresses - * SSBO sizes - * if (vertex shader) { - * driver params (IR3_DP_*) - * if (stream_output.num_outputs > 0) - * stream-out addresses - * } - * immediates - * - * Immediates go last mostly because they are inserted in the CP pass - * after the nir -> ir3 frontend. - * - * Note UBO size in bytes should be aligned to vec4 - */ debug_assert((ctx->so->shader->ubo_state.size % 16) == 0); unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4); unsigned ptrsz = ir3_pointer_size(ctx->compiler); - memset(&so->constbase, ~0, sizeof(so->constbase)); - - if (so->num_ubos > 0) { - so->constbase.ubo = constoff; + if (const_state->num_ubos > 0) { + const_state->offsets.ubo = constoff; constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; } - if (so->const_layout.ssbo_size.count > 0) { - unsigned cnt = so->const_layout.ssbo_size.count; - so->constbase.ssbo_sizes = constoff; + if (const_state->ssbo_size.count > 0) { + unsigned cnt = const_state->ssbo_size.count; + const_state->offsets.ssbo_sizes = constoff; constoff += align(cnt, 4) / 4; } - if (so->const_layout.image_dims.count > 0) { - unsigned cnt = so->const_layout.image_dims.count; - so->constbase.image_dims = constoff; + if (const_state->image_dims.count > 0) { + unsigned cnt = const_state->image_dims.count; + const_state->offsets.image_dims = constoff; constoff += align(cnt, 4) / 4; } @@ -156,17 +139,17 @@ ir3_context_init(struct ir3_compiler *compiler, num_driver_params = IR3_DP_CS_COUNT; } - so->constbase.driver_param = constoff; + const_state->offsets.driver_param = constoff; constoff += align(num_driver_params, 4) / 4; if ((so->type == MESA_SHADER_VERTEX) && (compiler->gpu_id < 500) && so->shader->stream_output.num_outputs > 0) { - so->constbase.tfbo = constoff; + const_state->offsets.tfbo = constoff; constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4; } - so->constbase.immediate = constoff; + const_state->offsets.immediate = constoff; return ctx; } diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 28ba43f09ee..983c5fa61f2 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -323,10 +323,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags ctx->immediate_idx++; } + struct ir3_const_state *const_state = &ctx->so->const_state; + new_flags &= ~IR3_REG_IMMED; new_flags |= IR3_REG_CONST; reg->flags = new_flags; - reg->num = i + (4 * ctx->so->constbase.immediate); + reg->num = i + (4 * const_state->offsets.immediate); return reg; } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 744fd958fc6..804196f63e9 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -278,7 +278,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, void ir3_nir_scan_driver_consts(nir_shader *shader, - struct ir3_driver_const_layout *layout) + struct ir3_const_state *layout) { nir_foreach_function(function, shader) { if (!function->impl) diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index b60374410bc..bc0d496adfb 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -33,7 +33,7 @@ #include "ir3_shader.h" -void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout); +void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_const_state *layout); bool ir3_nir_apply_trig_workarounds(nir_shader *shader); bool ir3_nir_lower_tg4_to_tex(nir_shader *shader); diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 92e3e7b251d..63cad3ee414 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -350,8 +350,9 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) (regid >> 2), "xyzw"[regid & 0x3], i); } + struct ir3_const_state *const_state = &so->const_state; for (i = 0; i < so->immediates_count; i++) { - fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i); + fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i); fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", so->immediates[i].val[0], so->immediates[i].val[1], diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 7c1dc38de23..448f6052194 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -71,6 +71,14 @@ enum ir3_driver_param { /** + * Describes the layout of shader consts. This includes: + * + Driver lowered UBO ranges + * + SSBO sizes + * + Image sizes/dimensions + * + Driver params (ie. IR3_DP_*) + * + TFBO addresses (for generations that do not have hardware streamout) + * + Lowered immediates + * * For consts needed to pass internal values to shader which may or may not * be required, rather than allocating worst-case const space, we scan the * shader and allocate consts as-needed: @@ -80,8 +88,46 @@ enum ir3_driver_param { * * + Image dimensions: needed to calculate pixel offset, but only for * images that have a image_store intrinsic + * + * Layout of constant registers, each section aligned to vec4. Note + * that pointer size (ubo, etc) changes depending on generation. + * + * user consts + * UBO addresses + * SSBO sizes + * if (vertex shader) { + * driver params (IR3_DP_*) + * if (stream_output.num_outputs > 0) + * stream-out addresses + * } else if (compute_shader) { + * driver params (IR3_DP_*) + * } + * immediates + * + * Immediates go last mostly because they are inserted in the CP pass + * after the nir -> ir3 frontend. + * + * Note UBO size in bytes should be aligned to vec4 */ -struct ir3_driver_const_layout { +struct ir3_const_state { + /* number of uniforms (in vec4), not including built-in compiler + * constants, etc. + */ + unsigned num_uniforms; + + unsigned num_ubos; + + struct { + /* user const start at zero */ + unsigned ubo; + /* NOTE that a3xx might need a section for SSBO addresses too */ + unsigned ssbo_sizes; + unsigned image_dims; + unsigned driver_param; + unsigned tfbo; + unsigned immediate; + } offsets; + struct { uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */ uint32_t count; /* number of consts allocated */ @@ -340,7 +386,7 @@ struct ir3_shader_variant { bool binning_pass; struct ir3_shader_variant *binning; - struct ir3_driver_const_layout const_layout; + struct ir3_const_state const_state; struct ir3_info info; struct ir3 *ir; @@ -361,13 +407,6 @@ struct ir3_shader_variant { */ unsigned constlen; - /* number of uniforms (in vec4), not including built-in compiler - * constants, etc. - */ - unsigned num_uniforms; - - unsigned num_ubos; - /* About Linkage: * + Let the frag shader determine the position/compmask for the * varyings, since it is the place where we know if the varying @@ -451,21 +490,6 @@ struct ir3_shader_variant { bool per_samp; - /* Layout of constant registers, each section (in vec4). Pointer size - * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the - * UBO and stream-out consts. - */ - struct { - /* user const start at zero */ - unsigned ubo; - /* NOTE that a3xx might need a section for SSBO addresses too */ - unsigned ssbo_sizes; - unsigned image_dims; - unsigned driver_param; - unsigned tfbo; - unsigned immediate; - } constbase; - unsigned immediates_count; unsigned immediates_size; struct { |