diff options
author | Rob Clark <[email protected]> | 2017-10-30 13:23:37 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2017-11-12 12:28:59 -0500 |
commit | 33f5f63b8fc157fa2fd2a142783f31db987c9d55 (patch) | |
tree | 473de2c3b3ca740ecd72ebf9fa47fd46aca6008c /src/gallium | |
parent | b267a0840443fbccee0b46653a14f3d278490761 (diff) |
freedreno/ir3: add SSBO get_buffer_size() support
Somehow I overlooked this when adding initial SSBO support.
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 19 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 27 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_nir.c | 35 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_nir.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 26 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 24 |
6 files changed, 122 insertions, 11 deletions
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 21931e9dfbe..3d8e43ad3e4 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -379,14 +379,8 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); for (unsigned i = 0; i < count; i++) { - struct pipe_shader_buffer *buf = &so->sb[i]; - if (buf->buffer) { - struct fd_resource *rsc = fd_resource(buf->buffer); - OUT_RELOCW(ring, rsc->bo, 0, 0, 0); - } else { - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000000); - } + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); } @@ -401,10 +395,13 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, CP_LOAD_STATE4_2_EXT_SRC_ADDR_HI(0)); for (unsigned i = 0; i < count; i++) { struct pipe_shader_buffer *buf = &so->sb[i]; + unsigned sz = buf->buffer_size; - // TODO maybe offset encoded somewhere here?? - OUT_RING(ring, (buf->buffer_size << 16)); - OUT_RING(ring, 0x00000000); + /* width is in dwords, overflows into height: */ + sz /= 4; + + OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz)); + OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16)); } OUT_PKT7(ring, CP_LOAD_STATE4, 3 + (2 * count)); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 6b352de3e3d..ac38ec61311 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -217,6 +217,8 @@ compile_init(struct ir3_compiler *compiler, nir_print_shader(ctx->s, stdout); } + ir3_nir_scan_driver_consts(ctx->s, &so->const_layout); + so->num_uniforms = ctx->s->num_uniforms; so->num_ubos = ctx->s->info.num_ubos; @@ -225,6 +227,7 @@ compile_init(struct ir3_compiler *compiler, * * user consts * UBO addresses + * SSBO sizes * if (vertex shader) { * driver params (IR3_DP_*) * if (stream_output.num_outputs > 0) @@ -245,6 +248,12 @@ compile_init(struct ir3_compiler *compiler, constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4; } + if (so->const_layout.ssbo_size.count > 0) { + unsigned cnt = so->const_layout.ssbo_size.count; + so->constbase.ssbo_sizes = constoff; + constoff += align(cnt, 4) / 4; + } + unsigned num_driver_params = 0; if (so->type == SHADER_VERTEX) { num_driver_params = IR3_DP_VS_COUNT; @@ -1302,6 +1311,21 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) array_insert(b, b->keeps, stgb); } +/* src[] = { block_index } */ +static void +emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, + struct ir3_instruction **dst) +{ + /* SSBO size stored as a const starting at ssbo_sizes: */ + unsigned blk_idx = nir_src_as_const_value(intr->src[0])->u32[0]; + unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) + + ctx->so->const_layout.ssbo_size.off[blk_idx]; + + debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx)); + + dst[0] = create_uniform(ctx, idx); +} + static struct ir3_instruction * emit_intrinsic_atomic(struct ir3_context *ctx, nir_intrinsic_instr *intr) { @@ -1483,6 +1507,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_store_ssbo: emit_intrinsic_store_ssbo(ctx, intr); break; + case nir_intrinsic_get_buffer_size: + emit_intrinsic_ssbo_size(ctx, intr, dst); + break; case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_imin: case nir_intrinsic_ssbo_atomic_umin: diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c index ef7facff332..19d05b462e5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -209,3 +209,38 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, return s; } + +void +ir3_nir_scan_driver_consts(nir_shader *shader, + struct ir3_driver_const_layout *layout) +{ + nir_foreach_function(function, shader) { + if (!function->impl) + continue; + + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = + nir_instr_as_intrinsic(instr); + unsigned idx; + + switch (intr->intrinsic) { + case nir_intrinsic_get_buffer_size: + idx = nir_src_as_const_value(intr->src[0])->u32[0]; + if (layout->ssbo_size.mask & (1 << idx)) + break; + layout->ssbo_size.mask |= (1 << idx); + layout->ssbo_size.off[idx] = + layout->ssbo_size.count; + layout->ssbo_size.count += 1; /* one const per */ + break; + default: + break; + } + } + } + } +} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.h b/src/gallium/drivers/freedreno/ir3/ir3_nir.h index 2e2e093b098..e0e3108e328 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.h @@ -34,6 +34,8 @@ #include "ir3_shader.h" +void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout); + bool ir3_nir_lower_if_else(nir_shader *shader); bool ir3_nir_apply_trig_workarounds(nir_shader *shader); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 557cd5613a4..26f291de894 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -607,6 +607,27 @@ emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, } static void +emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v, + struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb) +{ + uint32_t offset = v->constbase.ssbo_sizes; + if (v->constlen > offset) { + uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)]; + unsigned mask = v->const_layout.ssbo_size.mask; + + while (mask) { + unsigned index = u_bit_scan(&mask); + unsigned off = v->const_layout.ssbo_size.off[index]; + sizes[off] = sb->sb[index].buffer_size; + } + + fd_wfi(ctx->batch, ring); + ctx->emit_const(ring, v->type, offset * 4, + 0, ARRAY_SIZE(sizes), sizes, NULL); + } +} + +static void emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { @@ -726,6 +747,11 @@ emit_common_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin if (shader_dirty) emit_immediates(ctx, v, ring); } + + if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) { + struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t]; + emit_ssbo_sizes(ctx, v, ring, sb); + } } void diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 9984809ea24..dd68e69d16c 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -56,6 +56,26 @@ enum ir3_driver_param { IR3_DP_VS_COUNT = 36 /* must be aligned to vec4 */ }; +/** + * For consts needed to pass internal values to shader which may or may not + * be required, rather than allocating worst-case const space, we scan the + * shader and allocate consts as-needed: + * + * + SSBO sizes: only needed if shader has a get_buffer_size intrinsic + * for a given SSBO + */ +struct ir3_driver_const_layout { + struct { + uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */ + uint32_t count; /* number of consts allocated */ + /* one const allocated per SSBO which has get_buffer_size, + * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes + * consts: + */ + uint32_t off[PIPE_MAX_SHADER_BUFFERS]; + } ssbo_size; +}; + /* Configuration key used to identify a shader variant.. different * shader variants can be used to implement features not supported * in hw (two sided color), binning-pass vertex shader, etc. @@ -173,6 +193,7 @@ struct ir3_shader_variant { struct ir3_shader_key key; + struct ir3_driver_const_layout const_layout; struct ir3_info info; struct ir3 *ir; @@ -191,6 +212,7 @@ struct ir3_shader_variant { * constants, etc. */ unsigned num_uniforms; + unsigned num_ubos; /* About Linkage: @@ -271,6 +293,8 @@ struct ir3_shader_variant { struct { /* user const start at zero */ unsigned ubo; + /* NOTE that a3xx might need a section for SSBO addresses too */ + unsigned ssbo_sizes; unsigned driver_param; unsigned tfbo; unsigned immediate; |