diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 54 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cp.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 38 |
6 files changed, 75 insertions, 53 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 95e6d26591c..6c3458a3b08 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -96,16 +96,16 @@ static void fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { + uint32_t anum = align(num, 4); uint32_t i; debug_assert((regid % 4) == 0); - debug_assert((num % 4) == 0); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | - CP_LOAD_STATE_0_NUM_UNIT(num/2)); + CP_LOAD_STATE_0_NUM_UNIT(anum/2)); OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); @@ -120,6 +120,9 @@ fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, OUT_RING(ring, 0xbad00000 | (i << 16)); } } + + for (; i < anum; i++) + OUT_RING(ring, 0xffffffff); } #define VERT_TEX_OFF 0 diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 9231823cb7f..2f3e0a6981b 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -96,16 +96,16 @@ static void fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) { + uint32_t anum = align(num, 4); uint32_t i; debug_assert((regid % 4) == 0); - debug_assert((num % 4) == 0); - OUT_PKT3(ring, CP_LOAD_STATE, 2 + num); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum); OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | - CP_LOAD_STATE_0_NUM_UNIT(num/4)); + CP_LOAD_STATE_0_NUM_UNIT(anum/4)); OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); @@ -120,6 +120,9 @@ fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, OUT_RING(ring, 0xbad00000 | (i << 16)); } } + + for (; i < anum; i++) + OUT_RING(ring, 0xffffffff); } static void diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index ac6840cd609..e0fc2aa49bd 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -119,6 +119,11 @@ struct ir3_compile { bool error; }; +/* gpu pointer size in units of 32bit registers/slots */ +static unsigned pointer_size(struct ir3_compile *ctx) +{ + return (ctx->compiler->gpu_id >= 500) ? 2 : 1; +} static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val); static struct ir3_block * get_block(struct ir3_compile *ctx, nir_block *nblock); @@ -181,31 +186,46 @@ compile_init(struct ir3_compiler *compiler, nir_print_shader(ctx->s, stdout); } - so->first_driver_param = so->first_immediate = align(ctx->s->num_uniforms, 4); + so->num_uniforms = ctx->s->num_uniforms; + so->num_ubos = ctx->s->info->num_ubos; - /* Layout of constant registers: + /* Layout of constant registers, each section aligned to vec4. Note + * that pointer size (ubo, etc) changes depending on generation. * - * num_uniform * vec4 - user consts - * 4 * vec4 - UBO addresses + * user consts + * UBO addresses * if (vertex shader) { - * N * vec4 - driver params (IR3_DP_*) - * 1 * vec4 - stream-out addresses + * driver params (IR3_DP_*) + * if (stream_output.num_outputs > 0) + * stream-out addresses * } + * immediates * - * TODO this could be made more dynamic, to at least skip sections - * that we don't need.. + * Immediates go last mostly because they are inserted in the CP pass + * after the nir -> ir3 frontend. */ + unsigned constoff = align(ctx->s->num_uniforms, 4); + unsigned ptrsz = pointer_size(ctx); - /* reserve 4 (vec4) slots for ubo base addresses: */ - so->first_immediate += 4; + memset(&so->constbase, ~0, sizeof(so->constbase)); + + if (so->num_ubos > 0) { + so->constbase.ubo = constoff; + constoff += align(ctx->s->info->num_ubos * ptrsz, 4) / 4; + } if (so->type == SHADER_VERTEX) { - /* driver params (see ir3_driver_param): */ - so->first_immediate += IR3_DP_COUNT/4; /* convert to vec4 */ - /* one (vec4) slot for stream-output base addresses: */ - so->first_immediate++; + so->constbase.driver_param = constoff; + constoff += align(IR3_DP_COUNT, 4) / 4; + + if (so->shader->stream_output.num_outputs > 0) { + so->constbase.tfbo = constoff; + constoff += align(PIPE_MAX_SO_BUFFERS * ptrsz, 4) / 4; + } } + so->constbase.immediate = constoff; + return ctx; } @@ -576,7 +596,7 @@ create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp) { /* first four vec4 sysval's reserved for UBOs: */ /* NOTE: dp is in scalar, but there can be >4 dp components: */ - unsigned n = ctx->so->first_driver_param + IR3_DRIVER_PARAM_OFF; + unsigned n = ctx->so->constbase.driver_param; unsigned r = regid(n + dp / 4, dp % 4); return create_uniform(ctx, r); } @@ -975,7 +995,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, struct ir3_instruction *addr, *src0, *src1; nir_const_value *const_offset; /* UBO addresses are the first driver params: */ - unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0); + unsigned ubo = regid(ctx->so->constbase.ubo, 0); int off = 0; /* First src is ubo index, which could either be an immed or not: */ @@ -1905,7 +1925,7 @@ emit_stream_out(struct ir3_compile *ctx) unsigned stride = strmout->stride[i]; struct ir3_instruction *base, *off; - base = create_uniform(ctx, regid(v->first_driver_param + IR3_TFBOS_OFF, i)); + base = create_uniform(ctx, regid(v->constbase.tfbo, i)); /* 24-bit should be enough: */ off = ir3_MUL_U(ctx->block, vtxcnt, 0, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 57c37e26372..71e02615c75 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -296,7 +296,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags new_flags &= ~IR3_REG_IMMED; new_flags |= IR3_REG_CONST; reg->flags = new_flags; - reg->num = i + (4 * ctx->so->first_immediate); + reg->num = i + (4 * ctx->so->constbase.immediate); return reg; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 8920225be13..4da7246a0cf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -366,7 +366,7 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin) } for (i = 0; i < so->immediates_count; i++) { - debug_printf("@const(c%d.x)\t", so->first_immediate + i); + debug_printf("@const(c%d.x)\t", so->constbase.immediate + i); debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n", so->immediates[i].val[0], so->immediates[i].val[1], @@ -503,7 +503,7 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v, * the user consts early to avoid HLSQ lockup caused by * writing too many consts */ - uint32_t max_const = MIN2(v->first_driver_param, v->constlen); + uint32_t max_const = MIN2(v->num_uniforms, v->constlen); // I expect that size should be a multiple of vec4's: assert(size == align(size, 4)); @@ -527,9 +527,9 @@ static void emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf) { - uint32_t offset = v->first_driver_param + IR3_UBOS_OFF; + uint32_t offset = v->constbase.ubo; if (v->constlen > offset) { - uint32_t params = MIN2(4, v->constlen - offset) * 4; + uint32_t params = v->num_ubos; uint32_t offsets[params]; struct pipe_resource *prscs[params]; @@ -557,7 +557,7 @@ emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { int size = v->immediates_count; - uint32_t base = v->first_immediate; + uint32_t base = v->constbase.immediate; /* truncate size to avoid writing constants that shader * does not use: @@ -581,7 +581,7 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v, struct fd_ringbuffer *ring) { /* streamout addresses after driver-params: */ - uint32_t offset = v->first_driver_param + IR3_TFBOS_OFF; + uint32_t offset = v->constbase.tfbo; if (v->constlen > offset) { struct fd_streamout_stateobj *so = &ctx->streamout; struct pipe_stream_output_info *info = &v->shader->stream_output; @@ -680,8 +680,8 @@ ir3_emit_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring, /* emit driver params every time: */ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */ if (info && (v->type == SHADER_VERTEX)) { - uint32_t offset = v->first_driver_param + IR3_DRIVER_PARAM_OFF; - if (v->constlen >= offset) { + uint32_t offset = v->constbase.driver_param; + if (v->constlen > offset) { uint32_t vertex_params[IR3_DP_COUNT] = { [IR3_DP_VTXID_BASE] = info->indexed ? info->index_bias : info->start, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index c603168a04b..7a0ff982e24 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -47,22 +47,6 @@ enum ir3_driver_param { IR3_DP_COUNT = 36 /* must be aligned to vec4 */ }; -/* Layout of constant registers: - * - * num_uniform * vec4 - user consts - * 4 * vec4 - UBO addresses - * if (vertex shader) { - * N * vec4 - driver params (IR3_DP_*) - * 1 * vec4 - stream-out addresses - * } - * - * TODO this could be made more dynamic, to at least skip sections - * that we don't need.. - */ -#define IR3_UBOS_OFF 0 /* UBOs after user consts */ -#define IR3_DRIVER_PARAM_OFF 4 /* driver params after UBOs */ -#define IR3_TFBOS_OFF (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4) - /* Configuration key used to identify a shader variant.. different * shader variants can be used to implement features not supported * in hw (two sided color), binning-pass vertex shader, etc. @@ -143,6 +127,12 @@ struct ir3_shader_variant { */ unsigned constlen; + /* number of uniforms (in vec4), not including built-in compiler + * constants, etc. + */ + unsigned num_uniforms; + unsigned num_ubos; + /* About Linkage: * + Let the frag shader determine the position/compmask for the * varyings, since it is the place where we know if the varying @@ -211,12 +201,18 @@ struct ir3_shader_variant { /* do we have kill instructions: */ bool has_kill; - /* const reg # of first immediate, ie. 1 == c1 - * (not regid, because TGSI thinks in terms of vec4 registers, - * not scalar registers) + /* Layout of constant registers, each section (in vec4). Pointer size + * is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the + * UBO and stream-out consts. */ - unsigned first_driver_param; - unsigned first_immediate; + struct { + /* user const start at zero */ + unsigned ubo; + unsigned driver_param; + unsigned tfbo; + unsigned immediate; + } constbase; + unsigned immediates_count; struct { uint32_t val[4]; |