From 78b81be627734ea7fa50ea246c07b0d4a3a1638a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Wed, 25 Nov 2015 14:14:05 -0800 Subject: nir: Get rid of *_indirect variants of input/output load/store intrinsics There is some special-casing needed in a competent back-end. However, they can do their special-casing easily enough based on whether or not the offset is a constant. In the mean time, having the *_indirect variants adds special cases a number of places where they don't need to be and, in general, only complicates things. To complicate matters, NIR had no way to convdert an indirect load/store to a direct one in the case that the indirect was a constant so we would still not really get what the back-ends wanted. The best solution seems to be to get rid of the *_indirect variants entirely. This commit is a bunch of different changes squashed together: - nir: Get rid of *_indirect variants of input/output load/store intrinsics - nir/glsl: Stop handling UBO/SSBO load/stores differently depending on indirect - nir/lower_io: Get rid of load/store_foo_indirect - i965/fs: Get rid of load/store_foo_indirect - i965/vec4: Get rid of load/store_foo_indirect - tgsi_to_nir: Get rid of load/store_foo_indirect - ir3/nir: Use the new unified io intrinsics - vc4: Do all uniform loads with byte offsets - vc4/nir: Use the new unified io intrinsics - vc4: Fix load_user_clip_plane crash - vc4: add missing src for store outputs - vc4: Fix state uniforms - nir/lower_clip: Update to the new load/store intrinsics - nir/lower_two_sided_color: Update to the new load intrinsic NIR and i965 changes are Reviewed-by: Kenneth Graunke NIR indirect declarations and vc4 changes are Reviewed-by: Eric Anholt ir3 changes are Reviewed-by: Rob Clark NIR changes are Acked-by: Rob Clark --- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 79 +++++++++++++--------- 1 file changed, 47 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers/freedreno') diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 2723959cb5f..eea5c5e28db 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1218,6 +1218,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; struct ir3_instruction *addr, *src0, *src1; + nir_const_value *const_offset; /* UBO addresses are the first driver params: */ unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0); unsigned off = intr->const_index[0]; @@ -1231,7 +1232,10 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, addr = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0)); } - if (intr->intrinsic == nir_intrinsic_load_ubo_indirect) { + const_offset = nir_src_as_const_value(intr->src[1]); + if (const_offset) { + off += const_offset->u[0]; + } else { /* For load_ubo_indirect, second src is indirect offset: */ src1 = get_src(ctx, &intr->src[1])[0]; @@ -1394,6 +1398,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) struct ir3_instruction **dst, **src; struct ir3_block *b = ctx->block; unsigned idx = intr->const_index[0]; + nir_const_value *const_offset; if (info->has_dest) { dst = get_dst(ctx, &intr->dest, intr->num_components); @@ -1403,43 +1408,49 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_load_uniform: - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_uniform(ctx, n); - } - break; - case nir_intrinsic_load_uniform_indirect: - src = get_src(ctx, &intr->src[0]); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_uniform_indirect(ctx, n, - get_addr(ctx, src[0])); + const_offset = nir_src_as_const_value(intr->src[0]); + if (const_offset) { + idx += const_offset->u[0]; + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_uniform(ctx, n); + } + } else { + src = get_src(ctx, &intr->src[0]); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_uniform_indirect(ctx, n, + get_addr(ctx, src[0])); + } + /* NOTE: if relative addressing is used, we set + * constlen in the compiler (to worst-case value) + * since we don't know in the assembler what the max + * addr reg value can be: + */ + ctx->so->constlen = ctx->s->num_uniforms; } - /* NOTE: if relative addressing is used, we set constlen in - * the compiler (to worst-case value) since we don't know in - * the assembler what the max addr reg value can be: - */ - ctx->so->constlen = ctx->s->num_uniforms; break; case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ubo_indirect: emit_intrinsic_load_ubo(ctx, intr, dst); break; case nir_intrinsic_load_input: - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = ctx->ir->inputs[n]; - } - break; - case nir_intrinsic_load_input_indirect: - src = get_src(ctx, &intr->src[0]); - struct ir3_instruction *collect = - create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); - struct ir3_instruction *addr = get_addr(ctx, src[0]); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, - n, addr, collect); + const_offset = nir_src_as_const_value(intr->src[0]); + if (const_offset) { + idx += const_offset->u[0]; + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = ctx->ir->inputs[n]; + } + } else { + src = get_src(ctx, &intr->src[0]); + struct ir3_instruction *collect = + create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); + struct ir3_instruction *addr = get_addr(ctx, src[0]); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, + n, addr, collect); + } } break; case nir_intrinsic_load_var: @@ -1449,6 +1460,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinisic_store_var(ctx, intr); break; case nir_intrinsic_store_output: + const_offset = nir_src_as_const_value(intr->src[1]); + compile_assert(ctx, const_offset != NULL); + idx += const_offset->u[0]; + src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; -- cgit v1.2.3