diff options
author | Jason Ekstrand <[email protected]> | 2015-11-25 14:14:05 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-12-10 12:25:16 -0800 |
commit | 78b81be627734ea7fa50ea246c07b0d4a3a1638a (patch) | |
tree | 10b0b098de5b3a111d076e9d8c5fca440fad45ad /src/gallium/drivers/freedreno | |
parent | f3970fad9e5b04e04de366a65fed5a30da618f9d (diff) |
nir: Get rid of *_indirect variants of input/output load/store intrinsics
There is some special-casing needed in a competent back-end. However, they
can do their special-casing easily enough based on whether or not the
offset is a constant. In the mean time, having the *_indirect variants
adds special cases a number of places where they don't need to be and, in
general, only complicates things. To complicate matters, NIR had no way to
convdert an indirect load/store to a direct one in the case that the
indirect was a constant so we would still not really get what the back-ends
wanted. The best solution seems to be to get rid of the *_indirect
variants entirely.
This commit is a bunch of different changes squashed together:
- nir: Get rid of *_indirect variants of input/output load/store intrinsics
- nir/glsl: Stop handling UBO/SSBO load/stores differently depending on indirect
- nir/lower_io: Get rid of load/store_foo_indirect
- i965/fs: Get rid of load/store_foo_indirect
- i965/vec4: Get rid of load/store_foo_indirect
- tgsi_to_nir: Get rid of load/store_foo_indirect
- ir3/nir: Use the new unified io intrinsics
- vc4: Do all uniform loads with byte offsets
- vc4/nir: Use the new unified io intrinsics
- vc4: Fix load_user_clip_plane crash
- vc4: add missing src for store outputs
- vc4: Fix state uniforms
- nir/lower_clip: Update to the new load/store intrinsics
- nir/lower_two_sided_color: Update to the new load intrinsic
NIR and i965 changes are
Reviewed-by: Kenneth Graunke <[email protected]>
NIR indirect declarations and vc4 changes are
Reviewed-by: Eric Anholt <[email protected]>
ir3 changes are
Reviewed-by: Rob Clark <[email protected]>
NIR changes are
Acked-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 79 |
1 files changed, 47 insertions, 32 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 2723959cb5f..eea5c5e28db 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1218,6 +1218,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; struct ir3_instruction *addr, *src0, *src1; + nir_const_value *const_offset; /* UBO addresses are the first driver params: */ unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0); unsigned off = intr->const_index[0]; @@ -1231,7 +1232,10 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, addr = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0)); } - if (intr->intrinsic == nir_intrinsic_load_ubo_indirect) { + const_offset = nir_src_as_const_value(intr->src[1]); + if (const_offset) { + off += const_offset->u[0]; + } else { /* For load_ubo_indirect, second src is indirect offset: */ src1 = get_src(ctx, &intr->src[1])[0]; @@ -1394,6 +1398,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) struct ir3_instruction **dst, **src; struct ir3_block *b = ctx->block; unsigned idx = intr->const_index[0]; + nir_const_value *const_offset; if (info->has_dest) { dst = get_dst(ctx, &intr->dest, intr->num_components); @@ -1403,43 +1408,49 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_load_uniform: - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_uniform(ctx, n); - } - break; - case nir_intrinsic_load_uniform_indirect: - src = get_src(ctx, &intr->src[0]); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_uniform_indirect(ctx, n, - get_addr(ctx, src[0])); + const_offset = nir_src_as_const_value(intr->src[0]); + if (const_offset) { + idx += const_offset->u[0]; + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_uniform(ctx, n); + } + } else { + src = get_src(ctx, &intr->src[0]); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_uniform_indirect(ctx, n, + get_addr(ctx, src[0])); + } + /* NOTE: if relative addressing is used, we set + * constlen in the compiler (to worst-case value) + * since we don't know in the assembler what the max + * addr reg value can be: + */ + ctx->so->constlen = ctx->s->num_uniforms; } - /* NOTE: if relative addressing is used, we set constlen in - * the compiler (to worst-case value) since we don't know in - * the assembler what the max addr reg value can be: - */ - ctx->so->constlen = ctx->s->num_uniforms; break; case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ubo_indirect: emit_intrinsic_load_ubo(ctx, intr, dst); break; case nir_intrinsic_load_input: - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = ctx->ir->inputs[n]; - } - break; - case nir_intrinsic_load_input_indirect: - src = get_src(ctx, &intr->src[0]); - struct ir3_instruction *collect = - create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); - struct ir3_instruction *addr = get_addr(ctx, src[0]); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, - n, addr, collect); + const_offset = nir_src_as_const_value(intr->src[0]); + if (const_offset) { + idx += const_offset->u[0]; + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = ctx->ir->inputs[n]; + } + } else { + src = get_src(ctx, &intr->src[0]); + struct ir3_instruction *collect = + create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); + struct ir3_instruction *addr = get_addr(ctx, src[0]); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, + n, addr, collect); + } } break; case nir_intrinsic_load_var: @@ -1449,6 +1460,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinisic_store_var(ctx, intr); break; case nir_intrinsic_store_output: + const_offset = nir_src_as_const_value(intr->src[1]); + compile_assert(ctx, const_offset != NULL); + idx += const_offset->u[0]; + src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; |