diff options
author | Jason Ekstrand <[email protected]> | 2015-11-25 14:14:05 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-12-10 12:25:16 -0800 |
commit | 78b81be627734ea7fa50ea246c07b0d4a3a1638a (patch) | |
tree | 10b0b098de5b3a111d076e9d8c5fca440fad45ad | |
parent | f3970fad9e5b04e04de366a65fed5a30da618f9d (diff) |
nir: Get rid of *_indirect variants of input/output load/store intrinsics
There is some special-casing needed in a competent back-end. However, they
can do their special-casing easily enough based on whether or not the
offset is a constant. In the mean time, having the *_indirect variants
adds special cases a number of places where they don't need to be and, in
general, only complicates things. To complicate matters, NIR had no way to
convdert an indirect load/store to a direct one in the case that the
indirect was a constant so we would still not really get what the back-ends
wanted. The best solution seems to be to get rid of the *_indirect
variants entirely.
This commit is a bunch of different changes squashed together:
- nir: Get rid of *_indirect variants of input/output load/store intrinsics
- nir/glsl: Stop handling UBO/SSBO load/stores differently depending on indirect
- nir/lower_io: Get rid of load/store_foo_indirect
- i965/fs: Get rid of load/store_foo_indirect
- i965/vec4: Get rid of load/store_foo_indirect
- tgsi_to_nir: Get rid of load/store_foo_indirect
- ir3/nir: Use the new unified io intrinsics
- vc4: Do all uniform loads with byte offsets
- vc4/nir: Use the new unified io intrinsics
- vc4: Fix load_user_clip_plane crash
- vc4: add missing src for store outputs
- vc4: Fix state uniforms
- nir/lower_clip: Update to the new load/store intrinsics
- nir/lower_two_sided_color: Update to the new load intrinsic
NIR and i965 changes are
Reviewed-by: Kenneth Graunke <[email protected]>
NIR indirect declarations and vc4 changes are
Reviewed-by: Eric Anholt <[email protected]>
ir3 changes are
Reviewed-by: Rob Clark <[email protected]>
NIR changes are
Acked-by: Rob Clark <[email protected]>
-rw-r--r-- | src/gallium/auxiliary/nir/tgsi_to_nir.c | 52 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 79 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_nir_lower_io.c | 54 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 2 | ||||
-rw-r--r-- | src/glsl/nir/glsl_to_nir.cpp | 74 | ||||
-rw-r--r-- | src/glsl/nir/nir.h | 2 | ||||
-rw-r--r-- | src/glsl/nir/nir_intrinsics.h | 88 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_clip.c | 3 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_io.c | 113 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_phis_to_scalar.c | 4 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_two_sided_color.c | 2 | ||||
-rw-r--r-- | src/glsl/nir/nir_print.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 128 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 45 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 95 |
19 files changed, 391 insertions, 414 deletions
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 5fef5423f82..5def6d3f32a 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -468,7 +468,7 @@ ttn_emit_immediate(struct ttn_compile *c) nir_builder_instr_insert(b, &load_const->instr); } -static nir_src +static nir_ssa_def * ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect); /* generate either a constant or indirect deref chain for accessing an @@ -487,7 +487,7 @@ ttn_array_deref(struct ttn_compile *c, nir_intrinsic_instr *instr, if (indirect) { arr->deref_array_type = nir_deref_array_type_indirect; - arr->indirect = ttn_src_for_indirect(c, indirect); + arr->indirect = nir_src_for_ssa(ttn_src_for_indirect(c, indirect)); } else { arr->deref_array_type = nir_deref_array_type_direct; } @@ -586,19 +586,14 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, switch (file) { case TGSI_FILE_INPUT: - op = indirect ? nir_intrinsic_load_input_indirect : - nir_intrinsic_load_input; + op = nir_intrinsic_load_input; assert(!dim); break; case TGSI_FILE_CONSTANT: if (dim) { - op = indirect ? nir_intrinsic_load_ubo_indirect : - nir_intrinsic_load_ubo; - /* convert index from vec4 to byte: */ - index *= 16; + op = nir_intrinsic_load_ubo; } else { - op = indirect ? nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform; + op = nir_intrinsic_load_uniform; } break; default: @@ -609,7 +604,6 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, load = nir_intrinsic_instr_create(b->shader, op); load->num_components = 4; - load->const_index[0] = index; if (dim) { if (dimind) { load->src[srcn] = @@ -622,17 +616,26 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, } srcn++; } - if (indirect) { - load->src[srcn] = ttn_src_for_indirect(c, indirect); - if (dim) { - assert(load->src[srcn].is_ssa); - /* we also need to covert vec4 to byte here too: */ - load->src[srcn] = - nir_src_for_ssa(nir_ishl(b, load->src[srcn].ssa, - nir_imm_int(b, 4))); + + nir_ssa_def *offset; + if (dim) { + /* UBO loads don't have a const_index[0] base offset. */ + offset = nir_imm_int(b, index); + if (indirect) { + offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect)); + } + /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */ + offset = nir_ishl(b, offset, nir_imm_int(b, 4)); + } else { + load->const_index[0] = index; + if (indirect) { + offset = ttn_src_for_indirect(c, indirect); + } else { + offset = nir_imm_int(b, 0); } - srcn++; } + load->src[srcn++] = nir_src_for_ssa(offset); + nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_builder_instr_insert(b, &load->instr); @@ -648,7 +651,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, return src; } -static nir_src +static nir_ssa_def * ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect) { nir_builder *b = &c->build; @@ -660,7 +663,7 @@ ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect) indirect->File, indirect->Index, NULL, NULL, NULL); - return nir_src_for_ssa(nir_imov_alu(b, src, 1)); + return nir_imov_alu(b, src, 1); } static nir_alu_dest @@ -729,7 +732,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) { nir_src *indirect = ralloc(c->build.shader, nir_src); - *indirect = ttn_src_for_indirect(c, &tgsi_fdst->Indirect); + *indirect = nir_src_for_ssa(ttn_src_for_indirect(c, &tgsi_fdst->Indirect)); dest.dest.reg.indirect = indirect; } @@ -1927,9 +1930,10 @@ ttn_add_output_stores(struct ttn_compile *c) nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); unsigned loc = var->data.driver_location + i; store->num_components = 4; - store->const_index[0] = loc; store->src[0].reg.reg = c->output_regs[loc].reg; store->src[0].reg.base_offset = c->output_regs[loc].offset; + store->const_index[0] = loc; + store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &store->instr); } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 2723959cb5f..eea5c5e28db 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1218,6 +1218,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, { struct ir3_block *b = ctx->block; struct ir3_instruction *addr, *src0, *src1; + nir_const_value *const_offset; /* UBO addresses are the first driver params: */ unsigned ubo = regid(ctx->so->first_driver_param + IR3_UBOS_OFF, 0); unsigned off = intr->const_index[0]; @@ -1231,7 +1232,10 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, addr = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0)); } - if (intr->intrinsic == nir_intrinsic_load_ubo_indirect) { + const_offset = nir_src_as_const_value(intr->src[1]); + if (const_offset) { + off += const_offset->u[0]; + } else { /* For load_ubo_indirect, second src is indirect offset: */ src1 = get_src(ctx, &intr->src[1])[0]; @@ -1394,6 +1398,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) struct ir3_instruction **dst, **src; struct ir3_block *b = ctx->block; unsigned idx = intr->const_index[0]; + nir_const_value *const_offset; if (info->has_dest) { dst = get_dst(ctx, &intr->dest, intr->num_components); @@ -1403,43 +1408,49 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_load_uniform: - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_uniform(ctx, n); - } - break; - case nir_intrinsic_load_uniform_indirect: - src = get_src(ctx, &intr->src[0]); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_uniform_indirect(ctx, n, - get_addr(ctx, src[0])); + const_offset = nir_src_as_const_value(intr->src[0]); + if (const_offset) { + idx += const_offset->u[0]; + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_uniform(ctx, n); + } + } else { + src = get_src(ctx, &intr->src[0]); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_uniform_indirect(ctx, n, + get_addr(ctx, src[0])); + } + /* NOTE: if relative addressing is used, we set + * constlen in the compiler (to worst-case value) + * since we don't know in the assembler what the max + * addr reg value can be: + */ + ctx->so->constlen = ctx->s->num_uniforms; } - /* NOTE: if relative addressing is used, we set constlen in - * the compiler (to worst-case value) since we don't know in - * the assembler what the max addr reg value can be: - */ - ctx->so->constlen = ctx->s->num_uniforms; break; case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ubo_indirect: emit_intrinsic_load_ubo(ctx, intr, dst); break; case nir_intrinsic_load_input: - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = ctx->ir->inputs[n]; - } - break; - case nir_intrinsic_load_input_indirect: - src = get_src(ctx, &intr->src[0]); - struct ir3_instruction *collect = - create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); - struct ir3_instruction *addr = get_addr(ctx, src[0]); - for (int i = 0; i < intr->num_components; i++) { - unsigned n = idx * 4 + i; - dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, - n, addr, collect); + const_offset = nir_src_as_const_value(intr->src[0]); + if (const_offset) { + idx += const_offset->u[0]; + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = ctx->ir->inputs[n]; + } + } else { + src = get_src(ctx, &intr->src[0]); + struct ir3_instruction *collect = + create_collect(b, ctx->ir->inputs, ctx->ir->ninputs); + struct ir3_instruction *addr = get_addr(ctx, src[0]); + for (int i = 0; i < intr->num_components; i++) { + unsigned n = idx * 4 + i; + dst[i] = create_indirect_load(ctx, ctx->ir->ninputs, + n, addr, collect); + } } break; case nir_intrinsic_load_var: @@ -1449,6 +1460,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) emit_intrinisic_store_var(ctx, intr); break; case nir_intrinsic_store_output: + const_offset = nir_src_as_const_value(intr->src[1]); + compile_assert(ctx, const_offset != NULL); + idx += const_offset->u[0]; + src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 38676cff6b7..4b10cb7fe56 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -61,6 +61,7 @@ vc4_nir_get_dst_color(nir_builder *b, int sample) nir_intrinsic_load_input); load->num_components = 1; load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL); nir_builder_instr_insert(b, &load->instr); return &load->dest.ssa; @@ -612,6 +613,7 @@ vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b, intr->const_index[0] = sample_mask->data.location; intr->src[0] = nir_src_for_ssa(val); + intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &intr->instr); } diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 72a514756fd..a46af77f370 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -179,6 +179,12 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, /* All TGSI-to-NIR inputs are vec4. */ assert(intr->num_components == 4); + /* We only accept direct outputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[0]) && + nir_src_as_const_value(intr->src[0])->u[0] == 0); + /* Generate dword loads for the VPM values (Since these intrinsics may * be reordered, the actual reads will be generated at the top of the * shader by ntq_setup_inputs(). @@ -190,6 +196,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, nir_intrinsic_load_input); intr_comp->num_components = 1; intr_comp->const_index[0] = intr->const_index[0] * 4 + i; + intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); nir_builder_instr_insert(b, &intr_comp->instr); @@ -245,6 +252,12 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, /* All TGSI-to-NIR inputs are vec4. */ assert(intr->num_components == 4); + /* We only accept direct inputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[0]) && + nir_src_as_const_value(intr->src[0])->u[0] == 0); + /* Generate scalar loads equivalent to the original VEC4. */ nir_ssa_def *dests[4]; for (unsigned i = 0; i < intr->num_components; i++) { @@ -252,6 +265,8 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input); intr_comp->num_components = 1; intr_comp->const_index[0] = intr->const_index[0] * 4 + i; + intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); nir_builder_instr_insert(b, &intr_comp->instr); @@ -319,6 +334,12 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, /* All TGSI-to-NIR outputs are VEC4. */ assert(intr->num_components == 4); + /* We only accept direct outputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[1]) && + nir_src_as_const_value(intr->src[1])->u[0] == 0); + b->cursor = nir_before_instr(&intr->instr); for (unsigned i = 0; i < intr->num_components; i++) { @@ -330,6 +351,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, assert(intr->src[0].is_ssa); intr_comp->src[0] = nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i)); + intr_comp->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &intr_comp->instr); } @@ -340,8 +362,8 @@ static void vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { - /* All TGSI-to-NIR uniform loads are vec4, but we may create dword - * loads in our lowering passes. + /* All TGSI-to-NIR uniform loads are vec4, but we need byte offsets + * in the backend. */ if (intr->num_components == 1) return; @@ -357,24 +379,23 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, intr_comp->num_components = 1; nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); - if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) { - /* Convert the variable TGSI register index to a byte - * offset. + /* Convert the uniform (not user_clip_plane) offset to bytes. + * If it happens to be a constant, constant-folding will clean + * up the shift for us. + */ + if (intr->intrinsic == nir_intrinsic_load_uniform) { + /* Convert the base offset to bytes and add the + * component */ + intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4); + intr_comp->src[0] = - nir_src_for_ssa(nir_ishl(b, - intr->src[0].ssa, + nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, nir_imm_int(b, 4))); - - /* Convert the offset to be a byte index, too. */ - intr_comp->const_index[0] = (intr->const_index[0] * 16 + - i * 4); } else { - /* We want a dword index for non-indirect uniform - * loads. - */ - intr_comp->const_index[0] = (intr->const_index[0] * 4 + - i); + assert(intr->intrinsic == + nir_intrinsic_load_user_clip_plane); + intr_comp->const_index[0] = intr->const_index[0] * 4 + i; } dests[i] = &intr_comp->dest.ssa; @@ -406,7 +427,6 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, break; case nir_intrinsic_load_uniform: - case nir_intrinsic_load_uniform_indirect: case nir_intrinsic_load_user_clip_plane: vc4_nir_lower_uniform(c, b, intr); break; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 31968bb5db9..caad05cb9f7 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -115,8 +115,9 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b, nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); - intr->const_index[0] = VC4_NIR_STATE_UNIFORM_OFFSET + contents; + intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4; intr->num_components = 1; + intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL); nir_builder_instr_insert(b, &intr->instr); return &intr->dest.ssa; @@ -1516,6 +1517,8 @@ static void ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) { const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + nir_const_value *const_offset; + unsigned offset; struct qreg *dest = NULL; if (info->has_dest) { @@ -1525,21 +1528,25 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_uniform: assert(instr->num_components == 1); - if (instr->const_index[0] < VC4_NIR_STATE_UNIFORM_OFFSET) { - *dest = qir_uniform(c, QUNIFORM_UNIFORM, - instr->const_index[0]); + const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset = instr->const_index[0] + const_offset->u[0]; + assert(offset % 4 == 0); + /* We need dwords */ + offset = offset / 4; + if (offset < VC4_NIR_STATE_UNIFORM_OFFSET) { + *dest = qir_uniform(c, QUNIFORM_UNIFORM, + offset); + } else { + *dest = qir_uniform(c, offset - + VC4_NIR_STATE_UNIFORM_OFFSET, + 0); + } } else { - *dest = qir_uniform(c, instr->const_index[0] - - VC4_NIR_STATE_UNIFORM_OFFSET, - 0); + *dest = indirect_uniform_load(c, instr); } break; - case nir_intrinsic_load_uniform_indirect: - *dest = indirect_uniform_load(c, instr); - - break; - case nir_intrinsic_load_user_clip_plane: *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE, instr->const_index[0]); @@ -1551,7 +1558,10 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_load_input: assert(instr->num_components == 1); + const_offset = nir_src_as_const_value(instr->src[0]); + assert(const_offset && "vc4 doesn't support indirect inputs"); if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) { + assert(const_offset->u[0] == 0); /* Reads of the per-sample color need to be done in * order. */ @@ -1565,17 +1575,22 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) } *dest = c->color_reads[sample_index]; } else { - *dest = c->inputs[instr->const_index[0]]; + offset = instr->const_index[0] + const_offset->u[0]; + *dest = c->inputs[offset]; } break; case nir_intrinsic_store_output: + const_offset = nir_src_as_const_value(instr->src[1]); + assert(const_offset && "vc4 doesn't support indirect outputs"); + offset = instr->const_index[0] + const_offset->u[0]; + /* MSAA color outputs are the only case where we have an * output that's not lowered to being a store of a single 32 * bit value. */ if (c->stage == QSTAGE_FRAG && instr->num_components == 4) { - assert(instr->const_index[0] == c->output_color_index); + assert(offset == c->output_color_index); for (int i = 0; i < 4; i++) { c->sample_colors[i] = qir_MOV(c, ntq_get_src(c, instr->src[0], @@ -1583,9 +1598,9 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) } } else { assert(instr->num_components == 1); - c->outputs[instr->const_index[0]] = + c->outputs[offset] = qir_MOV(c, ntq_get_src(c, instr->src[0], 0)); - c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1); + c->num_outputs = MAX2(c->num_outputs, offset + 1); } break; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index d53095ed222..b875760a2ca 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -444,7 +444,7 @@ struct vc4_compile { /* Special offset for nir_load_uniform values to get a QUNIFORM_* * state-dependent value. */ -#define VC4_NIR_STATE_UNIFORM_OFFSET 2000000000 +#define VC4_NIR_STATE_UNIFORM_OFFSET 1000000000 struct vc4_compile *qir_compile_init(void); void qir_compile_destroy(struct vc4_compile *c); diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index fc0f4049941..db8b0cae814 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -885,24 +885,12 @@ nir_visitor::visit(ir_call *ir) ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); assert(write_mask); - /* Check if we need the indirect version */ - ir_constant *const_offset = offset->as_constant(); - if (!const_offset) { - op = nir_intrinsic_store_ssbo_indirect; - ralloc_free(instr); - instr = nir_intrinsic_instr_create(shader, op); - instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = 0; - } else { - instr->const_index[0] = const_offset->value.u[0]; - } - - instr->const_index[1] = write_mask->value.u[0]; - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); + instr->const_index[0] = write_mask->value.u[0]; instr->num_components = val->type->vector_elements; - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); nir_builder_instr_insert(&b, &instr->instr); break; } @@ -913,20 +901,8 @@ nir_visitor::visit(ir_call *ir) param = param->get_next(); ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - /* Check if we need the indirect version */ - ir_constant *const_offset = offset->as_constant(); - if (!const_offset) { - op = nir_intrinsic_load_ssbo_indirect; - ralloc_free(instr); - instr = nir_intrinsic_instr_create(shader, op); - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = 0; - dest = &instr->dest; - } else { - instr->const_index[0] = const_offset->value.u[0]; - } - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); const glsl_type *type = ir->return_deref->var->type; instr->num_components = type->vector_elements; @@ -1010,18 +986,8 @@ nir_visitor::visit(ir_call *ir) exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - /* Check if we need the indirect version */ - ir_constant *const_offset = offset->as_constant(); - if (!const_offset) { - op = nir_intrinsic_load_shared_indirect; - ralloc_free(instr); - instr = nir_intrinsic_instr_create(shader, op); - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = 0; - dest = &instr->dest; - } else { - instr->const_index[0] = const_offset->value.u[0]; - } + instr->const_index[0] = 0; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); const glsl_type *type = ir->return_deref->var->type; instr->num_components = type->vector_elements; @@ -1044,17 +1010,8 @@ nir_visitor::visit(ir_call *ir) ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); assert(write_mask); - /* Check if we need the indirect version */ - ir_constant *const_offset = offset->as_constant(); - if (!const_offset) { - op = nir_intrinsic_store_shared_indirect; - ralloc_free(instr); - instr = nir_intrinsic_instr_create(shader, op); - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = 0; - } else { - instr->const_index[0] = const_offset->value.u[0]; - } + instr->const_index[0] = 0; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); instr->const_index[1] = write_mask->value.u[0]; @@ -1303,20 +1260,11 @@ nir_visitor::visit(ir_expression *ir) /* Some special cases */ switch (ir->operation) { case ir_binop_ubo_load: { - ir_constant *const_index = ir->operands[1]->as_constant(); - - nir_intrinsic_op op; - if (const_index) { - op = nir_intrinsic_load_ubo; - } else { - op = nir_intrinsic_load_ubo_indirect; - } - nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); load->num_components = ir->type->vector_elements; - load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */ load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); - if (!const_index) - load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); + load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); add_instr(&load->instr, ir->type->vector_elements); /* diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index e161b70fa18..2e72e66699c 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1969,7 +1969,7 @@ void nir_assign_var_locations(struct exec_list *var_list, void nir_lower_io(nir_shader *shader, nir_variable_mode mode, int (*type_size)(const struct glsl_type *)); -nir_src *nir_get_io_indirect_src(nir_intrinsic_instr *instr); +nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); void nir_lower_vars_to_ssa(nir_shader *shader); diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 6b6cb32096b..9811fb391de 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -255,56 +255,60 @@ SYSTEM_VALUE(num_work_groups, 3, 0) SYSTEM_VALUE(helper_invocation, 1, 0) /* - * The format of the indices depends on the type of the load. For uniforms, - * the first index is the base address and the second index is an offset that - * should be added to the base address. (This way you can determine in the - * back-end which variable is being accessed even in an array.) For inputs, - * the one and only index corresponds to the attribute slot. UBO loads also - * have a single index which is the base address to load from. + * Load operations pull data from some piece of GPU memory. All load + * operations operate in terms of offsets into some piece of theoretical + * memory. Loads from externally visible memory (UBO and SSBO) simply take a + * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.) + * take a base+offset pair where the base (const_index[0]) gives the location + * of the start of the variable being loaded and and the offset source is a + * offset into that variable. * - * UBO loads have a (possibly constant) source which is the UBO buffer index. - * For each type of load, the _indirect variant has one additional source - * (the second in the case of UBO's) that is the is an indirect to be added to - * the constant address or base offset to compute the final offset. + * Some load operations such as UBO/SSBO load and per_vertex loads take an + * additional source to specify which UBO/SSBO/vertex to load from. * - * For vector backends, the address is in terms of one vec4, and so each array - * element is +4 scalar components from the previous array element. For scalar - * backends, the address is in terms of a single 4-byte float/int and arrays - * elements begin immediately after the previous array element. + * The exact address type depends on the lowering pass that generates the + * load/store intrinsics. Typically, this is vec4 units for things such as + * varying slots and float units for fragment shader inputs. UBO and SSBO + * offsets are always in bytes. */ -#define LOAD(name, extra_srcs, indices, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, indices, flags) \ - INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, indices, flags) +#define LOAD(name, srcs, indices, flags) \ + INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) -LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) -LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE) -LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) -LOAD(shared, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { buffer_index, offset }. No const_index */ +LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { vertex, offset }. const_index[] = { base } */ +LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { buffer_index, offset }. No const_index */ +LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { vertex, offset }. const_index[] = { base } */ +LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) /* - * Stores work the same way as loads, except now the first register input is - * the value or array to store and the optional second input is the indirect - * offset. SSBO stores are similar, but they accept an extra source for the - * block index and an extra index with the writemask to use. + * Stores work the same way as loads, except now the first source is the value + * to store and the second (and possibly third) source specify where to store + * the value. SSBO and shared memory stores also have a write mask as + * const_index[0]. */ -#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \ - INTRINSIC(store_##name, 1 + extra_srcs, \ - ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \ - false, 0, 0, 1 + extra_indices, flags) \ - INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \ - ARR(0, 1, extra_srcs_size, extra_srcs_size), \ - false, 0, 0, 1 + extra_indices, flags) +#define STORE(name, srcs, indices, flags) \ + INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags) -STORE(output, 0, 0, 0, 0) -STORE(per_vertex_output, 1, 1, 0, 0) -STORE(ssbo, 1, 1, 1, 0) -STORE(shared, 0, 0, 1, 0) +/* src[] = { value, offset }. const_index[] = { base } */ +STORE(output, 2, 1, 0) +/* src[] = { value, vertex, offset }. const_index[] = { base } */ +STORE(per_vertex_output, 3, 1, 0) +/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ +STORE(ssbo, 3, 1, 0) +/* src[] = { value, offset }. const_index[] = { base, write_mask } */ +STORE(shared, 2, 1, 0) -LAST_INTRINSIC(store_shared_indirect) +LAST_INTRINSIC(store_shared) diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c index c58c7785b3f..e2a2bb689a8 100644 --- a/src/glsl/nir/nir_lower_clip.c +++ b/src/glsl/nir/nir_lower_clip.c @@ -74,6 +74,7 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val) store->const_index[0] = out->data.driver_location; store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]); store->src[0].is_ssa = true; + store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &store->instr); } @@ -85,6 +86,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val) load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); load->num_components = 4; load->const_index[0] = in->data.driver_location; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_builder_instr_insert(b, &load->instr); @@ -112,6 +114,7 @@ find_output_in_block(nir_block *block, void *void_state) intr->const_index[0] == state->drvloc) { assert(state->def == NULL); assert(intr->src[0].is_ssa); + assert(nir_src_as_const_value(intr->src[1])); state->def = intr->src[0].ssa; #if !defined(DEBUG) diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index f64ac696fa2..3d646eb14b4 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -86,10 +86,9 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var) stage == MESA_SHADER_TESS_CTRL; } -static unsigned +static nir_ssa_def * get_io_offset(nir_builder *b, nir_deref_var *deref, nir_ssa_def **vertex_index, - nir_ssa_def **out_indirect, int (*type_size)(const struct glsl_type *)) { nir_deref *tail = &deref->deref; @@ -109,8 +108,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, *vertex_index = vtx; } - nir_ssa_def *indirect = NULL; - unsigned base_offset = 0; + /* Just emit code and let constant-folding go to town */ + nir_ssa_def *offset = nir_imm_int(b, 0); while (tail->child != NULL) { const struct glsl_type *parent_type = tail->type; @@ -120,55 +119,46 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, nir_deref_array *deref_array = nir_deref_as_array(tail); unsigned size = type_size(tail->type); - base_offset += size * deref_array->base_offset; + offset = nir_iadd(b, offset, + nir_imm_int(b, size * deref_array->base_offset)); if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_ssa_def *mul = nir_imul(b, nir_imm_int(b, size), nir_ssa_for_src(b, deref_array->indirect, 1)); - indirect = indirect ? nir_iadd(b, indirect, mul) : mul; + offset = nir_iadd(b, offset, mul); } } else if (tail->deref_type == nir_deref_type_struct) { nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + unsigned field_offset = 0; for (unsigned i = 0; i < deref_struct->index; i++) { - base_offset += type_size(glsl_get_struct_field(parent_type, i)); + field_offset += type_size(glsl_get_struct_field(parent_type, i)); } + offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); } } - *out_indirect = indirect; - return base_offset; + return offset; } static nir_intrinsic_op load_op(struct lower_io_state *state, - nir_variable_mode mode, bool per_vertex, bool has_indirect) + nir_variable_mode mode, bool per_vertex) { nir_intrinsic_op op; switch (mode) { case nir_var_shader_in: - if (per_vertex) { - op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect : - nir_intrinsic_load_per_vertex_input; - } else { - op = has_indirect ? nir_intrinsic_load_input_indirect : - nir_intrinsic_load_input; - } + op = per_vertex ? nir_intrinsic_load_per_vertex_input : + nir_intrinsic_load_input; break; case nir_var_shader_out: - if (per_vertex) { - op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect : - nir_intrinsic_load_per_vertex_output; - } else { - op = has_indirect ? nir_intrinsic_load_output_indirect : - nir_intrinsic_load_output; - } + op = per_vertex ? nir_intrinsic_load_per_vertex_output : + nir_intrinsic_load_output; break; case nir_var_uniform: - op = has_indirect ? nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform; + op = nir_intrinsic_load_uniform; break; default: unreachable("Unknown variable mode"); @@ -211,32 +201,25 @@ nir_lower_io_block(nir_block *block, void *void_state) is_per_vertex_input(state, intrin->variables[0]->var) || is_per_vertex_output(state, intrin->variables[0]->var); - nir_ssa_def *indirect; + nir_ssa_def *offset; nir_ssa_def *vertex_index; - unsigned offset = get_io_offset(b, intrin->variables[0], - per_vertex ? &vertex_index : NULL, - &indirect, state->type_size); + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, - load_op(state, mode, per_vertex, - indirect)); + load_op(state, mode, per_vertex)); load->num_components = intrin->num_components; - unsigned location = intrin->variables[0]->var->data.driver_location; - if (mode == nir_var_uniform) { - load->const_index[0] = location; - load->const_index[1] = offset; - } else { - load->const_index[0] = location + offset; - } + load->const_index[0] = + intrin->variables[0]->var->data.driver_location; if (per_vertex) load->src[0] = nir_src_for_ssa(vertex_index); - if (indirect) - load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect); + load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset); if (intrin->dest.is_ssa) { nir_ssa_dest_init(&load->instr, &load->dest, @@ -255,38 +238,33 @@ nir_lower_io_block(nir_block *block, void *void_state) case nir_intrinsic_store_var: { assert(mode == nir_var_shader_out); - nir_ssa_def *indirect; + nir_ssa_def *offset; nir_ssa_def *vertex_index; bool per_vertex = is_per_vertex_output(state, intrin->variables[0]->var); - unsigned offset = get_io_offset(b, intrin->variables[0], - per_vertex ? &vertex_index : NULL, - &indirect, state->type_size); - offset += intrin->variables[0]->var->data.driver_location; + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); - nir_intrinsic_op store_op; - if (per_vertex) { - store_op = indirect ? nir_intrinsic_store_per_vertex_output_indirect - : nir_intrinsic_store_per_vertex_output; - } else { - store_op = indirect ? nir_intrinsic_store_output_indirect - : nir_intrinsic_store_output; - } + nir_intrinsic_op store_op = + per_vertex ? nir_intrinsic_store_per_vertex_output : + nir_intrinsic_store_output; nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, store_op); store->num_components = intrin->num_components; - store->const_index[0] = offset; nir_src_copy(&store->src[0], &intrin->src[0], store); + store->const_index[0] = + intrin->variables[0]->var->data.driver_location; + if (per_vertex) store->src[1] = nir_src_for_ssa(vertex_index); - if (indirect) - store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(indirect); + store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset); nir_instr_insert_before(&intrin->instr, &store->instr); nir_instr_remove(&intrin->instr); @@ -330,21 +308,21 @@ nir_lower_io(nir_shader *shader, nir_variable_mode mode, } /** - * Return the indirect source for a load/store indirect intrinsic. + * Return the offset soruce for a load/store intrinsic. */ nir_src * -nir_get_io_indirect_src(nir_intrinsic_instr *instr) +nir_get_io_offset_src(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { - case nir_intrinsic_load_input_indirect: - case nir_intrinsic_load_output_indirect: - case nir_intrinsic_load_uniform_indirect: + case nir_intrinsic_load_input: + case nir_intrinsic_load_output: + case nir_intrinsic_load_uniform: return &instr->src[0]; - case nir_intrinsic_load_per_vertex_input_indirect: - case nir_intrinsic_load_per_vertex_output_indirect: - case nir_intrinsic_store_output_indirect: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_output: return &instr->src[1]; - case nir_intrinsic_store_per_vertex_output_indirect: + case nir_intrinsic_store_per_vertex_output: return &instr->src[2]; default: return NULL; @@ -360,11 +338,8 @@ nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_vertex_output: - case nir_intrinsic_load_per_vertex_input_indirect: - case nir_intrinsic_load_per_vertex_output_indirect: return &instr->src[0]; case nir_intrinsic_store_per_vertex_output: - case nir_intrinsic_store_per_vertex_output_indirect: return &instr->src[1]; default: return NULL; diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c index aa124d9e6cc..2f5927f6406 100644 --- a/src/glsl/nir/nir_lower_phis_to_scalar.c +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -91,13 +91,9 @@ is_phi_src_scalarizable(nir_phi_src *src, case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: case nir_intrinsic_load_uniform: - case nir_intrinsic_load_uniform_indirect: case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ubo_indirect: case nir_intrinsic_load_ssbo: - case nir_intrinsic_load_ssbo_indirect: case nir_intrinsic_load_input: - case nir_intrinsic_load_input_indirect: return true; default: break; diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c index 6995b9d6bc1..7df12e070f1 100644 --- a/src/glsl/nir/nir_lower_two_sided_color.c +++ b/src/glsl/nir/nir_lower_two_sided_color.c @@ -73,6 +73,7 @@ load_input(nir_builder *b, nir_variable *in) load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); load->num_components = 4; load->const_index[0] = in->data.driver_location; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_builder_instr_insert(b, &load->instr); @@ -151,6 +152,7 @@ nir_lower_two_sided_color_block(nir_block *block, void *void_state) unsigned drvloc = state->colors[idx].front->data.driver_location; if (intr->const_index[0] == drvloc) { + assert(nir_src_as_const_value(intr->src[0])); break; } } diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index c98a0476ef9..1a4cc695d5a 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -439,21 +439,15 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) switch (instr->intrinsic) { case nir_intrinsic_load_uniform: - case nir_intrinsic_load_uniform_indirect: var_list = &state->shader->uniforms; break; case nir_intrinsic_load_input: - case nir_intrinsic_load_input_indirect: case nir_intrinsic_load_per_vertex_input: - case nir_intrinsic_load_per_vertex_input_indirect: var_list = &state->shader->inputs; break; case nir_intrinsic_load_output: - case nir_intrinsic_load_output_indirect: case nir_intrinsic_store_output: - case nir_intrinsic_store_output_indirect: case nir_intrinsic_store_per_vertex_output: - case nir_intrinsic_store_per_vertex_output_indirect: var_list = &state->shader->outputs; break; default: diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index cead99155f4..f2e384129cb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -280,7 +280,7 @@ public: unsigned stream_id); void emit_gs_thread_end(); void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, - const fs_reg &indirect_offset, unsigned imm_offset, + unsigned base_offset, const nir_src &offset_src, unsigned num_components); void emit_cs_terminate(); fs_reg *emit_cs_local_invocation_id_setup(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 13059999e7d..db38f619272 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1603,28 +1603,30 @@ fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src, void fs_visitor::emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src, - const fs_reg &indirect_offset, - unsigned imm_offset, + unsigned base_offset, + const nir_src &offset_src, unsigned num_components) { struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) prog_data; + nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); + nir_const_value *offset_const = nir_src_as_const_value(offset_src); + const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; + /* Offset 0 is the VUE header, which contains VARYING_SLOT_LAYER [.y], * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w]. Only * gl_PointSize is available as a GS input, however, so it must be that. */ - const bool is_point_size = - indirect_offset.file == BAD_FILE && imm_offset == 0; - - nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); - const unsigned push_reg_count = gs_prog_data->base.urb_read_length * 8; + const bool is_point_size = (base_offset == 0); - if (indirect_offset.file == BAD_FILE && vertex_const != NULL && - 4 * imm_offset < push_reg_count) { - imm_offset = 4 * imm_offset + vertex_const->u[0] * push_reg_count; + if (offset_const != NULL && vertex_const != NULL && + 4 * (base_offset + offset_const->u[0]) < push_reg_count) { + int imm_offset = (base_offset + offset_const->u[0]) * 4 + + vertex_const->u[0] * push_reg_count; /* This input was pushed into registers. */ if (is_point_size) { /* gl_PointSize comes in .w */ + assert(imm_offset == 0); bld.MOV(dst, fs_reg(ATTR, imm_offset + 3, dst.type)); } else { for (unsigned i = 0; i < num_components; i++) { @@ -1683,21 +1685,21 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, } fs_inst *inst; - if (indirect_offset.file == BAD_FILE) { + if (offset_const) { /* Constant indexing - use global offset. */ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); - inst->offset = imm_offset; + inst->offset = base_offset + offset_const->u[0]; inst->base_mrf = -1; inst->mlen = 1; inst->regs_written = num_components; } else { /* Indirect indexing - use per-slot offsets as well. */ - const fs_reg srcs[] = { icp_handle, indirect_offset }; + const fs_reg srcs[] = { icp_handle, get_nir_src(offset_src) }; fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); - inst->offset = imm_offset; + inst->offset = base_offset; inst->base_mrf = -1; inst->mlen = 2; inst->regs_written = num_components; @@ -1763,17 +1765,12 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD)); break; - case nir_intrinsic_load_input_indirect: case nir_intrinsic_load_input: unreachable("load_input intrinsics are invalid for the GS stage"); - case nir_intrinsic_load_per_vertex_input_indirect: - indirect_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D); - /* fallthrough */ case nir_intrinsic_load_per_vertex_input: - emit_gs_input_load(dest, instr->src[0], - indirect_offset, instr->const_index[0], - instr->num_components); + emit_gs_input_load(dest, instr->src[0], instr->const_index[0], + instr->src[1], instr->num_components); break; case nir_intrinsic_emit_vertex_with_counter: @@ -2137,8 +2134,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (nir_intrinsic_infos[instr->intrinsic].has_dest) dest = get_nir_dest(instr->dest); - bool has_indirect = false; - switch (instr->intrinsic) { case nir_intrinsic_atomic_counter_inc: case nir_intrinsic_atomic_counter_dec: @@ -2327,19 +2322,20 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(1)); break; - case nir_intrinsic_load_uniform_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_uniform: { /* Offsets are in bytes but they should always be multiples of 4 */ assert(instr->const_index[0] % 4 == 0); - assert(instr->const_index[1] % 4 == 0); fs_reg src(UNIFORM, instr->const_index[0] / 4, dest.type); - src.reg_offset = instr->const_index[1] / 4; - if (has_indirect) + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + /* Offsets are in bytes but they should always be multiples of 4 */ + assert(const_offset->u[0] % 4 == 0); + src.reg_offset = const_offset->u[0] / 4; + } else { src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0])); + } for (unsigned j = 0; j < instr->num_components; j++) { bld.MOV(offset(dest, bld, j), offset(src, bld, j)); @@ -2347,9 +2343,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_ubo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_ubo: { nir_const_value *const_index = nir_src_as_const_value(instr->src[0]); fs_reg surf_index; @@ -2377,24 +2370,24 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir->info.num_ubos - 1); } - if (has_indirect) { + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset == NULL) { fs_reg base_offset = retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_D); - unsigned vec4_offset = instr->const_index[0]; for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, - base_offset, vec4_offset + i * 4); + base_offset, i * 4); } else { fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; - struct brw_reg const_offset_reg = brw_imm_ud(instr->const_index[0] & ~15); + struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u[0] & ~15); bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts, surf_index, const_offset_reg); for (unsigned i = 0; i < instr->num_components; i++) { - packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i); + packed_consts.set_smear(const_offset->u[0] % 16 / 4 + i); /* The std140 packing rules don't allow vectors to cross 16-byte * boundaries, and a reg is 32 bytes. @@ -2408,9 +2401,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_ssbo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_ssbo: { assert(devinfo->gen >= 7); @@ -2436,12 +2426,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir->info.num_ssbos - 1); } - /* Get the offset to read from */ fs_reg offset_reg; - if (has_indirect) { - offset_reg = get_nir_src(instr->src[1]); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(const_offset->u[0]); } else { - offset_reg = brw_imm_ud(instr->const_index[0]); + offset_reg = get_nir_src(instr->src[1]); } /* Read the vector */ @@ -2456,9 +2446,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_shared_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_shared: { assert(devinfo->gen >= 7); @@ -2466,10 +2453,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr /* Get the offset to read from */ fs_reg offset_reg; - if (has_indirect) { - offset_reg = get_nir_src(instr->src[0]); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]); } else { - offset_reg = brw_imm_ud(instr->const_index[0]); + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), + brw_imm_ud(instr->const_index[0])); } /* Read the vector */ @@ -2484,9 +2475,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_store_shared_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_store_shared: { assert(devinfo->gen >= 7); @@ -2509,13 +2497,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned length = ffs(~(writemask >> first_component)) - 1; fs_reg offset_reg; - if (!has_indirect) { - offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] + + 4 * first_component); } else { offset_reg = vgrf(glsl_type::uint_type); bld.ADD(offset_reg, retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(4 * first_component)); + brw_imm_ud(instr->const_index[0] + 4 * first_component)); } emit_untyped_write(bld, surf_index, offset_reg, @@ -2532,9 +2522,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_load_input_indirect: - unreachable("Not allowed"); - /* fallthrough */ case nir_intrinsic_load_input: { fs_reg src; if (stage == MESA_SHADER_VERTEX) { @@ -2544,15 +2531,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr instr->const_index[0]); } + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + assert(const_offset && "Indirect input loads not allowed"); + src = offset(src, bld, const_offset->u[0]); + for (unsigned j = 0; j < instr->num_components; j++) { bld.MOV(offset(dest, bld, j), offset(src, bld, j)); } break; } - case nir_intrinsic_store_ssbo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_store_ssbo: { assert(devinfo->gen >= 7); @@ -2579,7 +2567,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fs_reg val_reg = get_nir_src(instr->src[0]); /* Writemask */ - unsigned writemask = instr->const_index[1]; + unsigned writemask = instr->const_index[0]; /* Combine groups of consecutive enabled channels in one write * message. We use ffs to find the first enabled channel and then ffs on @@ -2589,10 +2577,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr while (writemask) { unsigned first_component = ffs(writemask) - 1; unsigned length = ffs(~(writemask >> first_component)) - 1; - fs_reg offset_reg; - if (!has_indirect) { - offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component); + fs_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); + if (const_offset) { + offset_reg = brw_imm_ud(const_offset->u[0] + 4 * first_component); } else { offset_reg = vgrf(glsl_type::uint_type); bld.ADD(offset_reg, @@ -2613,14 +2602,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_store_output_indirect: - unreachable("Not allowed"); - /* fallthrough */ case nir_intrinsic_store_output: { fs_reg src = get_nir_src(instr->src[0]); fs_reg new_dest = offset(retype(nir_outputs, src.type), bld, instr->const_index[0]); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + assert(const_offset && "Indirect output stores not allowed"); + new_dest = offset(new_dest, bld, const_offset->u[0]); + for (unsigned j = 0; j < instr->num_components; j++) { bld.MOV(offset(new_dest, bld, j), offset(src, bld, j)); } diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index d62470379ee..14ad172a2c3 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -24,31 +24,49 @@ #include "brw_nir.h" #include "brw_shader.h" #include "glsl/nir/glsl_to_nir.h" +#include "glsl/nir/nir_builder.h" #include "program/prog_to_nir.h" +struct remap_vs_attrs_state { + nir_builder b; + uint64_t inputs_read; +}; + static bool -remap_vs_attrs(nir_block *block, void *closure) +remap_vs_attrs(nir_block *block, void *void_state) { - GLbitfield64 inputs_read = *((GLbitfield64 *) closure); + struct remap_vs_attrs_state *state = void_state; - nir_foreach_instr(block, instr) { + nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ - assert(intrin->intrinsic != nir_intrinsic_load_input_indirect); - if (intrin->intrinsic == nir_intrinsic_load_input) { /* Attributes come in a contiguous block, ordered by their * gl_vert_attrib value. That means we can compute the slot * number for an attribute by masking out the enabled attributes * before it and counting the bits. */ - int attr = intrin->const_index[0]; - int slot = _mesa_bitcount_64(inputs_read & BITFIELD64_MASK(attr)); + nir_const_value *const_offset = nir_src_as_const_value(intrin->src[0]); + + /* We set EmitNoIndirect for VS inputs, so there are no indirects. */ + assert(const_offset); + + int attr = intrin->const_index[0] + const_offset->u[0]; + int slot = _mesa_bitcount_64(state->inputs_read & + BITFIELD64_MASK(attr)); + + /* The NIR -> FS pass will just add the base and offset together, so + * there's no reason to keep them separate. Just put it all in + * const_index[0] and set the offset src[0] to load_const(0). + */ intrin->const_index[0] = 4 * slot; + + state->b.cursor = nir_before_instr(&intrin->instr); + nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], + nir_src_for_ssa(nir_imm_int(&state->b, 0))); } } return true; @@ -79,10 +97,17 @@ brw_nir_lower_inputs(nir_shader *nir, * key->inputs_read since the two are identical aside from Gen4-5 * edge flag differences. */ - GLbitfield64 inputs_read = nir->info.inputs_read; + struct remap_vs_attrs_state remap_state = { + .inputs_read = nir->info.inputs_read, + }; + + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); + nir_foreach_overload(nir, overload) { if (overload->impl) { - nir_foreach_block(overload->impl, remap_vs_attrs, &inputs_read); + nir_builder_init(&remap_state.b, overload->impl); + nir_foreach_block(overload->impl, remap_vs_attrs, &remap_state); } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index e51ef4b37d5..6f66978f8e1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -60,19 +60,19 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src_reg src; switch (instr->intrinsic) { - case nir_intrinsic_load_per_vertex_input_indirect: - assert(!"EmitNoIndirectInput should prevent this."); case nir_intrinsic_load_per_vertex_input: { /* The EmitNoIndirectInput flag guarantees our vertex index will * be constant. We should handle indirects someday. */ nir_const_value *vertex = nir_src_as_const_value(instr->src[0]); + nir_const_value *offset = nir_src_as_const_value(instr->src[1]); /* Make up a type...we have no way of knowing... */ const glsl_type *const type = glsl_type::ivec(instr->num_components); src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u[0] + - instr->const_index[0], type); + instr->const_index[0] + offset->u[0], + type); dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dest, src)); @@ -80,7 +80,6 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } case nir_intrinsic_load_input: - case nir_intrinsic_load_input_indirect: unreachable("nir_lower_io should have produced per_vertex intrinsics"); case nir_intrinsic_emit_vertex_with_counter: { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 50570cd7703..f965b39360f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -369,22 +369,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) dst_reg dest; src_reg src; - bool has_indirect = false; - switch (instr->intrinsic) { - case nir_intrinsic_load_input_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_input: { - int offset = instr->const_index[0]; - src = src_reg(ATTR, offset, glsl_type::uvec4_type); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + + /* We set EmitNoIndirectInput for VS */ + assert(const_offset); + + src = src_reg(ATTR, instr->const_index[0] + const_offset->u[0], + glsl_type::uvec4_type); - if (has_indirect) { - dest.reladdr = new(mem_ctx) src_reg(get_nir_src(instr->src[0], - BRW_REGISTER_TYPE_D, - 1)); - } dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); @@ -392,11 +387,11 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } - case nir_intrinsic_store_output_indirect: - unreachable("nir_lower_outputs_to_temporaries should prevent this"); - case nir_intrinsic_store_output: { - int varying = instr->const_index[0]; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + assert(const_offset); + + int varying = instr->const_index[0] + const_offset->u[0]; src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, instr->num_components); @@ -431,9 +426,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } - case nir_intrinsic_store_ssbo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_store_ssbo: { assert(devinfo->gen >= 7); @@ -458,20 +450,19 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } /* Offset */ - src_reg offset_reg = src_reg(this, glsl_type::uint_type); - unsigned const_offset_bytes = 0; - if (has_indirect) { - emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[2], 1))); + src_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]); + if (const_offset) { + offset_reg = brw_imm_ud(const_offset->u[0]); } else { - const_offset_bytes = instr->const_index[0]; - emit(MOV(dst_reg(offset_reg), brw_imm_ud(const_offset_bytes))); + offset_reg = get_nir_src(instr->src[2], 1); } /* Value */ src_reg val_reg = get_nir_src(instr->src[0], 4); /* Writemask */ - unsigned write_mask = instr->const_index[1]; + unsigned write_mask = instr->const_index[0]; /* IvyBridge does not have a native SIMD4x2 untyped write message so untyped * writes will use SIMD8 mode. In order to hide this and keep symmetry across @@ -537,9 +528,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) * write at to skip the channels we skipped, if any. */ if (skipped_channels > 0) { - if (!has_indirect) { - const_offset_bytes += 4 * skipped_channels; - offset_reg = brw_imm_ud(const_offset_bytes); + if (offset_reg.file == IMM) { + offset_reg.ud += 4 * skipped_channels; } else { emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(4 * skipped_channels))); @@ -574,9 +564,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } - case nir_intrinsic_load_ssbo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_ssbo: { assert(devinfo->gen >= 7); @@ -604,13 +591,12 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir->info.num_ssbos - 1); } - src_reg offset_reg = src_reg(this, glsl_type::uint_type); - unsigned const_offset_bytes = 0; - if (has_indirect) { - emit(MOV(dst_reg(offset_reg), get_nir_src(instr->src[1], 1))); + src_reg offset_reg; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset_reg = brw_imm_ud(const_offset->u[0]); } else { - const_offset_bytes = instr->const_index[0]; - emit(MOV(dst_reg(offset_reg), brw_imm_ud((const_offset_bytes)))); + offset_reg = get_nir_src(instr->src[1], 1); } /* Read the vector */ @@ -673,20 +659,21 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } - case nir_intrinsic_load_uniform_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_uniform: { /* Offsets are in bytes but they should always be multiples of 16 */ assert(instr->const_index[0] % 16 == 0); - assert(instr->const_index[1] % 16 == 0); dest = get_nir_dest(instr->dest); src = src_reg(dst_reg(UNIFORM, instr->const_index[0] / 16)); - src.reg_offset = instr->const_index[1] / 16; + src.type = dest.type; - if (has_indirect) { + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + /* Offsets are in bytes but they should always be multiples of 16 */ + assert(const_offset->u[0] % 16 == 0); + src.reg_offset = const_offset->u[0] / 16; + } else { src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1); src.reladdr = new(mem_ctx) src_reg(tmp); } @@ -724,9 +711,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } - case nir_intrinsic_load_ubo_indirect: - has_indirect = true; - /* fallthrough */ case nir_intrinsic_load_ubo: { nir_const_value *const_block_index = nir_src_as_const_value(instr->src[0]); src_reg surf_index; @@ -760,11 +744,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir->info.num_ubos - 1); } - unsigned const_offset = instr->const_index[0]; src_reg offset; - - if (!has_indirect) { - offset = brw_imm_ud(const_offset & ~15); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + if (const_offset) { + offset = brw_imm_ud(const_offset->u[0] & ~15); } else { offset = get_nir_src(instr->src[1], nir_type_int, 1); } @@ -778,10 +761,12 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) NULL, NULL /* before_block/inst */); packed_consts.swizzle = brw_swizzle_for_size(instr->num_components); - packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4); + if (const_offset) { + packed_consts.swizzle += BRW_SWIZZLE4(const_offset->u[0] % 16 / 4, + const_offset->u[0] % 16 / 4, + const_offset->u[0] % 16 / 4, + const_offset->u[0] % 16 / 4); + } emit(MOV(dest, packed_consts)); break; |