diff options
author | Jason Ekstrand <[email protected]> | 2015-11-25 14:14:05 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-12-10 12:25:16 -0800 |
commit | 78b81be627734ea7fa50ea246c07b0d4a3a1638a (patch) | |
tree | 10b0b098de5b3a111d076e9d8c5fca440fad45ad /src/gallium/drivers/vc4 | |
parent | f3970fad9e5b04e04de366a65fed5a30da618f9d (diff) |
nir: Get rid of *_indirect variants of input/output load/store intrinsics
There is some special-casing needed in a competent back-end. However, they
can do their special-casing easily enough based on whether or not the
offset is a constant. In the mean time, having the *_indirect variants
adds special cases a number of places where they don't need to be and, in
general, only complicates things. To complicate matters, NIR had no way to
convdert an indirect load/store to a direct one in the case that the
indirect was a constant so we would still not really get what the back-ends
wanted. The best solution seems to be to get rid of the *_indirect
variants entirely.
This commit is a bunch of different changes squashed together:
- nir: Get rid of *_indirect variants of input/output load/store intrinsics
- nir/glsl: Stop handling UBO/SSBO load/stores differently depending on indirect
- nir/lower_io: Get rid of load/store_foo_indirect
- i965/fs: Get rid of load/store_foo_indirect
- i965/vec4: Get rid of load/store_foo_indirect
- tgsi_to_nir: Get rid of load/store_foo_indirect
- ir3/nir: Use the new unified io intrinsics
- vc4: Do all uniform loads with byte offsets
- vc4/nir: Use the new unified io intrinsics
- vc4: Fix load_user_clip_plane crash
- vc4: add missing src for store outputs
- vc4: Fix state uniforms
- nir/lower_clip: Update to the new load/store intrinsics
- nir/lower_two_sided_color: Update to the new load intrinsic
NIR and i965 changes are
Reviewed-by: Kenneth Graunke <[email protected]>
NIR indirect declarations and vc4 changes are
Reviewed-by: Eric Anholt <[email protected]>
ir3 changes are
Reviewed-by: Rob Clark <[email protected]>
NIR changes are
Acked-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/vc4')
-rw-r--r-- | src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_nir_lower_io.c | 54 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 2 |
4 files changed, 71 insertions, 34 deletions
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 38676cff6b7..4b10cb7fe56 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -61,6 +61,7 @@ vc4_nir_get_dst_color(nir_builder *b, int sample) nir_intrinsic_load_input); load->num_components = 1; load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL); nir_builder_instr_insert(b, &load->instr); return &load->dest.ssa; @@ -612,6 +613,7 @@ vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b, intr->const_index[0] = sample_mask->data.location; intr->src[0] = nir_src_for_ssa(val); + intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &intr->instr); } diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 72a514756fd..a46af77f370 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -179,6 +179,12 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, /* All TGSI-to-NIR inputs are vec4. */ assert(intr->num_components == 4); + /* We only accept direct outputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[0]) && + nir_src_as_const_value(intr->src[0])->u[0] == 0); + /* Generate dword loads for the VPM values (Since these intrinsics may * be reordered, the actual reads will be generated at the top of the * shader by ntq_setup_inputs(). @@ -190,6 +196,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, nir_intrinsic_load_input); intr_comp->num_components = 1; intr_comp->const_index[0] = intr->const_index[0] * 4 + i; + intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); nir_builder_instr_insert(b, &intr_comp->instr); @@ -245,6 +252,12 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, /* All TGSI-to-NIR inputs are vec4. */ assert(intr->num_components == 4); + /* We only accept direct inputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[0]) && + nir_src_as_const_value(intr->src[0])->u[0] == 0); + /* Generate scalar loads equivalent to the original VEC4. */ nir_ssa_def *dests[4]; for (unsigned i = 0; i < intr->num_components; i++) { @@ -252,6 +265,8 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input); intr_comp->num_components = 1; intr_comp->const_index[0] = intr->const_index[0] * 4 + i; + intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); nir_builder_instr_insert(b, &intr_comp->instr); @@ -319,6 +334,12 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, /* All TGSI-to-NIR outputs are VEC4. */ assert(intr->num_components == 4); + /* We only accept direct outputs and TGSI only ever gives them to us + * with an offset value of 0. + */ + assert(nir_src_as_const_value(intr->src[1]) && + nir_src_as_const_value(intr->src[1])->u[0] == 0); + b->cursor = nir_before_instr(&intr->instr); for (unsigned i = 0; i < intr->num_components; i++) { @@ -330,6 +351,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, assert(intr->src[0].is_ssa); intr_comp->src[0] = nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i)); + intr_comp->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &intr_comp->instr); } @@ -340,8 +362,8 @@ static void vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { - /* All TGSI-to-NIR uniform loads are vec4, but we may create dword - * loads in our lowering passes. + /* All TGSI-to-NIR uniform loads are vec4, but we need byte offsets + * in the backend. */ if (intr->num_components == 1) return; @@ -357,24 +379,23 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, intr_comp->num_components = 1; nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); - if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) { - /* Convert the variable TGSI register index to a byte - * offset. + /* Convert the uniform (not user_clip_plane) offset to bytes. + * If it happens to be a constant, constant-folding will clean + * up the shift for us. + */ + if (intr->intrinsic == nir_intrinsic_load_uniform) { + /* Convert the base offset to bytes and add the + * component */ + intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4); + intr_comp->src[0] = - nir_src_for_ssa(nir_ishl(b, - intr->src[0].ssa, + nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, nir_imm_int(b, 4))); - - /* Convert the offset to be a byte index, too. */ - intr_comp->const_index[0] = (intr->const_index[0] * 16 + - i * 4); } else { - /* We want a dword index for non-indirect uniform - * loads. - */ - intr_comp->const_index[0] = (intr->const_index[0] * 4 + - i); + assert(intr->intrinsic == + nir_intrinsic_load_user_clip_plane); + intr_comp->const_index[0] = intr->const_index[0] * 4 + i; } dests[i] = &intr_comp->dest.ssa; @@ -406,7 +427,6 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, break; case nir_intrinsic_load_uniform: - case nir_intrinsic_load_uniform_indirect: case nir_intrinsic_load_user_clip_plane: vc4_nir_lower_uniform(c, b, intr); break; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 31968bb5db9..caad05cb9f7 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -115,8 +115,9 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b, nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform); - intr->const_index[0] = VC4_NIR_STATE_UNIFORM_OFFSET + contents; + intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4; intr->num_components = 1; + intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL); nir_builder_instr_insert(b, &intr->instr); return &intr->dest.ssa; @@ -1516,6 +1517,8 @@ static void ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) { const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + nir_const_value *const_offset; + unsigned offset; struct qreg *dest = NULL; if (info->has_dest) { @@ -1525,21 +1528,25 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_uniform: assert(instr->num_components == 1); - if (instr->const_index[0] < VC4_NIR_STATE_UNIFORM_OFFSET) { - *dest = qir_uniform(c, QUNIFORM_UNIFORM, - instr->const_index[0]); + const_offset = nir_src_as_const_value(instr->src[0]); + if (const_offset) { + offset = instr->const_index[0] + const_offset->u[0]; + assert(offset % 4 == 0); + /* We need dwords */ + offset = offset / 4; + if (offset < VC4_NIR_STATE_UNIFORM_OFFSET) { + *dest = qir_uniform(c, QUNIFORM_UNIFORM, + offset); + } else { + *dest = qir_uniform(c, offset - + VC4_NIR_STATE_UNIFORM_OFFSET, + 0); + } } else { - *dest = qir_uniform(c, instr->const_index[0] - - VC4_NIR_STATE_UNIFORM_OFFSET, - 0); + *dest = indirect_uniform_load(c, instr); } break; - case nir_intrinsic_load_uniform_indirect: - *dest = indirect_uniform_load(c, instr); - - break; - case nir_intrinsic_load_user_clip_plane: *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE, instr->const_index[0]); @@ -1551,7 +1558,10 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_load_input: assert(instr->num_components == 1); + const_offset = nir_src_as_const_value(instr->src[0]); + assert(const_offset && "vc4 doesn't support indirect inputs"); if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) { + assert(const_offset->u[0] == 0); /* Reads of the per-sample color need to be done in * order. */ @@ -1565,17 +1575,22 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) } *dest = c->color_reads[sample_index]; } else { - *dest = c->inputs[instr->const_index[0]]; + offset = instr->const_index[0] + const_offset->u[0]; + *dest = c->inputs[offset]; } break; case nir_intrinsic_store_output: + const_offset = nir_src_as_const_value(instr->src[1]); + assert(const_offset && "vc4 doesn't support indirect outputs"); + offset = instr->const_index[0] + const_offset->u[0]; + /* MSAA color outputs are the only case where we have an * output that's not lowered to being a store of a single 32 * bit value. */ if (c->stage == QSTAGE_FRAG && instr->num_components == 4) { - assert(instr->const_index[0] == c->output_color_index); + assert(offset == c->output_color_index); for (int i = 0; i < 4; i++) { c->sample_colors[i] = qir_MOV(c, ntq_get_src(c, instr->src[0], @@ -1583,9 +1598,9 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) } } else { assert(instr->num_components == 1); - c->outputs[instr->const_index[0]] = + c->outputs[offset] = qir_MOV(c, ntq_get_src(c, instr->src[0], 0)); - c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1); + c->num_outputs = MAX2(c->num_outputs, offset + 1); } break; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index d53095ed222..b875760a2ca 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -444,7 +444,7 @@ struct vc4_compile { /* Special offset for nir_load_uniform values to get a QUNIFORM_* * state-dependent value. */ -#define VC4_NIR_STATE_UNIFORM_OFFSET 2000000000 +#define VC4_NIR_STATE_UNIFORM_OFFSET 1000000000 struct vc4_compile *qir_compile_init(void); void qir_compile_destroy(struct vc4_compile *c); |