diff options
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 102 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_nir_lower_io.c | 57 |
2 files changed, 57 insertions, 102 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 91d4ab0020e..defddecc847 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -118,11 +118,44 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr) bool has_index = instr->intrinsic == nir_intrinsic_load_ubo; int offset_src = 0 + has_index; - /* Note that QUNIFORM_UBO_ADDR takes a UBO index shifted up by - * 1 (0 is gallium's constant buffer 0). - */ - struct qreg offset = vir_uniform(c, QUNIFORM_UBO_ADDR, - nir_src_as_uint(instr->src[0]) + 1); + struct qreg offset; + if (instr->intrinsic == nir_intrinsic_load_uniform) { + offset = vir_uniform(c, QUNIFORM_UBO_ADDR, 0); + + /* Find what variable in the default uniform block this + * uniform load is coming from. + */ + uint32_t base = nir_intrinsic_base(instr); + int i; + struct v3d_ubo_range *range = NULL; + for (i = 0; i < c->num_ubo_ranges; i++) { + range = &c->ubo_ranges[i]; + if (base >= range->src_offset && + base < range->src_offset + range->size) { + break; + } + } + /* The driver-location-based offset always has to be within a + * declared uniform range. + */ + assert(i != c->num_ubo_ranges); + if (!c->ubo_range_used[i]) { + c->ubo_range_used[i] = true; + range->dst_offset = c->next_ubo_dst_offset; + c->next_ubo_dst_offset += range->size; + } + + base = base - range->src_offset + range->dst_offset; + + if (base != 0) + offset = vir_ADD(c, offset, vir_uniform_ui(c, base)); + } else { + /* Note that QUNIFORM_UBO_ADDR takes a UBO index shifted up by + * 1 (0 is gallium's constant buffer 0). + */ + offset = vir_uniform(c, QUNIFORM_UBO_ADDR, + nir_src_as_uint(instr->src[0]) + 1); + } uint32_t config = (0xffffff00 | tmu_op | @@ -161,49 +194,6 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr) ntq_store_dest(c, &instr->dest, i, vir_MOV(c, vir_LDTMU(c))); } -static struct qreg -indirect_uniform_load(struct v3d_compile *c, nir_intrinsic_instr *intr) -{ - struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0); - uint32_t offset = nir_intrinsic_base(intr); - struct v3d_ubo_range *range = NULL; - unsigned i; - - for (i = 0; i < c->num_ubo_ranges; i++) { - range = &c->ubo_ranges[i]; - if (offset >= range->src_offset && - offset < range->src_offset + range->size) { - break; - } - } - /* The driver-location-based offset always has to be within a declared - * uniform range. - */ - assert(i != c->num_ubo_ranges); - if (!c->ubo_range_used[i]) { - c->ubo_range_used[i] = true; - range->dst_offset = c->next_ubo_dst_offset; - c->next_ubo_dst_offset += range->size; - } - - offset -= range->src_offset; - - if (range->dst_offset + offset != 0) { - indirect_offset = vir_ADD(c, indirect_offset, - vir_uniform_ui(c, range->dst_offset + - offset)); - } - - /* Adjust for where we stored the TGSI register base. */ - vir_ADD_dest(c, - vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUA), - vir_uniform(c, QUNIFORM_UBO_ADDR, 0), - indirect_offset); - - vir_emit_thrsw(c); - return vir_LDTMU(c); -} - static struct qreg * ntq_init_ssa_def(struct v3d_compile *c, nir_ssa_def *def) { @@ -1618,19 +1608,19 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_uniform: - assert(instr->num_components == 1); if (nir_src_is_const(instr->src[0])) { - offset = (nir_intrinsic_base(instr) + - nir_src_as_uint(instr->src[0])); + int offset = (nir_intrinsic_base(instr) + + nir_src_as_uint(instr->src[0])); assert(offset % 4 == 0); /* We need dwords */ offset = offset / 4; - ntq_store_dest(c, &instr->dest, 0, - vir_uniform(c, QUNIFORM_UNIFORM, - offset)); + for (int i = 0; i < instr->num_components; i++) { + ntq_store_dest(c, &instr->dest, i, + vir_uniform(c, QUNIFORM_UNIFORM, + offset + i)); + } } else { - ntq_store_dest(c, &instr->dest, 0, - indirect_uniform_load(c, instr)); + ntq_emit_tmu_general(c, instr); } break; diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c index db339f87a53..b65a82b7f7a 100644 --- a/src/broadcom/compiler/v3d_nir_lower_io.c +++ b/src/broadcom/compiler/v3d_nir_lower_io.c @@ -28,61 +28,26 @@ * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io * intrinsics into something amenable to the V3D architecture. * - * Currently, it just splits uniforms into scalars, and fixes up the - * addressing on indirect uniform loads. FS input and VS output scalarization - * is handled by nir_lower_io_to_scalar(). + * After moving more and more logic to NIR, all that's left here is fixing up + * addressing on uniform loads. FS input and VS output scalarization is + * handled by nir_lower_io_to_scalar(). */ -static void -replace_intrinsic_with_vec(nir_builder *b, nir_intrinsic_instr *intr, - nir_ssa_def **comps) -{ - - /* Batch things back together into a vector. This will get split by - * the later ALU scalarization pass. - */ - nir_ssa_def *vec = nir_vec(b, comps, intr->num_components); - - /* Replace the old intrinsic with a reference to our reconstructed - * vector. - */ - nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec)); - nir_instr_remove(&intr->instr); -} - +/* Convert the uniform offset to bytes. If it happens to be a constant, + * constant-folding will clean up the shift for us. + */ static void v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { b->cursor = nir_before_instr(&intr->instr); - /* Generate scalar loads equivalent to the original vector. */ - nir_ssa_def *dests[4]; - for (unsigned i = 0; i < intr->num_components; i++) { - nir_intrinsic_instr *intr_comp = - nir_intrinsic_instr_create(c->s, intr->intrinsic); - intr_comp->num_components = 1; - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, - intr->dest.ssa.bit_size, NULL); - - /* Convert the uniform offset to bytes. If it happens - * to be a constant, constant-folding will clean up - * the shift for us. - */ - nir_intrinsic_set_base(intr_comp, - nir_intrinsic_base(intr) * 16 + - i * 4); - - intr_comp->src[0] = - nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, - nir_imm_int(b, 4))); - - dests[i] = &intr_comp->dest.ssa; - - nir_builder_instr_insert(b, &intr_comp->instr); - } + nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16); - replace_intrinsic_with_vec(b, intr, dests); + nir_instr_rewrite_src(&intr->instr, + &intr->src[0], + nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, + nir_imm_int(b, 4)))); } static void |