summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c51
2 files changed, 52 insertions, 0 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 1a3deb41e01..c0a14a07d48 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -961,6 +961,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
/* cat6 instructions: */
INSTR2(6, LDLV)
+INSTR2(6, LDG)
/* ************************************************************************* */
/* split this out or find some helper to use.. like main/bitset.h.. */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index f7ea8799c2f..13ae7c2cac4 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -975,6 +975,53 @@ emit_alu(struct ir3_compile *ctx, nir_alu_instr *alu)
}
}
+/* handles direct/indirect UBO reads: */
+static void
+emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
+ struct ir3_instruction **dst)
+{
+ struct ir3_block *b = ctx->block;
+ struct ir3_instruction *addr, *src0, *src1;
+ /* UBO addresses are the first driver params: */
+ unsigned ubo = regid(ctx->so->first_driver_param, 0);
+ unsigned off = intr->const_index[0];
+
+ /* First src is ubo index, which could either be an immed or not: */
+ src0 = get_src(ctx, &intr->src[0])[0];
+ if (is_same_type_mov(src0) &&
+ (src0->regs[1]->flags & IR3_REG_IMMED)) {
+ addr = create_uniform(ctx, ubo + src0->regs[1]->iim_val);
+ } else {
+ addr = create_uniform_indirect(ctx, ubo, get_addr(ctx, src0));
+ }
+
+ if (intr->intrinsic == nir_intrinsic_load_ubo_indirect) {
+ /* For load_ubo_indirect, second src is indirect offset: */
+ src1 = get_src(ctx, &intr->src[1])[0];
+
+ /* and add offset to addr: */
+ addr = ir3_ADD_S(b, addr, 0, src1, 0);
+ }
+
+ /* if offset is to large to encode in the ldg, split it out: */
+ if ((off + (intr->num_components * 4)) > 1024) {
+ /* split out the minimal amount to improve the odds that
+ * cp can fit the immediate in the add.s instruction:
+ */
+ unsigned off2 = off + (intr->num_components * 4) - 1024;
+ addr = ir3_ADD_S(b, addr, 0, create_immed(b, off2), 0);
+ off -= off2;
+ }
+
+ for (int i = 0; i < intr->num_components; i++) {
+ struct ir3_instruction *load =
+ ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
+ load->cat6.type = TYPE_U32;
+ load->cat6.offset = off + i * 4; /* byte offset */
+ dst[i] = load;
+ }
+}
+
/* handles array reads: */
static void
emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
@@ -1124,6 +1171,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
get_addr(ctx, src[0]));
}
break;
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_ubo_indirect:
+ emit_intrinsic_load_ubo(ctx, intr, dst);
+ break;
case nir_intrinsic_load_input:
compile_assert(ctx, intr->const_index[1] == 1);
for (int i = 0; i < intr->num_components; i++) {