summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h28
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c99
2 files changed, 66 insertions, 61 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 1a8beade25b..85daf106535 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -908,6 +908,34 @@ INSTR1(4, SQRT)
INSTR1(5, DSX)
INSTR1(5, DSY)
+static inline struct ir3_instruction *
+ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
+ unsigned wrmask, unsigned flags, unsigned samp, unsigned tex,
+ struct ir3_instruction *src0, struct ir3_instruction *src1)
+{
+ struct ir3_instruction *sam;
+ struct ir3_register *reg;
+
+ sam = ir3_instr_create(block, 5, opc);
+ sam->flags |= flags;
+ ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
+ if (src0) {
+ reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
+ reg->wrmask = (1 << (src0->regs_count - 1)) - 1;
+ reg->instr = src0;
+ }
+ if (src1) {
+ reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
+ reg->instr = src1;
+ reg->wrmask = (1 << (src1->regs_count - 1)) - 1;
+ }
+ sam->cat5.samp = samp;
+ sam->cat5.tex = tex;
+ sam->cat5.type = type;
+
+ return sam;
+}
+
/* cat6 instructions: */
INSTR2(6, LDLV)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 9af5c163bbf..0139c20345f 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -440,6 +440,9 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr,
{
struct ir3_instruction *collect;
+ if (arrsz == 0)
+ return NULL;
+
collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz);
ir3_reg_create(collect, 0, 0);
for (unsigned i = 0; i < arrsz; i++)
@@ -1153,11 +1156,12 @@ static void
emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
{
struct ir3_block *b = ctx->block;
- struct ir3_instruction **dst, *src0, *src1, *sam;
+ struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
- struct ir3_register *reg;
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags = 0;
+ unsigned nsrc0 = 0, nsrc1 = 0;
+ type_t type;
opc_t opc;
/* TODO: might just be one component for gathers? */
@@ -1211,61 +1215,51 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
* bias/lod go into the second arg
*/
- src0 = ir3_instr_create2(b, -1, OPC_META_FI, 12);
- ir3_reg_create(src0, 0, 0);
-
coords = tex->coord_components;
if (tex->is_array) /* array idx goes after shadow ref */
coords--;
/* insert tex coords: */
for (i = 0; i < coords; i++)
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = coord[i];
+ src0[nsrc0++] = coord[i];
if (coords == 1) {
/* hw doesn't do 1d, so we treat it as 2d with
* height of 1, and patch up the y coord.
* TODO: y coord should be (int)0 in some cases..
*/
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr =
- create_immed(b, fui(0.5));
+ src0[nsrc0++] = create_immed(b, fui(0.5));
+ } else if (coords == 3) {
+ flags |= IR3_INSTR_3D;
}
if (tex->is_shadow) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = compare;
+ src0[nsrc0++] = compare;
flags |= IR3_INSTR_S;
}
if (tex->is_array) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = coord[coords];
+ src0[nsrc0++] = coord[coords];
flags |= IR3_INSTR_A;
}
if (has_proj) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = proj;
+ src0[nsrc0++] = proj;
flags |= IR3_INSTR_P;
}
/* pad to 4, then ddx/ddy: */
if (tex->op == nir_texop_txd) {
- while (src0->regs_count < 5) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr =
- create_immed(b, fui(0.0));
- }
- for (i = 0; i < coords; i++) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = ddx[i];
- }
- if (coords < 2) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr =
- create_immed(b, fui(0.0));
- }
- for (i = 0; i < coords; i++) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = ddy[i];
- }
- if (coords < 2) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr =
- create_immed(b, fui(0.0));
- }
+ while (nsrc0 < 4)
+ src0[nsrc0++] = create_immed(b, fui(0.0));
+ for (i = 0; i < coords; i++)
+ src0[nsrc0++] = ddx[i];
+ if (coords < 2)
+ src0[nsrc0++] = create_immed(b, fui(0.0));
+ for (i = 0; i < coords; i++)
+ src0[nsrc0++] = ddy[i];
+ if (coords < 2)
+ src0[nsrc0++] = create_immed(b, fui(0.0));
}
/*
@@ -1275,25 +1269,16 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
* - bias
*/
if (has_off | has_lod | has_bias) {
- src1 = ir3_instr_create2(b, -1, OPC_META_FI, 5);
- ir3_reg_create(src1, 0, 0);
-
if (has_off) {
- for (i = 0; i < coords; i++) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr = off[i];
- }
- if (coords < 2) {
- ir3_reg_create(src0, 0, IR3_REG_SSA)->instr =
- create_immed(b, fui(0.0));
- }
+ for (i = 0; i < coords; i++)
+ src1[nsrc1++] = off[i];
+ if (coords < 2)
+ src1[nsrc1++] = create_immed(b, fui(0.0));
flags |= IR3_INSTR_O;
}
- if (has_lod | has_bias) {
- ir3_reg_create(src1, 0, IR3_REG_SSA)->instr = lod;
- }
- } else {
- src1 = NULL;
+ if (has_lod | has_bias)
+ src1[nsrc1++] = lod;
}
switch (tex->op) {
@@ -1311,33 +1296,25 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
return;
}
- sam = ir3_instr_create(b, 5, opc);
- sam->flags |= flags;
- ir3_reg_create(sam, 0, 0)->wrmask = 0xf; // TODO proper wrmask??
- reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
- reg->wrmask = (1 << (src0->regs_count - 1)) - 1;
- reg->instr = src0;
- if (src1) {
- reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
- reg->instr = src1;
- reg->wrmask = (1 << (src1->regs_count - 1)) - 1;
- }
- sam->cat5.samp = tex->sampler_index;
- sam->cat5.tex = tex->sampler_index;
-
switch (tex->dest_type) {
case nir_type_invalid:
case nir_type_float:
- sam->cat5.type = TYPE_F32;
+ type = TYPE_F32;
break;
case nir_type_int:
- sam->cat5.type = TYPE_S32;
+ type = TYPE_S32;
break;
case nir_type_unsigned:
case nir_type_bool:
- sam->cat5.type = TYPE_U32;
+ type = TYPE_U32;
+ break;
}
+ sam = ir3_SAM(b, opc, type, 0xf, flags,
+ tex->sampler_index, tex->sampler_index,
+ create_collect(b, src0, nsrc0),
+ create_collect(b, src1, nsrc1));
+
// TODO maybe split this out into a helper, for other cases that
// write multiple?
struct ir3_instruction *prev = NULL;