aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/etnaviv
diff options
context:
space:
mode:
authorJonathan Marek <[email protected]>2019-09-11 14:29:10 -0400
committerJonathan Marek <[email protected]>2019-09-28 00:33:42 -0400
commit0036e078e3d097f8a7a94235052f6d0e647e4da1 (patch)
tree7535094008b9d8ff536e1dacb9a7ba81797ca9e9 /src/gallium/drivers/etnaviv
parent7da15bdd2d05cc252b28cf385bf2cf2b31aa576b (diff)
etnaviv: nir: allocate contiguous components for LOAD destination
LOAD starts reading into the first enabled destination component, and doesn't skip disabled components, so we need to allocate a destination with contiguous components. Signed-off-by: Jonathan Marek <[email protected]> Reviewed-by: Christian Gmeiner <[email protected]>
Diffstat (limited to 'src/gallium/drivers/etnaviv')
-rw-r--r--src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h61
1 files changed, 53 insertions, 8 deletions
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h
index c89ab9a9453..20e6bb2d26c 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.h
@@ -157,6 +157,9 @@ enum {
REG_CLASS_VEC4,
/* special vec2 class for fast transcendentals, limited to XY or ZW */
REG_CLASS_VIRT_VEC2T,
+ /* special classes for LOAD - contiguous components */
+ REG_CLASS_VIRT_VEC2C,
+ REG_CLASS_VIRT_VEC3C,
NUM_REG_CLASSES,
} reg_class;
@@ -178,6 +181,11 @@ enum {
REG_TYPE_VIRT_SCALAR_W,
REG_TYPE_VIRT_VEC2T_XY,
REG_TYPE_VIRT_VEC2T_ZW,
+ REG_TYPE_VIRT_VEC2C_XY,
+ REG_TYPE_VIRT_VEC2C_YZ,
+ REG_TYPE_VIRT_VEC2C_ZW,
+ REG_TYPE_VIRT_VEC3C_XYZ,
+ REG_TYPE_VIRT_VEC3C_YZW,
NUM_REG_TYPES,
} reg_type;
@@ -189,18 +197,23 @@ reg_writemask[NUM_REG_TYPES] = {
[REG_TYPE_VIRT_SCALAR_Y] = 0x2,
[REG_TYPE_VIRT_VEC2_XY] = 0x3,
[REG_TYPE_VIRT_VEC2T_XY] = 0x3,
+ [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
[REG_TYPE_VIRT_SCALAR_Z] = 0x4,
[REG_TYPE_VIRT_VEC2_XZ] = 0x5,
[REG_TYPE_VIRT_VEC2_YZ] = 0x6,
+ [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
[REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
+ [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
[REG_TYPE_VIRT_SCALAR_W] = 0x8,
[REG_TYPE_VIRT_VEC2_XW] = 0x9,
[REG_TYPE_VIRT_VEC2_YW] = 0xa,
[REG_TYPE_VIRT_VEC3_XYW] = 0xb,
[REG_TYPE_VIRT_VEC2_ZW] = 0xc,
[REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
+ [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
[REG_TYPE_VIRT_VEC3_XZW] = 0xd,
[REG_TYPE_VIRT_VEC3_YZW] = 0xe,
+ [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
};
/* how to swizzle when used as a src */
@@ -211,18 +224,23 @@ reg_swiz[NUM_REG_TYPES] = {
[REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y),
[REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
[REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
+ [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
[REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z),
[REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z),
[REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z),
+ [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z),
[REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
+ [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
[REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W),
[REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W),
[REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W),
[REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X),
[REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W),
[REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W),
+ [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W),
[REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X),
[REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X),
+ [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X),
};
/* how to swizzle when used as a dest */
@@ -233,18 +251,23 @@ reg_dst_swiz[NUM_REG_TYPES] = {
[REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X),
[REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
[REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
+ [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
[REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X),
[REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y),
[REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y),
+ [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y),
[REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
+ [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
[REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X),
[REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y),
[REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y),
[REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z),
[REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y),
[REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y),
+ [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y),
[REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z),
[REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z),
+ [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
};
static inline int reg_get_type(int virt_reg)
@@ -285,6 +308,13 @@ static inline int reg_get_class(int virt_reg)
case REG_TYPE_VIRT_VEC2T_XY:
case REG_TYPE_VIRT_VEC2T_ZW:
return REG_CLASS_VIRT_VEC2T;
+ case REG_TYPE_VIRT_VEC2C_XY:
+ case REG_TYPE_VIRT_VEC2C_YZ:
+ case REG_TYPE_VIRT_VEC2C_ZW:
+ return REG_CLASS_VIRT_VEC2C;
+ case REG_TYPE_VIRT_VEC3C_XYZ:
+ case REG_TYPE_VIRT_VEC3C_YZW:
+ return REG_CLASS_VIRT_VEC3C;
}
assert(false);
@@ -775,11 +805,13 @@ live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map)
/* precomputed by register_allocate */
static unsigned int *q_values[] = {
- (unsigned int[]) { 1, 2, 3, 4, 2 },
- (unsigned int[]) { 3, 5, 6, 6, 5 },
- (unsigned int[]) { 3, 4, 4, 4, 4 },
- (unsigned int[]) { 1, 1, 1, 1, 1 },
- (unsigned int[]) { 1, 2, 2, 2, 1 },
+ (unsigned int[]) {1, 2, 3, 4, 2, 2, 3, },
+ (unsigned int[]) {3, 5, 6, 6, 5, 5, 6, },
+ (unsigned int[]) {3, 4, 4, 4, 4, 4, 4, },
+ (unsigned int[]) {1, 1, 1, 1, 1, 1, 1, },
+ (unsigned int[]) {1, 2, 2, 2, 1, 2, 2, },
+ (unsigned int[]) {2, 3, 3, 3, 2, 3, 3, },
+ (unsigned int[]) {2, 2, 2, 2, 2, 2, 2, },
};
static void
@@ -835,8 +867,7 @@ ra_assign(struct state *state, nir_shader *shader)
for (unsigned i = 0; i < num_nodes; i++) {
nir_instr *instr = defs[i].instr;
nir_dest *dest = defs[i].dest;
-
- ra_set_node_class(g, i, nir_dest_num_components(*dest) - 1);
+ unsigned c = nir_dest_num_components(*dest) - 1;
if (instr->type == nir_instr_type_alu && option(etna_new_transcendentals)) {
switch (nir_instr_as_alu(instr)->op) {
@@ -845,11 +876,25 @@ ra_assign(struct state *state, nir_shader *shader)
case nir_op_fsin:
case nir_op_fcos:
assert(dest->is_ssa);
- ra_set_node_class(g, i, REG_CLASS_VIRT_VEC2T);
+ c = REG_CLASS_VIRT_VEC2T;
default:
break;
}
}
+
+ if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic == nir_intrinsic_load_uniform) {
+ /* make sure there isn't any reswizzling */
+ assert(dest == &intr->dest);
+ if (dest->ssa.num_components == 2)
+ c = REG_CLASS_VIRT_VEC2C;
+ if (dest->ssa.num_components == 3)
+ c = REG_CLASS_VIRT_VEC3C;
+ }
+ }
+
+ ra_set_node_class(g, i, c);
}
nir_foreach_block(block, impl) {