summaryrefslogtreecommitdiffstats
path: root/src/panfrost
diff options
context:
space:
mode:
authorAlyssa Rosenzweig <[email protected]>2020-03-20 12:25:08 -0400
committerMarge Bot <[email protected]>2020-03-22 03:32:35 +0000
commit9458b017a946778ef5d065bfd61c47dafdfe3e94 (patch)
tree2356f2a3abc3f5e8da13300cbac8ac5b6fe529d6 /src/panfrost
parent409e4f8a49094a60354656a40cd23e38362e9a67 (diff)
pan/bi: Flesh out st_vary IR
We need to make the semantics of BI_VECTOR a bit more precise - vectorize only the first argument, not all of them. This is enough for current and future users, as far as I know. Signed-off-by: Alyssa Rosenzweig <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4276>
Diffstat (limited to 'src/panfrost')
-rw-r--r--src/panfrost/bifrost/bi_ra.c4
-rw-r--r--src/panfrost/bifrost/bifrost_compile.c16
-rw-r--r--src/panfrost/bifrost/bir.c6
-rw-r--r--src/panfrost/bifrost/compiler.h5
4 files changed, 16 insertions, 15 deletions
diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c
index 8c0fa92a12e..a047932ae71 100644
--- a/src/panfrost/bifrost/bi_ra.c
+++ b/src/panfrost/bifrost/bi_ra.c
@@ -114,7 +114,7 @@ bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src)
if (ins->src[src] >= l->node_count)
return;
- bool vector = (bi_class_props[ins->type] & BI_VECTOR);
+ bool vector = (bi_class_props[ins->type] & BI_VECTOR) && src == 0;
unsigned offset = 0;
if (vector) {
@@ -125,7 +125,7 @@ bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src)
unsigned size = nir_alu_type_get_type_size(T);
unsigned bytes = (MAX2(size, 8) / 8);
unsigned comps_per_reg = 4 / bytes;
- unsigned components = bi_get_component_count(ins);
+ unsigned components = bi_get_component_count(ins, src);
for (unsigned i = 0; i < components; ++i) {
unsigned off = ins->swizzle[src][i] / comps_per_reg;
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index ff1d354b49f..25bb4ab9c58 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -178,22 +178,22 @@ bi_emit_st_vary(bi_context *ctx, nir_intrinsic_instr *instr)
address.src_types[2] = nir_type_uint32;
address.src_types[3] = nir_intrinsic_type(instr);
address.dest = bi_make_temp(ctx);
- address.dest_type = nir_type_uint64;
- address.writemask = (1 << 8) - 1;
+ address.dest_type = nir_type_uint32;
+ address.writemask = (1 << 12) - 1;
bi_instruction st = {
.type = BI_STORE_VAR,
.src = {
- address.dest,
- bir_src_index(&instr->src[0])
+ bir_src_index(&instr->src[0]),
+ address.dest, address.dest, address.dest,
},
.src_types = {
- nir_type_uint64,
- nir_type_uint32
+ nir_type_uint32,
+ nir_type_uint32, nir_type_uint32, nir_type_uint32,
},
.swizzle = {
- { 0 },
- { 0, 1, 2, 3 }
+ { 0, 1, 2, 3 },
+ { 0 }, { 1 }, { 2}
}
};
diff --git a/src/panfrost/bifrost/bir.c b/src/panfrost/bifrost/bir.c
index 5de9d30fe30..c385cfe7587 100644
--- a/src/panfrost/bifrost/bir.c
+++ b/src/panfrost/bifrost/bir.c
@@ -93,10 +93,10 @@ bi_from_bytemask(uint16_t bytemask, unsigned bytes)
}
unsigned
-bi_get_component_count(bi_instruction *ins)
+bi_get_component_count(bi_instruction *ins, unsigned src)
{
if (bi_class_props[ins->type] & BI_VECTOR) {
- return 4;
+ return (src == 0) ? 4 : 1;
} else {
/* Stores imply VECTOR */
assert(ins->dest_type);
@@ -119,10 +119,10 @@ uint16_t
bi_bytemask_of_read_components(bi_instruction *ins, unsigned node)
{
uint16_t mask = 0x0;
- unsigned component_count = bi_get_component_count(ins);
bi_foreach_src(ins, s) {
if (ins->src[s] != node) continue;
+ unsigned component_count = bi_get_component_count(ins, s);
nir_alu_type T = ins->src_types[s];
unsigned size = nir_alu_type_get_type_size(T);
unsigned bytes = (MAX2(size, 8) / 8);
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 1a777feaaac..ee868f75d92 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -111,7 +111,8 @@ extern unsigned bi_class_props[BI_NUM_CLASSES];
* the end of a clause. Implies ADD */
#define BI_SCHED_HI_LATENCY (1 << 7)
-/* Intrinsic is vectorized and should read 4 components regardless of writemask */
+/* Intrinsic is vectorized and should read 4 components in the first source
+ * regardless of writemask */
#define BI_VECTOR (1 << 8)
/* Use a data register for src0/dest respectively, bypassing the usual
@@ -530,7 +531,7 @@ bool bi_has_source_mods(bi_instruction *ins);
bool bi_is_src_swizzled(bi_instruction *ins, unsigned s);
bool bi_has_arg(bi_instruction *ins, unsigned arg);
uint16_t bi_from_bytemask(uint16_t bytemask, unsigned bytes);
-unsigned bi_get_component_count(bi_instruction *ins);
+unsigned bi_get_component_count(bi_instruction *ins, unsigned s);
unsigned bi_load32_components(bi_instruction *ins);
uint16_t bi_bytemask_of_read_components(bi_instruction *ins, unsigned node);