aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/lima
diff options
context:
space:
mode:
authorErico Nunes <[email protected]>2020-04-14 02:45:38 +0200
committerErico Nunes <[email protected]>2020-05-09 14:40:34 +0200
commit8c47640731303ed2607d28ce2cf19a7e8f0f4006 (patch)
tree2916196c8686cd26a4c970b9df9c2164382d9d84 /src/gallium/drivers/lima
parent570f1420dbf361cf889c21c119816e063ecc3cea (diff)
lima/ppir: rework store output
In many cases, it is possible to avoid creating a mov for the store output node. Additionally, nodes other than alu, such as load varying, can be valid store output nodes too. This is another small optimization, but helps a vast majority of programs by 1 instruction. Shaders with discard easily become complicated to handle properly. Some example issues: ppir has to rely on instruction ordering; or a node with ssa output could be required both before a discard_if (as a condition) and after it (as the instruction with the 'stop' bit set). So don't try to handle them here. Signed-off-by: Erico Nunes <[email protected]> Reviewed-by: Vasily Khoruzhick <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4632>
Diffstat (limited to 'src/gallium/drivers/lima')
-rw-r--r--src/gallium/drivers/lima/ir/pp/codegen.c2
-rw-r--r--src/gallium/drivers/lima/ir/pp/liveness.c1
-rw-r--r--src/gallium/drivers/lima/ir/pp/nir.c34
-rw-r--r--src/gallium/drivers/lima/ir/pp/node.c13
-rw-r--r--src/gallium/drivers/lima/ir/pp/node_to_instr.c13
-rw-r--r--src/gallium/drivers/lima/ir/pp/ppir.h3
-rw-r--r--src/gallium/drivers/lima/ir/pp/regalloc.c2
7 files changed, 40 insertions, 28 deletions
diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c
index ce2c10732ce..4420ee30e05 100644
--- a/src/gallium/drivers/lima/ir/pp/codegen.c
+++ b/src/gallium/drivers/lima/ir/pp/codegen.c
@@ -201,7 +201,6 @@ static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code)
f->op = shift_to_op(alu->shift);
break;
case ppir_op_mov:
- case ppir_op_store_color:
f->op = ppir_codegen_vec4_mul_op_mov;
break;
case ppir_op_max:
@@ -344,7 +343,6 @@ static void ppir_codegen_encode_vec_add(ppir_node *node, void *code)
f->op = ppir_codegen_vec4_acc_op_add;
break;
case ppir_op_mov:
- case ppir_op_store_color:
f->op = ppir_codegen_vec4_acc_op_mov;
break;
case ppir_op_sum3:
diff --git a/src/gallium/drivers/lima/ir/pp/liveness.c b/src/gallium/drivers/lima/ir/pp/liveness.c
index b17564b089d..20954bd08be 100644
--- a/src/gallium/drivers/lima/ir/pp/liveness.c
+++ b/src/gallium/drivers/lima/ir/pp/liveness.c
@@ -178,7 +178,6 @@ ppir_liveness_instr_dest(ppir_compiler *comp, ppir_instr *instr)
switch(node->op) {
case ppir_op_const:
case ppir_op_undef:
- case ppir_op_store_color: /* never clear dest if its store output */
continue;
default:
break;
diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c
index 3c02c0a811c..620ecbed4d2 100644
--- a/src/gallium/drivers/lima/ir/pp/nir.c
+++ b/src/gallium/drivers/lima/ir/pp/nir.c
@@ -352,7 +352,26 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
return true;
case nir_intrinsic_store_output: {
- alu_node = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0);
+ /* In simple cases where the store_output is ssa, that register
+ * can be directly marked as the output.
+ * If discard is used or the source is not ssa, things can get a
+ * lot more complicated, so don't try to optimize those and fall
+ * back to inserting a mov at the end.
+ * If the source node will only be able to output to pipeline
+ * registers, fall back to the mov as well. */
+ if (!block->comp->uses_discard && instr->src->is_ssa) {
+ node = block->comp->var_nodes[instr->src->ssa->index];
+ switch (node->op) {
+ case ppir_op_load_uniform:
+ case ppir_op_const:
+ break;
+ default:
+ node->is_end = 1;
+ return true;
+ }
+ }
+
+ alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
if (!alu_node)
return false;
@@ -370,6 +389,8 @@ static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
u_bit_consecutive(0, instr->num_components));
+ alu_node->node.is_end = 1;
+
list_addtail(&alu_node->node.list, &block->node_list);
return true;
}
@@ -724,9 +745,9 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
static void ppir_add_ordering_deps(ppir_compiler *comp)
{
/* Some intrinsics do not have explicit dependencies and thus depend
- * on instructions order. Consider discard_if and store_ouput as
- * example. If we don't add fake dependency of discard_if to store_output
- * scheduler may put store_output first and since store_output terminates
+ * on instructions order. Consider discard_if and the is_end node as
+ * example. If we don't add fake dependency of discard_if to is_end,
+ * scheduler may put the is_end first and since is_end terminates
* shader on Utgard PP, rest of it will never be executed.
* Add fake dependencies for discard/branch/store to preserve
* instruction order.
@@ -753,8 +774,8 @@ static void ppir_add_ordering_deps(ppir_compiler *comp)
if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
}
- if (node->op == ppir_op_discard ||
- node->op == ppir_op_store_color ||
+ if (node->is_end ||
+ node->op == ppir_op_discard ||
node->op == ppir_op_store_temp ||
node->op == ppir_op_branch) {
prev_node = node;
@@ -818,6 +839,7 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
return false;
comp->ra = ra;
+ comp->uses_discard = nir->info.fs.uses_discard;
/* 1st pass: create ppir blocks */
nir_foreach_function(function, nir) {
diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c
index fdf444af0bb..b71cbc3763e 100644
--- a/src/gallium/drivers/lima/ir/pp/node.c
+++ b/src/gallium/drivers/lima/ir/pp/node.c
@@ -313,14 +313,6 @@ const ppir_op_info ppir_op_infos[] = {
.name = "const",
.type = ppir_node_type_const,
},
- [ppir_op_store_color] = {
- .name = "st_col",
- .type = ppir_node_type_alu,
- .slots = (int []) {
- PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,
- PPIR_INSTR_SLOT_END
- },
- },
[ppir_op_store_temp] = {
.name = "st_temp",
.type = ppir_node_type_store,
@@ -632,6 +624,11 @@ ppir_node *ppir_node_insert_mov(ppir_node *node)
ppir_node_add_dep(move, node, ppir_dep_src);
list_addtail(&move->list, &node->list);
+ if (node->is_end) {
+ node->is_end = false;
+ move->is_end = true;
+ }
+
return move;
}
diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
index 35cf5f10f63..14b8c41f302 100644
--- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c
+++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
@@ -75,7 +75,7 @@ static bool ppir_do_node_to_instr_try_insert(ppir_block *block, ppir_node *node)
return ppir_instr_insert_node(succ->instr, node);
}
-static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_node **next)
+static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node)
{
switch (node->type) {
case ppir_node_type_alu:
@@ -105,9 +105,6 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n
if (!node->instr && !create_new_instr(block, node))
return false;
- if (node->op == ppir_op_store_color)
- node->instr->is_end = true;
-
break;
}
case ppir_node_type_load:
@@ -195,15 +192,13 @@ static bool ppir_do_one_node_to_instr(ppir_block *block, ppir_node *node, ppir_n
static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
{
- ppir_node *next = node;
-
/* first try pipeline sched, if that didn't succeed try normal scheduling */
if (!ppir_do_node_to_instr_try_insert(block, node))
- if (!ppir_do_one_node_to_instr(block, node, &next))
+ if (!ppir_do_one_node_to_instr(block, node))
return false;
- /* next may have been updated in ppir_do_one_node_to_instr */
- node = next;
+ if (node->is_end)
+ node->instr->is_end = true;
/* we have to make sure the dep not be destroyed (due to
* succ change) in ppir_do_node_to_instr, otherwise we can't
diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h
index cec918d50f3..59887a70ec9 100644
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@@ -108,7 +108,6 @@ typedef enum {
ppir_op_load_temp,
ppir_op_store_temp,
- ppir_op_store_color,
ppir_op_const,
@@ -162,6 +161,7 @@ typedef struct ppir_node {
struct ppir_instr *instr;
int instr_pos;
struct ppir_block *block;
+ bool is_end;
/* for scheduler */
struct list_head succ_list;
@@ -385,6 +385,7 @@ typedef struct ppir_compiler {
struct ra_regs *ra;
struct lima_fs_shader_state *prog;
+ bool uses_discard;
/* for scheduler */
int sched_instr_base;
diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c
index 4b5d3a8bb1d..5c5d13e5b9d 100644
--- a/src/gallium/drivers/lima/ir/pp/regalloc.c
+++ b/src/gallium/drivers/lima/ir/pp/regalloc.c
@@ -138,7 +138,7 @@ static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
{
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_node, node, &block->node_list, list) {
- if (node->op == ppir_op_store_color)
+ if (node->is_end)
continue;
if (!node->instr || node->op == ppir_op_const)