summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/lima/ir/pp
diff options
context:
space:
mode:
authorVasily Khoruzhick <[email protected]>2019-05-10 19:17:40 -0700
committerVasily Khoruzhick <[email protected]>2019-05-27 07:39:03 -0700
commitaf0de6b91c0b2e26e57bd235cbdc7296992502d9 (patch)
tree1639c188678c75a569a97be425ef8e42b14927f2 /src/gallium/drivers/lima/ir/pp
parent7a7be6139839b57a89f5c6d24d04025fa25ba4a1 (diff)
lima/ppir: implement discard and discard_if
This commit also adds codegen for branch since we need it for discard_if. Reviewed-by: Qiang Yu <[email protected]> Signed-off-by: Vasily Khoruzhick <[email protected]>
Diffstat (limited to 'src/gallium/drivers/lima/ir/pp')
-rw-r--r--src/gallium/drivers/lima/ir/pp/codegen.c40
-rw-r--r--src/gallium/drivers/lima/ir/pp/instr.c32
-rw-r--r--src/gallium/drivers/lima/ir/pp/lower.c35
-rw-r--r--src/gallium/drivers/lima/ir/pp/nir.c104
-rw-r--r--src/gallium/drivers/lima/ir/pp/node.c16
-rw-r--r--src/gallium/drivers/lima/ir/pp/node_to_instr.c12
-rw-r--r--src/gallium/drivers/lima/ir/pp/ppir.h24
7 files changed, 253 insertions, 10 deletions
diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c
index 93fd5628669..73763218d4b 100644
--- a/src/gallium/drivers/lima/ir/pp/codegen.c
+++ b/src/gallium/drivers/lima/ir/pp/codegen.c
@@ -507,6 +507,42 @@ static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code)
code[i] = util_float_to_half(constant->value[i].f);
}
+static void ppir_codegen_encode_discard(ppir_node *node, void *code)
+{
+ ppir_codegen_field_branch *b = code;
+ assert(node->op = ppir_op_discard);
+
+ b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0;
+ b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1;
+ b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2;
+}
+
+static void ppir_codegen_encode_branch(ppir_node *node, void *code)
+{
+ ppir_codegen_field_branch *b = code;
+ ppir_branch_node *branch;
+ ppir_instr *target_instr;
+ if (node->op == ppir_op_discard) {
+ ppir_codegen_encode_discard(node, code);
+ return;
+ }
+
+ assert(node->op = ppir_op_branch);
+ branch = ppir_node_to_branch(node);
+
+ b->branch.unknown_0 = 0x0;
+ b->branch.arg0_source = ppir_target_get_src_reg_index(&branch->src[0]);
+ b->branch.arg1_source = ppir_target_get_src_reg_index(&branch->src[1]);
+ b->branch.cond_gt = branch->cond_gt;
+ b->branch.cond_eq = branch->cond_eq;
+ b->branch.cond_lt = branch->cond_lt;
+ b->branch.unknown_1 = 0x0;
+ b->branch.unknown_2 = 0x3;
+
+ target_instr = list_first_entry(&branch->target->instr_list, ppir_instr, list);
+ b->branch.target = target_instr->offset - node->instr->offset;
+}
+
typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *);
static const ppir_codegen_instr_slot_encode_func
@@ -520,6 +556,7 @@ ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = {
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add,
[PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine,
[PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp,
+ [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch,
};
static const int ppir_codegen_field_size[] = {
@@ -634,7 +671,7 @@ static void ppir_codegen_print_prog(ppir_compiler *comp)
printf("========ppir codegen========\n");
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
- printf("%03d: ", instr->index);
+ printf("%03d (@%6ld): ", instr->index, instr->offset);
int n = prog[0] & 0x1f;
for (int i = 0; i < n; i++) {
if (i && i % 6 == 0)
@@ -655,6 +692,7 @@ bool ppir_codegen_prog(ppir_compiler *comp)
int size = 0;
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
+ instr->offset = size;
size += get_instr_encode_size(instr);
}
}
diff --git a/src/gallium/drivers/lima/ir/pp/instr.c b/src/gallium/drivers/lima/ir/pp/instr.c
index ae296a4bb82..19cca714fa1 100644
--- a/src/gallium/drivers/lima/ir/pp/instr.c
+++ b/src/gallium/drivers/lima/ir/pp/instr.c
@@ -135,6 +135,20 @@ static bool ppir_instr_insert_const(ppir_const *dst, const ppir_const *src,
return true;
}
+static void ppir_update_src_pipeline(ppir_pipeline pipeline, ppir_src *src,
+ ppir_dest *dest, uint8_t *swizzle)
+{
+ if (ppir_node_target_equal(src, dest)) {
+ src->type = ppir_target_pipeline;
+ src->pipeline = pipeline;
+
+ if (swizzle) {
+ for (int k = 0; k < 4; k++)
+ src->swizzle[k] = swizzle[src->swizzle[k]];
+ }
+ }
+}
+
/* make alu node src reflact the pipeline reg */
static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline,
ppir_dest *dest, uint8_t *swizzle)
@@ -146,15 +160,16 @@ static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipe
ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]);
for (int j = 0; j < alu->num_src; j++) {
ppir_src *src = alu->src + j;
- if (ppir_node_target_equal(src, dest)) {
- src->type = ppir_target_pipeline;
- src->pipeline = pipeline;
+ ppir_update_src_pipeline(pipeline, src, dest, swizzle);
+ }
+ }
- if (swizzle) {
- for (int k = 0; k < 4; k++)
- src->swizzle[k] = swizzle[src->swizzle[k]];
- }
- }
+ ppir_node *branch_node = instr->slots[PPIR_INSTR_SLOT_BRANCH];
+ if (branch_node && (branch_node->type == ppir_node_type_branch)) {
+ ppir_branch_node *branch = ppir_node_to_branch(branch_node);
+ for (int j = 0; j < 2; j++) {
+ ppir_src *src = branch->src + j;
+ ppir_update_src_pipeline(pipeline, src, dest, swizzle);
}
}
}
@@ -234,6 +249,7 @@ static struct {
[PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" },
[PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" },
[PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" },
+ [PPIR_INSTR_SLOT_BRANCH] = { 4, "brch" },
};
void ppir_instr_print_list(ppir_compiler *comp)
diff --git a/src/gallium/drivers/lima/ir/pp/lower.c b/src/gallium/drivers/lima/ir/pp/lower.c
index ded92b150c5..192392f6224 100644
--- a/src/gallium/drivers/lima/ir/pp/lower.c
+++ b/src/gallium/drivers/lima/ir/pp/lower.c
@@ -400,6 +400,40 @@ static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)
return true;
}
+static bool ppir_lower_branch(ppir_block *block, ppir_node *node)
+{
+ ppir_branch_node *branch = ppir_node_to_branch(node);
+ ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);
+
+ if (!zero)
+ return false;
+
+ list_addtail(&zero->node.list, &node->list);
+
+ zero->constant.value[0].f = 0;
+ zero->constant.num = 1;
+ zero->dest.type = ppir_target_ssa;
+ zero->dest.ssa.num_components = 1;
+ zero->dest.ssa.live_in = INT_MAX;
+ zero->dest.ssa.live_out = 0;
+ zero->dest.write_mask = 0x01;
+
+ /* For now we're just comparing branch condition with 0,
+ * in future we should look whether it's possible to move
+ * comparision node into branch itself and use current
+ * way as a fallback for complex conditions.
+ */
+ branch->src[1].type = ppir_target_ssa;
+ branch->src[1].ssa = &zero->dest.ssa;
+
+ branch->cond_gt = true;
+ branch->cond_lt = true;
+
+ ppir_node_add_dep(&branch->node, &zero->node);
+
+ return true;
+}
+
static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
[ppir_op_const] = ppir_lower_const,
[ppir_op_dot2] = ppir_lower_dot,
@@ -417,6 +451,7 @@ static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {
[ppir_op_load_texture] = ppir_lower_texture,
[ppir_op_select] = ppir_lower_select,
[ppir_op_trunc] = ppir_lower_trunc,
+ [ppir_op_branch] = ppir_lower_branch,
};
bool ppir_lower_prog(ppir_compiler *comp)
diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c
index 1d390827b07..580d5c3aff1 100644
--- a/src/gallium/drivers/lima/ir/pp/nir.c
+++ b/src/gallium/drivers/lima/ir/pp/nir.c
@@ -204,6 +204,57 @@ static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
return &node->node;
}
+static ppir_block *ppir_block_create(ppir_compiler *comp);
+
+static bool ppir_emit_discard_block(ppir_compiler *comp)
+{
+ ppir_block *block = ppir_block_create(comp);
+ ppir_discard_node *discard;
+ if (!block)
+ return false;
+
+ comp->discard_block = block;
+ block->comp = comp;
+
+ discard = ppir_node_create(block, ppir_op_discard, -1, 0);
+ if (discard)
+ list_addtail(&discard->node.list, &block->node_list);
+ else
+ return false;
+
+ return true;
+}
+
+static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
+{
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
+ ppir_node *node;
+ ppir_compiler *comp = block->comp;
+ ppir_branch_node *branch;
+
+ if (!comp->discard_block && !ppir_emit_discard_block(comp))
+ return NULL;
+
+ node = ppir_node_create(block, ppir_op_branch, -1, 0);
+ if (!node)
+ return NULL;
+ branch = ppir_node_to_branch(node);
+
+ /* second src and condition will be updated during lowering */
+ ppir_node_add_src(block->comp, node, &branch->src[0],
+ &instr->src[0], u_bit_consecutive(0, instr->num_components));
+ branch->target = comp->discard_block;
+
+ return node;
+}
+
+static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
+{
+ ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
+
+ return node;
+}
+
static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
{
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
@@ -264,6 +315,12 @@ static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
return &snode->node;
+ case nir_intrinsic_discard:
+ return ppir_emit_discard(block, ni);
+
+ case nir_intrinsic_discard_if:
+ return ppir_emit_discard_if(block, ni);
+
default:
ppir_error("unsupported nir_intrinsic_instr %s\n",
nir_intrinsic_infos[instr->intrinsic].name);
@@ -452,6 +509,46 @@ static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigne
return comp;
}
+static void ppir_add_ordering_deps(ppir_compiler *comp)
+{
+ /* Some intrinsics do not have explicit dependencies and thus depend
+ * on instructions order. Consider discard_if and store_ouput as
+ * example. If we don't add fake dependency of discard_if to store_output
+ * scheduler may put store_output first and since store_output terminates
+ * shader on Utgard PP, rest of it will never be executed.
+ * Add fake dependencies for discard/branch/store to preserve
+ * instruction order.
+ *
+ * TODO: scheduler should schedule discard_if as early as possible otherwise
+ * we may end up with suboptimal code for cases like this:
+ *
+ * s3 = s1 < s2
+ * discard_if s3
+ * s4 = s1 + s2
+ * store s4
+ *
+ * In this case store depends on discard_if and s4, but since dependencies can
+ * be scheduled in any order it can result in code like this:
+ *
+ * instr1: s3 = s1 < s3
+ * instr2: s4 = s1 + s2
+ * instr3: discard_if s3
+ * instr4: store s4
+ */
+ list_for_each_entry(ppir_block, block, &comp->block_list, list) {
+ ppir_node *prev_node = NULL;
+ list_for_each_entry(ppir_node, node, &block->node_list, list) {
+ if (node->type == ppir_node_type_discard ||
+ node->type == ppir_node_type_store ||
+ node->type == ppir_node_type_branch) {
+ if (prev_node)
+ ppir_node_add_dep(node, prev_node);
+ prev_node = node;
+ }
+ }
+ }
+}
+
bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
struct ra_regs *ra)
{
@@ -477,6 +574,13 @@ bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
if (!ppir_emit_cf_list(comp, &func->body))
goto err_out0;
+
+ /* If we have discard block add it to the very end */
+ if (comp->discard_block)
+ list_addtail(&comp->discard_block->list, &comp->block_list);
+
+ ppir_add_ordering_deps(comp);
+
ppir_node_print_prog(comp);
if (!ppir_lower_prog(comp))
diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c
index 5abf263768e..38d7e6284e4 100644
--- a/src/gallium/drivers/lima/ir/pp/node.c
+++ b/src/gallium/drivers/lima/ir/pp/node.c
@@ -281,6 +281,20 @@ const ppir_op_info ppir_op_infos[] = {
PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END
},
},
+ [ppir_op_discard] = {
+ .name = "discard",
+ .type = ppir_node_type_discard,
+ .slots = (int []) {
+ PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
+ },
+ },
+ [ppir_op_branch] = {
+ .name = "branch",
+ .type = ppir_node_type_branch,
+ .slots = (int []) {
+ PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END
+ },
+ },
};
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
@@ -292,6 +306,8 @@ void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)
[ppir_node_type_load] = sizeof(ppir_load_node),
[ppir_node_type_store] = sizeof(ppir_store_node),
[ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),
+ [ppir_node_type_discard] = sizeof(ppir_discard_node),
+ [ppir_node_type_branch] = sizeof(ppir_branch_node),
};
ppir_node_type type = ppir_op_infos[op].type;
diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
index b38fa3aa733..22678ae3349 100644
--- a/src/gallium/drivers/lima/ir/pp/node_to_instr.c
+++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c
@@ -93,7 +93,8 @@ static bool insert_to_each_succ_instr(ppir_block *block, ppir_node *node)
ppir_node_foreach_succ_safe(node, dep) {
ppir_node *succ = dep->succ;
- assert(succ->type == ppir_node_type_alu);
+ assert(succ->type == ppir_node_type_alu ||
+ succ->type == ppir_node_type_branch);
if (!ppir_instr_insert_node(succ->instr, node)) {
/* create a move node to insert for failed node */
@@ -323,6 +324,15 @@ static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node)
node = move;
break;
}
+ case ppir_node_type_discard:
+ if (!create_new_instr(block, node))
+ return false;
+ node->instr->is_end = true;
+ break;
+ case ppir_node_type_branch:
+ if (!create_new_instr(block, node))
+ return false;
+ break;
default:
return false;
}
diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h
index 71d80dc5196..233e5cdc3d8 100644
--- a/src/gallium/drivers/lima/ir/pp/ppir.h
+++ b/src/gallium/drivers/lima/ir/pp/ppir.h
@@ -108,6 +108,9 @@ typedef enum {
ppir_op_const,
+ ppir_op_discard,
+ ppir_op_branch,
+
ppir_op_num,
} ppir_op;
@@ -117,6 +120,8 @@ typedef enum {
ppir_node_type_load,
ppir_node_type_store,
ppir_node_type_load_texture,
+ ppir_node_type_discard,
+ ppir_node_type_branch,
} ppir_node_type;
typedef struct {
@@ -254,6 +259,10 @@ typedef struct {
int sampler_dim;
} ppir_load_texture_node;
+typedef struct {
+ ppir_node node;
+} ppir_discard_node;
+
enum ppir_instr_slot {
PPIR_INSTR_SLOT_VARYING,
PPIR_INSTR_SLOT_TEXLD,
@@ -264,6 +273,7 @@ enum ppir_instr_slot {
PPIR_INSTR_SLOT_ALU_SCL_ADD,
PPIR_INSTR_SLOT_ALU_COMBINE,
PPIR_INSTR_SLOT_STORE_TEMP,
+ PPIR_INSTR_SLOT_BRANCH,
PPIR_INSTR_SLOT_NUM,
PPIR_INSTR_SLOT_END,
PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL,
@@ -287,6 +297,7 @@ typedef struct ppir_instr {
int est; /* earliest start time */
int parent_index;
bool scheduled;
+ off_t offset;
} ppir_instr;
typedef struct ppir_block {
@@ -300,6 +311,15 @@ typedef struct ppir_block {
int sched_instr_base;
} ppir_block;
+typedef struct {
+ ppir_node node;
+ ppir_src src[2];
+ bool cond_gt;
+ bool cond_eq;
+ bool cond_lt;
+ ppir_block *target;
+} ppir_branch_node;
+
struct ra_regs;
struct lima_fs_shader_state;
@@ -322,6 +342,8 @@ typedef struct ppir_compiler {
/* for regalloc spilling debug */
int force_spilling;
+
+ ppir_block *discard_block;
} ppir_compiler;
void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask);
@@ -377,6 +399,8 @@ static inline ppir_node *ppir_node_first_pred(ppir_node *node)
#define ppir_node_to_load(node) ((ppir_load_node *)(node))
#define ppir_node_to_store(node) ((ppir_store_node *)(node))
#define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node))
+#define ppir_node_to_discard(node) ((ppir_discard_node *)(node))
+#define ppir_node_to_branch(node) ((ppir_branch_node *)(node))
static inline ppir_dest *ppir_node_get_dest(ppir_node *node)
{