diff options
author | Qiang Yu <[email protected]> | 2019-03-12 13:49:26 -0600 |
---|---|---|
committer | Qiang Yu <[email protected]> | 2019-04-11 09:57:53 +0800 |
commit | 92d7ca4b1cdfe1ffc80748fa7eedf927f3c664f0 (patch) | |
tree | d96651839bae0342e65b9cd414e56759a9a25cea /src/gallium/drivers/lima/ir/pp | |
parent | 64eaf60ca739704f71bd312c0f4039d287258216 (diff) |
gallium: add lima driver
v2:
- use renamed util_dynarray_grow_cap
- use DEBUG_GET_ONCE_FLAGS_OPTION for debug flags
- remove DRM_FORMAT_MOD_ARM_AGTB_MODE0 usage
- compute min/max index in driver
v3:
- fix plbu framebuffer state calculation
- fix color_16pc assemble
- use nir_lower_all_source_mods for lowering neg/abs/sat
- use float arrary for static GPU data
- add disassemble comment for static shader code
- use drm_find_modifier
v4:
- use lima_nir_lower_uniform_to_scalar
v5:
- remove nir_opt_global_to_local when rebase
Cc: Rob Clark <[email protected]>
Cc: Alyssa Rosenzweig <[email protected]>
Acked-by: Eric Anholt <[email protected]>
Signed-off-by: Andreas Baierl <[email protected]>
Signed-off-by: Arno Messiaen <[email protected]>
Signed-off-by: Connor Abbott <[email protected]>
Signed-off-by: Erico Nunes <[email protected]>
Signed-off-by: Heiko Stuebner <[email protected]>
Signed-off-by: Koen Kooi <[email protected]>
Signed-off-by: Marek Vasut <[email protected]>
Signed-off-by: marmeladema <[email protected]>
Signed-off-by: Paweł Chmiel <[email protected]>
Signed-off-by: Rob Herring <[email protected]>
Signed-off-by: Rohan Garg <[email protected]>
Signed-off-by: Vasily Khoruzhick <[email protected]>
Signed-off-by: Qiang Yu <[email protected]>
Diffstat (limited to 'src/gallium/drivers/lima/ir/pp')
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/codegen.c | 669 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/codegen.h | 359 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/disasm.c | 776 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/instr.c | 311 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/lower.c | 421 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/nir.c | 494 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/node.c | 426 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/node_to_instr.c | 401 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/ppir.h | 512 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/regalloc.c | 757 | ||||
-rw-r--r-- | src/gallium/drivers/lima/ir/pp/scheduler.c | 197 |
11 files changed, 5323 insertions, 0 deletions
diff --git a/src/gallium/drivers/lima/ir/pp/codegen.c b/src/gallium/drivers/lima/ir/pp/codegen.c new file mode 100644 index 00000000000..1cce28595e6 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/codegen.c @@ -0,0 +1,669 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" +#include "util/u_half.h" +#include "util/bitscan.h" + +#include "ppir.h" +#include "codegen.h" +#include "lima_context.h" + +static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift) +{ + unsigned ret = 0; + for (int i = 0; i < 4; i++) + ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2); + return ret; +} + +static int get_scl_reg_index(ppir_src *src, int component) +{ + int ret = ppir_target_get_src_reg_index(src); + ret += src->swizzle[component]; + return ret; +} + +static void ppir_codegen_encode_varying(ppir_node *node, void *code) +{ + ppir_codegen_field_varying *f = code; + ppir_load_node *load = ppir_node_to_load(node); + ppir_dest *dest = &load->dest; + int index = ppir_target_get_dest_reg_index(dest); + int num_components = load->num_components; + + if (num_components) { + assert(node->op == ppir_op_load_varying || node->op == ppir_op_load_coords); + + f->imm.dest = index >> 2; + f->imm.mask = dest->write_mask << (index & 0x3); + + int alignment = num_components == 3 ? 3 : num_components - 1; + f->imm.alignment = alignment; + f->imm.offset_vector = 0xf; + + if (alignment == 3) + f->imm.index = load->index >> 2; + else + f->imm.index = load->index >> alignment; + } + else { + assert(node->op == ppir_op_load_coords); + + f->reg.dest = index >> 2; + f->reg.mask = dest->write_mask << (index & 0x3); + + f->reg.source_type = 1; + + ppir_src *src = &load->src; + index = ppir_target_get_src_reg_index(src); + f->reg.source = index >> 2; + f->reg.negate = src->negate; + f->reg.absolute = src->absolute; + f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0); + } +} + +static void ppir_codegen_encode_texld(ppir_node *node, void *code) +{ + ppir_codegen_field_sampler *f = code; + ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node); + + f->index = ldtex->sampler; + f->lod_bias_en = 0; + f->type = ppir_codegen_sampler_type_2d; + f->offset_en = 0; + f->unknown_2 = 0x39001; +} + +static void ppir_codegen_encode_uniform(ppir_node *node, void *code) +{ + ppir_codegen_field_uniform *f = code; + ppir_load_node *load = ppir_node_to_load(node); + + switch (node->op) { + case ppir_op_load_uniform: + f->source = ppir_codegen_uniform_src_uniform; + break; + case ppir_op_load_temp: + f->source = ppir_codegen_uniform_src_temporary; + break; + default: + assert(0); + } + + int num_components = load->num_components; + int alignment = num_components == 4 ? 2 : num_components - 1; + + f->alignment = alignment; + + /* TODO: uniform can be also combined like varying */ + f->index = load->index << (2 - alignment); +} + +static unsigned shift_to_op(int shift) +{ + assert(shift >= -3 && shift <= 3); + return shift < 0 ? shift + 8 : shift; +} + +static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code) +{ + ppir_codegen_field_vec4_mul *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_dest *dest = &alu->dest; + int dest_shift = 0; + if (dest->type != ppir_target_pipeline) { + int index = ppir_target_get_dest_reg_index(dest); + dest_shift = index & 0x3; + f->dest = index >> 2; + f->mask = dest->write_mask << dest_shift; + } + f->dest_modifier = dest->modifier; + + switch (node->op) { + case ppir_op_mul: + f->op = shift_to_op(alu->shift); + break; + case ppir_op_mov: + f->op = ppir_codegen_vec4_mul_op_mov; + break; + case ppir_op_max: + f->op = ppir_codegen_vec4_mul_op_max; + break; + case ppir_op_min: + f->op = ppir_codegen_vec4_mul_op_min; + break; + case ppir_op_and: + f->op = ppir_codegen_vec4_mul_op_and; + break; + case ppir_op_or: + f->op = ppir_codegen_vec4_mul_op_or; + break; + case ppir_op_xor: + f->op = ppir_codegen_vec4_mul_op_xor; + break; + case ppir_op_gt: + f->op = ppir_codegen_vec4_mul_op_gt; + break; + case ppir_op_ge: + f->op = ppir_codegen_vec4_mul_op_ge; + break; + case ppir_op_eq: + f->op = ppir_codegen_vec4_mul_op_eq; + break; + case ppir_op_ne: + f->op = ppir_codegen_vec4_mul_op_ne; + break; + case ppir_op_not: + f->op = ppir_codegen_vec4_mul_op_not; + break; + default: + break; + } + + ppir_src *src = alu->src; + int index = ppir_target_get_src_reg_index(src); + f->arg0_source = index >> 2; + f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); + f->arg0_absolute = src->absolute; + f->arg0_negate = src->negate; + + if (alu->num_src == 2) { + src = alu->src + 1; + index = ppir_target_get_src_reg_index(src); + f->arg1_source = index >> 2; + f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); + f->arg1_absolute = src->absolute; + f->arg1_negate = src->negate; + } +} + +static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code) +{ + ppir_codegen_field_float_mul *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_dest *dest = &alu->dest; + int dest_component = ffs(dest->write_mask) - 1; + assert(dest_component >= 0); + + if (dest->type != ppir_target_pipeline) { + f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; + f->output_en = true; + } + f->dest_modifier = dest->modifier; + + switch (node->op) { + case ppir_op_mul: + f->op = shift_to_op(alu->shift); + break; + case ppir_op_mov: + f->op = ppir_codegen_float_mul_op_mov; + break; + case ppir_op_max: + f->op = ppir_codegen_float_mul_op_max; + break; + case ppir_op_min: + f->op = ppir_codegen_float_mul_op_min; + break; + case ppir_op_and: + f->op = ppir_codegen_float_mul_op_and; + break; + case ppir_op_or: + f->op = ppir_codegen_float_mul_op_or; + break; + case ppir_op_xor: + f->op = ppir_codegen_float_mul_op_xor; + break; + case ppir_op_gt: + f->op = ppir_codegen_float_mul_op_gt; + break; + case ppir_op_ge: + f->op = ppir_codegen_float_mul_op_ge; + break; + case ppir_op_eq: + f->op = ppir_codegen_float_mul_op_eq; + break; + case ppir_op_ne: + f->op = ppir_codegen_float_mul_op_ne; + break; + case ppir_op_not: + f->op = ppir_codegen_float_mul_op_not; + break; + default: + break; + } + + ppir_src *src = alu->src; + f->arg0_source = get_scl_reg_index(src, dest_component); + f->arg0_absolute = src->absolute; + f->arg0_negate = src->negate; + + if (alu->num_src == 2) { + src = alu->src + 1; + f->arg1_source = get_scl_reg_index(src, dest_component); + f->arg1_absolute = src->absolute; + f->arg1_negate = src->negate; + } +} + +static void ppir_codegen_encode_vec_add(ppir_node *node, void *code) +{ + ppir_codegen_field_vec4_acc *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_dest *dest = &alu->dest; + int index = ppir_target_get_dest_reg_index(dest); + int dest_shift = index & 0x3; + f->dest = index >> 2; + f->mask = dest->write_mask << dest_shift; + f->dest_modifier = dest->modifier; + + switch (node->op) { + case ppir_op_add: + f->op = ppir_codegen_vec4_acc_op_add; + break; + case ppir_op_mov: + f->op = ppir_codegen_vec4_acc_op_mov; + break; + case ppir_op_sum3: + f->op = ppir_codegen_vec4_acc_op_sum3; + dest_shift = 0; + break; + case ppir_op_sum4: + f->op = ppir_codegen_vec4_acc_op_sum4; + dest_shift = 0; + break; + case ppir_op_floor: + f->op = ppir_codegen_vec4_acc_op_floor; + break; + case ppir_op_fract: + f->op = ppir_codegen_vec4_acc_op_fract; + break; + case ppir_op_gt: + f->op = ppir_codegen_vec4_acc_op_gt; + break; + case ppir_op_ge: + f->op = ppir_codegen_vec4_acc_op_ge; + break; + case ppir_op_eq: + f->op = ppir_codegen_vec4_acc_op_eq; + break; + case ppir_op_ne: + f->op = ppir_codegen_vec4_acc_op_ne; + break; + case ppir_op_select: + f->op = ppir_codegen_vec4_acc_op_sel; + break; + default: + break; + } + + ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src; + index = ppir_target_get_src_reg_index(src); + + if (src->type == ppir_target_pipeline && + src->pipeline == ppir_pipeline_reg_vmul) + f->mul_in = true; + else + f->arg0_source = index >> 2; + + f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); + f->arg0_absolute = src->absolute; + f->arg0_negate = src->negate; + + if (++src < alu->src + alu->num_src) { + index = ppir_target_get_src_reg_index(src); + f->arg1_source = index >> 2; + f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); + f->arg1_absolute = src->absolute; + f->arg1_negate = src->negate; + } +} + +static void ppir_codegen_encode_scl_add(ppir_node *node, void *code) +{ + ppir_codegen_field_float_acc *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_dest *dest = &alu->dest; + int dest_component = ffs(dest->write_mask) - 1; + assert(dest_component >= 0); + + f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; + f->output_en = true; + f->dest_modifier = dest->modifier; + + switch (node->op) { + case ppir_op_add: + f->op = shift_to_op(alu->shift); + break; + case ppir_op_mov: + f->op = ppir_codegen_float_acc_op_mov; + break; + case ppir_op_max: + f->op = ppir_codegen_float_acc_op_max; + break; + case ppir_op_min: + f->op = ppir_codegen_float_acc_op_min; + break; + case ppir_op_floor: + f->op = ppir_codegen_float_acc_op_floor; + break; + case ppir_op_fract: + f->op = ppir_codegen_float_acc_op_fract; + break; + case ppir_op_gt: + f->op = ppir_codegen_float_acc_op_gt; + break; + case ppir_op_ge: + f->op = ppir_codegen_float_acc_op_ge; + break; + case ppir_op_eq: + f->op = ppir_codegen_float_acc_op_eq; + break; + case ppir_op_ne: + f->op = ppir_codegen_float_acc_op_ne; + break; + case ppir_op_select: + f->op = ppir_codegen_float_acc_op_sel; + break; + default: + break; + } + + ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src; + if (src->type == ppir_target_pipeline && + src->pipeline == ppir_pipeline_reg_fmul) + f->mul_in = true; + else + f->arg0_source = get_scl_reg_index(src, dest_component); + f->arg0_absolute = src->absolute; + f->arg0_negate = src->negate; + + if (++src < alu->src + alu->num_src) { + f->arg1_source = get_scl_reg_index(src, dest_component); + f->arg1_absolute = src->absolute; + f->arg1_negate = src->negate; + } +} + +static void ppir_codegen_encode_combine(ppir_node *node, void *code) +{ + ppir_codegen_field_combine *f = code; + ppir_alu_node *alu = ppir_node_to_alu(node); + + switch (node->op) { + case ppir_op_rsqrt: + case ppir_op_log2: + case ppir_op_exp2: + case ppir_op_rcp: + case ppir_op_sqrt: + case ppir_op_sin: + case ppir_op_cos: + { + f->scalar.dest_vec = false; + f->scalar.arg1_en = false; + + ppir_dest *dest = &alu->dest; + int dest_component = ffs(dest->write_mask) - 1; + assert(dest_component >= 0); + f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component; + f->scalar.dest_modifier = dest->modifier; + + ppir_src *src = alu->src; + f->scalar.arg0_src = get_scl_reg_index(src, dest_component); + f->scalar.arg0_absolute = src->absolute; + f->scalar.arg0_negate = src->negate; + + switch (node->op) { + case ppir_op_rsqrt: + f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt; + break; + case ppir_op_log2: + f->scalar.op = ppir_codegen_combine_scalar_op_log2; + break; + case ppir_op_exp2: + f->scalar.op = ppir_codegen_combine_scalar_op_exp2; + break; + case ppir_op_rcp: + f->scalar.op = ppir_codegen_combine_scalar_op_rcp; + break; + case ppir_op_sqrt: + f->scalar.op = ppir_codegen_combine_scalar_op_sqrt; + break; + case ppir_op_sin: + f->scalar.op = ppir_codegen_combine_scalar_op_sin; + break; + case ppir_op_cos: + f->scalar.op = ppir_codegen_combine_scalar_op_cos; + break; + default: + break; + } + } + default: + break; + } +} + +static void ppir_codegen_encode_store_temp(ppir_node *node, void *code) +{ + assert(node->op == ppir_op_store_temp); + + ppir_codegen_field_temp_write *f = code; + ppir_store_node *snode = ppir_node_to_store(node); + int num_components = snode->num_components; + + f->temp_write.dest = 0x03; // 11 - temporary + f->temp_write.source = snode->src.reg->index; + + int alignment = num_components == 4 ? 2 : num_components - 1; + f->temp_write.alignment = alignment; + f->temp_write.index = snode->index << (2 - alignment); + + f->temp_write.offset_reg = snode->index >> 2; +} + +static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code) +{ + for (int i = 0; i < constant->num; i++) + code[i] = util_float_to_half(constant->value[i].f); +} + +typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *); + +static const ppir_codegen_instr_slot_encode_func +ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = { + [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying, + [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld, + [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform, + [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul, + [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul, + [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add, + [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add, + [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine, + [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp, +}; + +static const int ppir_codegen_field_size[] = { + 34, 62, 41, 43, 30, 44, 31, 30, 41, 73 +}; + +static inline int align_to_word(int size) +{ + return ((size + 0x1f) >> 5); +} + +static int get_instr_encode_size(ppir_instr *instr) +{ + int size = 0; + + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + if (instr->slots[i]) + size += ppir_codegen_field_size[i]; + } + + for (int i = 0; i < 2; i++) { + if (instr->constant[i].num) + size += 64; + } + + return align_to_word(size) + 1; +} + +static void bitcopy(void *dst, int dst_offset, void *src, int src_size) +{ + int off1 = dst_offset & 0x1f; + uint32_t *cpy_dst = dst, *cpy_src = src; + + cpy_dst += (dst_offset >> 5); + + if (off1) { + int off2 = 32 - off1; + int cpy_size = 0; + while (1) { + *cpy_dst |= *cpy_src << off1; + cpy_dst++; + + cpy_size += off2; + if (cpy_size >= src_size) + break; + + *cpy_dst |= *cpy_src >> off2; + cpy_src++; + + cpy_size += off1; + if (cpy_size >= src_size) + break; + } + } + else + memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4); +} + +static int encode_instr(ppir_instr *instr, void *code, void *last_code) +{ + int size = 0; + ppir_codegen_ctrl *ctrl = code; + + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + if (instr->slots[i]) { + /* max field size (73), align to dword */ + uint8_t output[12] = {0}; + + ppir_codegen_encode_slot[i](instr->slots[i], output); + bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]); + + size += ppir_codegen_field_size[i]; + ctrl->fields |= 1 << i; + } + } + + if (instr->slots[PPIR_INSTR_SLOT_TEXLD]) + ctrl->sync = true; + + for (int i = 0; i < 2; i++) { + if (instr->constant[i].num) { + uint16_t output[4] = {0}; + + ppir_codegen_encode_const(instr->constant + i, output); + bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16); + + size += 64; + ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i); + } + } + + size = align_to_word(size) + 1; + + ctrl->count = size; + if (instr->is_end) + ctrl->stop = true; + + if (last_code) { + ppir_codegen_ctrl *last_ctrl = last_code; + last_ctrl->next_count = size; + last_ctrl->prefetch = true; + } + + return size; +} + +static void ppir_codegen_print_prog(ppir_compiler *comp) +{ + uint32_t *prog = comp->prog->shader; + unsigned offset = 0; + + printf("========ppir codegen========\n"); + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + printf("%03d: ", instr->index); + int n = prog[0] & 0x1f; + for (int i = 0; i < n; i++) { + if (i && i % 6 == 0) + printf("\n "); + printf("%08x ", prog[i]); + } + printf("\n"); + ppir_disassemble_instr(prog, offset); + prog += n; + offset += n; + } + } + printf("-----------------------\n"); +} + +bool ppir_codegen_prog(ppir_compiler *comp) +{ + int size = 0; + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + size += get_instr_encode_size(instr); + } + } + + uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t)); + if (!prog) + return false; + + uint32_t *code = prog, *last_code = NULL; + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + int offset = encode_instr(instr, code, last_code); + last_code = code; + code += offset; + } + } + + comp->prog->shader = prog; + comp->prog->shader_size = size * sizeof(uint32_t); + + if (lima_debug & LIMA_DEBUG_PP) + ppir_codegen_print_prog(comp); + + return true; +} diff --git a/src/gallium/drivers/lima/ir/pp/codegen.h b/src/gallium/drivers/lima/ir/pp/codegen.h new file mode 100644 index 00000000000..ab80d392dc2 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/codegen.h @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2017 Lima Project + * Copyright (c) 2013 Ben Brewer ([email protected]) + * Copyright (c) 2013 Connor Abbott ([email protected]) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef LIMA_IR_PP_CODEGEN_H +#define LIMA_IR_PP_CODEGEN_H + +#include <stdint.h> +#include <stdbool.h> + +/* Control */ + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + unsigned count : 5; + bool stop : 1; + bool sync : 1; + unsigned fields : 12; + unsigned next_count : 6; + bool prefetch : 1; + unsigned unknown : 6; + }; + uint32_t mask; +} ppir_codegen_ctrl; + +typedef enum { + ppir_codegen_field_shift_varying = 0, + ppir_codegen_field_shift_sampler = 1, + ppir_codegen_field_shift_uniform = 2, + ppir_codegen_field_shift_vec4_mul = 3, + ppir_codegen_field_shift_float_mul = 4, + ppir_codegen_field_shift_vec4_acc = 5, + ppir_codegen_field_shift_float_acc = 6, + ppir_codegen_field_shift_combine = 7, + ppir_codegen_field_shift_temp_write = 8, + ppir_codegen_field_shift_branch = 9, + ppir_codegen_field_shift_vec4_const_0 = 10, + ppir_codegen_field_shift_vec4_const_1 = 11, + ppir_codegen_field_shift_count = 12, +} ppir_codegen_field_shift; + +/* Data Inputs */ + +typedef enum { + ppir_codegen_vec4_reg_frag_color = 0, + ppir_codegen_vec4_reg_constant0 = 12, + ppir_codegen_vec4_reg_constant1 = 13, + ppir_codegen_vec4_reg_texture = 14, + ppir_codegen_vec4_reg_uniform = 15, + ppir_codegen_vec4_reg_discard = 15, +} ppir_codegen_vec4_reg; + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + unsigned perspective : 2; + unsigned source_type : 2; + unsigned unknown_0 : 1; /* = 0 */ + unsigned alignment : 2; + unsigned unknown_1 : 3; /* = 00 0 */ + unsigned offset_vector : 4; + unsigned unknown_2 : 2; /* = 00 */ + unsigned offset_scalar : 2; + unsigned index : 6; + ppir_codegen_vec4_reg dest : 4; + unsigned mask : 4; + unsigned unknown_3 : 2; /* = 00 */ + } imm; + struct __attribute__((__packed__)) { + unsigned perspective : 2; + unsigned source_type : 2; /* = 01 */ + unsigned unknown_0 : 2; /* = 00 */ + bool normalize : 1; + unsigned unknown_1 : 3; + ppir_codegen_vec4_reg source : 4; + bool negate : 1; + bool absolute : 1; + unsigned swizzle : 8; + ppir_codegen_vec4_reg dest : 4; + unsigned mask : 4; + unsigned unknown_2 : 2; /* = 00 */ + } reg; +} ppir_codegen_field_varying; + +typedef enum { + ppir_codegen_sampler_type_2d = 0x00, + ppir_codegen_sampler_type_cube = 0x1F, +} ppir_codegen_sampler_type; + +typedef struct __attribute__((__packed__)) { + unsigned lod_bias : 6; + unsigned index_offset : 6; + unsigned unknown_0 : 6; /* = 000000 */ + bool lod_bias_en : 1; + unsigned unknown_1 : 5; /* = 00000 */ + ppir_codegen_sampler_type type : 5; + bool offset_en : 1; + unsigned index : 12; + unsigned unknown_2 : 20; /* = 0011 1001 0000 0000 0001 */ +} ppir_codegen_field_sampler; + +typedef enum { + ppir_codegen_uniform_src_uniform = 0, + ppir_codegen_uniform_src_temporary = 3, +} ppir_codegen_uniform_src; + +typedef struct __attribute__((__packed__)) { + ppir_codegen_uniform_src source : 2; + unsigned unknown_0 : 8; /* = 00 0000 00 */ + unsigned alignment : 2; /* 00: float, 01: vec2, 10: vec4 */ + unsigned unknown_1 : 6; /* = 00 0000 */ + unsigned offset_reg : 6; + bool offset_en : 1; + unsigned index : 16; +} ppir_codegen_field_uniform; + +/* Vector Pipe */ + +typedef enum { + ppir_codegen_vec4_mul_op_not = 0x08, /* Logical Not */ + ppir_codegen_vec4_mul_op_and = 0x09, /* Logical AND */ + ppir_codegen_vec4_mul_op_or = 0x0A, /* Logical OR */ + ppir_codegen_vec4_mul_op_xor = 0x0B, /* Logical XOR */ + ppir_codegen_vec4_mul_op_ne = 0x0C, /* Not Equal */ + ppir_codegen_vec4_mul_op_gt = 0x0D, /* Great Than */ + ppir_codegen_vec4_mul_op_ge = 0x0E, /* Great than or Equal */ + ppir_codegen_vec4_mul_op_eq = 0x0F, /* Equal */ + ppir_codegen_vec4_mul_op_min = 0x10, /* Minimum */ + ppir_codegen_vec4_mul_op_max = 0x11, /* Maximum */ + ppir_codegen_vec4_mul_op_mov = 0x1F, /* Passthrough, result = arg1 */ +} ppir_codegen_vec4_mul_op; + +typedef enum { + ppir_codegen_outmod_none = 0, + ppir_codegen_outmod_clamp_fraction = 1, + ppir_codegen_outmod_clamp_positive = 2, + ppir_codegen_outmod_round = 3, +} ppir_codegen_outmod; + +typedef struct __attribute__((__packed__)) { + ppir_codegen_vec4_reg arg0_source : 4; + unsigned arg0_swizzle : 8; + bool arg0_absolute : 1; + bool arg0_negate : 1; + ppir_codegen_vec4_reg arg1_source : 4; + unsigned arg1_swizzle : 8; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned dest : 4; + unsigned mask : 4; + ppir_codegen_outmod dest_modifier : 2; + ppir_codegen_vec4_mul_op op : 5; +} ppir_codegen_field_vec4_mul; + +typedef enum { + ppir_codegen_vec4_acc_op_add = 0x00, + ppir_codegen_vec4_acc_op_fract = 0x04, /* Fract? */ + ppir_codegen_vec4_acc_op_ne = 0x08, /* Not Equal */ + ppir_codegen_vec4_acc_op_gt = 0x09, /* Great-Than */ + ppir_codegen_vec4_acc_op_ge = 0x0A, /* Great-than or Equal */ + ppir_codegen_vec4_acc_op_eq = 0x0B, /* Equal */ + ppir_codegen_vec4_acc_op_floor = 0x0C, + ppir_codegen_vec4_acc_op_ceil = 0x0D, + ppir_codegen_vec4_acc_op_min = 0x0E, + ppir_codegen_vec4_acc_op_max = 0x0F, + ppir_codegen_vec4_acc_op_sum3 = 0x10, /* dest.xyzw = (arg0.x + arg0.y + arg0.z) */ + ppir_codegen_vec4_acc_op_sum4 = 0x11, /* dest.xyzw = (arg0.x + arg0.y + arg0.z + arg0.w) */ + ppir_codegen_vec4_acc_op_dFdx = 0x14, + ppir_codegen_vec4_acc_op_dFdy = 0x15, + ppir_codegen_vec4_acc_op_sel = 0x17, /* result = (^fmul ? arg0 : arg1) */ + ppir_codegen_vec4_acc_op_mov = 0x1F, /* Passthrough, result = arg0 */ +} ppir_codegen_vec4_acc_op; + +typedef struct __attribute__((__packed__)) { + ppir_codegen_vec4_reg arg0_source : 4; + unsigned arg0_swizzle : 8; + bool arg0_absolute : 1; + bool arg0_negate : 1; + ppir_codegen_vec4_reg arg1_source : 4; + unsigned arg1_swizzle : 8; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned dest : 4; + unsigned mask : 4; + ppir_codegen_outmod dest_modifier : 2; + ppir_codegen_vec4_acc_op op : 5; + bool mul_in : 1; /* whether to get arg0 from multiply unit below */ +} ppir_codegen_field_vec4_acc; + +/* Float (Scalar) Pipe */ + +typedef enum { + ppir_codegen_float_mul_op_not = 0x08, /* Logical Not */ + ppir_codegen_float_mul_op_and = 0x09, /* Logical AND */ + ppir_codegen_float_mul_op_or = 0x0A, /* Logical OR */ + ppir_codegen_float_mul_op_xor = 0x0B, /* Logical XOR */ + ppir_codegen_float_mul_op_ne = 0x0C, /* Not Equal */ + ppir_codegen_float_mul_op_gt = 0x0D, /* Great Than */ + ppir_codegen_float_mul_op_ge = 0x0E, /* great than or Equal */ + ppir_codegen_float_mul_op_eq = 0x0F, /* Equal */ + ppir_codegen_float_mul_op_min = 0x10, /* Minimum */ + ppir_codegen_float_mul_op_max = 0x11, /* Maximum */ + ppir_codegen_float_mul_op_mov = 0x1F, /* Passthrough, result = arg1 */ +} ppir_codegen_float_mul_op; + +typedef struct __attribute__((__packed__)) { + unsigned arg0_source : 6; + bool arg0_absolute : 1; + bool arg0_negate : 1; + unsigned arg1_source : 6; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned dest : 6; + bool output_en : 1; /* Set to 0 when outputting directly to float_acc below. */ + ppir_codegen_outmod dest_modifier : 2; + ppir_codegen_float_mul_op op : 5; +} ppir_codegen_field_float_mul; + +typedef enum { + ppir_codegen_float_acc_op_add = 0x00, + ppir_codegen_float_acc_op_fract = 0x04, + ppir_codegen_float_acc_op_ne = 0x08, /* Not Equal */ + ppir_codegen_float_acc_op_gt = 0x09, /* Great-Than */ + ppir_codegen_float_acc_op_ge = 0x0A, /* Great-than or Equal */ + ppir_codegen_float_acc_op_eq = 0x0B, /* Equal */ + ppir_codegen_float_acc_op_floor = 0x0C, + ppir_codegen_float_acc_op_ceil = 0x0D, + ppir_codegen_float_acc_op_min = 0x0E, + ppir_codegen_float_acc_op_max = 0x0F, + ppir_codegen_float_acc_op_dFdx = 0x14, + ppir_codegen_float_acc_op_dFdy = 0x15, + ppir_codegen_float_acc_op_sel = 0x17, /* result = (^fmul ? arg0 : arg1) */ + ppir_codegen_float_acc_op_mov = 0x1F, /* Passthrough, result = arg1 */ +} ppir_codegen_float_acc_op; + +typedef struct __attribute__((__packed__)) { + unsigned arg0_source : 6; + bool arg0_absolute : 1; + bool arg0_negate : 1; + unsigned arg1_source : 6; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned dest : 6; + bool output_en : 1; /* Always true */ + ppir_codegen_outmod dest_modifier : 2; + ppir_codegen_float_acc_op op : 5; + bool mul_in : 1; /* Get arg1 from float_mul above. */ +} ppir_codegen_field_float_acc; + +/* Temporary Write / Framebuffer Read */ + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + unsigned dest : 2; /* = 11 */ + unsigned unknown_0 : 2; /* = 00 */ + unsigned source : 6; + unsigned alignment : 2; /* 0: float, 1:vec2, 2: vec4 */ + unsigned unknown_1 : 6; /* = 00 0000 */ + unsigned offset_reg : 6; + bool offset_en : 1; + unsigned index : 16; + } temp_write; + struct __attribute__((__packed__)) { + bool source : 1; /* 0 = fb_depth, 1 = fb_color */ + unsigned unknown_0 : 5; /* = 00 111 */ + unsigned dest : 4; + unsigned unknown_1 : 31; /* = 0 0000 ... 10 */ + } fb_read; +} ppir_codegen_field_temp_write; + +/* Result combiner */ + +typedef enum { + ppir_codegen_combine_scalar_op_rcp = 0, /* Reciprocal */ + ppir_codegen_combine_scalar_op_mov = 1, /* No Operation */ + ppir_codegen_combine_scalar_op_sqrt = 2, /* Square-Root */ + ppir_codegen_combine_scalar_op_rsqrt = 3, /* Inverse Square-Root */ + ppir_codegen_combine_scalar_op_exp2 = 4, /* Binary Exponent */ + ppir_codegen_combine_scalar_op_log2 = 5, /* Binary Logarithm */ + ppir_codegen_combine_scalar_op_sin = 6, /* Sine (Scaled LUT) */ + ppir_codegen_combine_scalar_op_cos = 7, /* Cosine (Scaled LUT) */ + ppir_codegen_combine_scalar_op_atan = 8, /* Arc Tangent Part 1 */ + ppir_codegen_combine_scalar_op_atan2 = 9, /* Arc Tangent 2 Part 1 */ +} ppir_codegen_combine_scalar_op; + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + bool dest_vec : 1; + bool arg1_en : 1; + ppir_codegen_combine_scalar_op op : 4; + bool arg1_absolute : 1; + bool arg1_negate : 1; + unsigned arg1_src : 6; + bool arg0_absolute : 1; + bool arg0_negate : 1; + unsigned arg0_src : 6; + ppir_codegen_outmod dest_modifier : 2; + unsigned dest : 6; + } scalar; + struct __attribute__((__packed__)) { + bool dest_vec : 1; + bool arg1_en : 1; + unsigned arg1_swizzle : 8; + unsigned arg1_source : 4; + unsigned padding_0 : 8; + unsigned mask : 4; + unsigned dest : 4; + } vector; +} ppir_codegen_field_combine; + +/* Branch/Control Flow */ + +#define PPIR_CODEGEN_DISCARD_WORD0 0x007F0003 +#define PPIR_CODEGEN_DISCARD_WORD1 0x00000000 +#define PPIR_CODEGEN_DISCARD_WORD2 0x000 + +typedef union __attribute__((__packed__)) { + struct __attribute__((__packed__)) { + unsigned unknown_0 : 4; /* = 0000 */ + unsigned arg1_source : 6; + unsigned arg0_source : 6; + bool cond_gt : 1; + bool cond_eq : 1; + bool cond_lt : 1; + unsigned unknown_1 : 22; /* = 0 0000 0000 0000 0000 0000 0 */ + signed target : 27; + unsigned unknown_2 : 5; /* = 0 0011 */ + } branch; + struct __attribute__((__packed__)) { + unsigned word0 : 32; + unsigned word1 : 32; + unsigned word2 : 9; + } discard; +} ppir_codegen_field_branch; + +void ppir_disassemble_instr(uint32_t *instr, unsigned offset); + +#endif diff --git a/src/gallium/drivers/lima/ir/pp/disasm.c b/src/gallium/drivers/lima/ir/pp/disasm.c new file mode 100644 index 00000000000..d2a3fb17d03 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/disasm.c @@ -0,0 +1,776 @@ +/* + * Copyright (c) 2018 Lima Project + * + * Copyright (c) 2013 Codethink (http://www.codethink.co.uk) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_half.h" + +#include "ppir.h" +#include "codegen.h" + +typedef struct { + char *name; + unsigned srcs; +} asm_op; + +static void +print_swizzle(uint8_t swizzle) +{ + if (swizzle == 0xE4) + return; + + printf("."); + for (unsigned i = 0; i < 4; i++, swizzle >>= 2) + printf("%c", "xyzw"[swizzle & 3]); +} + +static void +print_mask(uint8_t mask) +{ + if (mask == 0xF) + return; + + printf("."); + if (mask & 1) printf("x"); + if (mask & 2) printf("y"); + if (mask & 4) printf("z"); + if (mask & 8) printf("w"); +} + +static void +print_reg(ppir_codegen_vec4_reg reg, const char *special) +{ + if (special) { + printf("%s", special); + } else { + switch (reg) + { + case ppir_codegen_vec4_reg_constant0: + printf("^const0"); + break; + case ppir_codegen_vec4_reg_constant1: + printf("^const1"); + break; + case ppir_codegen_vec4_reg_texture: + printf("^texture"); + break; + case ppir_codegen_vec4_reg_uniform: + printf("^uniform"); + break; + default: + printf("$%u", reg); + break; + } + } +} + +static void +print_vector_source(ppir_codegen_vec4_reg reg, const char *special, + uint8_t swizzle, bool abs, bool neg) +{ + if (neg) + printf("-"); + if (abs) + printf("abs("); + + print_reg(reg, special); + print_swizzle(swizzle); + + if (abs) + printf(")"); +} + +static void +print_source_scalar(unsigned reg, const char *special, bool abs, bool neg) +{ + if (neg) + printf("-"); + if (abs) + printf("abs("); + + print_reg(reg >> 2, special); + if (!special) + printf(".%c", "xyzw"[reg & 3]); + + if (abs) + printf(")"); +} + +static void +print_outmod(ppir_codegen_outmod modifier) +{ + switch (modifier) + { + case ppir_codegen_outmod_clamp_fraction: + printf(".sat"); + break; + case ppir_codegen_outmod_clamp_positive: + printf(".pos"); + break; + case ppir_codegen_outmod_round: + printf(".int"); + break; + default: + break; + } +} + +static void +print_dest_scalar(unsigned reg) +{ + printf("$%u", reg >> 2); + printf(".%c ", "xyzw"[reg & 3]); +} + +static void +print_const(unsigned const_num, uint16_t *val) +{ + printf("const%u", const_num); + for (unsigned i = 0; i < 4; i++) + printf(" %f", util_half_to_float(val[i])); +} + +static void +print_const0(void *code, unsigned offset) +{ + (void) offset; + + print_const(0, code); +} + +static void +print_const1(void *code, unsigned offset) +{ + (void) offset; + + print_const(1, code); +} + +static void +print_varying(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_varying *varying = code; + + printf("load"); + + bool perspective = varying->imm.source_type < 2 && varying->imm.perspective; + if (perspective) + { + printf(".perspective"); + switch (varying->imm.perspective) + { + case 2: + printf(".z"); + break; + case 3: + printf(".w"); + break; + default: + printf(".unknown"); + break; + } + } + + printf(".v "); + + switch (varying->imm.dest) + { + case ppir_codegen_vec4_reg_discard: + printf("^discard"); + break; + default: + printf("$%u", varying->imm.dest); + break; + } + print_mask(varying->imm.mask); + printf(" "); + + switch (varying->imm.source_type) { + case 1: + print_vector_source(varying->reg.source, NULL, varying->reg.swizzle, + varying->reg.absolute, varying->reg.negate); + break; + case 2: + printf("gl_FragCoord"); + break; + case 3: + if (varying->imm.perspective) + printf("gl_FrontFacing"); + else + printf("gl_PointCoord"); + break; + default: + switch (varying->imm.alignment) { + case 0: + printf("%u.%c", varying->imm.index >> 2, + "xyzw"[varying->imm.index & 3]); + break; + case 1: { + const char *c[2] = {"xy", "zw"}; + printf("%u.%s", varying->imm.index >> 1, c[varying->imm.index & 1]); + break; + } + default: + printf("%u", varying->imm.index); + break; + } + + if (varying->imm.offset_vector != 15) { + unsigned reg = (varying->imm.offset_vector << 2) + + varying->imm.offset_scalar; + printf("+"); + print_source_scalar(reg, NULL, false, false); + } + break; + } +} + +static void +print_sampler(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_sampler *sampler = code; + + printf("texld"); + if (sampler->lod_bias_en) + printf(".b"); + + switch (sampler->type) { + case ppir_codegen_sampler_type_2d: + printf(".2d"); + break; + case ppir_codegen_sampler_type_cube: + printf(".cube"); + break; + default: + printf("_t%u", sampler->type); + break; + } + + printf(" %u", sampler->index); + + if (sampler->offset_en) + { + printf("+"); + print_source_scalar(sampler->index_offset, NULL, false, false); + } + + if (sampler->lod_bias_en) + { + printf(" "); + print_source_scalar(sampler->lod_bias, NULL, false, false); + } +} + +static void +print_uniform(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_uniform *uniform = code; + + printf("load."); + + switch (uniform->source) { + case ppir_codegen_uniform_src_uniform: + printf("u"); + break; + case ppir_codegen_uniform_src_temporary: + printf("t"); + break; + default: + printf(".u%u", uniform->source); + break; + } + + if (uniform->alignment) + printf(" %u", uniform->index); + else + printf(" %u.%c", uniform->index >> 2, "xyzw"[uniform->index & 3]); + + if (uniform->offset_en) { + printf(" "); + print_source_scalar(uniform->offset_reg, NULL, false, false); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_vec4_mul_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op vec4_mul_ops[] = { + [0 ... 7] = { + .name = "mul", + .srcs = 2 + }, + CASE(not, 1), + CASE(and, 2), + CASE(or, 2), + CASE(xor, 2), + CASE(ne, 2), + CASE(gt, 2), + CASE(ge, 2), + CASE(eq, 2), + CASE(min, 2), + CASE(max, 2), + CASE(mov, 1), +}; + +#undef CASE + +static void +print_vec4_mul(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_vec4_mul *vec4_mul = code; + + asm_op op = vec4_mul_ops[vec4_mul->op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", vec4_mul->op); + print_outmod(vec4_mul->dest_modifier); + printf(".v0 "); + + if (vec4_mul->mask) { + printf("$%u", vec4_mul->dest); + print_mask(vec4_mul->mask); + printf(" "); + } + + print_vector_source(vec4_mul->arg0_source, NULL, + vec4_mul->arg0_swizzle, + vec4_mul->arg0_absolute, + vec4_mul->arg0_negate); + + if (vec4_mul->op < 8 && vec4_mul->op != 0) { + printf("<<%u", vec4_mul->op); + } + + printf(" "); + + if (op.srcs > 1) { + print_vector_source(vec4_mul->arg1_source, NULL, + vec4_mul->arg1_swizzle, + vec4_mul->arg1_absolute, + vec4_mul->arg1_negate); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_vec4_acc_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op vec4_acc_ops[] = { + CASE(add, 2), + CASE(fract, 1), + CASE(ne, 2), + CASE(gt, 2), + CASE(ge, 2), + CASE(eq, 2), + CASE(floor, 1), + CASE(ceil, 1), + CASE(min, 2), + CASE(max, 2), + CASE(sum3, 1), + CASE(sum4, 1), + CASE(dFdx, 2), + CASE(dFdy, 2), + CASE(sel, 2), + CASE(mov, 1), +}; + +#undef CASE + +static void +print_vec4_acc(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_vec4_acc *vec4_acc = code; + + asm_op op = vec4_acc_ops[vec4_acc->op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", vec4_acc->op); + print_outmod(vec4_acc->dest_modifier); + printf(".v1 "); + + if (vec4_acc->mask) { + printf("$%u", vec4_acc->dest); + print_mask(vec4_acc->mask); + printf(" "); + } + + print_vector_source(vec4_acc->arg0_source, vec4_acc->mul_in ? "^v0" : NULL, + vec4_acc->arg0_swizzle, + vec4_acc->arg0_absolute, + vec4_acc->arg0_negate); + + if (op.srcs > 1) { + printf(" "); + print_vector_source(vec4_acc->arg1_source, NULL, + vec4_acc->arg1_swizzle, + vec4_acc->arg1_absolute, + vec4_acc->arg1_negate); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_float_mul_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op float_mul_ops[] = { + [0 ... 7] = { + .name = "mul", + .srcs = 2 + }, + CASE(not, 1), + CASE(and, 2), + CASE(or, 2), + CASE(xor, 2), + CASE(ne, 2), + CASE(gt, 2), + CASE(ge, 2), + CASE(eq, 2), + CASE(min, 2), + CASE(max, 2), + CASE(mov, 1), +}; + +#undef CASE + +static void +print_float_mul(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_float_mul *float_mul = code; + + asm_op op = float_mul_ops[float_mul->op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", float_mul->op); + print_outmod(float_mul->dest_modifier); + printf(".s0 "); + + if (float_mul->output_en) + print_dest_scalar(float_mul->dest); + + print_source_scalar(float_mul->arg0_source, NULL, + float_mul->arg0_absolute, + float_mul->arg0_negate); + + if (float_mul->op < 8 && float_mul->op != 0) { + printf("<<%u", float_mul->op); + } + + if (op.srcs > 1) { + printf(" "); + + print_source_scalar(float_mul->arg1_source, NULL, + float_mul->arg1_absolute, + float_mul->arg1_negate); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_float_acc_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op float_acc_ops[] = { + CASE(add, 2), + CASE(fract, 1), + CASE(ne, 2), + CASE(gt, 2), + CASE(ge, 2), + CASE(eq, 2), + CASE(floor, 1), + CASE(ceil, 1), + CASE(min, 2), + CASE(max, 2), + CASE(dFdx, 2), + CASE(dFdy, 2), + CASE(sel, 2), + CASE(mov, 1), +}; + +#undef CASE + +static void +print_float_acc(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_float_acc *float_acc = code; + + asm_op op = float_acc_ops[float_acc->op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", float_acc->op); + print_outmod(float_acc->dest_modifier); + printf(".s1 "); + + if (float_acc->output_en) + print_dest_scalar(float_acc->dest); + + print_source_scalar(float_acc->arg0_source, float_acc->mul_in ? "^s0" : NULL, + float_acc->arg0_absolute, + float_acc->arg0_negate); + + if (op.srcs > 1) { + printf(" "); + print_source_scalar(float_acc->arg1_source, NULL, + float_acc->arg1_absolute, + float_acc->arg1_negate); + } +} + +#define CASE(_name, _srcs) \ +[ppir_codegen_combine_scalar_op_##_name] = { \ + .name = #_name, \ + .srcs = _srcs \ +} + +static const asm_op combine_ops[] = { + CASE(rcp, 1), + CASE(mov, 1), + CASE(sqrt, 1), + CASE(rsqrt, 1), + CASE(exp2, 1), + CASE(log2, 1), + CASE(sin, 1), + CASE(cos, 1), + CASE(atan, 1), + CASE(atan2, 1), +}; + +#undef CASE + +static void +print_combine(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_combine *combine = code; + + if (combine->scalar.dest_vec && + combine->scalar.arg1_en) { + /* This particular combination can only be valid for scalar * vector + * multiplies, and the opcode field is reused for something else. + */ + printf("mul"); + } else { + asm_op op = combine_ops[combine->scalar.op]; + + if (op.name) + printf("%s", op.name); + else + printf("op%u", combine->scalar.op); + } + + if (!combine->scalar.dest_vec) + print_outmod(combine->scalar.dest_modifier); + printf(".s2 "); + + if (combine->scalar.dest_vec) { + printf("$%u", combine->vector.dest); + print_mask(combine->vector.mask); + } else { + print_dest_scalar(combine->scalar.dest); + } + printf(" "); + + print_source_scalar(combine->scalar.arg0_src, NULL, + combine->scalar.arg0_absolute, + combine->scalar.arg0_negate); + printf(" "); + + if (combine->scalar.arg1_en) { + if (combine->scalar.dest_vec) { + print_vector_source(combine->vector.arg1_source, NULL, + combine->vector.arg1_swizzle, + false, false); + } else { + print_source_scalar(combine->scalar.arg1_src, NULL, + combine->scalar.arg1_absolute, + combine->scalar.arg1_negate); + } + } +} + +static void +print_temp_write(void *code, unsigned offset) +{ + (void) offset; + ppir_codegen_field_temp_write *temp_write = code; + + if (temp_write->fb_read.unknown_0 == 0x7) { + if (temp_write->fb_read.source) + printf("fb_color"); + else + printf("fb_depth"); + printf(" $%u", temp_write->fb_read.dest); + + return; + } + + printf("store.t"); + + if (temp_write->temp_write.alignment) { + printf(" %u", temp_write->temp_write.index); + } else { + printf(" %u.%c", temp_write->temp_write.index >> 2, + "xyzw"[temp_write->temp_write.index & 3]); + } + + if (temp_write->temp_write.offset_en) { + printf("+"); + print_source_scalar(temp_write->temp_write.offset_reg, + NULL, false, false); + } + + printf(" "); + + if (temp_write->temp_write.alignment) { + print_reg(temp_write->temp_write.source >> 2, NULL); + } else { + print_source_scalar(temp_write->temp_write.source, NULL, false, false); + } +} + +static void +print_branch(void *code, unsigned offset) +{ + ppir_codegen_field_branch *branch = code; + + if (branch->discard.word0 == PPIR_CODEGEN_DISCARD_WORD0 && + branch->discard.word1 == PPIR_CODEGEN_DISCARD_WORD1 && + branch->discard.word2 == PPIR_CODEGEN_DISCARD_WORD2) { + printf("discard"); + return; + } + + + const char* cond[] = { + "nv", "lt", "eq", "le", + "gt", "ne", "ge", "" , + }; + + unsigned cond_mask = 0; + cond_mask |= (branch->branch.cond_lt ? 1 : 0); + cond_mask |= (branch->branch.cond_eq ? 2 : 0); + cond_mask |= (branch->branch.cond_gt ? 4 : 0); + printf("branch"); + if (cond_mask != 0x7) { + printf(".%s ", cond[cond_mask]); + print_source_scalar(branch->branch.arg0_source, NULL, false, false); + printf(" "); + print_source_scalar(branch->branch.arg1_source, NULL, false, false); + } + + printf(" %d", branch->branch.target + offset); +} + +typedef void (*print_field_func)(void *, unsigned); + +static const print_field_func print_field[ppir_codegen_field_shift_count] = { + [ppir_codegen_field_shift_varying] = print_varying, + [ppir_codegen_field_shift_sampler] = print_sampler, + [ppir_codegen_field_shift_uniform] = print_uniform, + [ppir_codegen_field_shift_vec4_mul] = print_vec4_mul, + [ppir_codegen_field_shift_float_mul] = print_float_mul, + [ppir_codegen_field_shift_vec4_acc] = print_vec4_acc, + [ppir_codegen_field_shift_float_acc] = print_float_acc, + [ppir_codegen_field_shift_combine] = print_combine, + [ppir_codegen_field_shift_temp_write] = print_temp_write, + [ppir_codegen_field_shift_branch] = print_branch, + [ppir_codegen_field_shift_vec4_const_0] = print_const0, + [ppir_codegen_field_shift_vec4_const_1] = print_const1, +}; + +static const int ppir_codegen_field_size[] = { + 34, 62, 41, 43, 30, 44, 31, 30, 41, 73, 64, 64 +}; + +static void +bitcopy(char *src, char *dst, unsigned bits, unsigned src_offset) +{ + src += src_offset / 8; + src_offset %= 8; + + for (int b = bits; b > 0; b -= 8, src++, dst++) { + unsigned char out = ((unsigned char) *src) >> src_offset; + if (src_offset > 0 && src_offset + b > 8) + out |= ((unsigned char) *(src + 1)) << (8 - src_offset); + *dst = (char) out; + } +} + +void +ppir_disassemble_instr(uint32_t *instr, unsigned offset) +{ + ppir_codegen_ctrl *ctrl = (ppir_codegen_ctrl *) instr; + + char *instr_code = (char *) (instr + 1); + unsigned bit_offset = 0; + bool first = true; + for (unsigned i = 0; i < ppir_codegen_field_shift_count; i++) { + char code[12]; + + if (!((ctrl->fields >> i) & 1)) + continue; + + unsigned bits = ppir_codegen_field_size[i]; + bitcopy(instr_code, code, bits, bit_offset); + + if (first) + first = false; + else + printf(", "); + + print_field[i](code, offset); + + bit_offset += bits; + } + + if (ctrl->sync) + printf(", sync"); + if (ctrl->stop) + printf(", stop"); + + printf("\n"); +} + diff --git a/src/gallium/drivers/lima/ir/pp/instr.c b/src/gallium/drivers/lima/ir/pp/instr.c new file mode 100644 index 00000000000..ae296a4bb82 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/instr.c @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" + +#include "ppir.h" + +ppir_instr *ppir_instr_create(ppir_block *block) +{ + ppir_instr *instr = rzalloc(block, ppir_instr); + if (!instr) + return NULL; + + list_inithead(&instr->succ_list); + list_inithead(&instr->pred_list); + + instr->index = block->comp->cur_instr_index++; + instr->reg_pressure = -1; + + list_addtail(&instr->list, &block->instr_list); + return instr; +} + +void ppir_instr_add_dep(ppir_instr *succ, ppir_instr *pred) +{ + /* don't add duplicated instr */ + ppir_instr_foreach_pred(succ, dep) { + if (pred == dep->pred) + return; + } + + ppir_dep *dep = ralloc(succ, ppir_dep); + dep->pred = pred; + dep->succ = succ; + list_addtail(&dep->pred_link, &succ->pred_list); + list_addtail(&dep->succ_link, &pred->succ_list); +} + +void ppir_instr_insert_mul_node(ppir_node *add, ppir_node *mul) +{ + ppir_instr *instr = add->instr; + int pos = mul->instr_pos; + int *slots = ppir_op_infos[mul->op].slots; + + for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++) { + /* possible to insert at required place */ + if (slots[i] == pos) { + if (!instr->slots[pos]) { + ppir_alu_node *add_alu = ppir_node_to_alu(add); + ppir_alu_node *mul_alu = ppir_node_to_alu(mul); + ppir_dest *dest = &mul_alu->dest; + int pipeline = pos == PPIR_INSTR_SLOT_ALU_VEC_MUL ? + ppir_pipeline_reg_vmul : ppir_pipeline_reg_fmul; + + /* ^vmul/^fmul can't be used as last arg */ + if (add_alu->num_src > 1) { + ppir_src *last_src = add_alu->src + add_alu->num_src - 1; + if (ppir_node_target_equal(last_src, dest)) + return; + } + + /* update add node src to use pipeline reg */ + ppir_src *src = add_alu->src; + if (add_alu->num_src == 3) { + if (ppir_node_target_equal(src, dest)) { + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + } + + if (ppir_node_target_equal(++src, dest)) { + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + } + } + else { + assert(ppir_node_target_equal(src, dest)); + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + } + + /* update mul node dest to output to pipeline reg */ + dest->type = ppir_target_pipeline; + dest->pipeline = pipeline; + + instr->slots[pos] = mul; + mul->instr = instr; + } + return; + } + } +} + +/* check whether a const slot fix into another const slot */ +static bool ppir_instr_insert_const(ppir_const *dst, const ppir_const *src, + uint8_t *swizzle) +{ + int i, j; + + for (i = 0; i < src->num; i++) { + for (j = 0; j < dst->num; j++) { + if (src->value[i].ui == dst->value[j].ui) + break; + } + + if (j == dst->num) { + if (dst->num == 4) + return false; + dst->value[dst->num++] = src->value[i]; + } + + swizzle[i] = j; + } + + return true; +} + +/* make alu node src reflact the pipeline reg */ +static void ppir_instr_update_src_pipeline(ppir_instr *instr, ppir_pipeline pipeline, + ppir_dest *dest, uint8_t *swizzle) +{ + for (int i = PPIR_INSTR_SLOT_ALU_START; i <= PPIR_INSTR_SLOT_ALU_END; i++) { + if (!instr->slots[i]) + continue; + + ppir_alu_node *alu = ppir_node_to_alu(instr->slots[i]); + for (int j = 0; j < alu->num_src; j++) { + ppir_src *src = alu->src + j; + if (ppir_node_target_equal(src, dest)) { + src->type = ppir_target_pipeline; + src->pipeline = pipeline; + + if (swizzle) { + for (int k = 0; k < 4; k++) + src->swizzle[k] = swizzle[src->swizzle[k]]; + } + } + } + } +} + +bool ppir_instr_insert_node(ppir_instr *instr, ppir_node *node) +{ + if (node->op == ppir_op_const) { + int i; + ppir_const_node *c = ppir_node_to_const(node); + const ppir_const *nc = &c->constant; + + for (i = 0; i < 2; i++) { + ppir_const ic = instr->constant[i]; + uint8_t swizzle[4] = {0}; + + if (ppir_instr_insert_const(&ic, nc, swizzle)) { + instr->constant[i] = ic; + ppir_instr_update_src_pipeline( + instr, ppir_pipeline_reg_const0 + i, &c->dest, swizzle); + break; + } + } + + /* no const slot can insert */ + if (i == 2) + return false; + + return true; + } + else { + int *slots = ppir_op_infos[node->op].slots; + for (int i = 0; slots[i] != PPIR_INSTR_SLOT_END; i++) { + int pos = slots[i]; + + if (instr->slots[pos]) { + /* node already in this instr, i.e. load_uniform */ + if (instr->slots[pos] == node) + return true; + else + continue; + } + + if (pos == PPIR_INSTR_SLOT_ALU_SCL_MUL || + pos == PPIR_INSTR_SLOT_ALU_SCL_ADD) { + ppir_dest *dest = ppir_node_get_dest(node); + if (!ppir_target_is_scaler(dest)) + continue; + } + + instr->slots[pos] = node; + node->instr = instr; + node->instr_pos = pos; + + if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) { + ppir_load_node *l = ppir_node_to_load(node); + ppir_instr_update_src_pipeline( + instr, ppir_pipeline_reg_uniform, &l->dest, NULL); + } + + return true; + } + + return false; + } +} + +static struct { + int len; + char *name; +} ppir_instr_fields[] = { + [PPIR_INSTR_SLOT_VARYING] = { 4, "vary" }, + [PPIR_INSTR_SLOT_TEXLD] = { 4, "texl"}, + [PPIR_INSTR_SLOT_UNIFORM] = { 4, "unif" }, + [PPIR_INSTR_SLOT_ALU_VEC_MUL] = { 4, "vmul" }, + [PPIR_INSTR_SLOT_ALU_SCL_MUL] = { 4, "smul" }, + [PPIR_INSTR_SLOT_ALU_VEC_ADD] = { 4, "vadd" }, + [PPIR_INSTR_SLOT_ALU_SCL_ADD] = { 4, "sadd" }, + [PPIR_INSTR_SLOT_ALU_COMBINE] = { 4, "comb" }, + [PPIR_INSTR_SLOT_STORE_TEMP] = { 4, "stor" }, +}; + +void ppir_instr_print_list(ppir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_PP)) + return; + + printf("======ppir instr list======\n"); + printf(" "); + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) + printf("%-*s ", ppir_instr_fields[i].len, ppir_instr_fields[i].name); + printf("const0|1\n"); + + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + printf("%c%03d: ", instr->is_end ? '*' : ' ', instr->index); + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + ppir_node *node = instr->slots[i]; + if (node) + printf("%-*d ", ppir_instr_fields[i].len, node->index); + else + printf("%-*s ", ppir_instr_fields[i].len, "null"); + } + for (int i = 0; i < 2; i++) { + if (i) + printf("| "); + + for (int j = 0; j < instr->constant[i].num; j++) + printf("%f ", instr->constant[i].value[j].f); + } + printf("\n"); + } + printf("------------------------\n"); + } +} + +static void ppir_instr_print_sub(ppir_instr *instr) +{ + printf("[%s%d", + instr->printed && !ppir_instr_is_leaf(instr) ? "+" : "", + instr->index); + + if (!instr->printed) { + ppir_instr_foreach_pred(instr, dep) { + ppir_instr_print_sub(dep->pred); + } + + instr->printed = true; + } + + printf("]"); +} + +void ppir_instr_print_dep(ppir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_PP)) + return; + + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + instr->printed = false; + } + } + + printf("======ppir instr depend======\n"); + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + if (ppir_instr_is_root(instr)) { + ppir_instr_print_sub(instr); + printf("\n"); + } + } + printf("------------------------\n"); + } +} diff --git a/src/gallium/drivers/lima/ir/pp/lower.c b/src/gallium/drivers/lima/ir/pp/lower.c new file mode 100644 index 00000000000..e294f6740d1 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/lower.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/bitscan.h" +#include "util/ralloc.h" + +#include "ppir.h" + +static bool ppir_lower_const(ppir_block *block, ppir_node *node) +{ + if (ppir_node_is_root(node)) { + ppir_node_delete(node); + return true; + } + + ppir_node *move = NULL; + ppir_dest *dest = ppir_node_get_dest(node); + + /* const (register) can only be used in alu node, create a move + * node for other types of node */ + ppir_node_foreach_succ_safe(node, dep) { + ppir_node *succ = dep->succ; + + if (succ->type != ppir_node_type_alu) { + if (!move) { + move = ppir_node_create(block, ppir_op_mov, -1, 0); + if (unlikely(!move)) + return false; + + ppir_debug("lower const create move %d for %d\n", + move->index, node->index); + + ppir_alu_node *alu = ppir_node_to_alu(move); + alu->dest = *dest; + alu->num_src = 1; + ppir_node_target_assign(alu->src, dest); + for (int i = 0; i < 4; i++) + alu->src->swizzle[i] = i; + } + + ppir_node_replace_pred(dep, move); + ppir_node_replace_child(succ, node, move); + } + } + + if (move) { + ppir_node_add_dep(move, node); + list_addtail(&move->list, &node->list); + } + + return true; +} + +/* lower dot to mul+sum */ +static bool ppir_lower_dot(ppir_block *block, ppir_node *node) +{ + ppir_alu_node *mul = ppir_node_create(block, ppir_op_mul, -1, 0); + if (!mul) + return false; + list_addtail(&mul->node.list, &node->list); + + ppir_alu_node *dot = ppir_node_to_alu(node); + mul->src[0] = dot->src[0]; + mul->src[1] = dot->src[1]; + mul->num_src = 2; + + int num_components = node->op - ppir_op_dot2 + 2; + ppir_dest *dest = &mul->dest; + dest->type = ppir_target_ssa; + dest->ssa.num_components = num_components; + dest->ssa.live_in = INT_MAX; + dest->ssa.live_out = 0; + dest->write_mask = u_bit_consecutive(0, num_components); + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node_remove_dep(dep); + ppir_node_add_dep(&mul->node, dep->pred); + } + ppir_node_add_dep(node, &mul->node); + + if (node->op == ppir_op_dot2) { + node->op = ppir_op_add; + + ppir_node_target_assign(dot->src, dest); + dot->src[0].swizzle[0] = 0; + dot->src[0].absolute = false; + dot->src[0].negate = false; + + ppir_node_target_assign(dot->src + 1, dest); + dot->src[1].swizzle[0] = 1; + dot->src[1].absolute = false; + dot->src[1].negate = false; + } + else { + node->op = node->op == ppir_op_dot3 ? ppir_op_sum3 : ppir_op_sum4; + + ppir_node_target_assign(dot->src, dest); + for (int i = 0; i < 4; i++) + dot->src[0].swizzle[i] = i; + dot->src[0].absolute = false; + dot->src[0].negate = false; + + dot->num_src = 1; + } + + return true; +} + +static ppir_reg *create_reg(ppir_compiler *comp, int num_components) +{ + ppir_reg *r = rzalloc(comp, ppir_reg); + if (!r) + return NULL; + + r->num_components = num_components; + r->live_in = INT_MAX; + r->live_out = 0; + r->is_head = false; + list_addtail(&r->list, &comp->reg_list); + + return r; +} + +/* lower vector alu node to multi scalar nodes */ +static bool ppir_lower_vec_to_scalar(ppir_block *block, ppir_node *node) +{ + ppir_alu_node *alu = ppir_node_to_alu(node); + ppir_dest *dest = &alu->dest; + + int n = 0; + int index[4]; + + unsigned mask = dest->write_mask; + while (mask) + index[n++] = u_bit_scan(&mask); + + if (n == 1) + return true; + + ppir_reg *r; + /* we need a reg for scalar nodes to store output */ + if (dest->type == ppir_target_register) + r = dest->reg; + else { + r = create_reg(block->comp, n); + if (!r) + return false; + + /* change all successors to use reg r */ + ppir_node_foreach_succ(node, dep) { + ppir_node *succ = dep->succ; + if (succ->type == ppir_node_type_alu) { + ppir_alu_node *sa = ppir_node_to_alu(succ); + for (int i = 0; i < sa->num_src; i++) { + ppir_src *src = sa->src + i; + if (ppir_node_target_equal(src, dest)) { + src->type = ppir_target_register; + src->reg = r; + } + } + } + else { + assert(succ->type == ppir_node_type_store); + ppir_store_node *ss = ppir_node_to_store(succ); + ppir_src *src = &ss->src; + src->type = ppir_target_register; + src->reg = r; + } + } + } + + /* create each component's scalar node */ + for (int i = 0; i < n; i++) { + ppir_node *s = ppir_node_create(block, node->op, -1, 0); + if (!s) + return false; + list_addtail(&s->list, &node->list); + + ppir_alu_node *sa = ppir_node_to_alu(s); + ppir_dest *sd = &sa->dest; + sd->type = ppir_target_register; + sd->reg = r; + sd->modifier = dest->modifier; + sd->write_mask = 1 << index[i]; + + for (int j = 0; j < alu->num_src; j++) + sa->src[j] = alu->src[j]; + sa->num_src = alu->num_src; + + /* TODO: need per reg component dependancy */ + ppir_node_foreach_succ(node, dep) { + ppir_node_add_dep(dep->succ, s); + } + + ppir_node_foreach_pred(node, dep) { + ppir_node_add_dep(s, dep->pred); + } + } + + ppir_node_delete(node); + return true; +} + +static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node) +{ + /* swapped op must be the next op */ + node->op++; + + assert(node->type == ppir_node_type_alu); + ppir_alu_node *alu = ppir_node_to_alu(node); + assert(alu->num_src == 2); + + ppir_src tmp = alu->src[0]; + alu->src[0] = alu->src[1]; + alu->src[1] = tmp; + return true; +} + +static bool ppir_lower_texture(ppir_block *block, ppir_node *node) +{ + ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node); + + if (ppir_node_has_single_pred(node)) { + ppir_node *pred = ppir_node_first_pred(node); + if (pred->op == ppir_op_load_varying) { + /* If ldtex is the only successor of load_varying node + * we're good. Just change load_varying op type to load_coords. + */ + if (ppir_node_has_single_succ(pred)) { + pred->op = ppir_op_load_coords; + return true; + } + } + } + + /* Otherwise we need to create load_coords node */ + ppir_load_node *load = ppir_node_create(block, ppir_op_load_coords, -1, 0); + if (!load) + return false; + list_addtail(&load->node.list, &node->list); + + ppir_debug("%s create load_coords node %d for %d\n", + __FUNCTION__, load->node.index, node->index); + + ppir_dest *dest = &load->dest; + dest->type = ppir_target_ssa; + dest->ssa.num_components = load_tex->src_coords.ssa->num_components; + dest->ssa.live_in = INT_MAX; + dest->ssa.live_out = 0; + dest->write_mask = u_bit_consecutive(0, dest->ssa.num_components); + + load->src = load_tex->src_coords; + + ppir_src *src = &load_tex->src_coords; + src->type = ppir_target_ssa; + src->ssa = &dest->ssa; + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(&load->node, pred); + } + + ppir_node_add_dep(node, &load->node); + return true; +} + +/* Prepare for sin and cos and then lower vector alu node to multi + * scalar nodes */ +static bool ppir_lower_sin_cos_vec_to_scalar(ppir_block *block, ppir_node *node) +{ + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_node *inv_2pi_node = ppir_node_create(block, ppir_op_const, -1, 0); + if (!inv_2pi_node) + return false; + list_addtail(&inv_2pi_node->list, &node->list); + + /* For sin and cos, the input has to multiplied by the constant + * 1/(2*pi), presumably to simplify the hardware. */ + ppir_const_node *inv_2pi_const = ppir_node_to_const(inv_2pi_node); + inv_2pi_const->constant.num = 1; + inv_2pi_const->constant.value[0].f = (1.0f/(2.0f * M_PI)); + + inv_2pi_const->dest.type = ppir_target_ssa; + inv_2pi_const->dest.ssa.num_components = 1; + inv_2pi_const->dest.ssa.live_in = INT_MAX; + inv_2pi_const->dest.ssa.live_out = 0; + inv_2pi_const->dest.write_mask = 0x01; + + ppir_node *mul_node = ppir_node_create(block, ppir_op_mul, -1, 0); + if (!mul_node) + return false; + list_addtail(&mul_node->list, &node->list); + + ppir_alu_node *mul_alu = ppir_node_to_alu(mul_node); + mul_alu->num_src = 2; + mul_alu->src[0] = alu->src[0]; + mul_alu->src[1].type = ppir_target_ssa; + mul_alu->src[1].ssa = &inv_2pi_const->dest.ssa; + + int num_components = alu->src[0].ssa->num_components; + mul_alu->dest.type = ppir_target_ssa; + mul_alu->dest.ssa.num_components = num_components; + mul_alu->dest.ssa.live_in = INT_MAX; + mul_alu->dest.ssa.live_out = 0; + mul_alu->dest.write_mask = u_bit_consecutive(0, num_components); + + alu->src[0].type = ppir_target_ssa; + alu->src[0].ssa = &mul_alu->dest.ssa; + for (int i = 0; i < 4; i++) + alu->src->swizzle[i] = i; + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(mul_node, pred); + } + ppir_node_add_dep(node, mul_node); + ppir_node_add_dep(mul_node, inv_2pi_node); + + return ppir_lower_vec_to_scalar(block, node); +} + +/* insert a move as the select condition to make sure it can + * be inserted to select instr float mul slot + */ +static bool ppir_lower_select(ppir_block *block, ppir_node *node) +{ + ppir_alu_node *alu = ppir_node_to_alu(node); + + ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0); + if (!move) + return false; + list_addtail(&move->list, &node->list); + + ppir_alu_node *move_alu = ppir_node_to_alu(move); + ppir_src *move_src = move_alu->src, *src = alu->src; + move_src->type = src->type; + move_src->ssa = src->ssa; + move_src->swizzle[0] = src->swizzle[0]; + move_alu->num_src = 1; + + ppir_dest *move_dest = &move_alu->dest; + move_dest->type = ppir_target_ssa; + move_dest->ssa.num_components = 1; + move_dest->ssa.live_in = INT_MAX; + move_dest->ssa.live_out = 0; + move_dest->write_mask = 1; + + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + ppir_dest *dest = ppir_node_get_dest(pred); + if (ppir_node_target_equal(alu->src, dest)) { + ppir_node_replace_pred(dep, move); + ppir_node_add_dep(move, pred); + } + } + + /* move must be the first pred of select node which make sure + * the float mul slot is free when node to instr + */ + assert(ppir_node_first_pred(node) == move); + + src->swizzle[0] = 0; + ppir_node_target_assign(alu->src, move_dest); + return true; +} + +static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = { + [ppir_op_const] = ppir_lower_const, + [ppir_op_dot2] = ppir_lower_dot, + [ppir_op_dot3] = ppir_lower_dot, + [ppir_op_dot4] = ppir_lower_dot, + [ppir_op_rcp] = ppir_lower_vec_to_scalar, + [ppir_op_rsqrt] = ppir_lower_vec_to_scalar, + [ppir_op_log2] = ppir_lower_vec_to_scalar, + [ppir_op_exp2] = ppir_lower_vec_to_scalar, + [ppir_op_sqrt] = ppir_lower_vec_to_scalar, + [ppir_op_sin] = ppir_lower_sin_cos_vec_to_scalar, + [ppir_op_cos] = ppir_lower_sin_cos_vec_to_scalar, + [ppir_op_lt] = ppir_lower_swap_args, + [ppir_op_le] = ppir_lower_swap_args, + [ppir_op_load_texture] = ppir_lower_texture, + [ppir_op_select] = ppir_lower_select, +}; + +bool ppir_lower_prog(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry_safe(ppir_node, node, &block->node_list, list) { + if (ppir_lower_funcs[node->op] && + !ppir_lower_funcs[node->op](block, node)) + return false; + } + } + + ppir_node_print_prog(comp); + return true; +} diff --git a/src/gallium/drivers/lima/ir/pp/nir.c b/src/gallium/drivers/lima/ir/pp/nir.c new file mode 100644 index 00000000000..0a5fe13e312 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/nir.c @@ -0,0 +1,494 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include <string.h> + +#include "util/ralloc.h" +#include "util/bitscan.h" +#include "compiler/nir/nir.h" + +#include "ppir.h" + +static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa) +{ + ppir_node *node = ppir_node_create(block, op, ssa->index, 0); + if (!node) + return NULL; + + ppir_dest *dest = ppir_node_get_dest(node); + dest->type = ppir_target_ssa; + dest->ssa.num_components = ssa->num_components; + dest->ssa.live_in = INT_MAX; + dest->ssa.live_out = 0; + dest->write_mask = u_bit_consecutive(0, ssa->num_components); + + if (node->type == ppir_node_type_load || + node->type == ppir_node_type_store) + dest->ssa.is_head = true; + + return node; +} + +static void *ppir_node_create_reg(ppir_block *block, ppir_op op, + nir_reg_dest *reg, unsigned mask) +{ + ppir_node *node = ppir_node_create(block, op, reg->reg->index, mask); + if (!node) + return NULL; + + ppir_dest *dest = ppir_node_get_dest(node); + + list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) { + if (r->index == reg->reg->index) { + dest->reg = r; + break; + } + } + + dest->type = ppir_target_register; + dest->write_mask = mask; + + if (node->type == ppir_node_type_load || + node->type == ppir_node_type_store) + dest->reg->is_head = true; + + return node; +} + +static void *ppir_node_create_dest(ppir_block *block, ppir_op op, + nir_dest *dest, unsigned mask) +{ + unsigned index = -1; + + if (dest) { + if (dest->is_ssa) + return ppir_node_create_ssa(block, op, &dest->ssa); + else + return ppir_node_create_reg(block, op, &dest->reg, mask); + } + + return ppir_node_create(block, op, index, 0); +} + +static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node, + ppir_src *ps, nir_src *ns, unsigned mask) +{ + ppir_node *child = NULL; + + if (ns->is_ssa) { + child = comp->var_nodes[ns->ssa->index]; + ppir_node_add_dep(node, child); + } + else { + nir_register *reg = ns->reg.reg; + while (mask) { + int swizzle = ps->swizzle[u_bit_scan(&mask)]; + child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle]; + ppir_node_add_dep(node, child); + } + } + + ppir_dest *dest = ppir_node_get_dest(child); + ppir_node_target_assign(ps, dest); +} + +static int nir_to_ppir_opcodes[nir_num_opcodes] = { + /* not supported */ + [0 ... nir_last_opcode] = -1, + + [nir_op_fmov] = ppir_op_mov, + [nir_op_imov] = ppir_op_mov, + [nir_op_fmul] = ppir_op_mul, + [nir_op_fadd] = ppir_op_add, + [nir_op_fdot2] = ppir_op_dot2, + [nir_op_fdot3] = ppir_op_dot3, + [nir_op_fdot4] = ppir_op_dot4, + [nir_op_frsq] = ppir_op_rsqrt, + [nir_op_flog2] = ppir_op_log2, + [nir_op_fexp2] = ppir_op_exp2, + [nir_op_fsqrt] = ppir_op_sqrt, + [nir_op_fsin] = ppir_op_sin, + [nir_op_fcos] = ppir_op_cos, + [nir_op_fmax] = ppir_op_max, + [nir_op_fmin] = ppir_op_min, + [nir_op_frcp] = ppir_op_rcp, + [nir_op_ffloor] = ppir_op_floor, + [nir_op_ffract] = ppir_op_fract, + [nir_op_fand] = ppir_op_and, + [nir_op_for] = ppir_op_or, + [nir_op_fxor] = ppir_op_xor, + [nir_op_sge] = ppir_op_ge, + [nir_op_fge] = ppir_op_ge, + [nir_op_slt] = ppir_op_lt, + [nir_op_flt] = ppir_op_lt, + [nir_op_seq] = ppir_op_eq, + [nir_op_feq] = ppir_op_eq, + [nir_op_sne] = ppir_op_ne, + [nir_op_fne] = ppir_op_ne, + [nir_op_fnot] = ppir_op_not, + [nir_op_bcsel] = ppir_op_select, + [nir_op_inot] = ppir_op_not, + [nir_op_b2f32] = ppir_op_mov, +}; + +static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni) +{ + nir_alu_instr *instr = nir_instr_as_alu(ni); + int op = nir_to_ppir_opcodes[instr->op]; + + if (op < 0) { + ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name); + return NULL; + } + + ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest, + instr->dest.write_mask); + if (!node) + return NULL; + + ppir_dest *pd = &node->dest; + nir_alu_dest *nd = &instr->dest; + if (nd->saturate) + pd->modifier = ppir_outmod_clamp_fraction; + + unsigned src_mask; + switch (op) { + case ppir_op_dot2: + src_mask = 0b0011; + break; + case ppir_op_dot3: + src_mask = 0b0111; + break; + case ppir_op_dot4: + src_mask = 0b1111; + break; + default: + src_mask = pd->write_mask; + break; + } + + unsigned num_child = nir_op_infos[instr->op].num_inputs; + node->num_src = num_child; + + for (int i = 0; i < num_child; i++) { + nir_alu_src *ns = instr->src + i; + ppir_src *ps = node->src + i; + memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle)); + ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask); + + ps->absolute = ns->abs; + ps->negate = ns->negate; + } + + return &node->node; +} + +static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni) +{ + nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni); + unsigned mask = 0; + ppir_load_node *lnode; + ppir_store_node *snode; + nir_const_value *const_offset; + + switch (instr->intrinsic) { + case nir_intrinsic_load_input: + if (!instr->dest.is_ssa) + mask = u_bit_consecutive(0, instr->num_components); + + lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask); + if (!lnode) + return NULL; + + lnode->num_components = instr->num_components; + lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr); + return &lnode->node; + + case nir_intrinsic_load_uniform: + if (!instr->dest.is_ssa) + mask = u_bit_consecutive(0, instr->num_components); + + lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask); + if (!lnode) + return NULL; + + lnode->num_components = instr->num_components; + lnode->index = nir_intrinsic_base(instr); + + const_offset = nir_src_as_const_value(instr->src[0]); + assert(const_offset); + lnode->index += (uint32_t)const_offset->f32[0]; + + return &lnode->node; + + case nir_intrinsic_store_output: + snode = ppir_node_create_dest(block, ppir_op_store_color, NULL, 0); + if (!snode) + return NULL; + + snode->index = nir_intrinsic_base(instr); + + for (int i = 0; i < instr->num_components; i++) + snode->src.swizzle[i] = i; + + ppir_node_add_src(block->comp, &snode->node, &snode->src, instr->src, + u_bit_consecutive(0, instr->num_components)); + + return &snode->node; + + default: + ppir_error("unsupported nir_intrinsic_instr %d\n", instr->intrinsic); + return NULL; + } +} + +static ppir_node *ppir_emit_load_const(ppir_block *block, nir_instr *ni) +{ + nir_load_const_instr *instr = nir_instr_as_load_const(ni); + ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def); + if (!node) + return NULL; + + assert(instr->def.bit_size == 32); + + for (int i = 0; i < instr->def.num_components; i++) + node->constant.value[i].i = instr->value.i32[i]; + node->constant.num = instr->def.num_components; + + return &node->node; +} + +static ppir_node *ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni) +{ + ppir_error("nir_ssa_undef_instr not support\n"); + return NULL; +} + +static ppir_node *ppir_emit_tex(ppir_block *block, nir_instr *ni) +{ + nir_tex_instr *instr = nir_instr_as_tex(ni); + ppir_load_texture_node *node; + + if (instr->op != nir_texop_tex) { + ppir_error("unsupported texop %d\n", instr->op); + return NULL; + } + + node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, 0); + if (!node) + return NULL; + + node->sampler = instr->texture_index; + + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + break; + default: + ppir_debug("unsupported sampler dim: %d\n", instr->sampler_dim); + return NULL; + } + + node->sampler_dim = instr->sampler_dim; + + for (int i = 0; i < instr->coord_components; i++) + node->src_coords.swizzle[i] = i; + + assert(instr->num_srcs == 1); + for (int i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_coord: + ppir_node_add_src(block->comp, &node->node, &node->src_coords, &instr->src[i].src, + u_bit_consecutive(0, instr->coord_components)); + break; + default: + ppir_debug("unknown texture source"); + return NULL; + } + } + + return &node->node; +} + +static ppir_node *ppir_emit_jump(ppir_block *block, nir_instr *ni) +{ + ppir_error("nir_jump_instr not support\n"); + return NULL; +} + +static ppir_node *(*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = { + [nir_instr_type_alu] = ppir_emit_alu, + [nir_instr_type_intrinsic] = ppir_emit_intrinsic, + [nir_instr_type_load_const] = ppir_emit_load_const, + [nir_instr_type_ssa_undef] = ppir_emit_ssa_undef, + [nir_instr_type_tex] = ppir_emit_tex, + [nir_instr_type_jump] = ppir_emit_jump, +}; + +static ppir_block *ppir_block_create(ppir_compiler *comp) +{ + ppir_block *block = rzalloc(comp, ppir_block); + if (!block) + return NULL; + + list_inithead(&block->node_list); + list_inithead(&block->instr_list); + + return block; +} + +static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock) +{ + ppir_block *block = ppir_block_create(comp); + if (!block) + return false; + + list_addtail(&block->list, &comp->block_list); + block->comp = comp; + + nir_foreach_instr(instr, nblock) { + assert(instr->type < nir_instr_type_phi); + ppir_node *node = ppir_emit_instr[instr->type](block, instr); + if (node) + list_addtail(&node->list, &block->node_list); + } + + return true; +} + +static bool ppir_emit_if(ppir_compiler *comp, nir_if *nif) +{ + ppir_error("if nir_cf_node not support\n"); + return false; +} + +static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop) +{ + ppir_error("loop nir_cf_node not support\n"); + return false; +} + +static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc) +{ + ppir_error("function nir_cf_node not support\n"); + return false; +} + +static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) { + bool ret; + + switch (node->type) { + case nir_cf_node_block: + ret = ppir_emit_block(comp, nir_cf_node_as_block(node)); + break; + case nir_cf_node_if: + ret = ppir_emit_if(comp, nir_cf_node_as_if(node)); + break; + case nir_cf_node_loop: + ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node)); + break; + case nir_cf_node_function: + ret = ppir_emit_function(comp, nir_cf_node_as_function(node)); + break; + default: + ppir_error("unknown NIR node type %d\n", node->type); + return false; + } + + if (!ret) + return false; + } + + return true; +} + +static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa) +{ + ppir_compiler *comp = rzalloc_size( + prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *)); + if (!comp) + return NULL; + + list_inithead(&comp->block_list); + list_inithead(&comp->reg_list); + + comp->var_nodes = (ppir_node **)(comp + 1); + comp->reg_base = num_ssa; + comp->prog = prog; + return comp; +} + +bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir, + struct ra_regs *ra) +{ + nir_function_impl *func = nir_shader_get_entrypoint(nir); + ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc); + if (!comp) + return false; + + comp->ra = ra; + + foreach_list_typed(nir_register, reg, node, &func->registers) { + ppir_reg *r = rzalloc(comp, ppir_reg); + if (!r) + return false; + + r->index = reg->index; + r->num_components = reg->num_components; + r->live_in = INT_MAX; + r->live_out = 0; + r->is_head = false; + list_addtail(&r->list, &comp->reg_list); + } + + if (!ppir_emit_cf_list(comp, &func->body)) + goto err_out0; + ppir_node_print_prog(comp); + + if (!ppir_lower_prog(comp)) + goto err_out0; + + if (!ppir_node_to_instr(comp)) + goto err_out0; + + if (!ppir_schedule_prog(comp)) + goto err_out0; + + if (!ppir_regalloc_prog(comp)) + goto err_out0; + + if (!ppir_codegen_prog(comp)) + goto err_out0; + + ralloc_free(comp); + return true; + +err_out0: + ralloc_free(comp); + return false; +} + diff --git a/src/gallium/drivers/lima/ir/pp/node.c b/src/gallium/drivers/lima/ir/pp/node.c new file mode 100644 index 00000000000..9c871abb4c9 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/node.c @@ -0,0 +1,426 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/u_math.h" +#include "util/ralloc.h" +#include "util/bitscan.h" + +#include "ppir.h" + +const ppir_op_info ppir_op_infos[] = { + [ppir_op_mov] = { + .name = "mov", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL, + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_mul] = { + .name = "mul", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_add] = { + .name = "add", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_dot2] = { + .name = "dot2", + }, + [ppir_op_dot3] = { + .name = "dot3", + }, + [ppir_op_dot4] = { + .name = "dot4", + }, + [ppir_op_sum3] = { + .name = "sum3", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_sum4] = { + .name = "sum4", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_rsqrt] = { + .name = "rsqrt", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_log2] = { + .name = "log2", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_exp2] = { + .name = "exp2", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_sqrt] = { + .name = "sqrt", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_sin] = { + .name = "sin", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_cos] = { + .name = "cos", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_max] = { + .name = "max", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL, + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_min] = { + .name = "min", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL, + PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_floor] = { + .name = "floor", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_fract] = { + .name = "fract", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_and] = { + .name = "and", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_or] = { + .name = "or", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_xor] = { + .name = "xor", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_not] = { + .name = "not", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_lt] = { + .name = "lt", + }, + [ppir_op_le] = { + .name = "le", + }, + [ppir_op_gt] = { + .name = "gt", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_ge] = { + .name = "ge", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_eq] = { + .name = "eq", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_ne] = { + .name = "ne", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_select] = { + .name = "select", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_rcp] = { + .name = "rcp", + .slots = (int []) { + PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_varying] = { + .name = "ld_var", + .type = ppir_node_type_load, + .slots = (int []) { + PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_coords] = { + .name = "ld_coords", + .type = ppir_node_type_load, + .slots = (int []) { + PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_uniform] = { + .name = "ld_uni", + .type = ppir_node_type_load, + .slots = (int []) { + PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_texture] = { + .name = "ld_tex", + .type = ppir_node_type_load_texture, + .slots = (int []) { + PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_load_temp] = { + .name = "ld_temp", + .type = ppir_node_type_load, + .slots = (int []) { + PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END + }, + }, + [ppir_op_const] = { + .name = "const", + .type = ppir_node_type_const, + }, + [ppir_op_store_color] = { + .name = "st_col", + .type = ppir_node_type_store, + }, + [ppir_op_store_temp] = { + .name = "st_temp", + .type = ppir_node_type_store, + .slots = (int []) { + PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END + }, + }, +}; + +void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask) +{ + ppir_compiler *comp = block->comp; + static const int node_size[] = { + [ppir_node_type_alu] = sizeof(ppir_alu_node), + [ppir_node_type_const] = sizeof(ppir_const_node), + [ppir_node_type_load] = sizeof(ppir_load_node), + [ppir_node_type_store] = sizeof(ppir_store_node), + [ppir_node_type_load_texture] = sizeof(ppir_load_texture_node), + }; + + ppir_node_type type = ppir_op_infos[op].type; + int size = node_size[type]; + ppir_node *node = rzalloc_size(block, size); + if (!node) + return NULL; + + list_inithead(&node->succ_list); + list_inithead(&node->pred_list); + + if (index >= 0) { + if (mask) { + /* reg has 4 slots for each componemt write node */ + while (mask) + comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node; + snprintf(node->name, sizeof(node->name), "reg%d", index); + } else { + comp->var_nodes[index] = node; + snprintf(node->name, sizeof(node->name), "ssa%d", index); + } + } + else + snprintf(node->name, sizeof(node->name), "new"); + + node->op = op; + node->type = type; + node->index = comp->cur_index++; + node->block = block; + + return node; +} + +void ppir_node_add_dep(ppir_node *succ, ppir_node *pred) +{ + /* don't add dep for two nodes from different block */ + if (succ->block != pred->block) + return; + + /* don't add duplicated dep */ + ppir_node_foreach_pred(succ, dep) { + if (dep->pred == pred) + return; + } + + ppir_dep *dep = ralloc(succ, ppir_dep); + dep->pred = pred; + dep->succ = succ; + list_addtail(&dep->pred_link, &succ->pred_list); + list_addtail(&dep->succ_link, &pred->succ_list); +} + +void ppir_node_remove_dep(ppir_dep *dep) +{ + list_del(&dep->succ_link); + list_del(&dep->pred_link); + ralloc_free(dep); +} + +static void _ppir_node_replace_child(ppir_src *src, ppir_node *old_child, ppir_node *new_child) +{ + ppir_dest *od = ppir_node_get_dest(old_child); + if (ppir_node_target_equal(src, od)) { + ppir_dest *nd = ppir_node_get_dest(new_child); + ppir_node_target_assign(src, nd); + } +} + +void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child) +{ + if (parent->type == ppir_node_type_alu) { + ppir_alu_node *alu = ppir_node_to_alu(parent); + for (int i = 0; i < alu->num_src; i++) + _ppir_node_replace_child(alu->src + i, old_child, new_child); + } + else if (parent->type == ppir_node_type_store) { + ppir_store_node *store = ppir_node_to_store(parent); + _ppir_node_replace_child(&store->src, old_child, new_child); + } +} + +void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred) +{ + list_del(&dep->succ_link); + dep->pred = new_pred; + list_addtail(&dep->succ_link, &new_pred->succ_list); +} + +void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src) +{ + ppir_node_foreach_succ_safe(src, dep) { + ppir_node_replace_pred(dep, dst); + ppir_node_replace_child(dep->succ, src, dst); + } +} + +void ppir_node_delete(ppir_node *node) +{ + ppir_node_foreach_succ_safe(node, dep) + ppir_node_remove_dep(dep); + + ppir_node_foreach_pred_safe(node, dep) + ppir_node_remove_dep(dep); + + list_del(&node->list); + ralloc_free(node); +} + +static void ppir_node_print_node(ppir_node *node, int space) +{ + for (int i = 0; i < space; i++) + printf(" "); + printf("%s%s %d %s\n", node->printed && !ppir_node_is_leaf(node) ? "+" : "", + ppir_op_infos[node->op].name, node->index, node->name); + + if (!node->printed) { + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_print_node(pred, space + 2); + } + + node->printed = true; + } +} + +void ppir_node_print_prog(ppir_compiler *comp) +{ + if (!(lima_debug & LIMA_DEBUG_PP)) + return; + + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + node->printed = false; + } + } + + printf("========prog========\n"); + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + printf("-------block------\n"); + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (ppir_node_is_root(node)) + ppir_node_print_node(node, 0); + } + } + printf("====================\n"); +} diff --git a/src/gallium/drivers/lima/ir/pp/node_to_instr.c b/src/gallium/drivers/lima/ir/pp/node_to_instr.c new file mode 100644 index 00000000000..26d2c9868f6 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/node_to_instr.c @@ -0,0 +1,401 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "ppir.h" + + +static bool create_new_instr(ppir_block *block, ppir_node *node) +{ + ppir_instr *instr = ppir_instr_create(block); + if (unlikely(!instr)) + return false; + + if (!ppir_instr_insert_node(instr, node)) + return false; + + return true; +} + +static bool insert_to_load_tex(ppir_block *block, ppir_node *load_coords, ppir_node *ldtex) +{ + ppir_dest *dest = ppir_node_get_dest(ldtex); + ppir_node *move = NULL; + + ppir_load_node *load = ppir_node_to_load(load_coords); + load->dest.type = ppir_target_pipeline; + load->dest.pipeline = ppir_pipeline_reg_discard; + + ppir_load_texture_node *load_texture = ppir_node_to_load_texture(ldtex); + load_texture->src_coords.type = ppir_target_pipeline; + load_texture->src_coords.pipeline = ppir_pipeline_reg_discard; + + /* Insert load_coords to ldtex instruction */ + if (!ppir_instr_insert_node(ldtex->instr, load_coords)) + return false; + + /* Create move node */ + move = ppir_node_create(block, ppir_op_mov, -1 , 0); + if (unlikely(!move)) + return false; + + ppir_debug("insert_load_tex: create move %d for %d\n", + move->index, ldtex->index); + + ppir_alu_node *alu = ppir_node_to_alu(move); + alu->dest = *dest; + + ppir_node_replace_all_succ(move, ldtex); + + dest->type = ppir_target_pipeline; + dest->pipeline = ppir_pipeline_reg_sampler; + + alu->num_src = 1; + ppir_node_target_assign(&alu->src[0], dest); + for (int i = 0; i < 4; i++) + alu->src->swizzle[i] = i; + + ppir_node_add_dep(move, ldtex); + list_addtail(&move->list, &ldtex->list); + + if (!ppir_instr_insert_node(ldtex->instr, move)) + return false; + + return true; +} + +static bool insert_to_each_succ_instr(ppir_block *block, ppir_node *node) +{ + ppir_dest *dest = ppir_node_get_dest(node); + assert(dest->type == ppir_target_ssa); + + ppir_node *move = NULL; + + ppir_node_foreach_succ_safe(node, dep) { + ppir_node *succ = dep->succ; + assert(succ->type == ppir_node_type_alu); + + if (!ppir_instr_insert_node(succ->instr, node)) { + /* create a move node to insert for failed node */ + if (!move) { + move = ppir_node_create(block, ppir_op_mov, -1, 0); + if (unlikely(!move)) + return false; + + ppir_debug("node_to_instr create move %d for %d\n", + move->index, node->index); + + ppir_alu_node *alu = ppir_node_to_alu(move); + alu->dest = *dest; + alu->num_src = 1; + ppir_node_target_assign(alu->src, dest); + for (int i = 0; i < 4; i++) + alu->src->swizzle[i] = i; + } + + ppir_node_replace_pred(dep, move); + ppir_node_replace_child(succ, node, move); + } + } + + if (move) { + if (!create_new_instr(block, move)) + return false; + + MAYBE_UNUSED bool insert_result = + ppir_instr_insert_node(move->instr, node); + assert(insert_result); + + ppir_node_add_dep(move, node); + list_addtail(&move->list, &node->list); + } + + /* dupliacte node for each successor */ + + bool first = true; + struct list_head dup_list; + list_inithead(&dup_list); + + ppir_node_foreach_succ_safe(node, dep) { + ppir_node *succ = dep->succ; + + if (first) { + first = false; + node->instr = succ->instr; + continue; + } + + if (succ->instr == node->instr) + continue; + + list_for_each_entry(ppir_node, dup, &dup_list, list) { + if (succ->instr == dup->instr) { + ppir_node_replace_pred(dep, dup); + continue; + } + } + + ppir_node *dup = ppir_node_create(block, node->op, -1, 0); + if (unlikely(!dup)) + return false; + list_addtail(&dup->list, &dup_list); + + ppir_debug("node_to_instr duplicate %s %d from %d\n", + ppir_op_infos[dup->op].name, dup->index, node->index); + + ppir_instr *instr = succ->instr; + dup->instr = instr; + dup->instr_pos = node->instr_pos; + ppir_node_replace_pred(dep, dup); + + if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) { + ppir_load_node *load = ppir_node_to_load(node); + ppir_load_node *dup_load = ppir_node_to_load(dup); + dup_load->dest = load->dest; + dup_load->index = load->index; + dup_load->num_components = load->num_components; + instr->slots[node->instr_pos] = dup; + } + } + + list_splicetail(&dup_list, &node->list); + + return true; +} + +static bool ppir_do_node_to_instr(ppir_block *block, ppir_node *node) +{ + switch (node->type) { + case ppir_node_type_alu: + { + /* merge pred mul and succ add in the same instr can save a reg + * by using pipeline reg ^vmul/^fmul */ + ppir_alu_node *alu = ppir_node_to_alu(node); + if (alu->dest.type == ppir_target_ssa && + ppir_node_has_single_succ(node)) { + ppir_node *succ = ppir_node_first_succ(node); + if (succ->instr_pos == PPIR_INSTR_SLOT_ALU_VEC_ADD) { + node->instr_pos = PPIR_INSTR_SLOT_ALU_VEC_MUL; + /* select instr's condition must be inserted to fmul slot */ + if (succ->op == ppir_op_select && + ppir_node_first_pred(succ) == node) { + assert(alu->dest.ssa.num_components == 1); + node->instr_pos = PPIR_INSTR_SLOT_ALU_SCL_MUL; + } + ppir_instr_insert_mul_node(succ, node); + } + else if (succ->instr_pos == PPIR_INSTR_SLOT_ALU_SCL_ADD && + alu->dest.ssa.num_components == 1) { + node->instr_pos = PPIR_INSTR_SLOT_ALU_SCL_MUL; + ppir_instr_insert_mul_node(succ, node); + } + } + + /* can't inserted to any existing instr, create one */ + if (!node->instr && !create_new_instr(block, node)) + return false; + + break; + } + case ppir_node_type_load: + if ((node->op == ppir_op_load_uniform) || (node->op == ppir_op_load_temp)) { + /* merge pred load_uniform into succ instr can save a reg + * by using pipeline reg */ + if (!insert_to_each_succ_instr(block, node)) + return false; + + ppir_load_node *load = ppir_node_to_load(node); + load->dest.type = ppir_target_pipeline; + load->dest.pipeline = ppir_pipeline_reg_uniform; + } + else if (node->op == ppir_op_load_temp) { + /* merge pred load_temp into succ instr can save a reg + * by using pipeline reg */ + if (!insert_to_each_succ_instr(block, node)) + return false; + + ppir_load_node *load = ppir_node_to_load(node); + load->dest.type = ppir_target_pipeline; + load->dest.pipeline = ppir_pipeline_reg_uniform; + } + else if (node->op == ppir_op_load_varying) { + /* delay the load varying dup to scheduler */ + if (!create_new_instr(block, node)) + return false; + } + else if (node->op == ppir_op_load_coords) { + ppir_node *ldtex = ppir_node_first_succ(node); + if (!insert_to_load_tex(block, node, ldtex)) + return false; + } + else { + /* not supported yet */ + assert(0); + return false; + } + break; + case ppir_node_type_load_texture: + if (!create_new_instr(block, node)) + return false; + break; + case ppir_node_type_const: + if (!insert_to_each_succ_instr(block, node)) + return false; + break; + case ppir_node_type_store: + { + if (node->op == ppir_op_store_temp) { + if (!create_new_instr(block, node)) + return false; + break; + } + + /* Only the store color node should appear here. + * Currently we always insert a move node as the end instr. + * But it should only be done when: + * 1. store a const node + * 2. store a load node + * 3. store a reg assigned in another block like loop/if + */ + + assert(node->op == ppir_op_store_color); + + ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0); + if (unlikely(!move)) + return false; + + ppir_debug("node_to_instr create move %d from store %d\n", + move->index, node->index); + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + /* we can't do this in this function except here as this + * store is the root of this recursion */ + ppir_node_remove_dep(dep); + ppir_node_add_dep(move, pred); + } + + ppir_node_add_dep(node, move); + list_addtail(&move->list, &node->list); + + ppir_alu_node *alu = ppir_node_to_alu(move); + ppir_store_node *store = ppir_node_to_store(node); + alu->src[0] = store->src; + alu->num_src = 1; + + alu->dest.type = ppir_target_ssa; + alu->dest.ssa.num_components = 4; + alu->dest.ssa.live_in = INT_MAX; + alu->dest.ssa.live_out = 0; + alu->dest.write_mask = 0xf; + + store->src.type = ppir_target_ssa; + store->src.ssa = &alu->dest.ssa; + + if (!create_new_instr(block, move)) + return false; + + move->instr->is_end = true; + node->instr = move->instr; + + /* use move for the following recursion */ + node = move; + break; + } + default: + return false; + } + + /* we have to make sure the dep not be destroyed (due to + * succ change) in ppir_do_node_to_instr, otherwise we can't + * do recursion like this */ + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + bool ready = true; + + /* pred may already be processed by the previous pred + * (this pred may be both node and previous pred's child) */ + if (pred->instr) + continue; + + /* insert pred only when all its successors have been inserted */ + ppir_node_foreach_succ(pred, dep) { + ppir_node *succ = dep->succ; + if (!succ->instr) { + ready = false; + break; + } + } + + if (ready) { + if (!ppir_do_node_to_instr(block, pred)) + return false; + } + } + + return true; +} + +static bool ppir_create_instr_from_node(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (ppir_node_is_root(node)) { + if (!ppir_do_node_to_instr(block, node)) + return false; + } + } + } + + return true; +} + +static void ppir_build_instr_dependency(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + ppir_node *node = instr->slots[i]; + if (node) { + ppir_node_foreach_pred(node, dep) { + ppir_node *pred = dep->pred; + if (pred->instr && pred->instr != instr) + ppir_instr_add_dep(instr, pred->instr); + } + } + } + } + } +} + +bool ppir_node_to_instr(ppir_compiler *comp) +{ + if (!ppir_create_instr_from_node(comp)) + return false; + ppir_instr_print_list(comp); + + ppir_build_instr_dependency(comp); + ppir_instr_print_dep(comp); + + return true; +} diff --git a/src/gallium/drivers/lima/ir/pp/ppir.h b/src/gallium/drivers/lima/ir/pp/ppir.h new file mode 100644 index 00000000000..feb34895114 --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/ppir.h @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2017 Lima Project + * Copyright (c) 2013 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#ifndef LIMA_IR_PP_PPIR_H +#define LIMA_IR_PP_PPIR_H + +#include "util/u_math.h" +#include "util/list.h" + +#include "ir/lima_ir.h" + +typedef enum { + ppir_op_mov, + ppir_op_add, + + ppir_op_ddx, + ppir_op_ddy, + + ppir_op_mul, + ppir_op_rcp, + + ppir_op_sin_lut, + ppir_op_cos_lut, + + ppir_op_sum3, + ppir_op_sum4, + + ppir_op_normalize2, + ppir_op_normalize3, + ppir_op_normalize4, + + ppir_op_select, + + ppir_op_sin, + ppir_op_cos, + ppir_op_tan, + ppir_op_asin, + ppir_op_acos, + + ppir_op_atan, + ppir_op_atan2, + ppir_op_atan_pt1, + ppir_op_atan2_pt1, + ppir_op_atan_pt2, + + ppir_op_exp, + ppir_op_log, + ppir_op_exp2, + ppir_op_log2, + ppir_op_sqrt, + ppir_op_rsqrt, + + ppir_op_sign, + ppir_op_floor, + ppir_op_ceil, + ppir_op_fract, + ppir_op_mod, + ppir_op_min, + ppir_op_max, + + ppir_op_dot2, + ppir_op_dot3, + ppir_op_dot4, + + ppir_op_and, + ppir_op_or, + ppir_op_xor, + + ppir_op_lt, + ppir_op_gt, + ppir_op_le, + ppir_op_ge, + ppir_op_eq, + ppir_op_ne, + ppir_op_not, + + ppir_op_load_uniform, + ppir_op_load_varying, + ppir_op_load_coords, + ppir_op_load_texture, + ppir_op_load_temp, + + ppir_op_store_temp, + ppir_op_store_color, + + ppir_op_const, + + ppir_op_num, +} ppir_op; + +typedef enum { + ppir_node_type_alu, + ppir_node_type_const, + ppir_node_type_load, + ppir_node_type_store, + ppir_node_type_load_texture, +} ppir_node_type; + +typedef struct { + char *name; + ppir_node_type type; + int *slots; +} ppir_op_info; + +extern const ppir_op_info ppir_op_infos[]; + +typedef struct { + void *pred, *succ; + struct list_head pred_link; + struct list_head succ_link; +} ppir_dep; + +typedef struct ppir_node { + struct list_head list; + ppir_op op; + ppir_node_type type; + int index; + char name[16]; + bool printed; + struct ppir_instr *instr; + int instr_pos; + struct ppir_block *block; + + /* for scheduler */ + struct list_head succ_list; + struct list_head pred_list; +} ppir_node; + +typedef enum { + ppir_pipeline_reg_const0, + ppir_pipeline_reg_const1, + ppir_pipeline_reg_sampler, + ppir_pipeline_reg_uniform, + ppir_pipeline_reg_vmul, + ppir_pipeline_reg_fmul, + ppir_pipeline_reg_discard, /* varying load */ +} ppir_pipeline; + +typedef struct ppir_reg { + struct list_head list; + int index; + int num_components; + /* whether this reg has to start from the x component + * of a full physical reg, this is true for reg used + * in load/store instr which has no swizzle field + */ + bool is_head; + /* instr live range */ + int live_in, live_out; + bool spilled; +} ppir_reg; + +typedef enum { + ppir_target_ssa, + ppir_target_pipeline, + ppir_target_register, +} ppir_target; + +typedef struct ppir_src { + ppir_target type; + + union { + ppir_reg *ssa; + ppir_reg *reg; + ppir_pipeline pipeline; + }; + + uint8_t swizzle[4]; + bool absolute, negate; +} ppir_src; + +typedef enum { + ppir_outmod_none, + ppir_outmod_clamp_fraction, + ppir_outmod_clamp_positive, + ppir_outmod_round, +} ppir_outmod; + +typedef struct ppir_dest { + ppir_target type; + + union { + ppir_reg ssa; + ppir_reg *reg; + ppir_pipeline pipeline; + }; + + ppir_outmod modifier; + unsigned write_mask : 4; +} ppir_dest; + +typedef struct { + ppir_node node; + ppir_dest dest; + ppir_src src[3]; + int num_src; + int shift : 3; /* Only used for ppir_op_mul */ +} ppir_alu_node; + +typedef struct ppir_const { + union fi value[4]; + int num; +} ppir_const; + +typedef struct { + ppir_node node; + ppir_const constant; + ppir_dest dest; +} ppir_const_node; + +typedef struct { + ppir_node node; + int index; + int num_components; + ppir_dest dest; + ppir_src src; +} ppir_load_node; + +typedef struct { + ppir_node node; + int index; + int num_components; + ppir_src src; +} ppir_store_node; + +typedef struct { + ppir_node node; + ppir_dest dest; + ppir_src src_coords; + int sampler; + int sampler_dim; +} ppir_load_texture_node; + +enum ppir_instr_slot { + PPIR_INSTR_SLOT_VARYING, + PPIR_INSTR_SLOT_TEXLD, + PPIR_INSTR_SLOT_UNIFORM, + PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_ALU_SCL_MUL, + PPIR_INSTR_SLOT_ALU_VEC_ADD, + PPIR_INSTR_SLOT_ALU_SCL_ADD, + PPIR_INSTR_SLOT_ALU_COMBINE, + PPIR_INSTR_SLOT_STORE_TEMP, + PPIR_INSTR_SLOT_NUM, + PPIR_INSTR_SLOT_END, + PPIR_INSTR_SLOT_ALU_START = PPIR_INSTR_SLOT_ALU_VEC_MUL, + PPIR_INSTR_SLOT_ALU_END = PPIR_INSTR_SLOT_ALU_COMBINE, +}; + +typedef struct ppir_instr { + struct list_head list; + int index; + bool printed; + int seq; /* command sequence after schedule */ + + ppir_node *slots[PPIR_INSTR_SLOT_NUM]; + ppir_const constant[2]; + bool is_end; + + /* for scheduler */ + struct list_head succ_list; + struct list_head pred_list; + float reg_pressure; + int est; /* earliest start time */ + int parent_index; + bool scheduled; +} ppir_instr; + +typedef struct ppir_block { + struct list_head list; + struct list_head node_list; + struct list_head instr_list; + struct ppir_compiler *comp; + + /* for scheduler */ + int sched_instr_index; + int sched_instr_base; +} ppir_block; + +struct ra_regs; +struct lima_fs_shader_state; + +typedef struct ppir_compiler { + struct list_head block_list; + int cur_index; + int cur_instr_index; + + struct list_head reg_list; + + /* array for searching ssa/reg node */ + ppir_node **var_nodes; + unsigned reg_base; + + struct ra_regs *ra; + struct lima_fs_shader_state *prog; + + /* for scheduler */ + int sched_instr_base; + + /* for regalloc spilling debug */ + int force_spilling; +} ppir_compiler; + +void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask); +void ppir_node_add_dep(ppir_node *succ, ppir_node *pred); +void ppir_node_remove_dep(ppir_dep *dep); +void ppir_node_delete(ppir_node *node); +void ppir_node_print_prog(ppir_compiler *comp); +void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child); +void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src); +void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred); + +static inline bool ppir_node_is_root(ppir_node *node) +{ + return list_empty(&node->succ_list); +} + +static inline bool ppir_node_is_leaf(ppir_node *node) +{ + return list_empty(&node->pred_list); +} + +static inline bool ppir_node_has_single_succ(ppir_node *node) +{ + return list_is_singular(&node->succ_list); +} + +static inline ppir_node *ppir_node_first_succ(ppir_node *node) +{ + return list_first_entry(&node->succ_list, ppir_dep, succ_link)->succ; +} + +static inline bool ppir_node_has_single_pred(ppir_node *node) +{ + return list_is_singular(&node->pred_list); +} + +static inline ppir_node *ppir_node_first_pred(ppir_node *node) +{ + return list_first_entry(&node->pred_list, ppir_dep, pred_link)->pred; +} + +#define ppir_node_foreach_succ(node, dep) \ + list_for_each_entry(ppir_dep, dep, &node->succ_list, succ_link) +#define ppir_node_foreach_succ_safe(node, dep) \ + list_for_each_entry_safe(ppir_dep, dep, &node->succ_list, succ_link) +#define ppir_node_foreach_pred(node, dep) \ + list_for_each_entry(ppir_dep, dep, &node->pred_list, pred_link) +#define ppir_node_foreach_pred_safe(node, dep) \ + list_for_each_entry_safe(ppir_dep, dep, &node->pred_list, pred_link) + +#define ppir_node_to_alu(node) ((ppir_alu_node *)(node)) +#define ppir_node_to_const(node) ((ppir_const_node *)(node)) +#define ppir_node_to_load(node) ((ppir_load_node *)(node)) +#define ppir_node_to_store(node) ((ppir_store_node *)(node)) +#define ppir_node_to_load_texture(node) ((ppir_load_texture_node *)(node)) + +static inline ppir_dest *ppir_node_get_dest(ppir_node *node) +{ + switch (node->type) { + case ppir_node_type_alu: + return &ppir_node_to_alu(node)->dest; + case ppir_node_type_load: + return &ppir_node_to_load(node)->dest; + case ppir_node_type_const: + return &ppir_node_to_const(node)->dest; + case ppir_node_type_load_texture: + return &ppir_node_to_load_texture(node)->dest; + default: + return NULL; + } +} + +static inline void ppir_node_target_assign(ppir_src *src, ppir_dest *dest) +{ + src->type = dest->type; + switch (src->type) { + case ppir_target_ssa: + src->ssa = &dest->ssa; + break; + case ppir_target_register: + src->reg = dest->reg; + break; + case ppir_target_pipeline: + src->pipeline = dest->pipeline; + break; + } +} + +static inline bool ppir_node_target_equal(ppir_src *src, ppir_dest *dest) +{ + if (src->type != dest->type || + (src->type == ppir_target_ssa && src->ssa != &dest->ssa) || + (src->type == ppir_target_register && src->reg != dest->reg) || + (src->type == ppir_target_pipeline && src->pipeline != dest->pipeline)) + return false; + + return true; +} + +static inline int ppir_target_get_src_reg_index(ppir_src *src) +{ + switch (src->type) { + case ppir_target_ssa: + return src->ssa->index; + case ppir_target_register: + return src->reg->index; + case ppir_target_pipeline: + if (src->pipeline == ppir_pipeline_reg_discard) + return 15 * 4; + return (src->pipeline + 12) * 4; + } + + return -1; +} + +static inline int ppir_target_get_dest_reg_index(ppir_dest *dest) +{ + switch (dest->type) { + case ppir_target_ssa: + return dest->ssa.index; + case ppir_target_register: + return dest->reg->index; + case ppir_target_pipeline: + if (dest->pipeline == ppir_pipeline_reg_discard) + return 15 * 4; + return (dest->pipeline + 12) * 4; + } + + return -1; +} + +static inline bool ppir_target_is_scaler(ppir_dest *dest) +{ + switch (dest->type) { + case ppir_target_ssa: + return dest->ssa.num_components == 1; + case ppir_target_register: + /* only one bit in mask is set */ + if ((dest->write_mask & 0x3) == 0x3 || + (dest->write_mask & 0x5) == 0x5 || + (dest->write_mask & 0x9) == 0x9 || + (dest->write_mask & 0x6) == 0x6 || + (dest->write_mask & 0xa) == 0xa || + (dest->write_mask & 0xc) == 0xc) + return false; + else + return true; + case ppir_target_pipeline: + if (dest->pipeline == ppir_pipeline_reg_fmul) + return true; + else + return false; + default: + return false; + } +} + +ppir_instr *ppir_instr_create(ppir_block *block); +bool ppir_instr_insert_node(ppir_instr *instr, ppir_node *node); +void ppir_instr_add_dep(ppir_instr *succ, ppir_instr *pred); +void ppir_instr_print_list(ppir_compiler *comp); +void ppir_instr_print_dep(ppir_compiler *comp); +void ppir_instr_insert_mul_node(ppir_node *add, ppir_node *mul); + +#define ppir_instr_foreach_succ(instr, dep) \ + list_for_each_entry(ppir_dep, dep, &instr->succ_list, succ_link) +#define ppir_instr_foreach_succ_safe(instr, dep) \ + list_for_each_entry_safe(ppir_dep, dep, &instr->succ_list, succ_link) +#define ppir_instr_foreach_pred(instr, dep) \ + list_for_each_entry(ppir_dep, dep, &instr->pred_list, pred_link) +#define ppir_instr_foreach_pred_safe(instr, dep) \ + list_for_each_entry_safe(ppir_dep, dep, &instr->pred_list, pred_link) + +static inline bool ppir_instr_is_root(ppir_instr *instr) +{ + return list_empty(&instr->succ_list); +} + +static inline bool ppir_instr_is_leaf(ppir_instr *instr) +{ + return list_empty(&instr->pred_list); +} + +bool ppir_lower_prog(ppir_compiler *comp); +bool ppir_node_to_instr(ppir_compiler *comp); +bool ppir_schedule_prog(ppir_compiler *comp); +bool ppir_regalloc_prog(ppir_compiler *comp); +bool ppir_codegen_prog(ppir_compiler *comp); + +#endif diff --git a/src/gallium/drivers/lima/ir/pp/regalloc.c b/src/gallium/drivers/lima/ir/pp/regalloc.c new file mode 100644 index 00000000000..6aa71e91cfe --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/regalloc.c @@ -0,0 +1,757 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "util/ralloc.h" +#include "util/register_allocate.h" +#include "util/u_debug.h" + +#include "ppir.h" +#include "lima_context.h" + +#define PPIR_FULL_REG_NUM 6 + +#define PPIR_VEC1_REG_NUM (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */ +#define PPIR_VEC2_REG_NUM (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */ +#define PPIR_VEC3_REG_NUM (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */ +#define PPIR_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */ +#define PPIR_HEAD_VEC1_REG_NUM PPIR_FULL_REG_NUM /* x */ +#define PPIR_HEAD_VEC2_REG_NUM PPIR_FULL_REG_NUM /* xy */ +#define PPIR_HEAD_VEC3_REG_NUM PPIR_FULL_REG_NUM /* xyz */ +#define PPIR_HEAD_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */ + +#define PPIR_VEC1_REG_BASE 0 +#define PPIR_VEC2_REG_BASE (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM) +#define PPIR_VEC3_REG_BASE (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM) +#define PPIR_VEC4_REG_BASE (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM) +#define PPIR_HEAD_VEC1_REG_BASE (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM) +#define PPIR_HEAD_VEC2_REG_BASE (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM) +#define PPIR_HEAD_VEC3_REG_BASE (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM) +#define PPIR_HEAD_VEC4_REG_BASE (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM) +#define PPIR_REG_COUNT (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM) + +enum ppir_ra_reg_class { + ppir_ra_reg_class_vec1, + ppir_ra_reg_class_vec2, + ppir_ra_reg_class_vec3, + ppir_ra_reg_class_vec4, + + /* 4 reg class for load/store instr regs: + * load/store instr has no swizzle field, so the (virtual) register + * must be allocated at the beginning of a (physical) register, + */ + ppir_ra_reg_class_head_vec1, + ppir_ra_reg_class_head_vec2, + ppir_ra_reg_class_head_vec3, + ppir_ra_reg_class_head_vec4, + + ppir_ra_reg_class_num, +}; + +static const int ppir_ra_reg_base[ppir_ra_reg_class_num + 1] = { + [ppir_ra_reg_class_vec1] = PPIR_VEC1_REG_BASE, + [ppir_ra_reg_class_vec2] = PPIR_VEC2_REG_BASE, + [ppir_ra_reg_class_vec3] = PPIR_VEC3_REG_BASE, + [ppir_ra_reg_class_vec4] = PPIR_VEC4_REG_BASE, + [ppir_ra_reg_class_head_vec1] = PPIR_HEAD_VEC1_REG_BASE, + [ppir_ra_reg_class_head_vec2] = PPIR_HEAD_VEC2_REG_BASE, + [ppir_ra_reg_class_head_vec3] = PPIR_HEAD_VEC3_REG_BASE, + [ppir_ra_reg_class_head_vec4] = PPIR_HEAD_VEC4_REG_BASE, + [ppir_ra_reg_class_num] = PPIR_REG_COUNT, +}; + +static unsigned int * +ppir_ra_reg_q_values[ppir_ra_reg_class_num] = { + (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4}, + (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3}, + (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, + (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1}, +}; + +struct ra_regs *ppir_regalloc_init(void *mem_ctx) +{ + struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false); + if (!ret) + return NULL; + + /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */ + static const int class_reg_num[ppir_ra_reg_class_num] = { + 4, 3, 2, 1, 1, 1, 1, 1, + }; + /* base reg (x, y, z, w) confliction with other regs */ + for (int h = 0; h < 4; h++) { + int base_reg_mask = 1 << h; + for (int i = 1; i < ppir_ra_reg_class_num; i++) { + int class_reg_base_mask = (1 << ((i % 4) + 1)) - 1; + for (int j = 0; j < class_reg_num[i]; j++) { + if (base_reg_mask & (class_reg_base_mask << j)) { + for (int k = 0; k < PPIR_FULL_REG_NUM; k++) { + ra_add_reg_conflict(ret, k * 4 + h, + ppir_ra_reg_base[i] + k * class_reg_num[i] + j); + } + } + } + } + } + /* build all other confliction by the base reg confliction */ + for (int i = 0; i < PPIR_VEC1_REG_NUM; i++) + ra_make_reg_conflicts_transitive(ret, i); + + for (int i = 0; i < ppir_ra_reg_class_num; i++) + ra_alloc_reg_class(ret); + + int reg_index = 0; + for (int i = 0; i < ppir_ra_reg_class_num; i++) { + while (reg_index < ppir_ra_reg_base[i + 1]) + ra_class_add_reg(ret, i, reg_index++); + } + + ra_set_finalize(ret, ppir_ra_reg_q_values); + return ret; +} + +static ppir_reg *get_src_reg(ppir_src *src) +{ + switch (src->type) { + case ppir_target_ssa: + return src->ssa; + case ppir_target_register: + return src->reg; + default: + return NULL; + } +} + +static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (node->op == ppir_op_store_color) + continue; + + if (!node->instr || node->op == ppir_op_const) + continue; + + ppir_dest *dest = ppir_node_get_dest(node); + if (dest) { + ppir_reg *reg = NULL; + + if (dest->type == ppir_target_ssa) { + reg = &dest->ssa; + list_addtail(®->list, &comp->reg_list); + } + } + } + } +} + +static ppir_reg *ppir_regalloc_build_liveness_info(ppir_compiler *comp) +{ + ppir_reg *ret = NULL; + + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + if (node->op == ppir_op_store_color) { + ppir_store_node *store = ppir_node_to_store(node); + if (store->src.type == ppir_target_ssa) + ret = store->src.ssa; + else + ret = store->src.reg; + ret->live_out = INT_MAX; + continue; + } + + if (!node->instr || node->op == ppir_op_const) + continue; + + /* update reg live_in from node dest (write) */ + ppir_dest *dest = ppir_node_get_dest(node); + if (dest) { + ppir_reg *reg = NULL; + + if (dest->type == ppir_target_ssa) { + reg = &dest->ssa; + } + else if (dest->type == ppir_target_register) + reg = dest->reg; + + if (reg && node->instr->seq < reg->live_in) + reg->live_in = node->instr->seq; + } + + /* update reg live_out from node src (read) */ + switch (node->type) { + case ppir_node_type_alu: + { + ppir_alu_node *alu = ppir_node_to_alu(node); + for (int i = 0; i < alu->num_src; i++) { + ppir_reg *reg = get_src_reg(alu->src + i); + if (reg && node->instr->seq > reg->live_out) + reg->live_out = node->instr->seq; + } + break; + } + case ppir_node_type_store: + { + ppir_store_node *store = ppir_node_to_store(node); + ppir_reg *reg = get_src_reg(&store->src); + if (reg && node->instr->seq > reg->live_out) + reg->live_out = node->instr->seq; + break; + } + case ppir_node_type_load: + { + ppir_load_node *load = ppir_node_to_load(node); + ppir_reg *reg = get_src_reg(&load->src); + if (reg && node->instr->seq > reg->live_out) + reg->live_out = node->instr->seq; + break; + } + case ppir_node_type_load_texture: + { + ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node); + ppir_reg *reg = get_src_reg(&load_tex->src_coords); + if (reg && node->instr->seq > reg->live_out) + reg->live_out = node->instr->seq; + break; + } + default: + break; + } + } + } + + return ret; +} + +static int get_phy_reg_index(int reg) +{ + int i; + + for (i = 0; i < ppir_ra_reg_class_num; i++) { + if (reg < ppir_ra_reg_base[i + 1]) { + reg -= ppir_ra_reg_base[i]; + break; + } + } + + if (i < ppir_ra_reg_class_head_vec1) + return reg / (4 - i) * 4 + reg % (4 - i); + else + return reg * 4; +} + +static void ppir_regalloc_print_result(ppir_compiler *comp) +{ + printf("======ppir regalloc result======\n"); + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { + printf("%03d:", instr->index); + for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { + ppir_node *node = instr->slots[i]; + if (!node) + continue; + + printf(" (%d|", node->index); + + ppir_dest *dest = ppir_node_get_dest(node); + if (dest) + printf("%d", ppir_target_get_dest_reg_index(dest)); + + printf("|"); + + switch (node->type) { + case ppir_node_type_alu: + { + ppir_alu_node *alu = ppir_node_to_alu(node); + for (int j = 0; j < alu->num_src; j++) { + if (j) + printf(" "); + + printf("%d", ppir_target_get_src_reg_index(alu->src + j)); + } + break; + } + case ppir_node_type_store: + { + ppir_store_node *store = ppir_node_to_store(node); + printf("%d", ppir_target_get_src_reg_index(&store->src)); + break; + } + case ppir_node_type_load: + { + ppir_load_node *load = ppir_node_to_load(node); + if (!load->num_components) + printf("%d", ppir_target_get_src_reg_index(&load->src)); + break; + } + case ppir_node_type_load_texture: + { + ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node); + printf("%d", ppir_target_get_src_reg_index(&load_tex->src_coords)); + break; + } + default: + break; + } + + printf(")"); + } + printf("\n"); + } + } + printf("--------------------------\n"); +} + +static bool create_new_instr_after(ppir_block *block, ppir_instr *ref, + ppir_node *node) +{ + ppir_instr *newinstr = ppir_instr_create(block); + if (unlikely(!newinstr)) + return false; + + list_del(&newinstr->list); + list_add(&newinstr->list, &ref->list); + + if (!ppir_instr_insert_node(newinstr, node)) + return false; + + list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) { + instr->seq++; + } + newinstr->seq = ref->seq+1; + newinstr->scheduled = true; + return true; +} + +static bool create_new_instr_before(ppir_block *block, ppir_instr *ref, + ppir_node *node) +{ + ppir_instr *newinstr = ppir_instr_create(block); + if (unlikely(!newinstr)) + return false; + + list_del(&newinstr->list); + list_addtail(&newinstr->list, &ref->list); + + if (!ppir_instr_insert_node(newinstr, node)) + return false; + + list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) { + instr->seq++; + } + newinstr->seq = ref->seq-1; + newinstr->scheduled = true; + return true; +} + +static ppir_alu_node* ppir_update_spilled_src(ppir_compiler *comp, + ppir_block *block, + ppir_node *node, ppir_src *src, + ppir_alu_node *move_alu) +{ + /* alu nodes may have multiple references to the same value. + * try to avoid unnecessary loads for the same alu node by + * saving the node resulting from the temporary load */ + if (move_alu) + goto update_src; + + /* alloc new node to load value */ + ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0); + if (!load_node) + return NULL; + list_addtail(&load_node->list, &node->list); + + ppir_load_node *load = ppir_node_to_load(load_node); + + load->index = -comp->prog->stack_size; /* index sizes are negative */ + load->num_components = src->reg->num_components; + + ppir_dest *ld_dest = &load->dest; + ld_dest->type = ppir_target_pipeline; + ld_dest->pipeline = ppir_pipeline_reg_uniform; + ld_dest->write_mask = 0xf; + + create_new_instr_before(block, node->instr, load_node); + + /* Create move node */ + ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0); + if (unlikely(!move_node)) + return false; + list_addtail(&move_node->list, &node->list); + + move_alu = ppir_node_to_alu(move_node); + + move_alu->num_src = 1; + move_alu->src->type = ppir_target_pipeline; + move_alu->src->pipeline = ppir_pipeline_reg_uniform; + for (int i = 0; i < 4; i++) + move_alu->src->swizzle[i] = i; + + ppir_dest *alu_dest = &move_alu->dest; + alu_dest->type = ppir_target_ssa; + alu_dest->ssa.num_components = 4; + alu_dest->ssa.live_in = INT_MAX; + alu_dest->ssa.live_out = 0; + alu_dest->write_mask = 0xf; + + list_addtail(&alu_dest->ssa.list, &comp->reg_list); + + if (!ppir_instr_insert_node(load_node->instr, move_node)) + return false; + + /* insert the new node as predecessor */ + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(load_node, pred); + } + ppir_node_add_dep(node, move_node); + ppir_node_add_dep(move_node, load_node); + +update_src: + /* switch node src to use the new ssa instead */ + src->type = ppir_target_ssa; + src->ssa = &move_alu->dest.ssa; + + return move_alu; +} + +static ppir_reg *create_reg(ppir_compiler *comp, int num_components) +{ + ppir_reg *r = rzalloc(comp, ppir_reg); + if (!r) + return NULL; + + r->num_components = num_components; + r->live_in = INT_MAX; + r->live_out = 0; + r->is_head = false; + list_addtail(&r->list, &comp->reg_list); + + return r; +} + +static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block, + ppir_node *node, ppir_dest *dest) +{ + assert(dest != NULL); + ppir_reg *reg = NULL; + if (dest->type == ppir_target_register) { + reg = dest->reg; + reg->num_components = 4; + reg->spilled = true; + } + else { + reg = create_reg(comp, 4); + reg->spilled = true; + list_del(&dest->ssa.list); + } + + /* alloc new node to load value */ + ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0); + if (!load_node) + return NULL; + list_addtail(&load_node->list, &node->list); + + ppir_load_node *load = ppir_node_to_load(load_node); + + load->index = -comp->prog->stack_size; /* index sizes are negative */ + load->num_components = 4; + + load->dest.type = ppir_target_pipeline; + load->dest.pipeline = ppir_pipeline_reg_uniform; + load->dest.write_mask = 0xf; + + create_new_instr_before(block, node->instr, load_node); + + /* Create move node */ + ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0); + if (unlikely(!move_node)) + return false; + list_addtail(&move_node->list, &node->list); + + ppir_alu_node *move_alu = ppir_node_to_alu(move_node); + + move_alu->num_src = 1; + move_alu->src->type = ppir_target_pipeline; + move_alu->src->pipeline = ppir_pipeline_reg_uniform; + for (int i = 0; i < 4; i++) + move_alu->src->swizzle[i] = i; + + move_alu->dest.type = ppir_target_register; + move_alu->dest.reg = reg; + move_alu->dest.write_mask = 0x0f; + + if (!ppir_instr_insert_node(load_node->instr, move_node)) + return false; + + ppir_node_foreach_pred_safe(node, dep) { + ppir_node *pred = dep->pred; + ppir_node_remove_dep(dep); + ppir_node_add_dep(load_node, pred); + } + ppir_node_add_dep(node, move_node); + ppir_node_add_dep(move_node, load_node); + + dest->type = ppir_target_register; + dest->reg = reg; + + /* alloc new node to store value */ + ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0); + if (!store_node) + return false; + list_addtail(&store_node->list, &node->list); + + ppir_store_node *store = ppir_node_to_store(store_node); + + store->index = -comp->prog->stack_size; /* index sizes are negative */ + store->num_components = 4; + + store->src.type = ppir_target_register; + store->src.reg = dest->reg; + + /* insert the new node as successor */ + ppir_node_foreach_succ_safe(node, dep) { + ppir_node *succ = dep->succ; + ppir_node_remove_dep(dep); + ppir_node_add_dep(succ, store_node); + } + ppir_node_add_dep(store_node, node); + + create_new_instr_after(block, node->instr, store_node); + + return true; +} + +static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + list_for_each_entry(ppir_node, node, &block->node_list, list) { + + ppir_dest *dest = ppir_node_get_dest(node); + ppir_reg *reg = NULL; + if (dest) { + if (dest->type == ppir_target_ssa) + reg = &dest->ssa; + else if (dest->type == ppir_target_register) + reg = dest->reg; + + if (reg == chosen) + ppir_update_spilled_dest(comp, block, node, dest); + } + + switch (node->type) { + case ppir_node_type_alu: + { + /* alu nodes may have multiple references to the same value. + * try to avoid unnecessary loads for the same alu node by + * saving the node resulting from the temporary load */ + ppir_alu_node *move_alu = NULL; + ppir_alu_node *alu = ppir_node_to_alu(node); + for (int i = 0; i < alu->num_src; i++) { + reg = get_src_reg(alu->src + i); + if (reg == chosen) { + move_alu = ppir_update_spilled_src(comp, block, node, + alu->src + i, move_alu); + } + } + break; + } + case ppir_node_type_store: + { + ppir_store_node *store = ppir_node_to_store(node); + reg = get_src_reg(&store->src); + if (reg == chosen) { + ppir_update_spilled_src(comp, block, node, &store->src, NULL); + } + break; + } + case ppir_node_type_load: + { + ppir_load_node *load = ppir_node_to_load(node); + reg = get_src_reg(&load->src); + if (reg == chosen) { + ppir_update_spilled_src(comp, block, node, &load->src, NULL); + } + break; + } + case ppir_node_type_load_texture: + { + ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node); + reg = get_src_reg(&load_tex->src_coords); + if (reg == chosen) { + ppir_update_spilled_src(comp, block, node, &load_tex->src_coords, + NULL); + } + break; + } + default: + break; + } + } + } + + return true; +} + +static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp, + struct ra_graph *g) +{ + int max_range = -1; + ppir_reg *chosen = NULL; + + list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { + int range = reg->live_out - reg->live_in; + + if (!reg->spilled && reg->live_out != INT_MAX && range > max_range) { + chosen = reg; + max_range = range; + } + } + + if (chosen) + chosen->spilled = true; + + return chosen; +} + +static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp) +{ + list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { + reg->live_in = INT_MAX; + reg->live_out = 0; + } +} + +int lima_ppir_force_spilling = 0; + +static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled) +{ + ppir_reg *end_reg; + + ppir_regalloc_reset_liveness_info(comp); + end_reg = ppir_regalloc_build_liveness_info(comp); + + struct ra_graph *g = ra_alloc_interference_graph( + comp->ra, list_length(&comp->reg_list)); + + int n = 0, end_reg_index = 0; + list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { + int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1); + if (reg->is_head) + c += 4; + if (reg == end_reg) + end_reg_index = n; + ra_set_node_class(g, n++, c); + } + + int n1 = 0; + list_for_each_entry(ppir_reg, reg1, &comp->reg_list, list) { + int n2 = n1 + 1; + list_for_each_entry_from(ppir_reg, reg2, reg1->list.next, + &comp->reg_list, list) { + bool interference = false; + if (reg1->live_in < reg2->live_in) { + if (reg1->live_out > reg2->live_in) + interference = true; + } + else if (reg1->live_in > reg2->live_in) { + if (reg2->live_out > reg1->live_in) + interference = true; + } + else + interference = true; + + if (interference) + ra_add_node_interference(g, n1, n2); + + n2++; + } + n1++; + } + + ra_set_node_reg(g, end_reg_index, ppir_ra_reg_base[ppir_ra_reg_class_vec4]); + + *spilled = false; + bool ok = ra_allocate(g); + if (!ok || (comp->force_spilling-- > 0)) { + ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g); + if (chosen) { + /* stack_size will be used to assemble the frame reg in lima_draw. + * It is also be used in the spilling code, as negative indices + * starting from -1, to create stack addresses. */ + comp->prog->stack_size++; + ppir_regalloc_spill_reg(comp, chosen); + /* Ask the outer loop to call back in. */ + *spilled = true; + + ppir_debug("ppir: spilled register\n"); + goto err_out; + } + + ppir_error("ppir: regalloc fail\n"); + goto err_out; + } + + n = 0; + list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) { + int reg_index = ra_get_node_reg(g, n++); + reg->index = get_phy_reg_index(reg_index); + } + + ralloc_free(g); + + if (lima_debug & LIMA_DEBUG_PP) + ppir_regalloc_print_result(comp); + + return true; + +err_out: + ralloc_free(g); + return false; +} + +bool ppir_regalloc_prog(ppir_compiler *comp) +{ + bool spilled = false; + comp->prog->stack_size = 0; + + /* Set from an environment variable to force spilling + * for debugging purposes, see lima_screen.c */ + comp->force_spilling = lima_ppir_force_spilling; + + ppir_regalloc_update_reglist_ssa(comp); + + /* this will most likely succeed in the first + * try, except for very complicated shaders */ + while (!ppir_regalloc_prog_try(comp, &spilled)) + if (!spilled) + return false; + + return true; +} diff --git a/src/gallium/drivers/lima/ir/pp/scheduler.c b/src/gallium/drivers/lima/ir/pp/scheduler.c new file mode 100644 index 00000000000..721504a810e --- /dev/null +++ b/src/gallium/drivers/lima/ir/pp/scheduler.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2017 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include <limits.h> + +#include "ppir.h" + + +static void ppir_schedule_calc_sched_info(ppir_instr *instr) +{ + int n = 0; + float extra_reg = 1.0; + + /* update all children's sched info */ + ppir_instr_foreach_pred(instr, dep) { + ppir_instr *pred = dep->pred; + + if (pred->reg_pressure < 0) + ppir_schedule_calc_sched_info(pred); + + if (instr->est < pred->est + 1) + instr->est = pred->est + 1; + + float reg_weight = 1.0 - 1.0 / list_length(&pred->succ_list); + if (extra_reg > reg_weight) + extra_reg = reg_weight; + + n++; + } + + /* leaf instr */ + if (!n) { + instr->reg_pressure = 0; + return; + } + + int i = 0, reg[n]; + ppir_instr_foreach_pred(instr, dep) { + ppir_instr *pred = dep->pred; + reg[i++] = pred->reg_pressure; + } + + /* sort */ + for (i = 0; i < n - 1; i++) { + for (int j = 0; j < n - i - 1; j++) { + if (reg[j] > reg[j + 1]) { + int tmp = reg[j + 1]; + reg[j + 1] = reg[j]; + reg[j] = tmp; + } + } + } + + for (i = 0; i < n; i++) { + int pressure = reg[i] + n - (i + 1); + if (pressure > instr->reg_pressure) + instr->reg_pressure = pressure; + } + + /* If all children of this instr have multi parents, then this + * instr need an extra reg to store its result. For example, + * it's not fair for parent has the same reg pressure as child + * if n==1 and child's successor>1, because we need 2 reg for + * this. + * + * But we can't add a full reg to the reg_pressure, because the + * last parent of a multi-successor child doesn't need an extra + * reg. For example, a single child (with multi successor) instr + * should has less reg pressure than a two children (with single + * successor) instr. + * + * extra reg = min(all child)(1.0 - 1.0 / num successor) + */ + instr->reg_pressure += extra_reg; +} + +static void ppir_insert_ready_list(struct list_head *ready_list, + ppir_instr *insert_instr) +{ + struct list_head *insert_pos = ready_list; + + list_for_each_entry(ppir_instr, instr, ready_list, list) { + if (insert_instr->parent_index < instr->parent_index || + (insert_instr->parent_index == instr->parent_index && + (insert_instr->reg_pressure < instr->reg_pressure || + (insert_instr->reg_pressure == instr->reg_pressure && + (insert_instr->est >= instr->est))))) { + insert_pos = &instr->list; + break; + } + } + + list_del(&insert_instr->list); + list_addtail(&insert_instr->list, insert_pos); +} + +static void ppir_schedule_ready_list(ppir_block *block, + struct list_head *ready_list) +{ + if (list_empty(ready_list)) + return; + + ppir_instr *instr = list_first_entry(ready_list, ppir_instr, list); + list_del(&instr->list); + + /* schedule the instr to the block instr list */ + list_add(&instr->list, &block->instr_list); + instr->scheduled = true; + block->sched_instr_index--; + instr->seq = block->sched_instr_base + block->sched_instr_index; + + ppir_instr_foreach_pred(instr, dep) { + ppir_instr *pred = dep->pred; + pred->parent_index = block->sched_instr_index; + + bool ready = true; + ppir_instr_foreach_succ(pred, dep) { + ppir_instr *succ = dep->succ; + if (!succ->scheduled) { + ready = false; + break; + } + } + /* all successor have been scheduled */ + if (ready) + ppir_insert_ready_list(ready_list, pred); + } + + ppir_schedule_ready_list(block, ready_list); +} + +/* Register sensitive schedule algorithm from paper: + * "Register-Sensitive Selection, Duplication, and Sequencing of Instructions" + * Author: Vivek Sarkar, Mauricio J. Serrano, Barbara B. Simons + */ +static void ppir_schedule_block(ppir_block *block) +{ + /* move all instr to instr_list, block->instr_list will + * contain schedule result */ + struct list_head instr_list; + list_replace(&block->instr_list, &instr_list); + list_inithead(&block->instr_list); + + /* step 2 & 3 */ + list_for_each_entry(ppir_instr, instr, &instr_list, list) { + if (ppir_instr_is_root(instr)) + ppir_schedule_calc_sched_info(instr); + block->sched_instr_index++; + } + block->sched_instr_base = block->comp->sched_instr_base; + block->comp->sched_instr_base += block->sched_instr_index; + + /* step 4 */ + struct list_head ready_list; + list_inithead(&ready_list); + + /* step 5 */ + list_for_each_entry_safe(ppir_instr, instr, &instr_list, list) { + if (ppir_instr_is_root(instr)) { + instr->parent_index = INT_MAX; + ppir_insert_ready_list(&ready_list, instr); + } + } + + /* step 6 */ + ppir_schedule_ready_list(block, &ready_list); +} + +bool ppir_schedule_prog(ppir_compiler *comp) +{ + list_for_each_entry(ppir_block, block, &comp->block_list, list) { + ppir_schedule_block(block); + } + + return true; +} |