diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/gen8_generator.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/gen8_generator.cpp | 643 |
1 files changed, 643 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/gen8_generator.cpp b/src/mesa/drivers/dri/i965/gen8_generator.cpp new file mode 100644 index 00000000000..ee5f792187f --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen8_generator.cpp @@ -0,0 +1,643 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file gen8_generator.cpp + * + * Code generation for Gen8+ hardware, replacing the brw_eu_emit.c layer. + */ + +extern "C" { +#include "main/compiler.h" +#include "main/macros.h" +#include "brw_context.h" +} /* extern "C" */ + +#include "glsl/ralloc.h" +#include "brw_eu.h" +#include "brw_reg.h" +#include "gen8_generator.h" + +gen8_generator::gen8_generator(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct gl_program *prog, + void *mem_ctx) + : shader_prog(shader_prog), prog(prog), brw(brw), mem_ctx(mem_ctx) +{ + ctx = &brw->ctx; + + memset(&default_state, 0, sizeof(default_state)); + default_state.mask_control = BRW_MASK_ENABLE; + + store_size = 1024; + store = rzalloc_array(mem_ctx, gen8_instruction, store_size); + nr_inst = 0; + next_inst_offset = 0; + + /* Set up the control flow stacks. */ + if_stack_depth = 0; + if_stack_array_size = 16; + if_stack = rzalloc_array(mem_ctx, int, if_stack_array_size); + + loop_stack_depth = 0; + loop_stack_array_size = 16; + loop_stack = rzalloc_array(mem_ctx, int, loop_stack_array_size); +} + +gen8_generator::~gen8_generator() +{ +} + +gen8_instruction * +gen8_generator::next_inst(unsigned opcode) +{ + gen8_instruction *inst; + + if (nr_inst + 1 > unsigned(store_size)) { + store_size <<= 1; + store = reralloc(mem_ctx, store, gen8_instruction, store_size); + assert(store); + } + + next_inst_offset += 16; + inst = &store[nr_inst++]; + + memset(inst, 0, sizeof(gen8_instruction)); + + gen8_set_opcode(inst, opcode); + gen8_set_exec_size(inst, default_state.exec_size); + gen8_set_access_mode(inst, default_state.access_mode); + gen8_set_mask_control(inst, default_state.mask_control); + gen8_set_cond_modifier(inst, default_state.conditional_mod); + gen8_set_pred_control(inst, default_state.predicate); + gen8_set_pred_inv(inst, default_state.predicate_inverse); + gen8_set_saturate(inst, default_state.saturate); + gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr); + return inst; +} + +#define ALU1(OP) \ +gen8_instruction * \ +gen8_generator::OP(struct brw_reg dst, struct brw_reg src) \ +{ \ + gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ + gen8_set_dst(brw, inst, dst); \ + gen8_set_src0(brw, inst, src); \ + return inst; \ +} + +#define ALU2(OP) \ +gen8_instruction * \ +gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \ +{ \ + gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ + gen8_set_dst(brw, inst, dst); \ + gen8_set_src0(brw, inst, s0); \ + gen8_set_src1(brw, inst, s1); \ + return inst; \ +} + +#define ALU2_ACCUMULATE(OP) \ +gen8_instruction * \ +gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, struct brw_reg s1) \ +{ \ + gen8_instruction *inst = next_inst(BRW_OPCODE_##OP); \ + gen8_set_dst(brw, inst, dst); \ + gen8_set_src0(brw, inst, s0); \ + gen8_set_src1(brw, inst, s1); \ + gen8_set_acc_wr_control(inst, true); \ + return inst; \ +} + +#define ALU3(OP) \ +gen8_instruction * \ +gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \ + struct brw_reg s1, struct brw_reg s2) \ +{ \ + return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \ +} + +#define ALU3F(OP) \ +gen8_instruction * \ +gen8_generator::OP(struct brw_reg dst, struct brw_reg s0, \ + struct brw_reg s1, struct brw_reg s2) \ +{ \ + assert(dst.type == BRW_REGISTER_TYPE_F); \ + assert(s0.type == BRW_REGISTER_TYPE_F); \ + assert(s1.type == BRW_REGISTER_TYPE_F); \ + assert(s2.type == BRW_REGISTER_TYPE_F); \ + return alu3(BRW_OPCODE_##OP, dst, s0, s1, s2); \ +} + +ALU2(ADD) +ALU2(AND) +ALU2(ASR) +ALU3(BFE) +ALU2(BFI1) +ALU3(BFI2) +ALU1(F32TO16) +ALU1(F16TO32) +ALU1(BFREV) +ALU1(CBIT) +ALU2_ACCUMULATE(ADDC) +ALU2_ACCUMULATE(SUBB) +ALU2(DP2) +ALU2(DP3) +ALU2(DP4) +ALU2(DPH) +ALU1(FBH) +ALU1(FBL) +ALU1(FRC) +ALU2(LINE) +ALU3F(LRP) +ALU3F(MAD) +ALU2(MUL) +ALU1(MOV) +ALU1(NOT) +ALU2(OR) +ALU2(PLN) +ALU1(RNDD) +ALU1(RNDE) +ALU1(RNDZ) +ALU2_ACCUMULATE(MAC) +ALU2_ACCUMULATE(MACH) +ALU2(SEL) +ALU2(SHL) +ALU2(SHR) +ALU2(XOR) + +gen8_instruction * +gen8_generator::CMP(struct brw_reg dst, unsigned conditional, + struct brw_reg src0, struct brw_reg src1) +{ + gen8_instruction *inst = next_inst(BRW_OPCODE_CMP); + gen8_set_cond_modifier(inst, conditional); + /* The CMP instruction appears to behave erratically for floating point + * sources unless the destination type is also float. Overriding it to + * match src0 makes it work in all cases. + */ + dst.type = src0.type; + gen8_set_dst(brw, inst, dst); + gen8_set_src0(brw, inst, src0); + gen8_set_src1(brw, inst, src1); + return inst; +} + +static int +get_3src_subreg_nr(struct brw_reg reg) +{ + if (reg.vstride == BRW_VERTICAL_STRIDE_0) { + assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); + return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); + } else { + return reg.subnr / 4; + } +} + +gen8_instruction * +gen8_generator::alu3(unsigned opcode, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1, + struct brw_reg src2) +{ + /* MRFs haven't existed since Gen7, so we better not be using them. */ + if (dst.file == BRW_MESSAGE_REGISTER_FILE) { + dst.file = BRW_GENERAL_REGISTER_FILE; + dst.nr += GEN7_MRF_HACK_START; + } + + gen8_instruction *inst = next_inst(opcode); + assert(gen8_access_mode(inst) == BRW_ALIGN_16); + + assert(dst.file == BRW_GENERAL_REGISTER_FILE); + assert(dst.nr < 128); + assert(dst.address_mode == BRW_ADDRESS_DIRECT); + assert(dst.type == BRW_REGISTER_TYPE_F || + dst.type == BRW_REGISTER_TYPE_D || + dst.type == BRW_REGISTER_TYPE_UD); + gen8_set_dst_3src_reg_nr(inst, dst.nr); + gen8_set_dst_3src_subreg_nr(inst, dst.subnr / 16); + gen8_set_dst_3src_writemask(inst, dst.dw1.bits.writemask); + + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.address_mode == BRW_ADDRESS_DIRECT); + assert(src0.nr < 128); + gen8_set_src0_3src_swizzle(inst, src0.dw1.bits.swizzle); + gen8_set_src0_3src_subreg_nr(inst, get_3src_subreg_nr(src0)); + gen8_set_src0_3src_rep_ctrl(inst, src0.vstride == BRW_VERTICAL_STRIDE_0); + gen8_set_src0_3src_reg_nr(inst, src0.nr); + gen8_set_src0_3src_abs(inst, src0.abs); + gen8_set_src0_3src_negate(inst, src0.negate); + + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.address_mode == BRW_ADDRESS_DIRECT); + assert(src1.nr < 128); + gen8_set_src1_3src_swizzle(inst, src1.dw1.bits.swizzle); + gen8_set_src1_3src_subreg_nr(inst, get_3src_subreg_nr(src1)); + gen8_set_src1_3src_rep_ctrl(inst, src1.vstride == BRW_VERTICAL_STRIDE_0); + gen8_set_src1_3src_reg_nr(inst, src1.nr); + gen8_set_src1_3src_abs(inst, src1.abs); + gen8_set_src1_3src_negate(inst, src1.negate); + + assert(src2.file == BRW_GENERAL_REGISTER_FILE); + assert(src2.address_mode == BRW_ADDRESS_DIRECT); + assert(src2.nr < 128); + gen8_set_src2_3src_swizzle(inst, src2.dw1.bits.swizzle); + gen8_set_src2_3src_subreg_nr(inst, get_3src_subreg_nr(src2)); + gen8_set_src2_3src_rep_ctrl(inst, src2.vstride == BRW_VERTICAL_STRIDE_0); + gen8_set_src2_3src_reg_nr(inst, src2.nr); + gen8_set_src2_3src_abs(inst, src2.abs); + gen8_set_src2_3src_negate(inst, src2.negate); + + /* Set both the source and destination types based on dst.type, ignoring + * the source register types. The MAD and LRP emitters both ensure that + * all register types are float. The BFE and BFI2 emitters, however, may + * send us mixed D and UD source types and want us to ignore that. + */ + switch (dst.type) { + case BRW_REGISTER_TYPE_F: + gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_F); + gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_F); + break; + case BRW_REGISTER_TYPE_D: + gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_D); + gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_D); + break; + case BRW_REGISTER_TYPE_UD: + gen8_set_src_3src_type(inst, BRW_3SRC_TYPE_UD); + gen8_set_dst_3src_type(inst, BRW_3SRC_TYPE_UD); + break; + } + + return inst; +} + +gen8_instruction * +gen8_generator::math(unsigned math_function, + struct brw_reg dst, + struct brw_reg src0) +{ + gen8_instruction *inst = next_inst(BRW_OPCODE_MATH); + + assert(dst.hstride == src0.hstride); + + gen8_set_math_function(inst, math_function); + gen8_set_dst(brw, inst, dst); + gen8_set_src0(brw, inst, src0); + return inst; +} + +gen8_instruction * +gen8_generator::MATH(unsigned math_function, + struct brw_reg dst, + struct brw_reg src0) +{ + assert(src0.type == BRW_REGISTER_TYPE_F); + gen8_instruction *inst = math(math_function, dst, src0); + return inst; +} + +gen8_instruction * +gen8_generator::MATH(unsigned math_function, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + bool int_math = + math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || + math_function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || + math_function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER; + + if (int_math) { + assert(src0.type != BRW_REGISTER_TYPE_F); + assert(src1.type != BRW_REGISTER_TYPE_F); + } else { + assert(src0.type == BRW_REGISTER_TYPE_F); + } + + gen8_instruction *inst = math(math_function, dst, src0); + gen8_set_src1(brw, inst, src1); + return inst; +} + +gen8_instruction * +gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0) +{ + gen8_instruction *inst = next_inst(BRW_OPCODE_MOV); + gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD)); + gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD)); + gen8_set_mask_control(inst, BRW_MASK_DISABLE); + + return inst; +} + + +gen8_instruction * +gen8_generator::NOP() +{ + return next_inst(BRW_OPCODE_NOP); +} + +void +gen8_generator::push_if_stack(gen8_instruction *inst) +{ + if_stack[if_stack_depth] = inst - store; + + ++if_stack_depth; + if (if_stack_array_size <= if_stack_depth) { + if_stack_array_size *= 2; + if_stack = reralloc(mem_ctx, if_stack, int, if_stack_array_size); + } +} + +gen8_instruction * +gen8_generator::pop_if_stack() +{ + --if_stack_depth; + return &store[if_stack[if_stack_depth]]; +} + +/** + * Patch the IF and ELSE instructions to set the jump offsets (JIP and UIP.) + */ +void +gen8_generator::patch_IF_ELSE(gen8_instruction *if_inst, + gen8_instruction *else_inst, + gen8_instruction *endif_inst) +{ + assert(if_inst != NULL && gen8_opcode(if_inst) == BRW_OPCODE_IF); + assert(else_inst == NULL || gen8_opcode(else_inst) == BRW_OPCODE_ELSE); + assert(endif_inst != NULL && gen8_opcode(endif_inst) == BRW_OPCODE_ENDIF); + + gen8_set_exec_size(endif_inst, gen8_exec_size(if_inst)); + + if (else_inst == NULL) { + /* Patch IF -> ENDIF */ + gen8_set_jip(if_inst, 16 * (endif_inst - if_inst)); + gen8_set_uip(if_inst, 16 * (endif_inst - if_inst)); + } else { + gen8_set_exec_size(else_inst, gen8_exec_size(if_inst)); + + /* Patch IF -> ELSE and ELSE -> ENDIF: + * + * The IF's JIP should point at the instruction after the ELSE. + * The IF's UIP should point to the ENDIF. + * + * Both are expressed in bytes, hence the multiply by 16...128-bits. + */ + gen8_set_jip(if_inst, 16 * (else_inst - if_inst + 1)); + gen8_set_uip(if_inst, 16 * (endif_inst - if_inst)); + + /* Patch ELSE -> ENDIF: + * + * Since we don't set branch_ctrl, both JIP and UIP point to ENDIF. + */ + gen8_set_jip(else_inst, 16 * (endif_inst - else_inst)); + gen8_set_uip(else_inst, 16 * (endif_inst - else_inst)); + } + gen8_set_jip(endif_inst, 16); +} + +gen8_instruction * +gen8_generator::IF(unsigned predicate) +{ + gen8_instruction *inst = next_inst(BRW_OPCODE_IF); + gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); + gen8_set_exec_size(inst, default_state.exec_size); + gen8_set_pred_control(inst, predicate); + gen8_set_mask_control(inst, BRW_MASK_ENABLE); + push_if_stack(inst); + + return inst; +} + +gen8_instruction * +gen8_generator::ELSE() +{ + gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE); + gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + gen8_set_mask_control(inst, BRW_MASK_ENABLE); + push_if_stack(inst); + return inst; +} + +gen8_instruction * +gen8_generator::ENDIF() +{ + gen8_instruction *if_inst = NULL; + gen8_instruction *else_inst = NULL; + + gen8_instruction *tmp = pop_if_stack(); + if (gen8_opcode(tmp) == BRW_OPCODE_ELSE) { + else_inst = tmp; + tmp = pop_if_stack(); + } + assert(gen8_opcode(tmp) == BRW_OPCODE_IF); + if_inst = tmp; + + gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF); + gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE); + patch_IF_ELSE(if_inst, else_inst, endif_inst); + + return endif_inst; +} + +unsigned +gen8_generator::next_ip(unsigned ip) const +{ + return ip + 16; +} + +unsigned +gen8_generator::find_next_block_end(unsigned start) const +{ + for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) { + gen8_instruction *inst = &store[ip / 16]; + + switch (gen8_opcode(inst)) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + case BRW_OPCODE_HALT: + return ip; + } + } + + return 0; +} + +/* There is no DO instruction on Gen6+, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +unsigned +gen8_generator::find_loop_end(unsigned start) const +{ + /* Always start after the instruction (such as a WHILE) we're trying to fix + * up. + */ + for (unsigned ip = next_ip(start); ip < next_inst_offset; ip = next_ip(ip)) { + gen8_instruction *inst = &store[ip / 16]; + + if (gen8_opcode(inst) == BRW_OPCODE_WHILE) { + if (ip + gen8_jip(inst) <= start) + return ip; + } + } + assert(!"not reached"); + return start; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK, CONT, and HALT instructions to their correct locations. + */ +void +gen8_generator::patch_jump_targets() +{ + for (unsigned ip = 0; ip < next_inst_offset; ip = next_ip(ip)) { + gen8_instruction *inst = &store[ip / 16]; + + int block_end_ip = find_next_block_end(ip); + switch (gen8_opcode(inst)) { + case BRW_OPCODE_BREAK: + assert(block_end_ip != 0); + gen8_set_jip(inst, block_end_ip - ip); + gen8_set_uip(inst, find_loop_end(ip) - ip); + assert(gen8_uip(inst) != 0); + assert(gen8_jip(inst) != 0); + break; + case BRW_OPCODE_CONTINUE: + assert(block_end_ip != 0); + gen8_set_jip(inst, block_end_ip - ip); + gen8_set_uip(inst, find_loop_end(ip) - ip); + assert(gen8_uip(inst) != 0); + assert(gen8_jip(inst) != 0); + break; + case BRW_OPCODE_ENDIF: + if (block_end_ip == 0) + gen8_set_jip(inst, 16); + else + gen8_set_jip(inst, block_end_ip - ip); + break; + case BRW_OPCODE_HALT: + /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): + * + * "In case of the halt instruction not inside any conditional + * code block, the value of <JIP> and <UIP> should be the + * same. In case of the halt instruction inside conditional code + * block, the <UIP> should be the end of the program, and the + * <JIP> should be end of the most inner conditional code block." + * + * The uip will have already been set by whoever set up the + * instruction. + */ + if (block_end_ip == 0) { + gen8_set_jip(inst, gen8_uip(inst)); + } else { + gen8_set_jip(inst, block_end_ip - ip); + } + assert(gen8_uip(inst) != 0); + assert(gen8_jip(inst) != 0); + break; + } + } +} + +void +gen8_generator::DO() +{ + if (loop_stack_array_size < loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(mem_ctx, loop_stack, int, loop_stack_array_size); + } + loop_stack[loop_stack_depth++] = nr_inst; +} + +gen8_instruction * +gen8_generator::BREAK() +{ + gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK); + gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + gen8_set_src1(brw, inst, brw_imm_d(0)); + gen8_set_exec_size(inst, default_state.exec_size); + return inst; +} + +gen8_instruction * +gen8_generator::CONTINUE() +{ + gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE); + gen8_set_dst(brw, inst, brw_ip_reg()); + gen8_set_src0(brw, inst, brw_ip_reg()); + gen8_set_src1(brw, inst, brw_imm_d(0)); + gen8_set_exec_size(inst, default_state.exec_size); + return inst; +} + +gen8_instruction * +gen8_generator::WHILE() +{ + gen8_instruction *do_inst = &store[loop_stack[--loop_stack_depth]]; + gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE); + + gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + gen8_set_src0(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + gen8_set_src1(brw, while_inst, brw_imm_ud(0)); + gen8_set_jip(while_inst, 16 * (do_inst - while_inst)); + gen8_set_exec_size(while_inst, default_state.exec_size); + + return while_inst; +} + +gen8_instruction * +gen8_generator::HALT() +{ + gen8_instruction *inst = next_inst(BRW_OPCODE_HALT); + gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + gen8_set_exec_size(inst, default_state.exec_size); + gen8_set_mask_control(inst, BRW_MASK_DISABLE); + return inst; +} + +void +gen8_generator::disassemble(FILE *out, int start, int end) +{ + bool dump_hex = false; + + for (int offset = start; offset < end; offset += 16) { + gen8_instruction *inst = &store[offset / 16]; + printf("0x%08x: ", offset); + + if (dump_hex) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *) inst)[3], + ((uint32_t *) inst)[2], + ((uint32_t *) inst)[1], + ((uint32_t *) inst)[0]); + } + + gen8_disassemble(stdout, inst, brw->gen); + } +} |