diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 65 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp | 36 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 13 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_ir_fs.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_ir_vec4.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 29 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 13 |
7 files changed, 123 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7904f4d2862..d2881b2d7a2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -288,6 +288,71 @@ fs_inst::is_send_from_grf() const } } +/** + * Returns true if this instruction's sources and destinations cannot + * safely be the same register. + * + * In most cases, a register can be written over safely by the same + * instruction that is its last use. For a single instruction, the + * sources are dereferenced before writing of the destination starts + * (naturally). + * + * However, there are a few cases where this can be problematic: + * + * - Virtual opcodes that translate to multiple instructions in the + * code generator: if src == dst and one instruction writes the + * destination before a later instruction reads the source, then + * src will have been clobbered. + * + * - SIMD16 compressed instructions with certain regioning (see below). + * + * The register allocator uses this information to set up conflicts between + * GRF sources and the destination. + */ +bool +fs_inst::has_source_and_destination_hazard() const +{ + switch (opcode) { + case FS_OPCODE_PACK_HALF_2x16_SPLIT: + /* Multiple partial writes to the destination */ + return true; + default: + /* The SIMD16 compressed instruction + * + * add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F + * + * is actually decoded in hardware as: + * + * add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F + * add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F + * + * Which is safe. However, if we have uniform accesses + * happening, we get into trouble: + * + * add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F + * add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F + * + * Now our destination for the first instruction overwrote the + * second instruction's src0, and we get garbage for those 8 + * pixels. There's a similar issue for the pre-gen6 + * pixel_x/pixel_y, which are registers of 16-bit values and thus + * would get stomped by the first decode as well. + */ + if (exec_size == 16) { + for (int i = 0; i < sources; i++) { + if (src[i].file == VGRF && (src[i].stride == 0 || + src[i].type == BRW_REGISTER_TYPE_UW || + src[i].type == BRW_REGISTER_TYPE_W || + src[i].type == BRW_REGISTER_TYPE_UB || + src[i].type == BRW_REGISTER_TYPE_B)) { + return true; + } + } + } + return false; + } +} + bool fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const { diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 80fb8c28f81..66b70a9144b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -59,42 +59,8 @@ fs_live_variables::setup_one_read(struct block_data *bd, fs_inst *inst, int var = var_from_reg(reg); assert(var < num_vars); - /* In most cases, a register can be written over safely by the - * same instruction that is its last use. For a single - * instruction, the sources are dereferenced before writing of the - * destination starts (naturally). This gets more complicated for - * simd16, because the instruction: - * - * add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F - * - * is actually decoded in hardware as: - * - * add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F - * add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F - * - * Which is safe. However, if we have uniform accesses - * happening, we get into trouble: - * - * add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F - * add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F - * - * Now our destination for the first instruction overwrote the - * second instruction's src0, and we get garbage for those 8 - * pixels. There's a similar issue for the pre-gen6 - * pixel_x/pixel_y, which are registers of 16-bit values and thus - * would get stomped by the first decode as well. - */ - int end_ip = ip; - if (inst->exec_size == 16 && (reg.stride == 0 || - reg.type == BRW_REGISTER_TYPE_UW || - reg.type == BRW_REGISTER_TYPE_W || - reg.type == BRW_REGISTER_TYPE_UB || - reg.type == BRW_REGISTER_TYPE_B)) { - end_ip++; - } - start[var] = MIN2(start[var], ip); - end[var] = MAX2(end[var], end_ip); + end[var] = MAX2(end[var], ip); /* The use[] bitset marks when the block makes use of a variable (VGRF * channel) without having completely defined that variable within the diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 40129fd695e..2347cd5d33f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -597,6 +597,19 @@ fs_visitor::assign_regs(bool allow_spilling) } } + /* Certain instructions can't safely use the same register for their + * sources and destination. Add interference. + */ + foreach_block_and_inst(block, fs_inst, inst, cfg) { + if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) { + for (unsigned i = 0; i < 3; i++) { + if (inst->src[i].file == VGRF) { + ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr); + } + } + } + } + setup_payload_interference(g, payload_node_count, first_payload_node); if (devinfo->gen >= 7) { int first_used_mrf = BRW_MAX_MRF(devinfo->gen); diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 84ee5292908..c3eec2efb42 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -205,6 +205,7 @@ public: bool can_do_source_mods(const struct brw_device_info *devinfo); bool can_change_types() const; bool has_side_effects() const; + bool has_source_and_destination_hazard() const; bool reads_flag() const; bool writes_flag() const; diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 861d7b83e10..660becaafa7 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -169,6 +169,7 @@ public: void reswizzle(int dst_writemask, int swizzle); bool can_do_source_mods(const struct brw_device_info *devinfo); bool can_change_types() const; + bool has_source_and_destination_hazard() const; bool reads_flag() { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 9a79d67e12f..a697bdf84a0 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -161,6 +161,35 @@ vec4_instruction::is_send_from_grf() } } +/** + * Returns true if this instruction's sources and destinations cannot + * safely be the same register. + * + * In most cases, a register can be written over safely by the same + * instruction that is its last use. For a single instruction, the + * sources are dereferenced before writing of the destination starts + * (naturally). + * + * However, there are a few cases where this can be problematic: + * + * - Virtual opcodes that translate to multiple instructions in the + * code generator: if src == dst and one instruction writes the + * destination before a later instruction reads the source, then + * src will have been clobbered. + * + * The register allocator uses this information to set up conflicts between + * GRF sources and the destination. + */ +bool +vec4_instruction::has_source_and_destination_hazard() const +{ + switch (opcode) { + /* Most opcodes in the vec4 world use MRFs. */ + default: + return false; + } +} + unsigned vec4_instruction::regs_read(unsigned arg) const { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 01c9c96276e..afc326612a2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -221,6 +221,19 @@ vec4_visitor::reg_allocate() } } + /* Certain instructions can't safely use the same register for their + * sources and destination. Add interference. + */ + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) { + for (unsigned i = 0; i < 3; i++) { + if (inst->src[i].file == VGRF) { + ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr); + } + } + } + } + setup_payload_interference(g, first_payload_node, node_count); if (!ra_allocate(g)) { |