diff options
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 99 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp | 21 |
3 files changed, 113 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 436ba97e147..7ab37e7ca9f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -600,6 +600,85 @@ vec4_visitor::move_push_constants_to_pull_constants() pack_uniform_registers(); } +bool +vec4_instruction::can_reswizzle_dst(int dst_writemask, + int swizzle, + int swizzle_mask) +{ + /* If this instruction sets anything not referenced by swizzle, then we'd + * totally break it when we reswizzle. + */ + if (dst.writemask & ~swizzle_mask) + return false; + + switch (opcode) { + case BRW_OPCODE_DP4: + case BRW_OPCODE_DP3: + case BRW_OPCODE_DP2: + return true; + default: + /* Check if there happens to be no reswizzling required. */ + for (int c = 0; c < 4; c++) { + int bit = 1 << BRW_GET_SWZ(swizzle, c); + /* Skip components of the swizzle not used by the dst. */ + if (!(dst_writemask & (1 << c))) + continue; + + /* We don't do the reswizzling yet, so just sanity check that we + * don't have to. + */ + if (bit != (1 << c)) + return false; + } + return true; + } +} + +/** + * For any channels in the swizzle's source that were populated by this + * instruction, rewrite the instruction to put the appropriate result directly + * in those channels. + * + * e.g. for swizzle=yywx, MUL a.xy b c -> MUL a.yy_x b.yy z.yy_x + */ +void +vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle) +{ + int new_writemask = 0; + + switch (opcode) { + case BRW_OPCODE_DP4: + case BRW_OPCODE_DP3: + case BRW_OPCODE_DP2: + for (int c = 0; c < 4; c++) { + int bit = 1 << BRW_GET_SWZ(swizzle, c); + /* Skip components of the swizzle not used by the dst. */ + if (!(dst_writemask & (1 << c))) + continue; + /* If we were populating this component, then populate the + * corresponding channel of the new dst. + */ + if (dst.writemask & bit) + new_writemask |= (1 << c); + } + dst.writemask = new_writemask; + break; + default: + for (int c = 0; c < 4; c++) { + int bit = 1 << BRW_GET_SWZ(swizzle, c); + /* Skip components of the swizzle not used by the dst. */ + if (!(dst_writemask & (1 << c))) + continue; + + /* We don't do the reswizzling yet, so just sanity check that we + * don't have to. + */ + assert(bit == (1 << c)); + } + break; + } +} + /* * Tries to reduce extra MOV instructions by taking GRFs that get just * written and then MOVed into an MRF and making the original write of @@ -641,26 +720,20 @@ vec4_visitor::opt_compute_to_mrf() */ bool chans_needed[4] = {false, false, false, false}; int chans_remaining = 0; + int swizzle_mask = 0; for (int i = 0; i < 4; i++) { int chan = BRW_GET_SWZ(inst->src[0].swizzle, i); if (!(inst->dst.writemask & (1 << i))) continue; - /* We don't handle compute-to-MRF across a swizzle. We would - * need to be able to rewrite instructions above to output - * results to different channels. - */ - if (chan != i) - chans_remaining = 5; + swizzle_mask |= (1 << chan); if (!chans_needed[chan]) { chans_needed[chan] = true; chans_remaining++; } } - if (chans_remaining > 4) - continue; /* Now walk up the instruction stream trying to see if we can * rewrite everything writing to the GRF into the MRF instead. @@ -689,6 +762,13 @@ vec4_visitor::opt_compute_to_mrf() } } + /* If we can't handle the swizzle, bail. */ + if (!scan_inst->can_reswizzle_dst(inst->dst.writemask, + inst->src[0].swizzle, + swizzle_mask)) { + break; + } + /* Mark which channels we found unconditional writes for. */ if (!scan_inst->predicate) { for (int i = 0; i < 4; i++) { @@ -759,10 +839,11 @@ vec4_visitor::opt_compute_to_mrf() if (scan_inst->dst.file == GRF && scan_inst->dst.reg == inst->src[0].reg && scan_inst->dst.reg_offset == inst->src[0].reg_offset) { + scan_inst->reswizzle_dst(inst->dst.writemask, + inst->src[0].swizzle); scan_inst->dst.file = MRF; scan_inst->dst.reg = mrf; scan_inst->dst.reg_offset = 0; - scan_inst->dst.writemask &= inst->dst.writemask; scan_inst->saturate |= inst->saturate; } scan_inst = (vec4_instruction *)scan_inst->next; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index d7c1cce075d..6da44d4080a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -194,6 +194,8 @@ public: bool is_tex(); bool is_math(); + bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask); + void reswizzle_dst(int dst_writemask, int swizzle); }; /** diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index c79b0fd1831..fa9c155655f 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -122,3 +122,24 @@ TEST_F(register_coalesce_test, test_multiple_use) EXPECT_NE(mul->dst.file, MRF); } + +TEST_F(register_coalesce_test, test_dp4_mrf) +{ + src_reg some_src_1 = src_reg(v, glsl_type::vec4_type); + src_reg some_src_2 = src_reg(v, glsl_type::vec4_type); + dst_reg init; + + dst_reg m0 = dst_reg(MRF, 0); + m0.writemask = WRITEMASK_Y; + m0.type = BRW_REGISTER_TYPE_F; + + dst_reg temp = dst_reg(v, glsl_type::float_type); + + vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2)); + v->emit(v->MOV(m0, src_reg(temp))); + + register_coalesce(v); + + EXPECT_EQ(dp4->dst.file, MRF); + EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y); +} |