diff options
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 103 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_generator.cpp | 66 |
2 files changed, 86 insertions, 83 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index fdf2f9d7ac1..479e07f1dc5 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -3235,7 +3235,18 @@ fs_visitor::emit_repclear_shader() write->mlen = 1; } else { assume(key->nr_color_regions > 0); + + struct brw_reg header = + retype(brw_message_reg(base_mrf), BRW_REGISTER_TYPE_UD); + bld.exec_all().group(16, 0) + .MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + for (int i = 0; i < key->nr_color_regions; ++i) { + if (i > 0) { + bld.exec_all().group(1, 0) + .MOV(component(header, 2), brw_imm_ud(i)); + } + write = bld.emit(FS_OPCODE_REP_FB_WRITE); write->saturate = key->clamp_fragment_color; write->base_mrf = base_mrf; @@ -3960,25 +3971,83 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, int header_size = 2, payload_header_size; unsigned length = 0; - /* From the Sandy Bridge PRM, volume 4, page 198: - * - * "Dispatched Pixel Enables. One bit per pixel indicating - * which pixels were originally enabled when the thread was - * dispatched. This field is only required for the end-of- - * thread message and on all dual-source messages." - */ - if (devinfo->gen >= 6 && - (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) && - color1.file == BAD_FILE && - key->nr_color_regions == 1) { - header_size = 0; - } + if (devinfo->gen < 6) { + /* For gen4-5, we always have a header consisting of g0 and g1. We have + * an implied MOV from g0,g1 to the start of the message. The MOV from + * g0 is handled by the hardware and the MOV from g1 is provided by the + * generator. This is required because, on gen4-5, the generator may + * generate two write messages with different message lengths in order + * to handle AA data properly. + * + * Also, since the pixel mask goes in the g0 portion of the message and + * since render target writes are the last thing in the shader, we write + * the pixel mask directly into g0 and it will get copied as part of the + * implied write. + */ + if (prog_data->uses_kill) { + bld.exec_all().group(1, 0) + .MOV(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW), + brw_flag_reg(0, 1)); + } + + assert(length == 0); + length = 2; + } else if ((devinfo->gen <= 7 && !devinfo->is_haswell && + prog_data->uses_kill) || + color1.file != BAD_FILE || + key->nr_color_regions > 1) { + /* From the Sandy Bridge PRM, volume 4, page 198: + * + * "Dispatched Pixel Enables. One bit per pixel indicating + * which pixels were originally enabled when the thread was + * dispatched. This field is only required for the end-of- + * thread message and on all dual-source messages." + */ + const fs_builder ubld = bld.exec_all().group(8, 0); + + /* The header starts off as g0 and g1 */ + fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); + ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0), + BRW_REGISTER_TYPE_UD)); + + uint32_t g00_bits = 0; + + /* Set "Source0 Alpha Present to RenderTarget" bit in message + * header. + */ + if (inst->target > 0 && key->replicate_alpha) + g00_bits |= 1 << 11; + + /* Set computes stencil to render target */ + if (prog_data->computed_stencil) + g00_bits |= 1 << 14; + + if (g00_bits) { + /* OR extra bits into g0.0 */ + ubld.group(1, 0).OR(component(header, 0), + retype(brw_vec1_grf(0, 0), + BRW_REGISTER_TYPE_UD), + brw_imm_ud(g00_bits)); + } + + /* Set the render target index for choosing BLEND_STATE. */ + if (inst->target > 0) { + ubld.group(1, 0).MOV(component(header, 2), brw_imm_ud(inst->target)); + } + + if (prog_data->uses_kill) { + ubld.group(1, 0).MOV(retype(component(header, 15), + BRW_REGISTER_TYPE_UW), + brw_flag_reg(0, 1)); + } - if (header_size != 0) { - assert(header_size == 2); - /* Allocate 2 registers for a header */ - length += 2; + assert(length == 0); + sources[0] = header; + sources[1] = horiz_offset(header, 8); + length = 2; } + assert(length == 0 || length == 2); + header_size = length; if (payload.aa_dest_stencil_reg) { sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1)); diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index f72139d43c8..f00c07b1bc6 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -307,9 +307,6 @@ fs_generator::fire_fb_write(fs_inst *inst, void fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) { - struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); - const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key; - if (devinfo->gen < 8 && !devinfo->is_haswell) { brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); } @@ -320,69 +317,6 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) if (inst->base_mrf >= 0) payload = brw_message_reg(inst->base_mrf); - /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied - * move, here's g1. - */ - if (inst->header_size != 0) { - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_flag_reg(p, 0, 0); - - /* On HSW, the GPU will use the predicate on SENDC, unless the header is - * present. - */ - if (prog_data->uses_kill) { - struct brw_reg pixel_mask; - - if (devinfo->gen >= 6) - pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); - else - pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - - brw_MOV(p, pixel_mask, brw_flag_reg(0, 1)); - } - - if (devinfo->gen >= 6) { - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_16); - brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED); - brw_MOV(p, - retype(payload, BRW_REGISTER_TYPE_UD), - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - brw_pop_insn_state(p); - - if (inst->target > 0 && key->replicate_alpha) { - /* Set "Source0 Alpha Present to RenderTarget" bit in message - * header. - */ - brw_OR(p, - vec1(retype(payload, BRW_REGISTER_TYPE_UD)), - vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)), - brw_imm_ud(0x1 << 11)); - } - - if (inst->target > 0) { - /* Set the render target index for choosing BLEND_STATE. */ - brw_MOV(p, retype(vec1(suboffset(payload, 2)), - BRW_REGISTER_TYPE_UD), - brw_imm_ud(inst->target)); - } - - /* Set computes stencil to render target */ - if (prog_data->computed_stencil) { - brw_OR(p, - vec1(retype(payload, BRW_REGISTER_TYPE_UD)), - vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)), - brw_imm_ud(0x1 << 14)); - } - } - - brw_pop_insn_state(p); - } - if (!runtime_check_aads_emit) { fire_fb_write(inst, payload, implied_header, inst->mlen); } else { |