diff options
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 77 |
5 files changed, 83 insertions, 19 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 736c54b4fa6..d9ad5bdffcf 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p, unsigned msg_length, unsigned response_length, bool eot, + bool last_render_target, bool header_present); void brw_SAMPLE(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6f29468083e..43e578389ef 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2292,6 +2292,7 @@ void brw_fb_WRITE(struct brw_compile *p, unsigned msg_length, unsigned response_length, bool eot, + bool last_render_target, bool header_present) { struct brw_context *brw = p->brw; @@ -2333,7 +2334,7 @@ void brw_fb_WRITE(struct brw_compile *p, msg_type, msg_length, header_present, - eot, /* last render target write */ + last_render_target, response_length, eot, 0 /* send_commit_msg */); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index ec77962dd3d..ee6ba98bb1b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -370,10 +370,12 @@ public: bool optimize_frontfacing_ternary(nir_alu_instr *instr, const fs_reg &result); - int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components); + int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, + bool use_2nd_half); void emit_alpha_test(); fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2, - fs_reg src0_alpha, unsigned components); + fs_reg src0_alpha, unsigned components, + bool use_2nd_half = false); void emit_fb_writes(); void emit_urb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 02ea3b6d7c5..e0862665f8b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -214,9 +214,12 @@ fs_generator::fire_fb_write(fs_inst *inst, if (inst->opcode == FS_OPCODE_REP_FB_WRITE) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; - else if (prog_data->dual_src_blend) - msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; - else if (dispatch_width == 16) + else if (prog_data->dual_src_blend) { + if (dispatch_width == 8 || !inst->eot) + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; + else + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23; + } else if (dispatch_width == 16) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; @@ -224,6 +227,10 @@ fs_generator::fire_fb_write(fs_inst *inst, uint32_t surf_index = prog_data->binding_table.render_target_start + inst->target; + bool last_render_target = inst->eot || + (prog_data->dual_src_blend && dispatch_width == 16); + + brw_fb_WRITE(p, dispatch_width, payload, @@ -233,6 +240,7 @@ fs_generator::fire_fb_write(fs_inst *inst, nr, 0, inst->eot, + last_render_target, inst->header_present); brw_mark_surface_used(&prog_data->base, surf_index); @@ -370,6 +378,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst) inst->mlen, 0, true, + true, inst->header_present); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 6b48f701fba..e413ae39ea3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3363,7 +3363,8 @@ fs_visitor::emit_interpolation_setup_gen6() } int -fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) +fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components, + bool use_2nd_half) { brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; fs_inst *inst; @@ -3381,7 +3382,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) colors_enabled = (1 << components) - 1; } - if (dispatch_width == 8 || brw->gen >= 6) { + if (dispatch_width == 8 || (brw->gen >= 6 && !do_dual_src)) { /* SIMD8 write looks like: * m + 0: r0 * m + 1: r1 @@ -3412,6 +3413,33 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) len++; } return len; + } else if (brw->gen >= 6 && do_dual_src) { + /* SIMD16 dual source blending for gen6+. + * + * From the SNB PRM, volume 4, part 1, page 193: + * + * "The dual source render target messages only have SIMD8 forms due to + * maximum message length limitations. SIMD16 pixel shaders must send two + * of these messages to cover all of the pixels. Each message contains + * two colors (4 channels each) for each pixel in the message payload." + * + * So in SIMD16 dual source blending we will send 2 SIMD8 messages, + * each one will call this function twice (one for each color involved), + * so in each pass we only write 4 registers. Notice that the second + * SIMD8 message needs to read color data from the 2nd half of the color + * registers, so it needs to call this with use_2nd_half = true. + */ + for (unsigned i = 0; i < 4; ++i) { + if (colors_enabled & (1 << i)) { + dst[i] = fs_reg(GRF, alloc.allocate(1), color.type); + inst = emit(MOV(dst[i], half(offset(color, i), + use_2nd_half ? 1 : 0))); + inst->saturate = key->clamp_fragment_color; + if (use_2nd_half) + inst->force_sechalf = true; + } + } + return 4; } else { /* pre-gen6 SIMD16 single source DP write looks like: * m + 0: r0 @@ -3495,7 +3523,8 @@ fs_visitor::emit_alpha_test() fs_inst * fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, - fs_reg src0_alpha, unsigned components) + fs_reg src0_alpha, unsigned components, + bool use_2nd_half) { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; @@ -3555,7 +3584,8 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. */ - length += setup_color_payload(sources + length, this->outputs[0], 0); + length += setup_color_payload(sources + length, this->outputs[0], 0, + false); } else if (color1.file == BAD_FILE) { if (src0_alpha.file != BAD_FILE) { sources[length] = fs_reg(GRF, alloc.allocate(reg_size), @@ -3565,10 +3595,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, length++; } - length += setup_color_payload(sources + length, color0, components); + length += setup_color_payload(sources + length, color0, components, + false); } else { - length += setup_color_payload(sources + length, color0, components); - length += setup_color_payload(sources + length, color1, components); + length += setup_color_payload(sources + length, color0, components, + use_2nd_half); + length += setup_color_payload(sources + length, color1, components, + use_2nd_half); } if (source_depth_to_render_target) { @@ -3637,12 +3670,6 @@ fs_visitor::emit_fb_writes() brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - if (do_dual_src) { - no16("GL_ARB_blend_func_extended not yet supported in SIMD16."); - if (dispatch_width == 16) - do_dual_src = false; - } - fs_inst *inst; if (do_dual_src) { if (INTEL_DEBUG & DEBUG_SHADER_TIME) @@ -3653,6 +3680,30 @@ fs_visitor::emit_fb_writes() inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, reg_undef, 4); inst->target = 0; + + /* SIMD16 dual source blending requires to send two SIMD8 dual source + * messages, where each message contains color data for 8 pixels. Color + * data for the first group of pixels is stored in the "lower" half of + * the color registers, so in SIMD16, the previous message did: + * m + 0: r0 + * m + 1: g0 + * m + 2: b0 + * m + 3: a0 + * + * Here goes the second message, which packs color data for the + * remaining 8 pixels. Color data for these pixels is stored in the + * "upper" half of the color registers, so we need to do: + * m + 0: r1 + * m + 1: g1 + * m + 2: b1 + * m + 3: a1 + */ + if (dispatch_width == 16) { + inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, + reg_undef, 4, true); + inst->target = 0; + } + prog_data->dual_src_blend = true; } else if (key->nr_color_regions > 0) { for (int target = 0; target < key->nr_color_regions; target++) { |