summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp77
5 files changed, 83 insertions, 19 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 736c54b4fa6..d9ad5bdffcf 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p,
unsigned msg_length,
unsigned response_length,
bool eot,
+ bool last_render_target,
bool header_present);
void brw_SAMPLE(struct brw_compile *p,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 6f29468083e..43e578389ef 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2292,6 +2292,7 @@ void brw_fb_WRITE(struct brw_compile *p,
unsigned msg_length,
unsigned response_length,
bool eot,
+ bool last_render_target,
bool header_present)
{
struct brw_context *brw = p->brw;
@@ -2333,7 +2334,7 @@ void brw_fb_WRITE(struct brw_compile *p,
msg_type,
msg_length,
header_present,
- eot, /* last render target write */
+ last_render_target,
response_length,
eot,
0 /* send_commit_msg */);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index ec77962dd3d..ee6ba98bb1b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -370,10 +370,12 @@ public:
bool optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result);
- int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components);
+ int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
+ bool use_2nd_half);
void emit_alpha_test();
fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
- fs_reg src0_alpha, unsigned components);
+ fs_reg src0_alpha, unsigned components,
+ bool use_2nd_half = false);
void emit_fb_writes();
void emit_urb_writes();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 02ea3b6d7c5..e0862665f8b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -214,9 +214,12 @@ fs_generator::fire_fb_write(fs_inst *inst,
if (inst->opcode == FS_OPCODE_REP_FB_WRITE)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
- else if (prog_data->dual_src_blend)
- msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
- else if (dispatch_width == 16)
+ else if (prog_data->dual_src_blend) {
+ if (dispatch_width == 8 || !inst->eot)
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
+ else
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
+ } else if (dispatch_width == 16)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
@@ -224,6 +227,10 @@ fs_generator::fire_fb_write(fs_inst *inst,
uint32_t surf_index =
prog_data->binding_table.render_target_start + inst->target;
+ bool last_render_target = inst->eot ||
+ (prog_data->dual_src_blend && dispatch_width == 16);
+
+
brw_fb_WRITE(p,
dispatch_width,
payload,
@@ -233,6 +240,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
nr,
0,
inst->eot,
+ last_render_target,
inst->header_present);
brw_mark_surface_used(&prog_data->base, surf_index);
@@ -370,6 +378,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)
inst->mlen,
0,
true,
+ true,
inst->header_present);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 6b48f701fba..e413ae39ea3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3363,7 +3363,8 @@ fs_visitor::emit_interpolation_setup_gen6()
}
int
-fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
+fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
+ bool use_2nd_half)
{
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
fs_inst *inst;
@@ -3381,7 +3382,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
colors_enabled = (1 << components) - 1;
}
- if (dispatch_width == 8 || brw->gen >= 6) {
+ if (dispatch_width == 8 || (brw->gen >= 6 && !do_dual_src)) {
/* SIMD8 write looks like:
* m + 0: r0
* m + 1: r1
@@ -3412,6 +3413,33 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
len++;
}
return len;
+ } else if (brw->gen >= 6 && do_dual_src) {
+ /* SIMD16 dual source blending for gen6+.
+ *
+ * From the SNB PRM, volume 4, part 1, page 193:
+ *
+ * "The dual source render target messages only have SIMD8 forms due to
+ * maximum message length limitations. SIMD16 pixel shaders must send two
+ * of these messages to cover all of the pixels. Each message contains
+ * two colors (4 channels each) for each pixel in the message payload."
+ *
+ * So in SIMD16 dual source blending we will send 2 SIMD8 messages,
+ * each one will call this function twice (one for each color involved),
+ * so in each pass we only write 4 registers. Notice that the second
+ * SIMD8 message needs to read color data from the 2nd half of the color
+ * registers, so it needs to call this with use_2nd_half = true.
+ */
+ for (unsigned i = 0; i < 4; ++i) {
+ if (colors_enabled & (1 << i)) {
+ dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
+ inst = emit(MOV(dst[i], half(offset(color, i),
+ use_2nd_half ? 1 : 0)));
+ inst->saturate = key->clamp_fragment_color;
+ if (use_2nd_half)
+ inst->force_sechalf = true;
+ }
+ }
+ return 4;
} else {
/* pre-gen6 SIMD16 single source DP write looks like:
* m + 0: r0
@@ -3495,7 +3523,8 @@ fs_visitor::emit_alpha_test()
fs_inst *
fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
- fs_reg src0_alpha, unsigned components)
+ fs_reg src0_alpha, unsigned components,
+ bool use_2nd_half)
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
@@ -3555,7 +3584,8 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
* alpha out the pipeline to our null renderbuffer to support
* alpha-testing, alpha-to-coverage, and so on.
*/
- length += setup_color_payload(sources + length, this->outputs[0], 0);
+ length += setup_color_payload(sources + length, this->outputs[0], 0,
+ false);
} else if (color1.file == BAD_FILE) {
if (src0_alpha.file != BAD_FILE) {
sources[length] = fs_reg(GRF, alloc.allocate(reg_size),
@@ -3565,10 +3595,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
length++;
}
- length += setup_color_payload(sources + length, color0, components);
+ length += setup_color_payload(sources + length, color0, components,
+ false);
} else {
- length += setup_color_payload(sources + length, color0, components);
- length += setup_color_payload(sources + length, color1, components);
+ length += setup_color_payload(sources + length, color0, components,
+ use_2nd_half);
+ length += setup_color_payload(sources + length, color1, components,
+ use_2nd_half);
}
if (source_depth_to_render_target) {
@@ -3637,12 +3670,6 @@ fs_visitor::emit_fb_writes()
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- if (do_dual_src) {
- no16("GL_ARB_blend_func_extended not yet supported in SIMD16.");
- if (dispatch_width == 16)
- do_dual_src = false;
- }
-
fs_inst *inst;
if (do_dual_src) {
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
@@ -3653,6 +3680,30 @@ fs_visitor::emit_fb_writes()
inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
reg_undef, 4);
inst->target = 0;
+
+ /* SIMD16 dual source blending requires to send two SIMD8 dual source
+ * messages, where each message contains color data for 8 pixels. Color
+ * data for the first group of pixels is stored in the "lower" half of
+ * the color registers, so in SIMD16, the previous message did:
+ * m + 0: r0
+ * m + 1: g0
+ * m + 2: b0
+ * m + 3: a0
+ *
+ * Here goes the second message, which packs color data for the
+ * remaining 8 pixels. Color data for these pixels is stored in the
+ * "upper" half of the color registers, so we need to do:
+ * m + 0: r1
+ * m + 1: g1
+ * m + 2: b1
+ * m + 3: a1
+ */
+ if (dispatch_width == 16) {
+ inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
+ reg_undef, 4, true);
+ inst->target = 0;
+ }
+
prog_data->dual_src_blend = true;
} else if (key->nr_color_regions > 0) {
for (int target = 0; target < key->nr_color_regions; target++) {