summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/intel/compiler/brw_fs.cpp103
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp66
2 files changed, 86 insertions, 83 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index fdf2f9d7ac1..479e07f1dc5 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -3235,7 +3235,18 @@ fs_visitor::emit_repclear_shader()
write->mlen = 1;
} else {
assume(key->nr_color_regions > 0);
+
+ struct brw_reg header =
+ retype(brw_message_reg(base_mrf), BRW_REGISTER_TYPE_UD);
+ bld.exec_all().group(16, 0)
+ .MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
for (int i = 0; i < key->nr_color_regions; ++i) {
+ if (i > 0) {
+ bld.exec_all().group(1, 0)
+ .MOV(component(header, 2), brw_imm_ud(i));
+ }
+
write = bld.emit(FS_OPCODE_REP_FB_WRITE);
write->saturate = key->clamp_fragment_color;
write->base_mrf = base_mrf;
@@ -3960,25 +3971,83 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
int header_size = 2, payload_header_size;
unsigned length = 0;
- /* From the Sandy Bridge PRM, volume 4, page 198:
- *
- * "Dispatched Pixel Enables. One bit per pixel indicating
- * which pixels were originally enabled when the thread was
- * dispatched. This field is only required for the end-of-
- * thread message and on all dual-source messages."
- */
- if (devinfo->gen >= 6 &&
- (devinfo->is_haswell || devinfo->gen >= 8 || !prog_data->uses_kill) &&
- color1.file == BAD_FILE &&
- key->nr_color_regions == 1) {
- header_size = 0;
- }
+ if (devinfo->gen < 6) {
+ /* For gen4-5, we always have a header consisting of g0 and g1. We have
+ * an implied MOV from g0,g1 to the start of the message. The MOV from
+ * g0 is handled by the hardware and the MOV from g1 is provided by the
+ * generator. This is required because, on gen4-5, the generator may
+ * generate two write messages with different message lengths in order
+ * to handle AA data properly.
+ *
+ * Also, since the pixel mask goes in the g0 portion of the message and
+ * since render target writes are the last thing in the shader, we write
+ * the pixel mask directly into g0 and it will get copied as part of the
+ * implied write.
+ */
+ if (prog_data->uses_kill) {
+ bld.exec_all().group(1, 0)
+ .MOV(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ brw_flag_reg(0, 1));
+ }
+
+ assert(length == 0);
+ length = 2;
+ } else if ((devinfo->gen <= 7 && !devinfo->is_haswell &&
+ prog_data->uses_kill) ||
+ color1.file != BAD_FILE ||
+ key->nr_color_regions > 1) {
+ /* From the Sandy Bridge PRM, volume 4, page 198:
+ *
+ * "Dispatched Pixel Enables. One bit per pixel indicating
+ * which pixels were originally enabled when the thread was
+ * dispatched. This field is only required for the end-of-
+ * thread message and on all dual-source messages."
+ */
+ const fs_builder ubld = bld.exec_all().group(8, 0);
+
+ /* The header starts off as g0 and g1 */
+ fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ ubld.group(16, 0).MOV(header, retype(brw_vec8_grf(0, 0),
+ BRW_REGISTER_TYPE_UD));
+
+ uint32_t g00_bits = 0;
+
+ /* Set "Source0 Alpha Present to RenderTarget" bit in message
+ * header.
+ */
+ if (inst->target > 0 && key->replicate_alpha)
+ g00_bits |= 1 << 11;
+
+ /* Set computes stencil to render target */
+ if (prog_data->computed_stencil)
+ g00_bits |= 1 << 14;
+
+ if (g00_bits) {
+ /* OR extra bits into g0.0 */
+ ubld.group(1, 0).OR(component(header, 0),
+ retype(brw_vec1_grf(0, 0),
+ BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(g00_bits));
+ }
+
+ /* Set the render target index for choosing BLEND_STATE. */
+ if (inst->target > 0) {
+ ubld.group(1, 0).MOV(component(header, 2), brw_imm_ud(inst->target));
+ }
+
+ if (prog_data->uses_kill) {
+ ubld.group(1, 0).MOV(retype(component(header, 15),
+ BRW_REGISTER_TYPE_UW),
+ brw_flag_reg(0, 1));
+ }
- if (header_size != 0) {
- assert(header_size == 2);
- /* Allocate 2 registers for a header */
- length += 2;
+ assert(length == 0);
+ sources[0] = header;
+ sources[1] = horiz_offset(header, 8);
+ length = 2;
}
+ assert(length == 0 || length == 2);
+ header_size = length;
if (payload.aa_dest_stencil_reg) {
sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index f72139d43c8..f00c07b1bc6 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -307,9 +307,6 @@ fs_generator::fire_fb_write(fs_inst *inst,
void
fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
{
- struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
- const brw_wm_prog_key * const key = (brw_wm_prog_key * const) this->key;
-
if (devinfo->gen < 8 && !devinfo->is_haswell) {
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
}
@@ -320,69 +317,6 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
if (inst->base_mrf >= 0)
payload = brw_message_reg(inst->base_mrf);
- /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
- * move, here's g1.
- */
- if (inst->header_size != 0) {
- brw_push_insn_state(p);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_set_default_exec_size(p, BRW_EXECUTE_1);
- brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_default_flag_reg(p, 0, 0);
-
- /* On HSW, the GPU will use the predicate on SENDC, unless the header is
- * present.
- */
- if (prog_data->uses_kill) {
- struct brw_reg pixel_mask;
-
- if (devinfo->gen >= 6)
- pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
- else
- pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
- }
-
- if (devinfo->gen >= 6) {
- brw_push_insn_state(p);
- brw_set_default_exec_size(p, BRW_EXECUTE_16);
- brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- brw_MOV(p,
- retype(payload, BRW_REGISTER_TYPE_UD),
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- brw_pop_insn_state(p);
-
- if (inst->target > 0 && key->replicate_alpha) {
- /* Set "Source0 Alpha Present to RenderTarget" bit in message
- * header.
- */
- brw_OR(p,
- vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
- vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
- brw_imm_ud(0x1 << 11));
- }
-
- if (inst->target > 0) {
- /* Set the render target index for choosing BLEND_STATE. */
- brw_MOV(p, retype(vec1(suboffset(payload, 2)),
- BRW_REGISTER_TYPE_UD),
- brw_imm_ud(inst->target));
- }
-
- /* Set computes stencil to render target */
- if (prog_data->computed_stencil) {
- brw_OR(p,
- vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
- vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
- brw_imm_ud(0x1 << 14));
- }
- }
-
- brw_pop_insn_state(p);
- }
-
if (!runtime_check_aads_emit) {
fire_fb_write(inst, payload, implied_header, inst->mlen);
} else {