summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2014-09-12 16:17:37 -0700
committerJason Ekstrand <[email protected]>2014-09-30 10:29:15 -0700
commit514fd1c55e617bb325979cbee4a89f0727c3b567 (patch)
treecbacd36b7ed0083887d31a5d8e51a24919f65552
parent1dd9b90ecd8e001b40febfb8908c0b9a0c08c7d5 (diff)
i965/fs: Use the GRF for FB writes on gen >= 7
On gen 7, the MRF was removed and we gained the ability to do send instructions directly from the GRF. This commit enables that functinoality for FB writes. v2: Make handling of components more sane. i965/fs: Force a high register for the final FB write v2: Renamed the array for the range mappings and added a comment Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp34
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp162
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/intel_screen.h10
6 files changed, 142 insertions, 71 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3277b586ede..b9bd94c3070 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -512,6 +512,8 @@ fs_inst::is_send_from_grf() const
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return src[1].file == GRF;
+ case FS_OPCODE_FB_WRITE:
+ return src[0].file == GRF;
default:
if (is_tex())
return src[0].file == GRF;
@@ -915,6 +917,8 @@ fs_inst::regs_read(fs_visitor *v, int arg) const
{
if (is_tex() && arg == 0 && src[0].file == GRF) {
return mlen;
+ } else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
+ return mlen;
} else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
return mlen;
} else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 77d76807f37..49024d87849 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -523,7 +523,7 @@ public:
const struct prog_instruction *fpi,
fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one);
- void emit_color_write(fs_reg color, int index, int first_color_mrf);
+ int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components);
void emit_alpha_test();
fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
fs_reg src0_alpha, unsigned components);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index ef5720c5368..63d9c05a575 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -113,6 +113,10 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
class_sizes[class_count++] = 8;
}
+ memset(screen->wm_reg_sets[index].class_to_ra_reg_range, 0,
+ sizeof(screen->wm_reg_sets[index].class_to_ra_reg_range));
+ int *class_to_ra_reg_range = screen->wm_reg_sets[index].class_to_ra_reg_range;
+
/* Compute the total number of registers across all classes. */
int ra_reg_count = 0;
for (int i = 0; i < class_count; i++) {
@@ -131,6 +135,14 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width)
} else {
ra_reg_count += base_reg_count - (class_sizes[i] - 1);
}
+ /* Mark the last register. We'll fill in the beginnings later. */
+ class_to_ra_reg_range[class_sizes[i]] = ra_reg_count;
+ }
+
+ /* Fill out the rest of the range markers */
+ for (int i = 1; i < 17; ++i) {
+ if (class_to_ra_reg_range[i] == 0)
+ class_to_ra_reg_range[i] = class_to_ra_reg_range[i-1];
}
uint8_t *ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count);
@@ -505,9 +517,29 @@ fs_visitor::assign_regs(bool allow_spilling)
}
setup_payload_interference(g, payload_node_count, first_payload_node);
- if (brw->gen >= 7)
+ if (brw->gen >= 7) {
setup_mrf_hack_interference(g, first_mrf_hack_node);
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ /* When we do send-from-GRF for FB writes, we need to ensure that
+ * the last write instruction sends from a high register. This is
+ * because the vertex fetcher wants to start filling the low
+ * payload registers while the pixel data port is still working on
+ * writing out the memory. If we don't do this, we get rendering
+ * artifacts.
+ *
+ * We could just do "something high". Instead, we just pick the
+ * highest register that works.
+ */
+ if (inst->opcode == FS_OPCODE_FB_WRITE && inst->eot) {
+ int size = virtual_grf_sizes[inst->src[0].reg];
+ int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
+ ra_set_node_reg(g, inst->src[0].reg, reg);
+ break;
+ }
+ }
+ }
+
if (dispatch_width > 8) {
/* In 16-wide dispatch we have an issue where a compressed
* instruction is actually two instructions executed simultaneiously.
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index c4cc2e9caf8..9f65b1f4cd0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2967,20 +2967,24 @@ fs_visitor::emit_interpolation_setup_gen6()
this->current_annotation = NULL;
}
-void
-fs_visitor::emit_color_write(fs_reg color, int index, int first_color_mrf)
+int
+fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components)
{
- assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- int reg_width = dispatch_width / 8;
fs_inst *inst;
- fs_reg mrf;
- /* If there's no color data to be written, skip it. */
- if (color.file == BAD_FILE)
- return;
+ if (color.file == BAD_FILE) {
+ return 4 * (dispatch_width / 8);
+ }
- color = offset(color, index);
+ uint8_t colors_enabled;
+ if (components == 0) {
+ /* We want to write one component to the alpha channel */
+ colors_enabled = 0x8;
+ } else {
+ /* Enable the first components-many channels */
+ colors_enabled = (1 << components) - 1;
+ }
if (dispatch_width == 8 || brw->gen >= 6) {
/* SIMD8 write looks like:
@@ -2999,10 +3003,20 @@ fs_visitor::emit_color_write(fs_reg color, int index, int first_color_mrf)
* m + 6: a0
* m + 7: a1
*/
- inst = emit(MOV(fs_reg(MRF, first_color_mrf + index * reg_width,
- color.type),
- color));
- inst->saturate = key->clamp_fragment_color;
+ int len = 0;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (colors_enabled & (1 << i)) {
+ dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8),
+ color.type, color.width);
+ inst = emit(MOV(dst[len], offset(color, i)));
+ inst->saturate = key->clamp_fragment_color;
+ } else if (color.width == 16) {
+ /* We need two BAD_FILE slots for a 16-wide color */
+ len++;
+ }
+ len++;
+ }
+ return len;
} else {
/* pre-gen6 SIMD16 single source DP write looks like:
* m + 0: r0
@@ -3014,26 +3028,19 @@ fs_visitor::emit_color_write(fs_reg color, int index, int first_color_mrf)
* m + 6: b1
* m + 7: a1
*/
- if (brw->has_compr4) {
- /* By setting the high bit of the MRF register number, we
- * indicate that we want COMPR4 mode - instead of doing the
- * usual destination + 1 for the second half we get
- * destination + 4.
- */
- inst = emit(MOV(fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index,
- color.type),
- color));
- inst->saturate = key->clamp_fragment_color;
- } else {
- inst = emit(MOV(fs_reg(MRF, first_color_mrf + index, color.type),
- color));
- inst->saturate = key->clamp_fragment_color;
-
- inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type),
- half(color, 1)));
- inst->force_sechalf = true;
- inst->saturate = key->clamp_fragment_color;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (colors_enabled & (1 << i)) {
+ dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+ inst = emit(MOV(dst[i], half(offset(color, i), 0)));
+ inst->saturate = key->clamp_fragment_color;
+
+ dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type);
+ inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
+ inst->saturate = key->clamp_fragment_color;
+ inst->force_sechalf = true;
+ }
}
+ return 8;
}
}
@@ -3101,12 +3108,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
this->current_annotation = "FB write header";
bool header_present = true;
+ int reg_size = dispatch_width / 8;
+
/* We can potentially have a message length of up to 15, so we have to set
* base_mrf to either 0 or 1 in order to fit in m0..m15.
*/
- int base_mrf = 1;
- int nr = base_mrf;
- int reg_width = dispatch_width / 8;
+ fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15);
+ int length = 0;
/* From the Sandy Bridge PRM, volume 4, page 198:
*
@@ -3123,12 +3131,14 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
}
if (header_present)
- /* m2, m3 header */
- nr += 2;
+ /* Allocate 2 registers for a header */
+ length += 2;
if (payload.aa_dest_stencil_reg) {
- emit(MOV(fs_reg(MRF, nr++),
+ sources[length] = fs_reg(GRF, virtual_grf_alloc(1));
+ emit(MOV(sources[length],
fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0))));
+ length++;
}
prog_data->uses_omask =
@@ -3136,9 +3146,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
if (prog_data->uses_omask) {
this->current_annotation = "FB write oMask";
assert(this->sample_mask.file != BAD_FILE);
- /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */
- emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), this->sample_mask);
- nr += 1;
+ /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since
+ * it's unsinged single words, one vgrf is always 16-wide.
+ */
+ sources[length] = fs_reg(GRF, virtual_grf_alloc(1),
+ BRW_REGISTER_TYPE_UW, 16);
+ emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask);
+ length++;
}
if (color0.file == BAD_FILE) {
@@ -3146,28 +3160,20 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
* alpha out the pipeline to our null renderbuffer to support
* alpha-testing, alpha-to-coverage, and so on.
*/
- emit_color_write(this->outputs[0], 3, nr);
- nr += 4 * reg_width;
+ length += setup_color_payload(sources + length, this->outputs[0], 0);
} else if (color1.file == BAD_FILE) {
if (src0_alpha.file != BAD_FILE) {
- fs_inst *inst;
- inst = emit(MOV(fs_reg(MRF, nr, src0_alpha.type), src0_alpha));
+ sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size),
+ src0_alpha.type, src0_alpha.width);
+ fs_inst *inst = emit(MOV(sources[length], src0_alpha));
inst->saturate = key->clamp_fragment_color;
- nr += reg_width;
+ length++;
}
- for (unsigned i = 0; i < components; i++)
- emit_color_write(color0, i, nr);
-
- nr += 4 * reg_width;
+ length += setup_color_payload(sources + length, color0, components);
} else {
- for (unsigned i = 0; i < components; i++)
- emit_color_write(color0, i, nr);
- nr += 4 * reg_width;
-
- for (unsigned i = 0; i < components; i++)
- emit_color_write(color1, i, nr);
- nr += 4 * reg_width;
+ length += setup_color_payload(sources + length, color0, components);
+ length += setup_color_payload(sources + length, color1, components);
}
if (source_depth_to_render_target) {
@@ -3180,33 +3186,51 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
no16("Missing support for simd16 depth writes on gen6\n");
}
+ sources[length] = fs_reg(this, glsl_type::float_type);
if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth.file != BAD_FILE);
- emit(MOV(fs_reg(MRF, nr), this->frag_depth));
+ emit(MOV(sources[length], this->frag_depth));
} else {
/* Pass through the payload depth. */
- emit(MOV(fs_reg(MRF, nr),
+ emit(MOV(sources[length],
fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
}
- nr += reg_width;
+ length++;
}
if (payload.dest_depth_reg) {
- emit(MOV(fs_reg(MRF, nr),
+ sources[length] = fs_reg(this, glsl_type::float_type);
+ emit(MOV(sources[length],
fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))));
- nr += reg_width;
+ length++;
}
- fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
- inst->base_mrf = base_mrf;
- inst->mlen = nr - base_mrf;
- inst->header_present = header_present;
+ fs_inst *load;
+ fs_inst *write;
+ if (brw->gen >= 7) {
+ /* Send from the GRF */
+ fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
+ load = emit(LOAD_PAYLOAD(payload, sources, length));
+ payload.reg = virtual_grf_alloc(load->regs_written);
+ load->dst = payload;
+ write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload);
+ write->base_mrf = -1;
+ } else {
+ /* Send from the MRF */
+ load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),
+ sources, length));
+ write = emit(FS_OPCODE_FB_WRITE);
+ write->base_mrf = 1;
+ }
+
+ write->mlen = load->regs_written;
+ write->header_present = header_present;
if ((brw->gen >= 8 || brw->is_haswell) && prog_data->uses_kill) {
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->flag_subreg = 1;
+ write->predicate = BRW_PREDICATE_NORMAL;
+ write->flag_subreg = 1;
}
- return inst;
+ return write;
}
void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 92089dbf8a7..4f58f2865e1 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -738,6 +738,7 @@ backend_instruction::has_side_effects() const
{
switch (opcode) {
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case FS_OPCODE_FB_WRITE:
return true;
default:
return false;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h
index ea0be2c52da..393315ea292 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -89,6 +89,16 @@ struct intel_screen
int classes[16];
/**
+ * Mapping from classes to ra_reg ranges. Each of the per-size
+ * classes corresponds to a range of ra_reg nodes. This array stores
+ * those ranges in the form of first ra_reg in each class and the
+ * total number of ra_reg elements in the last array element. This
+ * way the range of the i'th class is given by:
+ * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
+ */
+ int class_to_ra_reg_range[17];
+
+ /**
* Mapping for register-allocated objects in *regs to the first
* GRF for that object.
*/