summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-03-14 10:29:12 -0700
committerEric Anholt <[email protected]>2011-04-26 12:19:49 -0700
commit42ad2f0b9b6a18f1613f6d915a46b4a4a89c5aa2 (patch)
tree05687562e1c52e59fa618596f3aaba237899e737 /src/mesa/drivers
parent662f1b48bd1a02907bb42ecda889a3aa52a5755d (diff)
i965/fs: Add support for 16-wide dispatch on gen5.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp73
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c30
3 files changed, 93 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 8785957b6e6..4e3adbc0a69 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2040,6 +2040,59 @@ fs_visitor::emit_interpolation_setup_gen6()
}
void
+fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
+{
+ int reg_width = c->dispatch_width / 8;
+
+ if (c->dispatch_width == 8 || intel->gen == 6) {
+ /* SIMD8 write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ *
+ * gen6 SIMD16 DP write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ * m + 4: b0
+ * m + 5: b1
+ * m + 6: a0
+ * m + 7: a1
+ */
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
+ color);
+ } else {
+ /* pre-gen6 SIMD16 single source DP write looks like:
+ * m + 0: r0
+ * m + 1: g0
+ * m + 2: b0
+ * m + 3: a0
+ * m + 4: r1
+ * m + 5: g1
+ * m + 6: b1
+ * m + 7: a1
+ *
+ * By setting the high bit of the MRF register number,
+ * we could indicate that we want COMPR4 mode - instead
+ * of doing the usual destination + 1 for the second
+ * half we would get destination + 4. We would need to
+ * clue the optimizer into that, though.
+ */
+ push_force_uncompressed();
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+ pop_force_uncompressed();
+
+ push_force_sechalf();
+ color.sechalf = true;
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+ pop_force_sechalf();
+ color.sechalf = false;
+ }
+}
+
+void
fs_visitor::emit_fb_writes()
{
this->current_annotation = "FB write header";
@@ -2113,7 +2166,7 @@ fs_visitor::emit_fb_writes()
target);
if (this->frag_color || this->frag_data) {
for (int i = 0; i < 4; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i * reg_width), color);
+ emit_color_write(i, color_mrf, color);
color.reg_offset++;
}
}
@@ -2137,7 +2190,7 @@ fs_visitor::emit_fb_writes()
* renderbuffer.
*/
color.reg_offset += 3;
- emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + 3), color);
+ emit_color_write(3, color_mrf, color);
}
fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
@@ -2330,7 +2383,7 @@ fs_visitor::generate_math(fs_inst *inst,
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}
- } else {
+ } else /* gen <= 5 */{
assert(inst->mlen >= 1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
@@ -2351,6 +2404,7 @@ fs_visitor::generate_math(fs_inst *inst,
inst->base_mrf + 1, sechalf(src[0]),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}
@@ -3528,6 +3582,8 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
reg->hw_reg, reg->smear);
}
brw_reg = retype(brw_reg, reg->type);
+ if (reg->sechalf)
+ brw_reg = sechalf(brw_reg);
break;
case IMM:
switch (reg->type) {
@@ -3881,7 +3937,7 @@ fs_visitor::run()
/* Haven't hooked in support for uniforms through the 16-wide
* version yet.
*/
- return GL_FALSE;
+ return false;
}
/* align to 64 byte boundary. */
@@ -3957,11 +4013,10 @@ fs_visitor::run()
assert(force_uncompressed_stack == 0);
assert(force_sechalf_stack == 0);
- if (!failed)
- generate_code();
-
if (failed)
- return GL_FALSE;
+ return false;
+
+ generate_code();
if (c->dispatch_width == 8) {
c->prog_data.total_grf = grf_used;
@@ -4005,7 +4060,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
return false;
}
- if (intel->gen >= 6) {
+ if (intel->gen >= 5) {
c->dispatch_width = 16;
fs_visitor v2(c, shader);
v2.run();
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b158992071e..60398ac870e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -178,6 +178,7 @@ public:
int type;
bool negate;
bool abs;
+ bool sechalf;
struct brw_reg fixed_hw_reg;
int smear; /* -1, or a channel of the reg to smear to all channels. */
@@ -521,6 +522,7 @@ public:
void emit_if_gen6(ir_if *ir);
void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+ void emit_color_write(int index, int first_color_mrf, fs_reg color);
void emit_fb_writes();
void emit_assignment_writes(fs_reg &l, fs_reg &r,
const glsl_type *type, bool predicated);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index be4b260a5ff..9d0a7a8d27d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -41,10 +41,11 @@
*/
struct brw_wm_unit_key {
- unsigned int total_grf, total_scratch;
+ unsigned int total_grf, total_grf_16, total_scratch;
unsigned int urb_entry_read_length;
unsigned int curb_entry_read_length;
unsigned int dispatch_grf_start_reg;
+ uint32_t prog_offset_16;
unsigned int curbe_offset;
@@ -92,10 +93,21 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* CACHE_NEW_WM_PROG */
key->total_grf = brw->wm.prog_data->total_grf;
+ key->total_grf_16 = brw->wm.prog_data->total_grf_16;
key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
key->total_scratch = brw->wm.prog_data->total_scratch;
+ key->prog_offset_16 = brw->wm.prog_data->prog_offset_16;
+
+ if (key->prog_offset_16) {
+ /* These two fields should be the same pre-gen6, which is why we
+ * only have one hardware field to program for both dispatch
+ * widths.
+ */
+ assert(brw->wm.prog_data->first_curbe_grf ==
+ brw->wm.prog_data->first_curbe_grf_16);
+ }
/* BRW_NEW_CURBE_OFFSETS */
key->curbe_offset = brw->curbe.wm_start;
@@ -166,7 +178,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
memset(&wm, 0, sizeof(wm));
wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ wm.wm9.grf_reg_count_2 = ALIGN(key->total_grf_16, 16) / 16 - 1;
wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+ wm.wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
+ key->prog_offset_16) >> 6; /* reloc */
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
@@ -206,9 +221,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
wm.wm5.program_computes_depth = key->computes_depth;
wm.wm5.program_uses_killpixel = key->uses_kill;
- if (key->is_glsl)
+ if (key->is_glsl) {
wm.wm5.enable_8_pix = 1;
- else
+ if (key->prog_offset_16)
+ wm.wm5.enable_16_pix = 1;
+ } else
wm.wm5.enable_16_pix = 1;
wm.wm5.max_threads = brw->wm_max_threads - 1;
@@ -256,6 +273,13 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
brw->wm.prog_bo, wm.thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
+ if (key->prog_offset_16) {
+ drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm9),
+ brw->wm.prog_bo, ((wm.wm9.grf_reg_count_2 << 1) +
+ key->prog_offset_16),
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
+
/* Emit scratch space relocation */
if (key->total_scratch != 0) {
drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),