aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-03-19 15:28:11 -0700
committerEric Anholt <[email protected]>2013-03-28 11:46:15 -0700
commit5c5218ea6163f694a256562df1d73a108396e40d (patch)
treeea553484728b0e23448c31842a516b7af1759840 /src/mesa/drivers/dri/i965/brw_fs_emit.cpp
parent5c039543db25ab8aa7b4ca1afa2462e5bcee80b2 (diff)
i965/fs: Switch shader_time writes to using GRFs.
This avoids conflicts between shader_time and FB writes, so we can include more of the program under our profiling. This does mean hiding more of the message setup from the optimizer, which doesn't have a way to handle multi-reg sends from GRFs. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_emit.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp37
1 files changed, 36 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 712fef6e093..5db481c60ee 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -972,6 +972,41 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
}
void
+fs_generator::generate_shader_time_add(fs_inst *inst,
+ struct brw_reg payload,
+ struct brw_reg offset,
+ struct brw_reg value)
+{
+ assert(intel->gen >= 7);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, true);
+
+ assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+ struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
+ offset.type);
+ struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
+ value.type);
+
+ assert(offset.file == BRW_IMMEDIATE_VALUE);
+ if (value.file == BRW_GENERAL_REGISTER_FILE) {
+ value.width = BRW_WIDTH_1;
+ value.hstride = BRW_HORIZONTAL_STRIDE_0;
+ value.vstride = BRW_VERTICAL_STRIDE_0;
+ } else {
+ assert(value.file == BRW_IMMEDIATE_VALUE);
+ }
+
+ /* Trying to deal with setup of the params from the IR is crazy in the FS8
+ * case, and we don't really care about squeezing every bit of performance
+ * out of this path, so we just emit the MOVs from here.
+ */
+ brw_MOV(p, payload_offset, offset);
+ brw_MOV(p, payload_value, value);
+ brw_shader_time_add(p, payload, SURF_INDEX_WM_SHADER_TIME);
+ brw_pop_insn_state(p);
+}
+
+void
fs_generator::generate_code(exec_list *instructions)
{
int last_native_insn_offset = p->next_insn_offset;
@@ -1291,7 +1326,7 @@ fs_generator::generate_code(exec_list *instructions)
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
- brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME);
+ generate_shader_time_add(inst, src[0], src[1], src[2]);
break;
case FS_OPCODE_SET_SIMD4X2_OFFSET: