diff options
author | Eric Anholt <[email protected]> | 2013-03-19 15:28:11 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2013-03-28 11:46:15 -0700 |
commit | 5c5218ea6163f694a256562df1d73a108396e40d (patch) | |
tree | ea553484728b0e23448c31842a516b7af1759840 /src/mesa/drivers/dri/i965/brw_fs_emit.cpp | |
parent | 5c039543db25ab8aa7b4ca1afa2462e5bcee80b2 (diff) |
i965/fs: Switch shader_time writes to using GRFs.
This avoids conflicts between shader_time and FB writes, so we can include
more of the program under our profiling. This does mean hiding more of
the message setup from the optimizer, which doesn't have a way to handle
multi-reg sends from GRFs.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_emit.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 712fef6e093..5db481c60ee 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -972,6 +972,41 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, } void +fs_generator::generate_shader_time_add(fs_inst *inst, + struct brw_reg payload, + struct brw_reg offset, + struct brw_reg value) +{ + assert(intel->gen >= 7); + brw_push_insn_state(p); + brw_set_mask_control(p, true); + + assert(payload.file == BRW_GENERAL_REGISTER_FILE); + struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0), + offset.type); + struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0), + value.type); + + assert(offset.file == BRW_IMMEDIATE_VALUE); + if (value.file == BRW_GENERAL_REGISTER_FILE) { + value.width = BRW_WIDTH_1; + value.hstride = BRW_HORIZONTAL_STRIDE_0; + value.vstride = BRW_VERTICAL_STRIDE_0; + } else { + assert(value.file == BRW_IMMEDIATE_VALUE); + } + + /* Trying to deal with setup of the params from the IR is crazy in the FS8 + * case, and we don't really care about squeezing every bit of performance + * out of this path, so we just emit the MOVs from here. + */ + brw_MOV(p, payload_offset, offset); + brw_MOV(p, payload_value, value); + brw_shader_time_add(p, payload, SURF_INDEX_WM_SHADER_TIME); + brw_pop_insn_state(p); +} + +void fs_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = p->next_insn_offset; @@ -1291,7 +1326,7 @@ fs_generator::generate_code(exec_list *instructions) break; case SHADER_OPCODE_SHADER_TIME_ADD: - brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME); + generate_shader_time_add(inst, src[0], src[1], src[2]); break; case FS_OPCODE_SET_SIMD4X2_OFFSET: |