diff options
author | Kenneth Graunke <[email protected]> | 2015-02-26 22:55:54 -0800 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2015-03-09 16:07:04 -0700 |
commit | 4ebeb71573ad44f7657810dc5dd2c9030e3e63db (patch) | |
tree | 00329a9d4e619753cc998bb884c0a44b2bfbd63f | |
parent | e43af8d09f919d02b5ac0810c1c0f1783cbef6ef (diff) |
i965/fs: Make emit_shader_time_end() insert before EOT.
Previously, we emitted the shader-time epilogue from emit_fb_writes(),
during the middle of looping through color regions (or emit_urb_writes
for the VS). This is duplicated several times and rather awkward.
I need to fix a bug in our FB write handling, and it will be a lot
easier if we move emit_shader_time_end() out of there.
Now, we simply emit FB writes/URB writes, and subsequently have
emit_shader_time_end() insert instructions before the final SEND with
EOT. Not only is this simpler, it's actually a slight improvement:
we now include the MOVs to set up the final FB write payload in our
shader-time measurements.
Note that INTEL_DEBUG=shader_time only exists on Gen7+, and uses
send-from-GRF. (In the past, we might have hit trouble where both
attempt to use MRFs for messages; that's not a problem now.)
v2: Rebase on v3 of the previous patch and other shader_time fixes.
Signed-off-by: Kenneth Graunke <[email protected]>
Reviewed-by: Topi Pohjolainen <[email protected]> [v1]
Acked-by: Matt Turner <[email protected]>
Cc: [email protected]
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 28 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 13 |
2 files changed, 18 insertions, 23 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ee14a7a6483..89754ad9434 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -759,19 +759,24 @@ fs_visitor::emit_shader_time_end() unreachable("fs_visitor::emit_shader_time_end missing code"); } + /* Insert our code just before the final SEND with EOT. */ + exec_node *end = this->instructions.get_tail(); + assert(end && ((fs_inst *) end)->eot); + fs_inst *tm_read; fs_reg shader_end_time = get_timestamp(&tm_read); - emit(tm_read); + end->insert_before(tm_read); /* Check that there weren't any timestamp reset events (assuming these * were the only two timestamp reads that happened). */ fs_reg reset = shader_end_time; reset.set_smear(2); - fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u))); + fs_inst *test = AND(reg_null_d, reset, fs_reg(1u)); test->conditional_mod = BRW_CONDITIONAL_Z; test->force_writemask_all = true; - emit(IF(BRW_PREDICATE_NORMAL)); + end->insert_before(test); + end->insert_before(IF(BRW_PREDICATE_NORMAL)); fs_reg start = shader_start_time; start.negate = true; @@ -779,7 +784,7 @@ fs_visitor::emit_shader_time_end() diff.set_smear(0); fs_inst *add = ADD(diff, start, shader_end_time); add->force_writemask_all = true; - emit(add); + end->insert_before(add); /* If there were no instructions between the two timestamp gets, the diff * is 2 cycles. Remove that overhead, so I can forget about that when @@ -787,13 +792,13 @@ fs_visitor::emit_shader_time_end() */ add = ADD(diff, diff, fs_reg(-2u)); add->force_writemask_all = true; - emit(add); + end->insert_before(add); - emit(SHADER_TIME_ADD(type, diff)); - emit(SHADER_TIME_ADD(written_type, fs_reg(1u))); - emit(BRW_OPCODE_ELSE); - emit(SHADER_TIME_ADD(reset_type, fs_reg(1u))); - emit(BRW_OPCODE_ENDIF); + end->insert_before(SHADER_TIME_ADD(type, diff)); + end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u))); + end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width)); + end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u))); + end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width)); } fs_inst * @@ -3922,6 +3927,9 @@ fs_visitor::run_fs() emit_fb_writes(); + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_end(); + calculate_cfg(); optimize(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cdaba7f47d9..522238759aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3670,9 +3670,6 @@ fs_visitor::emit_fb_writes() fs_inst *inst; if (do_dual_src) { - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - emit_shader_time_end(); - this->current_annotation = ralloc_asprintf(this->mem_ctx, "FB dual-source write"); inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, @@ -3712,19 +3709,12 @@ fs_visitor::emit_fb_writes() if (brw->gen >= 6 && key->replicate_alpha && target != 0) src0_alpha = offset(outputs[0], 3); - if (target == key->nr_color_regions - 1 && - (INTEL_DEBUG & DEBUG_SHADER_TIME)) - emit_shader_time_end(); - inst = emit_single_fb_write(this->outputs[target], reg_undef, src0_alpha, this->output_components[target]); inst->target = target; } } else { - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - emit_shader_time_end(); - /* Even if there's no color buffers enabled, we still need to send * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. @@ -3935,9 +3925,6 @@ fs_visitor::emit_urb_writes() if (length == 8 || last) flush = true; if (flush) { - if (last && (INTEL_DEBUG & DEBUG_SHADER_TIME)) - emit_shader_time_end(); - fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1); fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1), BRW_REGISTER_TYPE_F); |