summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2013-03-19 15:28:11 -0700
committerEric Anholt <[email protected]>2013-03-28 11:46:15 -0700
commit5c5218ea6163f694a256562df1d73a108396e40d (patch)
treeea553484728b0e23448c31842a516b7af1759840
parent5c039543db25ab8aa7b4ca1afa2462e5bcee80b2 (diff)
i965/fs: Switch shader_time writes to using GRFs.
This avoids conflicts between shader_time and FB writes, so we can include more of the program under our profiling. This does mean hiding more of the message setup from the optimizer, which doesn't have a way to handle multi-reg sends from GRFs. Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp29
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp37
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp3
6 files changed, 63 insertions, 19 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 60ce231bac5..6ef1f83b5ca 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -305,7 +305,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
GLuint offset);
void brw_shader_time_add(struct brw_compile *p,
- int mrf,
+ struct brw_reg payload,
uint32_t surf_index);
/* If/else/endif. Works by manipulating the execution flags on each
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 23556264269..2578bf89cca 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2436,7 +2436,7 @@ brw_svb_write(struct brw_compile *p,
* messages.
*/
void brw_shader_time_add(struct brw_compile *p,
- int base_mrf,
+ struct brw_reg payload,
uint32_t surf_index)
{
struct intel_context *intel = &p->brw->intel;
@@ -2453,8 +2453,8 @@ void brw_shader_time_add(struct brw_compile *p,
*/
brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NULL, 0));
- brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
- base_mrf, 0));
+ brw_set_src0(p, send, brw_vec1_reg(payload.file,
+ payload.nr, 0));
uint32_t sfid, msg_type;
if (intel->is_haswell) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 01a1ec06ac0..9ea7339f581 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -377,6 +377,7 @@ bool
fs_inst::is_send_from_grf()
{
return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
+ opcode == SHADER_OPCODE_SHADER_TIME_ADD ||
(opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
src[1].file == GRF));
}
@@ -607,19 +608,16 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
{
int shader_time_index = brw_get_shader_time_index(brw, prog, &fp->Base,
type);
- int base_mrf = 6;
+ fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
- fs_reg offset_mrf = fs_reg(MRF, base_mrf);
- offset_mrf.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(offset_mrf, fs_reg(shader_time_index * SHADER_TIME_STRIDE)));
-
- fs_reg time_mrf = fs_reg(MRF, base_mrf + 1);
- time_mrf.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(time_mrf, value));
+ fs_reg payload;
+ if (dispatch_width == 8)
+ payload = fs_reg(this, glsl_type::uvec2_type);
+ else
+ payload = fs_reg(this, glsl_type::uint_type);
- fs_inst *inst = emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD));
- inst->base_mrf = base_mrf;
- inst->mlen = 2;
+ emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
+ fs_reg(), payload, offset, value));
}
void
@@ -735,8 +733,6 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXS:
return 1;
- case SHADER_OPCODE_SHADER_TIME_ADD:
- return 0;
case FS_OPCODE_FB_WRITE:
return 2;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
@@ -1359,6 +1355,13 @@ fs_visitor::split_virtual_grfs()
if (inst->regs_written() > 1) {
split_grf[inst->dst.reg] = false;
}
+
+ /* If we're sending from a GRF, don't split it, on the assumption that
+ * the send is reading the whole thing.
+ */
+ if (inst->is_send_from_grf()) {
+ split_grf[inst->src[0].reg] = false;
+ }
}
/* Allocate new space for split regs. Note that the virtual
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 1fabec45cf8..d9d17a2520e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -559,6 +559,11 @@ private:
struct brw_reg dst,
struct brw_reg src);
+ void generate_shader_time_add(fs_inst *inst,
+ struct brw_reg payload,
+ struct brw_reg offset,
+ struct brw_reg value);
+
void patch_discard_jumps_to_fb_writes();
struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 712fef6e093..5db481c60ee 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -972,6 +972,41 @@ fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
}
void
+fs_generator::generate_shader_time_add(fs_inst *inst,
+ struct brw_reg payload,
+ struct brw_reg offset,
+ struct brw_reg value)
+{
+ assert(intel->gen >= 7);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, true);
+
+ assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+ struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
+ offset.type);
+ struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
+ value.type);
+
+ assert(offset.file == BRW_IMMEDIATE_VALUE);
+ if (value.file == BRW_GENERAL_REGISTER_FILE) {
+ value.width = BRW_WIDTH_1;
+ value.hstride = BRW_HORIZONTAL_STRIDE_0;
+ value.vstride = BRW_VERTICAL_STRIDE_0;
+ } else {
+ assert(value.file == BRW_IMMEDIATE_VALUE);
+ }
+
+ /* Trying to deal with setup of the params from the IR is crazy in the FS8
+ * case, and we don't really care about squeezing every bit of performance
+ * out of this path, so we just emit the MOVs from here.
+ */
+ brw_MOV(p, payload_offset, offset);
+ brw_MOV(p, payload_value, value);
+ brw_shader_time_add(p, payload, SURF_INDEX_WM_SHADER_TIME);
+ brw_pop_insn_state(p);
+}
+
+void
fs_generator::generate_code(exec_list *instructions)
{
int last_native_insn_offset = p->next_insn_offset;
@@ -1291,7 +1326,7 @@ fs_generator::generate_code(exec_list *instructions)
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
- brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME);
+ generate_shader_time_add(inst, src[0], src[1], src[2]);
break;
case FS_OPCODE_SET_SIMD4X2_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index b73711cf7ad..54f3efd08ec 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -674,7 +674,8 @@ vec4_generator::generate_vs_instruction(vec4_instruction *instruction,
break;
case SHADER_OPCODE_SHADER_TIME_ADD:
- brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_VS_SHADER_TIME);
+ brw_shader_time_add(p, brw_message_reg(inst->base_mrf),
+ SURF_INDEX_VS_SHADER_TIME);
break;
default: