summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2016-01-21 09:10:09 -0800
committerMatt Turner <[email protected]>2016-03-04 11:52:34 -0800
commit1f862e923cba1d5cd54a707f70f0be113635e855 (patch)
tree076cbf6c8bc3d091ac7b2703acc17c1f9314143f
parent905ff861982450831a56d112036f68a751337441 (diff)
i965/fs: Optimize float conversions of byte/word extract.
instructions in affected programs: 31535 -> 29966 (-4.98%) helped: 23 cycles in affected programs: 272648 -> 266022 (-2.43%) helped: 14 HURT: 1 The patch decreases the number of instructions in the two Unigine programs by: #1721: 4374 -> 4155 instructions (-5.01%) #1706: 3582 -> 3363 instructions (-6.11%) Reviewed-by: Kenneth Graunke <[email protected]> Reviewed-by: Iago Toral Quiroga <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp46
2 files changed, 48 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 7446ca10cd1..21c7813509b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -272,6 +272,8 @@ public:
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
unsigned wr_mask);
+ bool optimize_extract_to_float(nir_alu_instr *instr,
+ const fs_reg &result);
bool optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index db20c71406e..04e9b8f69c2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -500,6 +500,49 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
}
}
+/**
+ * Recognizes a parent instruction of nir_op_extract_* and changes the type to
+ * match instr.
+ */
+bool
+fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
+ const fs_reg &result)
+{
+ if (!instr->src[0].src.is_ssa ||
+ !instr->src[0].src.ssa->parent_instr)
+ return false;
+
+ if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *src0 =
+ nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+
+ if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 &&
+ src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16)
+ return false;
+
+ nir_const_value *element = nir_src_as_const_value(src0->src[1].src);
+ assert(element != NULL);
+
+ enum opcode extract_op;
+ if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) {
+ assert(element->u[0] <= 1);
+ extract_op = SHADER_OPCODE_EXTRACT_WORD;
+ } else {
+ assert(element->u[0] <= 3);
+ extract_op = SHADER_OPCODE_EXTRACT_BYTE;
+ }
+
+ fs_reg op0 = get_nir_src(src0->src[0].src);
+ op0.type = brw_type_for_nir_type(nir_op_infos[src0->op].input_types[0]);
+ op0 = offset(op0, bld, src0->src[0].swizzle[0]);
+
+ set_saturate(instr->dest.saturate,
+ bld.emit(extract_op, result, op0, brw_imm_ud(element->u[0])));
+ return true;
+}
+
bool
fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result)
@@ -671,6 +714,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
switch (instr->op) {
case nir_op_i2f:
case nir_op_u2f:
+ if (optimize_extract_to_float(instr, result))
+ return;
+
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;