summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2013-04-25 11:03:38 -0700
committerMatt Turner <[email protected]>2013-04-25 18:27:39 -0700
commit0c1d87b0d7e2c9f1ae6e838a8fa7f074557e45f0 (patch)
treebce1895a19eb8941458a63282fc9456eb73d017c
parentc0f67a127b0b3e4bb715d1562a82c984d160280e (diff)
i965/vs: Add support for LRP instruction.
Only 13 affected programs in shader-db, but they were all helped. total instructions in shared programs: 368877 -> 368851 (-0.01%) instructions in affected programs: 1576 -> 1550 (-1.65%) Reviewed-by: Chris Forbes <[email protected]> Reviewed-by: Eric Anholt <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp14
5 files changed, 22 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index b3bd1b97667..5addff67318 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -152,8 +152,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
*/
brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir);
do_mat_op_to_vec(shader->ir);
- const int lrp_to_arith = (intel->gen < 6 || stage != MESA_SHADER_FRAGMENT)
- ? LRP_TO_ARITH : 0;
+ const int lrp_to_arith = intel->gen < 6 ? LRP_TO_ARITH : 0;
lower_instructions(shader->ir,
MOD_TO_FRACT |
DIV_TO_MUL_RCP |
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index c28092244a4..d34ed35ebc6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -386,6 +386,7 @@ public:
vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index);
vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
+ vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x);
int implied_mrf_writes(vec4_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 51ee4750490..f2c6cd60911 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -215,6 +215,9 @@ vec4_visitor::try_copy_propagation(struct intel_context *intel,
if (has_source_modifiers && !can_do_source_mods(inst))
return false;
+ if (inst->opcode == BRW_OPCODE_LRP && value.file == UNIFORM)
+ return false;
+
/* We can't copy-propagate a UD negation into a condmod
* instruction, because the condmod ends up looking at the 33-bit
* signed accumulator value instead of the 32-bit value we wanted
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index c9963bff80a..96b4965045b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -838,6 +838,10 @@ vec4_generator::generate_code(exec_list *instructions)
brw_F16TO32(p, dst, src[0]);
break;
+ case BRW_OPCODE_LRP:
+ brw_LRP(p, dst, src[0], src[1], src[2]);
+ break;
+
case BRW_OPCODE_IF:
if (inst->src[0].file != BAD_FILE) {
/* The instruction has an embedded compare (only allowed on gen6) */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 69e805d1e1a..88c435ca292 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -107,6 +107,14 @@ vec4_visitor::emit(enum opcode opcode)
src0, src1); \
}
+#define ALU3(op) \
+ vec4_instruction * \
+ vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
+ { \
+ return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
+ src0, src1, src2); \
+ }
+
ALU1(NOT)
ALU1(MOV)
ALU1(FRC)
@@ -127,6 +135,7 @@ ALU2(DPH)
ALU2(SHL)
ALU2(SHR)
ALU2(ASR)
+ALU3(LRP)
/** Gen4 predicated IF. */
vec4_instruction *
@@ -1619,7 +1628,10 @@ vec4_visitor::visit(ir_expression *ir)
}
case ir_triop_lrp:
- assert(!"not reached: should be handled by lrp_to_arith");
+ op[0] = fix_3src_operand(op[0]);
+ op[1] = fix_3src_operand(op[1]);
+ op[2] = fix_3src_operand(op[2]);
+ emit(LRP(result_dst, op[0], op[1], op[2]));
break;
case ir_quadop_vector: