diff options
author | Kenneth Graunke <[email protected]> | 2012-12-02 00:08:15 -0800 |
---|---|---|
committer | Matt Turner <[email protected]> | 2013-02-28 13:19:00 -0800 |
commit | 0a1d145e5f1e6120e70e9b46e069167a0d653579 (patch) | |
tree | 30e57f6ef19800b87e8ad0fd859eec726fd97a69 /src/mesa | |
parent | 015a48743dfcf138cce5752098e01a6cfd6efefe (diff) |
i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
v2 [mattst88]:
- Add BRW_OPCODE_LRP to list of CSE-able expressions.
- Fix op_var[] array size.
- Rename arguments to emit_lrp to (x, y, a) to clear confusion.
- Add LRP function to brw_fs.cpp/.h.
- Corrected comment about LRP instruction arguments in emit_lrp.
v3 [mattst88]:
- Duplicate MAD code for LRP instead of using a function pointer.
- Check for != GRF instead of == IMM in emit_lrp.
- Lower LRP on gen < 6.
Reviewed-by: Matt Turner <[email protected]>
Reviewed-by: Eric Anholt <[email protected]>
Signed-off-by: Kenneth Graunke <[email protected]>
1
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 35 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 4 |
7 files changed, 75 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ff4248bd61a..fe347018472 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -146,6 +146,13 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst, return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \ } +#define ALU3(op) \ + fs_inst * \ + fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) \ + { \ + return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\ + } + ALU1(NOT) ALU1(MOV) ALU1(FRC) @@ -161,6 +168,7 @@ ALU2(XOR) ALU2(SHL) ALU2(SHR) ALU2(ASR) +ALU3(LRP) /** Gen4 predicated IF. */ fs_inst * diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d1bb111bf5f..17ef046533b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -285,6 +285,7 @@ public: fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition); fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition); + fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x); fs_inst *DEP_RESOLVE_MOV(int grf); int type_size(const struct glsl_type *type); @@ -361,6 +362,7 @@ public: fs_reg fix_math_operand(fs_reg src); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); + void emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a); void emit_minmax(uint32_t conditionalmod, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index ea0622576fa..30d8d9bf527 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -135,7 +135,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) ir_expression *expr = ir->rhs->as_expression(); bool found_vector = false; unsigned int i, vector_elements = 1; - ir_variable *op_var[2]; + ir_variable *op_var[3]; if (!expr) return visit_continue; @@ -342,6 +342,20 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) assert(!"not yet supported"); break; + case ir_triop_lrp: + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + ir_rvalue *op1 = get_element(op_var[1], i); + ir_rvalue *op2 = get_element(op_var[2], i); + + assign(ir, i, new(mem_ctx) ir_expression(expr->operation, + element_type, + op0, + op1, + op2)); + } + break; + case ir_unop_pack_snorm_2x16: case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 44479d8e9ff..e0f824c5e3e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -66,6 +66,7 @@ is_expression(const fs_inst *const inst) case BRW_OPCODE_LINE: case BRW_OPCODE_PLN: case BRW_OPCODE_MAD: + case BRW_OPCODE_LRP: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_CINTERP: case FS_OPCODE_LINTERP: diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 3d1f3b356a8..a25f594d9d8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -1095,6 +1095,20 @@ fs_generator::generate_code(exec_list *instructions) brw_set_access_mode(p, BRW_ALIGN_1); break; + case BRW_OPCODE_LRP: + brw_set_access_mode(p, BRW_ALIGN_16); + if (dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_LRP(p, dst, src[0], src[1], src[2]); + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_LRP(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2])); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else { + brw_LRP(p, dst, src[0], src[1], src[2]); + } + brw_set_access_mode(p, BRW_ALIGN_1); + break; + case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 573921cf8cc..e6daf2f62e5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -199,6 +199,30 @@ fs_visitor::visit(ir_dereference_array *ir) } void +fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a) +{ + if (intel->gen < 6 || x.file != GRF || y.file != GRF || a.file != GRF) { + /* We can't use the LRP instruction. Emit x*(1-a) + y*a. */ + fs_reg y_times_a = fs_reg(this, glsl_type::float_type); + fs_reg one_minus_a = fs_reg(this, glsl_type::float_type); + fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type); + + emit(MUL(y_times_a, y, a)); + + a.negate = !a.negate; + emit(ADD(one_minus_a, fs_reg(1.0f), a)); + emit(MUL(x_times_one_minus_a, x, one_minus_a)); + + emit(ADD(dst, x_times_one_minus_a, y_times_a)); + } else { + /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so + * we need to reorder the operands. + */ + emit(LRP(dst, a, y, x)); + } +} + +void fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst, fs_reg src0, fs_reg src1) { @@ -291,10 +315,10 @@ void fs_visitor::visit(ir_expression *ir) { unsigned int operand; - fs_reg op[2], temp; + fs_reg op[3], temp; fs_inst *inst; - assert(ir->get_num_operands() <= 2); + assert(ir->get_num_operands() <= 3); if (try_emit_saturate(ir)) return; @@ -586,7 +610,7 @@ fs_visitor::visit(ir_expression *ir) case ir_binop_pack_half_2x16_split: emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]); break; - case ir_binop_ubo_load: + case ir_binop_ubo_load: { /* This IR node takes a constant uniform block and a constant or * variable byte offset within the block and loads a vector from that. */ @@ -640,6 +664,11 @@ fs_visitor::visit(ir_expression *ir) result.reg_offset = 0; break; } + + case ir_triop_lrp: + emit_lrp(this->result, op[0], op[1], op[2]); + break; + } } void diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9ab18cc7d63..2da5ed5c6cc 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -150,13 +150,15 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) */ brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir); do_mat_op_to_vec(shader->ir); + const int lrp_to_arith = (intel->gen < 6 || stage != MESA_SHADER_FRAGMENT) + ? LRP_TO_ARITH : 0; lower_instructions(shader->ir, MOD_TO_FRACT | DIV_TO_MUL_RCP | SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2 | - LRP_TO_ARITH); + lrp_to_arith); /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, * if-statements need to be flattened. |