diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 365 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 2 |
4 files changed, 384 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index bc4cfab5c02..39acae9e434 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -105,7 +105,8 @@ C_SOURCES = \ $(DRIVER_SOURCES) CXX_SOURCES = \ - brw_fs.cpp + brw_fs.cpp \ + brw_fs_channel_expressions.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9509d932367..d16e75a2ca9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -36,6 +36,7 @@ extern "C" { #include "brw_wm.h" #include "talloc.h" } +#include "../glsl/ir_optimization.h" struct gl_shader * brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) @@ -75,6 +76,20 @@ brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) GLboolean brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) { + static int using_new_fs = -1; + + if (using_new_fs == -1) + using_new_fs = getenv("INTEL_NEW_FS") != NULL; + + for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { + struct gl_shader *shader = prog->_LinkedShaders[i]; + + if (using_new_fs && shader->Type == GL_FRAGMENT_SHADER) { + do_mat_op_to_vec(shader->ir); + brw_do_channel_expressions(shader->ir); + } + } + if (!_mesa_ir_link_shader(ctx, prog)) return GL_FALSE; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp new file mode 100644 index 00000000000..d8d58a9467b --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -0,0 +1,365 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_wm_channel_expressions.cpp + * + * Breaks vector operations down into operations on each component. + * + * The 965 fragment shader receives 8 or 16 pixels at a time, so each + * channel of a vector is laid out as 1 or 2 8-float registers. Each + * ALU operation operates on one of those channel registers. As a + * result, there is no value to the 965 fragment shader in tracking + * "vector" expressions in the sense of GLSL fragment shaders, when + * doing a channel at a time may help in constant folding, algebraic + * simplification, and reducing the liveness of channel registers. + * + * The exception to the desire to break everything down to floats is + * texturing. The texture sampler returns a writemasked masked + * 4/8-register sequence containing the texture values. We don't want + * to dispatch to the sampler separately for each channel we need, so + * we do retain the vector types in that case. + */ + +extern "C" { +#include "main/core.h" +#include "brw_wm.h" +} +#include "../glsl/ir.h" +#include "../glsl/ir_expression_flattening.h" +#include "../glsl/glsl_types.h" + +class ir_channel_expressions_visitor : public ir_hierarchical_visitor { +public: + ir_channel_expressions_visitor() + { + this->progress = false; + this->mem_ctx = NULL; + } + + ir_visitor_status visit_leave(ir_assignment *); + + ir_rvalue *get_element(ir_variable *var, unsigned int element); + void assign(ir_assignment *ir, int elem, ir_rvalue *val); + + bool progress; + void *mem_ctx; +}; + +static bool +channel_expressions_predicate(ir_instruction *ir) +{ + ir_expression *expr = ir->as_expression(); + unsigned int i; + + if (!expr) + return false; + + for (i = 0; i < expr->get_num_operands(); i++) { + if (expr->operands[i]->type->is_vector()) + return true; + } + + return false; +} + +extern "C" { +GLboolean +brw_do_channel_expressions(exec_list *instructions) +{ + ir_channel_expressions_visitor v; + + /* Pull out any matrix expression to a separate assignment to a + * temp. This will make our handling of the breakdown to + * operations on the matrix's vector components much easier. + */ + do_expression_flattening(instructions, channel_expressions_predicate); + + visit_list_elements(&v, instructions); + + return v.progress; +} +} + +ir_rvalue * +ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem) +{ + ir_dereference *deref; + + if (var->type->is_scalar()) + return new(mem_ctx) ir_dereference_variable(var); + + assert(elem < var->type->components()); + deref = new(mem_ctx) ir_dereference_variable(var); + return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1); +} + +void +ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val) +{ + ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL); + ir_assignment *assign; + ir_swizzle *val_swiz; + + /* This assign-of-expression should have been generated by the + * expression flattening visitor (since we never short circit to + * not flatten, even for plain assignments of variables), so the + * writemask is always full. + */ + assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1); + + /* Smear the float across all the channels for the masked write. */ + val_swiz = new(mem_ctx) ir_swizzle(val, 0, 0, 0, 0, + ir->lhs->type->components()); + assign = new(mem_ctx) ir_assignment(lhs, val_swiz, NULL, (1 << elem)); + ir->insert_before(assign); +} + +ir_visitor_status +ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) +{ + ir_expression *expr = ir->rhs->as_expression(); + bool found_vector = false; + unsigned int i, vector_elements = 1; + ir_variable *op_var[2]; + + if (!expr) + return visit_continue; + + if (!this->mem_ctx) + this->mem_ctx = talloc_parent(ir); + + for (i = 0; i < expr->get_num_operands(); i++) { + if (expr->operands[i]->type->is_vector()) { + found_vector = true; + vector_elements = expr->operands[i]->type->vector_elements; + break; + } + } + if (!found_vector) + return visit_continue; + + /* Store the expression operands in temps so we can use them + * multiple times. + */ + for (i = 0; i < expr->get_num_operands(); i++) { + ir_assignment *assign; + ir_dereference *deref; + + assert(!expr->operands[i]->type->is_matrix()); + + op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type, + "channel_expressions", + ir_var_temporary); + ir->insert_before(op_var[i]); + + deref = new(mem_ctx) ir_dereference_variable(op_var[i]); + assign = new(mem_ctx) ir_assignment(deref, + expr->operands[i], + NULL); + ir->insert_before(assign); + } + + const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type, + 1, 1); + + /* OK, time to break down this vector operation. */ + switch (expr->operation) { + case ir_unop_bit_not: + case ir_unop_logic_not: + case ir_unop_neg: + case ir_unop_abs: + case ir_unop_sign: + case ir_unop_rcp: + case ir_unop_rsq: + case ir_unop_sqrt: + case ir_unop_exp: + case ir_unop_log: + case ir_unop_exp2: + case ir_unop_log2: + case ir_unop_f2i: + case ir_unop_i2f: + case ir_unop_f2b: + case ir_unop_b2f: + case ir_unop_i2b: + case ir_unop_b2i: + case ir_unop_u2f: + case ir_unop_trunc: + case ir_unop_ceil: + case ir_unop_floor: + case ir_unop_fract: + case ir_unop_sin: + case ir_unop_cos: + case ir_unop_dFdx: + case ir_unop_dFdy: + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + + assign(ir, i, new(mem_ctx) ir_expression(expr->operation, + element_type, + op0, + NULL)); + } + break; + + case ir_binop_add: + case ir_binop_sub: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + ir_rvalue *op1 = get_element(op_var[1], i); + + assign(ir, i, new(mem_ctx) ir_expression(expr->operation, + element_type, + op0, + op1)); + } + break; + + case ir_unop_any: { + ir_expression *temp; + temp = new(mem_ctx) ir_expression(ir_binop_logic_or, + element_type, + get_element(op_var[0], 0), + get_element(op_var[0], 1)); + + for (i = 2; i < vector_elements; i++) { + temp = new(mem_ctx) ir_expression(ir_binop_logic_or, + element_type, + get_element(op_var[0], i), + temp); + } + assign(ir, 0, temp); + break; + } + + case ir_binop_dot: { + ir_expression *last = NULL; + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + ir_rvalue *op1 = get_element(op_var[1], i); + ir_expression *temp; + + temp = new(mem_ctx) ir_expression(ir_binop_mul, + element_type, + op0, + op1); + if (last) { + last = new(mem_ctx) ir_expression(ir_binop_add, + element_type, + temp, + last); + } else { + last = temp; + } + } + assign(ir, 0, last); + break; + } + + case ir_binop_cross: { + for (i = 0; i < vector_elements; i++) { + int swiz0 = (i + 1) % 3; + int swiz1 = (i + 2) % 3; + ir_expression *temp1, *temp2; + + temp1 = new(mem_ctx) ir_expression(ir_binop_mul, + element_type, + get_element(op_var[0], swiz0), + get_element(op_var[1], swiz1)); + + temp2 = new(mem_ctx) ir_expression(ir_binop_mul, + element_type, + get_element(op_var[1], swiz0), + get_element(op_var[0], swiz1)); + + temp2 = new(mem_ctx) ir_expression(ir_unop_neg, + element_type, + temp2, + NULL); + + assign(ir, i, new(mem_ctx) ir_expression(ir_binop_add, + element_type, + temp1, temp2)); + } + break; + } + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + ir->print(); + printf("\n"); + assert(!"not reached: expression operates on scalars only"); + break; + case ir_binop_equal: + case ir_binop_nequal: { + ir_expression *last = NULL; + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op0 = get_element(op_var[0], i); + ir_rvalue *op1 = get_element(op_var[1], i); + ir_expression *temp; + ir_expression_operation join; + + if (expr->operation == ir_binop_equal) + join = ir_binop_logic_and; + else + join = ir_binop_logic_or; + + temp = new(mem_ctx) ir_expression(expr->operation, + element_type, + op0, + op1); + if (last) { + last = new(mem_ctx) ir_expression(join, + element_type, + temp, + last); + } else { + last = temp; + } + } + assign(ir, 0, last); + break; + } + } + + ir->remove(); + this->progress = true; + + return visit_continue; +} diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 25a72f5dda9..438da1af622 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -465,4 +465,6 @@ GLboolean brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog); struct gl_shader *brw_new_shader(GLcontext *ctx, GLuint name, GLuint type); struct gl_shader_program *brw_new_shader_program(GLcontext *ctx, GLuint name); +GLboolean brw_do_channel_expressions(struct exec_list *instructions); + #endif |