summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2010-11-18 11:05:32 -0800
committerIan Romanick <[email protected]>2010-11-19 15:00:25 -0800
commitf2616e56de8a48360cae8f269727b58490555f4d (patch)
tree56462288844e777618344e9f7830b5c3eb1f1f02
parent04ffbe1ac6a82ac5cce843afa15ffdfa4ef78103 (diff)
glsl: Add ir_unop_sin_reduced and ir_unop_cos_reduced
The operate just like ir_unop_sin and ir_unop_cos except that they expect their inputs to be limited to the range [-pi, pi]. Several GPUs require this limited range for their sine and cosine instructions, so having these as operations (along with a to-be-written lowering pass) helps this architectures. These new operations also matche the semantics of the GL_ARB_fragment_program SCS instruction. Having these as operations helps in generating GLSL IR directly from assembly fragment programs.
-rw-r--r--src/glsl/ir.cpp2
-rw-r--r--src/glsl/ir.h2
-rw-r--r--src/glsl/ir_constant_expression.cpp2
-rw-r--r--src/glsl/ir_validate.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp2
-rw-r--r--src/mesa/program/ir_to_mesa.cpp110
6 files changed, 120 insertions, 0 deletions
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 574ef3e1832..714826343c7 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -239,6 +239,8 @@ static const char *const operator_strs[] = {
"round_even",
"sin",
"cos",
+ "sin_reduced",
+ "cos_reduced",
"dFdx",
"dFdy",
"noise",
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 3ea7301f472..2b94e63cc2c 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -729,6 +729,8 @@ enum ir_expression_operation {
/*@{*/
ir_unop_sin,
ir_unop_cos,
+ ir_unop_sin_reduced, /**< Reduced range sin. [-pi, pi] */
+ ir_unop_cos_reduced, /**< Reduced range cos. [-pi, pi] */
/*@}*/
/**
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 8a54fc78cca..45860b279fb 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -216,6 +216,7 @@ ir_expression::constant_expression_value()
break;
case ir_unop_sin:
+ case ir_unop_sin_reduced:
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
for (unsigned c = 0; c < op[0]->type->components(); c++) {
data.f[c] = sinf(op[0]->value.f[c]);
@@ -223,6 +224,7 @@ ir_expression::constant_expression_value()
break;
case ir_unop_cos:
+ case ir_unop_cos_reduced:
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
for (unsigned c = 0; c < op[0]->type->components(); c++) {
data.f[c] = cosf(op[0]->value.f[c]);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 77f48968b81..2a066c1a277 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -273,6 +273,8 @@ ir_validate::visit_leave(ir_expression *ir)
case ir_unop_fract:
case ir_unop_sin:
case ir_unop_cos:
+ case ir_unop_sin_reduced:
+ case ir_unop_cos_reduced:
case ir_unop_dFdx:
case ir_unop_dFdy:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ac795e0bda1..164f89eace4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -778,9 +778,11 @@ fs_visitor::visit(ir_expression *ir)
assert(!"not reached: should be handled by ir_explog_to_explog2");
break;
case ir_unop_sin:
+ case ir_unop_sin_reduced:
emit_math(FS_OPCODE_SIN, this->result, op[0]);
break;
case ir_unop_cos:
+ case ir_unop_cos_reduced:
emit_math(FS_OPCODE_COS, this->result, op[0]);
break;
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 870fd6f25e1..ef9f692f946 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -282,6 +282,10 @@ public:
ir_to_mesa_src_reg src0,
ir_to_mesa_src_reg src1);
+ void emit_scs(ir_instruction *ir, enum prog_opcode op,
+ ir_to_mesa_dst_reg dst,
+ const ir_to_mesa_src_reg &src);
+
GLboolean try_emit_mad(ir_expression *ir,
int mul_operand);
@@ -475,6 +479,10 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
for (j = i + 1; j < 4; j++) {
+ /* If there is another enabled component in the destination that is
+ * derived from the same inputs, generate its value on this pass as
+ * well.
+ */
if (!(done_mask & (1 << j)) &&
GET_SWZ(src0.swizzle, j) == src0_swiz &&
GET_SWZ(src1.swizzle, j) == src1_swiz) {
@@ -508,6 +516,102 @@ ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
}
+/**
+ * Emit an OPCODE_SCS instruction
+ *
+ * The \c SCS opcode functions a bit differently than the other Mesa (or
+ * ARB_fragment_program) opcodes. Instead of splatting its result across all
+ * four components of the destination, it writes one value to the \c x
+ * component and another value to the \c y component.
+ *
+ * \param ir IR instruction being processed
+ * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which
+ * value is desired.
+ * \param dst Destination register
+ * \param src Source register
+ */
+void
+ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
+ ir_to_mesa_dst_reg dst,
+ const ir_to_mesa_src_reg &src)
+{
+ /* Vertex programs cannot use the SCS opcode.
+ */
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
+ ir_to_mesa_emit_scalar_op1(ir, op, dst, src);
+ return;
+ }
+
+ const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
+ const unsigned scs_mask = (1U << component);
+ int done_mask = ~dst.writemask;
+ ir_to_mesa_src_reg tmp;
+
+ assert(op == OPCODE_SIN || op == OPCODE_COS);
+
+ /* If there are compnents in the destination that differ from the component
+ * that will be written by the SCS instrution, we'll need a temporary.
+ */
+ if (scs_mask != unsigned(dst.writemask)) {
+ tmp = get_temp(glsl_type::vec4_type);
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ unsigned this_mask = (1U << i);
+ ir_to_mesa_src_reg src0 = src;
+
+ if ((done_mask & this_mask) != 0)
+ continue;
+
+ /* The source swizzle specified which component of the source generates
+ * sine / cosine for the current component in the destination. The SCS
+ * instruction requires that this value be swizzle to the X component.
+ * Replace the current swizzle with a swizzle that puts the source in
+ * the X component.
+ */
+ unsigned src0_swiz = GET_SWZ(src.swizzle, i);
+
+ src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+ src0_swiz, src0_swiz);
+ for (unsigned j = i + 1; j < 4; j++) {
+ /* If there is another enabled component in the destination that is
+ * derived from the same inputs, generate its value on this pass as
+ * well.
+ */
+ if (!(done_mask & (1 << j)) &&
+ GET_SWZ(src0.swizzle, j) == src0_swiz) {
+ this_mask |= (1 << j);
+ }
+ }
+
+ if (this_mask != scs_mask) {
+ ir_to_mesa_instruction *inst;
+ ir_to_mesa_dst_reg tmp_dst = ir_to_mesa_dst_reg_from_src(tmp);
+
+ /* Emit the SCS instruction.
+ */
+ inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, tmp_dst, src0);
+ inst->dst_reg.writemask = scs_mask;
+
+ /* Move the result of the SCS instruction to the desired location in
+ * the destination.
+ */
+ tmp.swizzle = MAKE_SWIZZLE4(component, component,
+ component, component);
+ inst = ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, tmp);
+ inst->dst_reg.writemask = this_mask;
+ } else {
+ /* Emit the SCS instruction to write directly to the destination.
+ */
+ ir_to_mesa_instruction *inst =
+ ir_to_mesa_emit_op1(ir, OPCODE_SCS, dst, src0);
+ inst->dst_reg.writemask = scs_mask;
+ }
+
+ done_mask |= this_mask;
+ }
+}
+
struct ir_to_mesa_src_reg
ir_to_mesa_visitor::src_reg_for_float(float val)
{
@@ -942,6 +1046,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
case ir_unop_cos:
ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
break;
+ case ir_unop_sin_reduced:
+ emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
+ break;
+ case ir_unop_cos_reduced:
+ emit_scs(ir, OPCODE_COS, result_dst, op[0]);
+ break;
case ir_unop_dFdx:
ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);