aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2011-08-18 11:55:42 -0700
committerKenneth Graunke <[email protected]>2011-09-26 16:30:07 -0700
commit74e927bcafad0a994be5f88fbda4058bef08bc51 (patch)
tree7337601dd15add0c25455940ed16de5d20a9b37e /src
parentc5943d6c1cc4eedbea088bc1f611abc153e90524 (diff)
i965/fs: Split generate_math into gen4/gen6 and 1/2 operand variants.
This mirrors the structure Eric used in the new VS backend, and seems simpler. In particular, the math1/math2 split will avoid having to figure out how many operands there are, as this is already known by the caller. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h11
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp132
2 files changed, 89 insertions, 54 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 0bd518f7702..f6a57bacb33 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -487,7 +487,16 @@ public:
void generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
- void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
+ void generate_math1_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math2_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1);
+ void generate_math_gen4(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
void generate_discard(fs_inst *inst);
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index f742e84e1c6..8176a76a85e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -143,69 +143,85 @@ fs_visitor::generate_linterp(fs_inst *inst,
}
void
-fs_visitor::generate_math(fs_inst *inst,
- struct brw_reg dst, struct brw_reg *src)
+fs_visitor::generate_math1_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0)
{
int op = brw_math_function(inst->opcode);
- if (intel->gen >= 6) {
- assert(inst->mlen == 0);
-
- if (inst->opcode == SHADER_OPCODE_POW) {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math2(p, dst, op, src[0], src[1]);
+ assert(inst->mlen == 0);
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- } else {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p, dst,
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- 0, src[0],
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p, sechalf(dst),
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- 0, sechalf(src[0]),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- }
- } else /* gen <= 5 */{
- assert(inst->mlen >= 1);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 0, src0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p, dst,
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
- inst->base_mrf, src[0],
+ 0, sechalf(src0),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+}
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p, sechalf(dst),
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- inst->base_mrf + 1, sechalf(src[0]),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+void
+fs_visitor::generate_math2_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ int op = brw_math_function(inst->opcode);
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
+ assert(inst->mlen == 0);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math2(p, dst, op, src0, src1);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+}
+
+void
+fs_visitor::generate_math_gen4(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ int op = brw_math_function(inst->opcode);
+
+ assert(inst->mlen >= 1);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf, src,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf + 1, sechalf(src),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}
@@ -770,10 +786,20 @@ fs_visitor::generate_code()
case SHADER_OPCODE_SQRT:
case SHADER_OPCODE_EXP2:
case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_POW:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
- generate_math(inst, dst, src);
+ if (intel->gen >= 6) {
+ generate_math1_gen6(inst, dst, src[0]);
+ } else {
+ generate_math_gen4(inst, dst, src[0]);
+ }
+ break;
+ case SHADER_OPCODE_POW:
+ if (intel->gen >= 6) {
+ generate_math2_gen6(inst, dst, src[0], src[1]);
+ } else {
+ generate_math_gen4(inst, dst, src[0]);
+ }
break;
case FS_OPCODE_PIXEL_X:
generate_pixel_xy(dst, true);