summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-03-28 18:03:37 -0700
committerEric Anholt <[email protected]>2011-04-26 12:19:40 -0700
commit148a32e622c5b95a4dbd9a8776fddf85ef484147 (patch)
tree614ea25d275462a842a7588b16f1f81eb80dda4c /src
parent54990673a65b72fd222aeafc19f3a384ce597146 (diff)
i965/fs: Add support for math instructions in 16-wide mode.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp41
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c10
3 files changed, 45 insertions, 14 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index d535f51437a..4eb67d57a5a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -293,6 +293,14 @@ static INLINE struct brw_reg retype( struct brw_reg reg,
return reg;
}
+static inline struct brw_reg
+sechalf(struct brw_reg reg)
+{
+ if (reg.vstride)
+ reg.nr++;
+ return reg;
+}
+
static INLINE struct brw_reg suboffset( struct brw_reg reg,
GLuint delta )
{
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index aa51d04df33..aa7b1beb33f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -214,9 +214,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case FS_OPCODE_LOG2:
case FS_OPCODE_SIN:
case FS_OPCODE_COS:
- return 1;
+ return 1 * c->dispatch_width / 8;
case FS_OPCODE_POW:
- return 2;
+ return 2 * c->dispatch_width / 8;
case FS_OPCODE_TEX:
case FS_OPCODE_TXB:
case FS_OPCODE_TXD:
@@ -614,7 +614,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
if (intel->gen < 6) {
inst->base_mrf = 2;
- inst->mlen = 1;
+ inst->mlen = c->dispatch_width / 8;
}
return inst;
@@ -652,7 +652,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
inst = emit(opcode, dst, src0, reg_null_f);
inst->base_mrf = base_mrf;
- inst->mlen = 2;
+ inst->mlen = 2 * c->dispatch_width / 8;
}
return inst;
}
@@ -2247,8 +2247,16 @@ fs_visitor::generate_math(fs_inst *inst,
assert(inst->mlen == 0);
if (inst->opcode == FS_OPCODE_POW) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math2(p, dst, op, src[0], src[1]);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
} else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p, dst,
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
@@ -2256,10 +2264,23 @@ fs_visitor::generate_math(fs_inst *inst,
0, src[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 0, sechalf(src[0]),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
}
} else {
assert(inst->mlen >= 1);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p, dst,
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
@@ -2267,6 +2288,18 @@ fs_visitor::generate_math(fs_inst *inst,
inst->base_mrf, src[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf + 1, sechalf(src[0]),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 0b136a81ab7..fd4cd892f41 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -51,16 +51,6 @@ static GLboolean can_do_pln(struct intel_context *intel,
return GL_TRUE;
}
-/* Not quite sure how correct this is - need to understand horiz
- * vs. vertical strides a little better.
- */
-static INLINE struct brw_reg sechalf( struct brw_reg reg )
-{
- if (reg.vstride)
- reg.nr++;
- return reg;
-}
-
/* Return the SrcReg index of the channels that can be immediate float operands
* instead of usage of PROGRAM_CONSTANT values through push/pull.
*/