summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2009-11-06 17:45:13 -0800
committerEric Anholt <[email protected]>2009-11-06 21:24:22 -0800
commit8baee3d25beb616f6d5ba575684e889d60e38740 (patch)
tree65ae13b2e5f3dbcfa47112077e405098e00d7b17
parentec66644ed0af976cacb069ca7c7f0d6731666359 (diff)
i965: Use Compr4 instruction compression mode on G4X and newer.
No statistically significant performance difference at n=3 with either openarena or my GL demo, but cutting program size seems like a good thing to be doing for the hypothetical app that has a working set near icache size.
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c33
3 files changed, 29 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 30603bdd0e6..39eb88d7c2b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -170,11 +170,11 @@ static INLINE struct brw_reg brw_reg( GLuint file,
GLuint writemask )
{
struct brw_reg reg;
- if (type == BRW_GENERAL_REGISTER_FILE)
+ if (file == BRW_GENERAL_REGISTER_FILE)
assert(nr < BRW_MAX_GRF);
- else if (type == BRW_MESSAGE_REGISTER_FILE)
- assert(nr < BRW_MAX_MRF);
- else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+ else if (file == BRW_MESSAGE_REGISTER_FILE)
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+ else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
reg.type = type;
@@ -538,7 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
static INLINE struct brw_reg brw_message_reg( GLuint nr )
{
- assert(nr < BRW_MAX_MRF);
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
nr,
0);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 241cdc33f86..7ceabba288a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -55,7 +55,8 @@ static void guess_execution_size( struct brw_instruction *insn,
static void brw_set_dest( struct brw_instruction *insn,
struct brw_reg dest )
{
- if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
+ dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
insn->bits1.da1.dest_reg_file = dest.file;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 9cd1fedacb7..eb37ea18640 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1105,6 +1105,7 @@ void emit_fb_write(struct brw_wm_compile *c,
GLuint eot)
{
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
GLuint nr = 2;
GLuint channel;
@@ -1119,18 +1120,28 @@ void emit_fb_write(struct brw_wm_compile *c,
brw_push_insn_state(p);
for (channel = 0; channel < 4; channel++) {
- /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
- /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p,
- brw_message_reg(nr + channel),
- arg0[channel]);
-
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
+ /* By setting the high bit of the MRF register number, we indicate
+ * that we want COMPR4 mode - instead of doing the usual destination
+ * + 1 for the second half we get destination + 4.
+ */
brw_MOV(p,
- brw_message_reg(nr + channel + 4),
- sechalf(arg0[channel]));
+ brw_message_reg(nr + channel + (1 << 7)),
+ arg0[channel]);
+ } else {
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(nr + channel),
+ arg0[channel]);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p,
+ brw_message_reg(nr + channel + 4),
+ sechalf(arg0[channel]));
+ }
}
}
/* skip over the regs populated above: