summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2011-05-10 15:30:11 -0700
committerEric Anholt <[email protected]>2011-05-18 13:57:16 -0700
commit51761a1aefd31b7df12edd9467ac630b9cbbbbc9 (patch)
treea269659229afec268d3b8ebb1915a2d71059839d
parent5dd5be69f099211db027b6e39150cacefcfdf8b6 (diff)
i965/fs: Cut an instruction and a temporary from gen6 discard statements.
I thought I was thwarted initially when I couldn't do conditional mod on a MOV, and couldn't use two immediate constants in one instruction. But g0 != g0 is also a way to produce a failing comparison. Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp64
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h6
2 files changed, 30 insertions, 40 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 00f39d4b534..bd5e8d2e843 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1606,12 +1606,9 @@ fs_visitor::visit(ir_swizzle *ir)
void
fs_visitor::visit(ir_discard *ir)
{
- fs_reg temp = fs_reg(this, glsl_type::uint_type);
-
assert(ir->condition == NULL); /* FINISHME */
- emit(FS_OPCODE_DISCARD_NOT, temp, reg_null_d);
- emit(FS_OPCODE_DISCARD_AND, reg_null_d, temp);
+ emit(FS_OPCODE_DISCARD);
kill_emitted = true;
}
@@ -2671,56 +2668,54 @@ fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
}
void
-fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask)
+fs_visitor::generate_discard(fs_inst *inst)
{
- if (intel->gen >= 6) {
- /* Gen6 no longer has the mask reg for us to just read the
- * active channels from. However, cmp updates just the channels
- * of the flag reg that are enabled, so we can get at the
- * channel enables that way. In this step, make a reg of ones
- * we'll compare to.
- */
- brw_MOV(p, mask, brw_imm_ud(1));
- } else {
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */
- brw_pop_insn_state(p);
- }
-}
+ struct brw_reg f0 = brw_flag_reg();
-void
-fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
-{
if (intel->gen >= 6) {
- struct brw_reg f0 = brw_flag_reg();
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ struct brw_reg some_register;
+
+ /* As of gen6, we no longer have the mask register to look at,
+ * so life gets a bit more complicated.
+ */
+ /* Load the flag register with all ones. */
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */
+ brw_MOV(p, f0, brw_imm_uw(0xffff));
brw_pop_insn_state(p);
+ /* Do a comparison that should always fail, to produce 0s in the flag
+ * reg where we have active channels.
+ */
+ some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
- BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */
+ BRW_CONDITIONAL_NZ, some_register, some_register);
+
/* Undo CMP's whacking of predication*/
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_AND(p, g1, f0, g1);
brw_pop_insn_state(p);
} else {
struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- mask = brw_uw1_reg(mask.file, mask.nr, 0);
+ struct brw_reg mask = brw_uw1_reg(mask.file, mask.nr, 0);
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_AND(p, g0, mask, g0);
+
+ /* Unlike the 965, we have the mask reg, so we just need
+ * somewhere to invert that (containing channels to be disabled)
+ * so it can be ANDed with the mask of pixels still to be
+ * written. Use the flag reg for consistency with gen6+.
+ */
+ brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
+ brw_AND(p, g0, f0, g0);
+
brw_pop_insn_state(p);
}
}
@@ -3968,11 +3963,8 @@ fs_visitor::generate_code()
case FS_OPCODE_TXL:
generate_tex(inst, dst, src[0]);
break;
- case FS_OPCODE_DISCARD_NOT:
- generate_discard_not(inst, dst);
- break;
- case FS_OPCODE_DISCARD_AND:
- generate_discard_and(inst, src[0]);
+ case FS_OPCODE_DISCARD:
+ generate_discard(inst);
break;
case FS_OPCODE_DDX:
generate_ddx(inst, dst, src[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 154ce7fdafb..1b37ef5d087 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -75,8 +75,7 @@ enum fs_opcodes {
FS_OPCODE_TXB,
FS_OPCODE_TXD,
FS_OPCODE_TXL,
- FS_OPCODE_DISCARD_NOT,
- FS_OPCODE_DISCARD_AND,
+ FS_OPCODE_DISCARD,
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_PULL_CONSTANT_LOAD,
@@ -500,8 +499,7 @@ public:
struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
- void generate_discard_not(fs_inst *inst, struct brw_reg temp);
- void generate_discard_and(fs_inst *inst, struct brw_reg temp);
+ void generate_discard(fs_inst *inst);
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_spill(fs_inst *inst, struct brw_reg src);