summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-12-06 12:15:13 -0800
committerEric Anholt <[email protected]>2012-12-11 10:13:08 -0800
commitd5016495cc1b50b1673d0d3ab8e6af8249b071d5 (patch)
treea60b5f17f2d19ac01cf96c0f9a0ccb06474edfd7 /src/mesa
parentb278f65e1c5295794dcf08d100356e6ded6c1f32 (diff)
i965/fs: Rewrite discards to use a flag subreg to track discarded pixels.
This makes much more sense on gen6+, and will also prove useful for early exit of shaders on discard. v2: fix up a stale comment from before converting gen4-5. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp76
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_fp.cpp17
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp12
8 files changed, 46 insertions, 73 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 5e00b40291d..2b77ae62ff3 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -673,7 +673,6 @@ enum opcode {
FS_OPCODE_PIXEL_Y,
FS_OPCODE_CINTERP,
FS_OPCODE_LINTERP,
- FS_OPCODE_DISCARD,
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index dbf48f89d7e..ac0bb56ad7d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2434,6 +2434,14 @@ fs_visitor::run()
else
emit_interpolation_setup_gen6();
+ /* We handle discards by keeping track of the still-live pixels in f0.1.
+ * Initialize it with the dispatched pixels.
+ */
+ if (fp->UsesKill) {
+ fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+ discard_init->flag_subreg = 1;
+ }
+
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b60a37e4592..b00755f953d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -501,7 +501,6 @@ private:
void generate_math_gen4(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src);
- void generate_discard(fs_inst *inst);
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
bool negate_value);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index b3d7f81f1a4..f185eb52104 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -74,6 +74,17 @@ fs_generator::generate_fb_write(fs_inst *inst)
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ if (fp->UsesKill) {
+ struct brw_reg pixel_mask;
+
+ if (intel->gen >= 6)
+ pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ else
+ pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
+ }
+
if (inst->header_present) {
if (intel->gen >= 6) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
@@ -514,58 +525,6 @@ fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src
}
void
-fs_generator::generate_discard(fs_inst *inst)
-{
- struct brw_reg f0 = brw_flag_reg(0, 0);
-
- if (intel->gen >= 6) {
- struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
- struct brw_reg some_register;
-
- /* As of gen6, we no longer have the mask register to look at,
- * so life gets a bit more complicated.
- */
-
- /* Load the flag register with all ones. */
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, f0, brw_imm_uw(0xffff));
- brw_pop_insn_state(p);
-
- /* Do a comparison that should always fail, to produce 0s in the flag
- * reg where we have active channels.
- */
- some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
- brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
- BRW_CONDITIONAL_NZ, some_register, some_register);
-
- /* Undo CMP's whacking of predication*/
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_AND(p, g1, f0, g1);
- brw_pop_insn_state(p);
- } else {
- struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-
- /* Unlike the 965, we have the mask reg, so we just need
- * somewhere to invert that (containing channels to be disabled)
- * so it can be ANDed with the mask of pixels still to be
- * written. Use the flag reg for consistency with gen6+.
- */
- brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
- brw_AND(p, g0, f0, g0);
-
- brw_pop_insn_state(p);
- }
-}
-
-void
fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
{
assert(inst->mlen != 0);
@@ -745,12 +704,16 @@ void
fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
{
struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
- struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ struct brw_reg dispatch_mask;
+
+ if (intel->gen >= 6)
+ dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ else
+ dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
- assert (intel->gen >= 6);
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, flags, g1);
+ brw_MOV(p, flags, dispatch_mask);
brw_pop_insn_state(p);
}
@@ -1083,9 +1046,6 @@ fs_generator::generate_code(exec_list *instructions)
case SHADER_OPCODE_TXS:
generate_tex(inst, dst, src[0]);
break;
- case FS_OPCODE_DISCARD:
- generate_discard(inst);
- break;
case FS_OPCODE_DDX:
generate_ddx(inst, dst, src[0]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
index 4be7779edf9..bebf059773d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -252,14 +252,15 @@ fs_visitor::emit_fragment_program_code()
continue;
}
- emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
- BRW_CONDITIONAL_L));
-
- if (intel->gen < 6 && dispatch_width == 16)
- fail("Can't support (non-uniform) control flow on 16-wide");
- emit(IF(BRW_PREDICATE_NORMAL));
- emit(FS_OPCODE_DISCARD);
- emit(BRW_OPCODE_ENDIF);
+
+ /* Emit an instruction that's predicated on the current
+ * undiscarded pixels, and updates just those pixels to be
+ * turned off.
+ */
+ fs_inst *cmp = emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+ BRW_CONDITIONAL_GE));
+ cmp->predicate = BRW_PREDICATE_NORMAL;
+ cmp->flag_subreg = 1;
}
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index a4fc03218a0..d1147f544e4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -277,9 +277,6 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
payload_last_use_ip[0 / reg_width] = use_ip;
payload_last_use_ip[1 / reg_width] = use_ip;
break;
- case FS_OPCODE_DISCARD:
- payload_last_use_ip[1 / reg_width] = use_ip;
- break;
case FS_OPCODE_LINTERP:
/* On gen6+ in 16-wide, there are 4 adjacent registers (so 2 nodes)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index 96d1131f10c..e9c25b0216a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -299,7 +299,6 @@ instruction_scheduler::calculate_deps()
}
if (inst->predicate) {
- assert(last_conditional_mod[inst->flag_subreg]);
add_dep(last_conditional_mod[inst->flag_subreg], n);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index f1c6860fe47..98cd064aceb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1435,7 +1435,17 @@ fs_visitor::visit(ir_discard *ir)
{
assert(ir->condition == NULL); /* FINISHME */
- emit(FS_OPCODE_DISCARD);
+ /* We track our discarded pixels in f0.1. By predicating on it, we can
+ * update just the flag bits that aren't yet discarded. By emitting a
+ * CMP of g0 != g0, all our currently executing channels will get turned
+ * off.
+ */
+ fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
+ BRW_REGISTER_TYPE_UW));
+ fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
+ BRW_CONDITIONAL_NZ));
+ cmp->predicate = BRW_PREDICATE_NORMAL;
+ cmp->flag_subreg = 1;
}
void