summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Forbes <[email protected]>2013-03-31 21:33:51 +1300
committerChris Forbes <[email protected]>2013-10-03 07:55:56 +1300
commit942a4ec18f7e713b962e7acc451fbd50ff9c4042 (patch)
tree9e5f23c1ec40d4398633d8e047db699cd39e8e1e
parentfb455500bfb11cca0f45076a9eaccc0ddd764731 (diff)
i965/fs: Add support for ir_tg4
Lowers ir_tg4 (from textureGather and textureGatherOffset builtins) to SHADER_OPCODE_TG4. The usual post-sampling swizzle workaround can't work for ir_tg4, so avoid doing that: * For R/G/B/A swizzles use the hardware channel select (lives in the same dword in the header as the texel offset), and then don't do anything afterward in the shader. * For 0/1 swizzles blast the appropriate constant over all the output channels instead of sampling. V2: Avoid duplicating header enabling block V3: Avoid sampling at all, for degenerate swizzles. Signed-off-by: Chris Forbes <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp62
2 files changed, 60 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 6d93132cd05..cf6379c26a1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -213,6 +213,7 @@ public:
void visit(ir_emit_vertex *);
void visit(ir_end_primitive *);
+ uint32_t gather_channel(ir_texture *ir, int sampler);
void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler);
bool can_do_source_mods(fs_inst *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 89d410219af..262cead1cce 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1167,6 +1167,12 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_lod:
inst = emit(SHADER_OPCODE_LOD, dst);
break;
+ case ir_tg4:
+ inst = emit(SHADER_OPCODE_TG4, dst);
+ break;
+ default:
+ fail("unrecognized texture opcode");
+ break;
}
inst->base_mrf = base_mrf;
inst->mlen = mlen;
@@ -1191,9 +1197,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
bool header_present = false;
int offsets[3];
- if (ir->offset && ir->op != ir_txf) {
- /* The offsets set up by the ir_texture visitor are in the
+ if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {
+ /* * The offsets set up by the ir_texture visitor are in the
* m1 header, so we can't go headerless.
+ *
+ * * ir4_tg4 needs to place its channel select in the header,
+ * for interaction with ARB_texture_swizzle
*/
header_present = true;
mlen++;
@@ -1209,6 +1218,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
switch (ir->op) {
case ir_tex:
case ir_lod:
+ case ir_tg4:
break;
case ir_txb:
emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
@@ -1323,6 +1333,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
+ case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break;
}
inst->base_mrf = base_mrf;
inst->mlen = mlen;
@@ -1450,6 +1461,24 @@ fs_visitor::visit(ir_texture *ir)
*/
int texunit = fp->Base.SamplerUnits[sampler];
+ if (ir->op == ir_tg4) {
+ /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
+ * emitting anything other than setting up the constant result.
+ */
+ int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0);
+ if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
+
+ fs_reg res = fs_reg(this, glsl_type::vec4_type);
+ this->result = res;
+
+ for (int i=0; i<4; i++) {
+ emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)));
+ res.reg_offset++;
+ }
+ return;
+ }
+ }
+
/* Should be lowered by do_lower_texture_projection */
assert(!ir->projector);
@@ -1477,6 +1506,7 @@ fs_visitor::visit(ir_texture *ir)
switch (ir->op) {
case ir_tex:
case ir_lod:
+ case ir_tg4:
break;
case ir_txb:
ir->lod_info.bias->accept(this);
@@ -1499,6 +1529,8 @@ fs_visitor::visit(ir_texture *ir)
ir->lod_info.sample_index->accept(this);
sample_index = this->result;
break;
+ default:
+ assert(!"Unrecognized texture opcode");
};
/* Writemasking doesn't eliminate channels on SIMD8 texture
@@ -1523,6 +1555,9 @@ fs_visitor::visit(ir_texture *ir)
if (ir->offset != NULL && ir->op != ir_txf)
inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
+ if (ir->op == ir_tg4)
+ inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17
+
inst->sampler = sampler;
if (ir->shadow_comparitor)
@@ -1543,6 +1578,24 @@ fs_visitor::visit(ir_texture *ir)
}
/**
+ * Set up the gather channel based on the swizzle, for gather4.
+ */
+uint32_t
+fs_visitor::gather_channel(ir_texture *ir, int sampler)
+{
+ int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0 /* red */);
+ switch (swiz) {
+ case SWIZZLE_X: return 0;
+ case SWIZZLE_Y: return 1;
+ case SWIZZLE_Z: return 2;
+ case SWIZZLE_W: return 3;
+ default:
+ assert(!"Not reached"); /* zero, one swizzles handled already */
+ return 0;
+ }
+}
+
+/**
* Swizzle the result of a texture result. This is necessary for
* EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
*/
@@ -1551,7 +1604,10 @@ fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler)
{
this->result = orig_val;
- if (ir->op == ir_txs || ir->op == ir_lod)
+ /* txs,lod don't actually sample the texture, so swizzling the result
+ * makes no sense.
+ */
+ if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4)
return;
if (ir->type == glsl_type::float_type) {