diff options
author | Chris Forbes <[email protected]> | 2013-03-31 21:33:51 +1300 |
---|---|---|
committer | Chris Forbes <[email protected]> | 2013-10-03 07:55:56 +1300 |
commit | 942a4ec18f7e713b962e7acc451fbd50ff9c4042 (patch) | |
tree | 9e5f23c1ec40d4398633d8e047db699cd39e8e1e | |
parent | fb455500bfb11cca0f45076a9eaccc0ddd764731 (diff) |
i965/fs: Add support for ir_tg4
Lowers ir_tg4 (from textureGather and textureGatherOffset builtins) to
SHADER_OPCODE_TG4.
The usual post-sampling swizzle workaround can't work for ir_tg4,
so avoid doing that:
* For R/G/B/A swizzles use the hardware channel select (lives in the
same dword in the header as the texel offset), and then don't do
anything afterward in the shader.
* For 0/1 swizzles blast the appropriate constant over all the output
channels instead of sampling.
V2: Avoid duplicating header enabling block
V3: Avoid sampling at all, for degenerate swizzles.
Signed-off-by: Chris Forbes <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 62 |
2 files changed, 60 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 6d93132cd05..cf6379c26a1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -213,6 +213,7 @@ public: void visit(ir_emit_vertex *); void visit(ir_end_primitive *); + uint32_t gather_channel(ir_texture *ir, int sampler); void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler); bool can_do_source_mods(fs_inst *inst); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 89d410219af..262cead1cce 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1167,6 +1167,12 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break; + case ir_tg4: + inst = emit(SHADER_OPCODE_TG4, dst); + break; + default: + fail("unrecognized texture opcode"); + break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -1191,9 +1197,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, bool header_present = false; int offsets[3]; - if (ir->offset && ir->op != ir_txf) { - /* The offsets set up by the ir_texture visitor are in the + if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) { + /* * The offsets set up by the ir_texture visitor are in the * m1 header, so we can't go headerless. + * + * * ir4_tg4 needs to place its channel select in the header, + * for interaction with ARB_texture_swizzle */ header_present = true; mlen++; @@ -1209,6 +1218,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, switch (ir->op) { case ir_tex: case ir_lod: + case ir_tg4: break; case ir_txb: emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); @@ -1323,6 +1333,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break; case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break; + case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -1450,6 +1461,24 @@ fs_visitor::visit(ir_texture *ir) */ int texunit = fp->Base.SamplerUnits[sampler]; + if (ir->op == ir_tg4) { + /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother + * emitting anything other than setting up the constant result. + */ + int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0); + if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { + + fs_reg res = fs_reg(this, glsl_type::vec4_type); + this->result = res; + + for (int i=0; i<4; i++) { + emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f))); + res.reg_offset++; + } + return; + } + } + /* Should be lowered by do_lower_texture_projection */ assert(!ir->projector); @@ -1477,6 +1506,7 @@ fs_visitor::visit(ir_texture *ir) switch (ir->op) { case ir_tex: case ir_lod: + case ir_tg4: break; case ir_txb: ir->lod_info.bias->accept(this); @@ -1499,6 +1529,8 @@ fs_visitor::visit(ir_texture *ir) ir->lod_info.sample_index->accept(this); sample_index = this->result; break; + default: + assert(!"Unrecognized texture opcode"); }; /* Writemasking doesn't eliminate channels on SIMD8 texture @@ -1523,6 +1555,9 @@ fs_visitor::visit(ir_texture *ir) if (ir->offset != NULL && ir->op != ir_txf) inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); + if (ir->op == ir_tg4) + inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17 + inst->sampler = sampler; if (ir->shadow_comparitor) @@ -1543,6 +1578,24 @@ fs_visitor::visit(ir_texture *ir) } /** + * Set up the gather channel based on the swizzle, for gather4. + */ +uint32_t +fs_visitor::gather_channel(ir_texture *ir, int sampler) +{ + int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0 /* red */); + switch (swiz) { + case SWIZZLE_X: return 0; + case SWIZZLE_Y: return 1; + case SWIZZLE_Z: return 2; + case SWIZZLE_W: return 3; + default: + assert(!"Not reached"); /* zero, one swizzles handled already */ + return 0; + } +} + +/** * Swizzle the result of a texture result. This is necessary for * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons. */ @@ -1551,7 +1604,10 @@ fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler) { this->result = orig_val; - if (ir->op == ir_txs || ir->op == ir_lod) + /* txs,lod don't actually sample the texture, so swizzling the result + * makes no sense. + */ + if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4) return; if (ir->type == glsl_type::float_type) { |