diff options
author | Matt Turner <[email protected]> | 2014-03-10 13:27:46 -0700 |
---|---|---|
committer | Matt Turner <[email protected]> | 2014-11-25 17:29:02 -0800 |
commit | 3532be76805e79993bc6f684876586c189ec605b (patch) | |
tree | b5ffcdf93462b6dcce21503f4182ff9dac9c2f2c /src/mesa/drivers/dri/i965 | |
parent | e14c7c7faff3c204a5eefc1f2ea487d4730b8382 (diff) |
i965/vec4: Optimize packUnorm4x8().
Reduces the number of instructions needed to implement packUnorm4x8()
from 11 -> 6.
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 24 |
3 files changed, 27 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 61ea2079140..d7a2a916387 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -83,12 +83,12 @@ brw_lower_packing_builtins(struct brw_context *brw, | LOWER_UNPACK_SNORM_2x16 | LOWER_PACK_UNORM_2x16 | LOWER_UNPACK_UNORM_2x16 - | LOWER_PACK_SNORM_4x8 - | LOWER_PACK_UNORM_4x8; + | LOWER_PACK_SNORM_4x8; if (shader_type == MESA_SHADER_FRAGMENT) { ops |= LOWER_UNPACK_UNORM_4x8 - | LOWER_UNPACK_SNORM_4x8; + | LOWER_UNPACK_SNORM_4x8 + | LOWER_PACK_UNORM_4x8; } if (brw->gen >= 7) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 5b3ef8a2822..150e20a0387 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -510,6 +510,7 @@ public: void emit_unpack_half_2x16(dst_reg dst, src_reg src0); void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0); void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0); + void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0); uint32_t gather_channel(ir_texture *ir, uint32_t sampler); src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index b6ace86cfdb..34f96070250 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -516,6 +516,26 @@ vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0) } void +vec4_visitor::emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0) +{ + dst_reg saturated(this, glsl_type::vec4_type); + vec4_instruction *inst = emit(MOV(saturated, src0)); + inst->saturate = true; + + dst_reg scaled(this, glsl_type::vec4_type); + emit(MUL(scaled, src_reg(saturated), src_reg(255.0f))); + + dst_reg rounded(this, glsl_type::vec4_type); + emit(RNDE(rounded, src_reg(scaled))); + + dst_reg u(this, glsl_type::uvec4_type); + emit(MOV(u, src_reg(rounded))); + + src_reg bytes(u); + emit(VEC4_OPCODE_PACK_BYTES, dst, bytes); +} + +void vec4_visitor::visit_instructions(const exec_list *list) { foreach_in_list(ir_instruction, ir, list) { @@ -1802,10 +1822,12 @@ vec4_visitor::visit(ir_expression *ir) case ir_unop_unpack_snorm_4x8: emit_unpack_snorm_4x8(result_dst, op[0]); break; + case ir_unop_pack_unorm_4x8: + emit_pack_unorm_4x8(result_dst, op[0]); + break; case ir_unop_pack_snorm_2x16: case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_unorm_2x16: unreachable("not reached: should be handled by lower_packing_builtins"); |