i965/vec4: Optimize unpackSnorm4x8().

Reduces the number of instructions needed to implement unpackSnorm4x8() from 16 -> 6. Reviewed-by: Kenneth Graunke <[email protected]>
author: Matt Turner <[email protected]> 2014-03-09 20:22:23 -0700
committer: Matt Turner <[email protected]> 2014-11-25 17:29:02 -0800
commit: 94a30bbd4fe5f3eda167819e307f736268fd33f6 (patch)
tree: d5d8fbfdc247fe2d9056a90a7744dd4221c3b3c4
parent: bf686b2785c63116ab4ab7e62eb77be51b97d346 (diff)
3 files changed, 33 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6cb2da8641f..b694b6d0d17 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -84,11 +84,11 @@ brw_lower_packing_builtins(struct brw_context *brw,
            | LOWER_PACK_UNORM_2x16
            | LOWER_UNPACK_UNORM_2x16
            | LOWER_PACK_SNORM_4x8
-           | LOWER_UNPACK_SNORM_4x8
            | LOWER_PACK_UNORM_4x8;
 
    if (shader_type == MESA_SHADER_FRAGMENT) {
-      ops |= LOWER_UNPACK_UNORM_4x8;
+      ops |= LOWER_UNPACK_UNORM_4x8
+           | LOWER_UNPACK_SNORM_4x8;
    }
 
    if (brw->gen >= 7) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 1c0717404e8..5b3ef8a2822 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -509,6 +509,7 @@ public:
    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
+   void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
 
    uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
    src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 45551911008..b6ace86cfdb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -489,6 +489,33 @@ vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
 }
 
 void
+vec4_visitor::emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0)
+{
+   /* Instead of splitting the 32-bit integer, shifting, and ORing it back
+    * together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
+    * is not suitable to generate the shift values, but we can use the packed
+    * vector float and a type-converting MOV.
+    */
+   dst_reg shift(this, glsl_type::uvec4_type);
+   emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
+
+   dst_reg shifted(this, glsl_type::uvec4_type);
+   src0.swizzle = BRW_SWIZZLE_XXXX;
+   emit(SHR(shifted, src0, src_reg(shift)));
+
+   shifted.type = BRW_REGISTER_TYPE_B;
+   dst_reg f(this, glsl_type::vec4_type);
+   emit(MOV(f, src_reg(shifted)));
+
+   dst_reg scaled(this, glsl_type::vec4_type);
+   emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
+
+   dst_reg max(this, glsl_type::vec4_type);
+   emit_minmax(BRW_CONDITIONAL_G, max, src_reg(scaled), src_reg(-1.0f));
+   emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
+}
+
+void
 vec4_visitor::visit_instructions(const exec_list *list)
 {
    foreach_in_list(ir_instruction, ir, list) {
@@ -1772,12 +1799,14 @@ vec4_visitor::visit(ir_expression *ir)
    case ir_unop_unpack_unorm_4x8:
       emit_unpack_unorm_4x8(result_dst, op[0]);
       break;
+   case ir_unop_unpack_snorm_4x8:
+      emit_unpack_snorm_4x8(result_dst, op[0]);
+      break;
    case ir_unop_pack_snorm_2x16:
    case ir_unop_pack_snorm_4x8:
    case ir_unop_pack_unorm_2x16:
    case ir_unop_pack_unorm_4x8:
    case ir_unop_unpack_snorm_2x16:
-   case ir_unop_unpack_snorm_4x8:
    case ir_unop_unpack_unorm_2x16:
       unreachable("not reached: should be handled by lower_packing_builtins");
    case ir_unop_unpack_half_2x16_split_x:
author	Matt Turner <[email protected]>	2014-03-09 20:22:23 -0700
committer	Matt Turner <[email protected]>	2014-11-25 17:29:02 -0800
commit	94a30bbd4fe5f3eda167819e307f736268fd33f6 (patch)
tree	d5d8fbfdc247fe2d9056a90a7744dd4221c3b3c4
parent	bf686b2785c63116ab4ab7e62eb77be51b97d346 (diff)