diff options
author | Chad Versace <[email protected]> | 2013-01-09 11:46:42 -0800 |
---|---|---|
committer | Chad Versace <[email protected]> | 2013-01-24 21:31:06 -0800 |
commit | 20dfa501b3950c1d2f5da7126091792f5eb67038 (patch) | |
tree | e4dae62f0df6728cb0cdde1f53c84f0002e30f19 /src/mesa/drivers/dri/i965/brw_fs_emit.cpp | |
parent | 203c12b18feb596999d9512e108408e72dd4ffd3 (diff) |
i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations (v4)
v2: Remove lewd comment. [for idr]
v3: - Optimize away tmp register for packHalf2x16. [for anholt, paul]
- Improve comments. [for anholt, paul]
- Reduce near-duplicate code by removing vec4_visitor emit_pack/unpack
methods. [for chadv]
v4: Factor our UD/W register conversion into helper function. [for anholt]
Reviewed-by: Eric Anholt <[email protected]>
Reviewed-by: Ian Romanick <[email protected]> (v2)
Signed-off-by: Chad Versace <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_emit.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 105 |
1 files changed, 104 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 324e6656d1f..27c5302b9f5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -922,6 +922,95 @@ fs_generator::generate_set_global_offset(fs_inst *inst, brw_pop_insn_state(p); } +/** + * Change the register's data type from UD to W, doubling the strides in order + * to compensate for halving the data type width. + */ +static struct brw_reg +ud_reg_to_w(struct brw_reg r) +{ + assert(r.type == BRW_REGISTER_TYPE_UD); + r.type = BRW_REGISTER_TYPE_W; + + /* The BRW_*_STRIDE enums are defined so that incrementing the field + * doubles the real stride. + */ + if (r.hstride != 0) + ++r.hstride; + if (r.vstride != 0) + ++r.vstride; + + return r; +} + +void +fs_generator::generate_pack_half_2x16_split(fs_inst *inst, + struct brw_reg dst, + struct brw_reg x, + struct brw_reg y) +{ + assert(intel->gen >= 7); + assert(dst.type == BRW_REGISTER_TYPE_UD); + assert(x.type = BRW_REGISTER_TYPE_F); + assert(y.type = BRW_REGISTER_TYPE_F); + + /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16: + * + * Because this instruction does not have a 16-bit floating-point type, + * the destination data type must be Word (W). + * + * The destination must be DWord-aligned and specify a horizontal stride + * (HorzStride) of 2. The 16-bit result is stored in the lower word of + * each destination channel and the upper word is not modified. + */ + struct brw_reg dst_w = ud_reg_to_w(dst); + + /* Give each 32-bit channel of dst the form below , where "." means + * unchanged. + * 0x....hhhh + */ + brw_F32TO16(p, dst_w, y); + + /* Now the form: + * 0xhhhh0000 + */ + brw_SHL(p, dst, dst, brw_imm_ud(16u)); + + /* And, finally the form of packHalf2x16's output: + * 0xhhhhllll + */ + brw_F32TO16(p, dst_w, x); +} + +void +fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(intel->gen >= 7); + assert(dst.type == BRW_REGISTER_TYPE_F); + assert(src.type == BRW_REGISTER_TYPE_UD); + + /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32: + * + * Because this instruction does not have a 16-bit floating-point type, + * the source data type must be Word (W). The destination type must be + * F (Float). + */ + struct brw_reg src_w = ud_reg_to_w(src); + + /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll. + * For the Y case, we wish to access only the upper word; therefore + * a 16-bit subregister offset is needed. + */ + assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X || + inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y); + if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y) + src.subnr += 2; + + brw_F16TO32(p, dst, src_w); +} + void fs_generator::generate_code(exec_list *instructions) { @@ -1082,7 +1171,12 @@ fs_generator::generate_code(exec_list *instructions) case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; - + case BRW_OPCODE_F32TO16: + brw_F32TO16(p, dst, src[0]); + break; + case BRW_OPCODE_F16TO32: + brw_F16TO32(p, dst, src[0]); + break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; @@ -1229,6 +1323,15 @@ fs_generator::generate_code(exec_list *instructions) generate_set_global_offset(inst, dst, src[0], src[1]); break; + case FS_OPCODE_PACK_HALF_2x16_SPLIT: + generate_pack_half_2x16_split(inst, dst, src[0], src[1]); + break; + + case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: + case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: + generate_unpack_half_2x16_split(inst, dst, src[0]); + break; + default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(ctx, "Unsupported opcode `%s' in FS", |