diff options
author | Ian Romanick <[email protected]> | 2016-06-21 17:18:04 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2016-07-19 12:19:29 -0700 |
commit | c2019c6c261d5c46a4e5d3edc88836bcedf75f30 (patch) | |
tree | 8bc7edcf73d6ceecc51c14b23dd40282ebcaa941 /src/mesa/drivers | |
parent | de20086eed47e6bfe7c25835d72383114f99c7a9 (diff) |
i965: Use LZD to implement nir_op_ifind_msb on Gen < 7
v2: Retype LZD source as UD to avoid potential problems with 0x80000000.
Suggested by Matt. Also update comment about problem values with
LZD(abs(x)). Suggested by Curro.
Signed-off-by: Ian Romanick <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 54 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 57 |
2 files changed, 90 insertions, 21 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 11215562738..5bf9aa4bbc7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -621,8 +621,36 @@ emit_find_msb_using_lzd(const fs_builder &bld, bool is_signed) { fs_inst *inst; + fs_reg temp = src; - bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src); + if (is_signed) { + /* LZD of an absolute value source almost always does the right + * thing. There are two problem values: + * + * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns + * 0. However, findMSB(int(0x80000000)) == 30. + * + * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns + * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: + * + * For a value of zero or negative one, -1 will be returned. + * + * * Negative powers of two. LZD(abs(-(1<<x))) returns x, but + * findMSB(-(1<<x)) should return x-1. + * + * For all negative number cases, including 0x80000000 and + * 0xffffffff, the correct value is obtained from LZD if instead of + * negating the (already negative) value the logical-not is used. A + * conditonal logical-not can be achieved in two instructions. + */ + temp = bld.vgrf(BRW_REGISTER_TYPE_D); + + bld.ASR(temp, src, brw_imm_d(31)); + bld.XOR(temp, temp, src); + } + + bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), + retype(temp, BRW_REGISTER_TYPE_UD)); /* LZD counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. Subtract the result from 31 to convert the MSB @@ -1337,17 +1365,23 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_ifind_msb: { assert(nir_dest_bit_size(instr->dest.dest) < 64); - bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ - bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + if (devinfo->gen < 7) { + emit_find_msb_using_lzd(bld, result, op[0], true); + } else { + bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]); - inst = bld.ADD(result, result, brw_imm_d(31)); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; + /* FBH counts from the MSB side, while GLSL's findMSB() wants the + * count from the LSB side. If FBH didn't return an error + * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB + * count into an LSB count. + */ + bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + + inst = bld.ADD(result, result, brw_imm_d(31)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; + } break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 352d88af479..85fa775dc6b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1000,8 +1000,36 @@ emit_find_msb_using_lzd(const vec4_builder &bld, bool is_signed) { vec4_instruction *inst; + src_reg temp = src; - bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD), src); + if (is_signed) { + /* LZD of an absolute value source almost always does the right + * thing. There are two problem values: + * + * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns + * 0. However, findMSB(int(0x80000000)) == 30. + * + * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns + * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: + * + * For a value of zero or negative one, -1 will be returned. + * + * * Negative powers of two. LZD(abs(-(1<<x))) returns x, but + * findMSB(-(1<<x)) should return x-1. + * + * For all negative number cases, including 0x80000000 and + * 0xffffffff, the correct value is obtained from LZD if instead of + * negating the (already negative) value the logical-not is used. A + * conditonal logical-not can be achieved in two instructions. + */ + temp = src_reg(bld.vgrf(BRW_REGISTER_TYPE_D)); + + bld.ASR(dst_reg(temp), src, brw_imm_d(31)); + bld.XOR(dst_reg(temp), temp, src); + } + + bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD), + retype(temp, BRW_REGISTER_TYPE_UD)); /* LZD counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. Subtract the result from 31 to convert the MSB count @@ -1485,18 +1513,25 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_ifind_msb: { - emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); - - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ + vec4_builder bld = vec4_builder(this).at_end(); src_reg src(dst); - emit(CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ)); - inst = emit(ADD(dst, src, brw_imm_d(31))); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->src[0].negate = true; + if (devinfo->gen < 7) { + emit_find_msb_using_lzd(bld, dst, op[0], true); + } else { + emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); + + /* FBH counts from the MSB side, while GLSL's findMSB() wants the + * count from the LSB side. If FBH didn't return an error + * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB + * count into an LSB count. + */ + bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ); + + inst = bld.ADD(dst, src, brw_imm_d(31)); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; + } break; } |