diff options
author | Brian Paul <[email protected]> | 2012-06-29 17:10:58 -0600 |
---|---|---|
committer | Brian Paul <[email protected]> | 2012-07-05 08:03:19 -0600 |
commit | d594f72e1615cda47b838046df4590316da3d1a9 (patch) | |
tree | 2b1e4f367d8ff81b7fb340fda49041e7b0041132 /src | |
parent | 30f8575fde673f2279aee1fbe89e7df07cb81081 (diff) |
svga: fix CMP translation for vertex shaders
Converting CMP to SLT+LRP didn't work when src2 or src3 was Inf/NaN.
That's the case for GLSL sqrt(0). sqrt(0) actually happens in many
piglit auto-generated tests that use the distance() function.
v2: remove debug/devel code, per Jose
Reviewed-by: José Fonseca <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/svga/svga_tgsi_insn.c | 73 |
1 files changed, 37 insertions, 36 deletions
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 40c743306c7..cd01bf576ed 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -896,42 +896,6 @@ static boolean emit_ceil(struct svga_shader_emitter *emit, } -/* Translate the following TGSI CMP instruction. - * CMP DST, SRC0, SRC1, SRC2 - * To the following SVGA3D instruction sequence. - * CMP DST, SRC0, SRC2, SRC1 - */ -static boolean emit_cmp(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn ) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - const struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - const struct src_register src1 = translate_src_register( - emit, &insn->Src[1] ); - const struct src_register src2 = translate_src_register( - emit, &insn->Src[2] ); - - if (emit->unit == PIPE_SHADER_VERTEX) { - SVGA3dShaderDestToken temp = get_temp(emit); - struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X); - - /* Since vertex shaders don't support the CMP instruction, - * simulate it with SLT and LRP instructions. - * SLT TMP, SRC0, 0.0 - * LRP DST, TMP, SRC1, SRC2 - */ - if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero)) - return FALSE; - return submit_lrp(emit, dst, src(temp), src1, src2); - } - - /* CMP DST, SRC0, SRC2, SRC1 */ - return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1); -} - - - /* Translate the following TGSI DIV instruction. * DIV DST.xy, SRC0, SRC1 * To the following SVGA3D instruction sequence. @@ -1445,6 +1409,43 @@ static boolean emit_select_op(struct svga_shader_emitter *emit, } +/** + * Translate TGSI CMP instruction. + */ +static boolean +emit_cmp(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); + const struct src_register src0 = + translate_src_register(emit, &insn->Src[0] ); + const struct src_register src1 = + translate_src_register(emit, &insn->Src[1] ); + const struct src_register src2 = + translate_src_register(emit, &insn->Src[2] ); + + if (emit->unit == PIPE_SHADER_VERTEX) { + struct src_register zero = + scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X); + /* We used to simulate CMP with SLT+LRP. But that didn't work when + * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed + * because it involves a CMP to handle the 0 case. + * Use a conditional expression instead. + */ + return emit_conditional(emit, PIPE_FUNC_LESS, dst, + src0, zero, src1, src2); + } + else { + assert(emit->unit == PIPE_SHADER_FRAGMENT); + + /* CMP DST, SRC0, SRC2, SRC1 */ + return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, + src0, src2, src1); + } + +} + + /* Translate texture instructions to SVGA3D representation. */ static boolean emit_tex2(struct svga_shader_emitter *emit, |