diff options
Diffstat (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.c')
-rw-r--r-- | src/gallium/auxiliary/draw/draw_vs_aos.c | 55 |
1 files changed, 38 insertions, 17 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 0c693a4a65c..9e37a26c1e2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -32,7 +32,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" @@ -143,7 +143,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float)); default: - ERROR(cp, "unknown reg file"); + AOS_ERROR(cp, "unknown reg file"); return x86_make_reg(0,0); } } @@ -177,7 +177,7 @@ static void spill( struct aos_compilation *cp, unsigned idx ) (cp->xmm[idx].file != TGSI_FILE_INPUT && /* inputs are fetched into xmm & set dirty */ cp->xmm[idx].file != TGSI_FILE_OUTPUT && cp->xmm[idx].file != TGSI_FILE_TEMPORARY)) { - ERROR(cp, "invalid spill"); + AOS_ERROR(cp, "invalid spill"); return; } else { @@ -284,6 +284,15 @@ void aos_release_xmm_reg( struct aos_compilation *cp, } +static void aos_soft_release_xmm( struct aos_compilation *cp, + struct x86_reg reg ) +{ + if (reg.file == file_XMM) { + assert(cp->xmm[reg.idx].last_used == cp->insn_counter); + cp->xmm[reg.idx].last_used = cp->insn_counter - 1; + } +} + /* Mark an xmm reg as holding the current copy of a shader reg. @@ -534,7 +543,7 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, switch (swizzle) { case TGSI_EXTSWIZZLE_ZERO: case TGSI_EXTSWIZZLE_ONE: - ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2"); + AOS_ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2"); break; default: @@ -555,7 +564,7 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, break; default: - ERROR(cp, "unsupported sign-mode"); + AOS_ERROR(cp, "unsupported sign-mode"); break; } } @@ -584,15 +593,17 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, sse_mulps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); + aos_soft_release_xmm(cp, imm_swz); } else if (negs) { struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS); sse_mulps(cp->func, dst, imm_negs); + aos_soft_release_xmm(cp, imm_negs); } if (abs && abs != 0xf) { - ERROR(cp, "unsupported partial abs"); + AOS_ERROR(cp, "unsupported partial abs"); } else if (abs) { struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); @@ -603,8 +614,10 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, sse_maxps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); + aos_soft_release_xmm(cp, neg); } + aos_soft_release_xmm(cp, arg0); return dst; } @@ -657,7 +670,7 @@ static void x87_fld_src( struct aos_compilation *cp, break; default: - ERROR(cp, "unsupported sign-mode"); + AOS_ERROR(cp, "unsupported sign-mode"); break; } } @@ -1559,7 +1572,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst */ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - if (0) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg r = aos_get_xmm_reg(cp); @@ -1568,21 +1580,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } else { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg r = aos_get_xmm_reg(cp); + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); struct x86_reg one_point_five = x86_make_disp( neg_half, 4 ); struct x86_reg src = get_xmm_writable( cp, arg0 ); - - sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */ - sse_mulss( cp->func, src, neg_half ); /* -.5 * a */ - sse_mulss( cp->func, src, r ); /* -.5 * a * r */ - sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */ - sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */ - sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */ + struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movaps(cp->func, tmp, src); + sse_mulps(cp->func, tmp, neg); + sse_maxps(cp->func, tmp, src); + + sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */ + sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */ + sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */ + sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */ + sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */ + sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */ store_scalar_dest(cp, &op->FullDstRegisters[0], r); + + aos_release_xmm_reg(cp, tmp.idx); + return TRUE; } } |