From abbbe876ac98596b143da295abf6887e0a4e50d2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 17:19:12 -0600 Subject: gallium: new PPC built-in constants array It's hard to form PPC vector immediates so load them from an array. --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 8 +++-- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 61 ++++++++++++++++++++++++++++---- src/gallium/auxiliary/tgsi/tgsi_ppc.h | 3 ++ 3 files changed, 63 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index fcc9cbfec54..8eff6d4fda3 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -55,7 +55,8 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*outputs)[4][4], float (*temps)[4][4], float (*immeds)[4][4], - float (*consts)[4]); + float (*consts)[4], + const float *builtins); #if 0 const struct tgsi_exec_vector *input, @@ -151,7 +152,8 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, #else shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4][4]) shader->base.immediates, - (float (*)[4]) constants); + (float (*)[4]) constants, + ppc_builtin_constants); /*output[0][0] = input[0][0] * 0.5;*/ #endif @@ -246,7 +248,9 @@ draw_create_vs_ppc(struct draw_context *draw, return &vs->base; fail: + /* debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + */ ppc_release_func( &vs->ppc_program ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 9d7de41fe7f..6b05fd16cf9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -36,6 +36,7 @@ #include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_sse.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -44,6 +45,14 @@ #include "rtasm/rtasm_ppc.h" +/** + * Since it's pretty much impossible to form PPC vector immediates, load + * them from memory here: + */ +const float ppc_builtin_constants[] ALIGN16_ATTRIB = { + 1.0f, -128.0f, 128.0, 0.0 +}; + #define FOR_EACH_CHANNEL( CHAN )\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) @@ -81,12 +90,46 @@ struct gen_context int temps_reg; /**< GP register pointing to temporary "registers" */ int immed_reg; /**< GP register pointing to immediates buffer */ int const_reg; /**< GP register pointing to constants buffer */ + int builtins_reg; /**< GP register pointint to built-in constants */ int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ }; +/** + * Load the given vector register with {value, value, value, value}. + * The value must be in the ppu_builtin_constants[] array. + * We wouldn't need this if there was a simple way to load PPC vector + * registers with immediate values! + */ +static void +load_constant_vec(struct gen_context *gen, int dst_vec, float value) +{ + uint pos; + for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { + if (ppc_builtin_constants[pos] == value) { + int offset_reg = ppc_allocate_register(gen->f); + int offset = pos * 4; + + ppc_li(gen->f, offset_reg, offset); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our builtins start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); + /* splat word[pos % 4] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); + ppc_release_register(gen->f, offset_reg); + return; + } + } + assert(0 && "Need to add new constant to ppc_builtin_constants array"); +} + + /** * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. */ @@ -95,7 +138,7 @@ gen_one_vec(struct gen_context *gen) { if (gen->one_vec < 0) { gen->one_vec = ppc_allocate_vec_register(gen->f); - ppc_vload_float(gen->f, gen->one_vec, 1.0f); + load_constant_vec(gen, gen->one_vec, 1.0f); } return gen->one_vec; } @@ -115,7 +158,6 @@ gen_get_bit31_vec(struct gen_context *gen) } - /** * Register fetch, put result in 'dst_vec'. */ @@ -182,7 +224,7 @@ emit_fetch(struct gen_context *gen, } break; case TGSI_EXTSWIZZLE_ZERO: - ppc_vload_float(gen->f, dst_vec, 0.0f); + ppc_vzero(gen->f, dst_vec); break; case TGSI_EXTSWIZZLE_ONE: { @@ -544,7 +586,7 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) int t_vec = ppc_allocate_vec_register(f); int zero_vec = ppc_allocate_vec_register(f); - ppc_vload_float(f, zero_vec, 0.0f); + ppc_vzero(f, zero_vec); ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ @@ -573,7 +615,7 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) FETCH(gen, *inst, x_vec, 0, CHAN_X); /* x_vec = src[0].x */ - ppc_vload_float(gen->f, zero_vec, 0.0f); /* zero = {0,0,0,0} */ + ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { @@ -586,7 +628,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) int w_vec = ppc_allocate_vec_register(gen->f); int pow_vec = ppc_allocate_vec_register(gen->f); int pos_vec = ppc_allocate_vec_register(gen->f); - int c128_vec = ppc_allocate_vec_register(gen->f); + int p128_vec = ppc_allocate_vec_register(gen->f); + int n128_vec = ppc_allocate_vec_register(gen->f); FETCH(gen, *inst, y_vec, 0, CHAN_Y); /* y_vec = src[0].y */ ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ @@ -594,7 +637,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ /* XXX clamp Y to [-128, 128] */ - ppc_vload_float(gen->f, c128_vec, 128.0f); + load_constant_vec(gen, p128_vec, 128.0f); + load_constant_vec(gen, n128_vec, -128.0f); /* if temp.x > 0 * pow(tmp.y, tmp.w) @@ -613,6 +657,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_release_vec_register(gen->f, w_vec); ppc_release_vec_register(gen->f, pow_vec); ppc_release_vec_register(gen->f, pos_vec); + ppc_release_vec_register(gen->f, p128_vec); + ppc_release_vec_register(gen->f, n128_vec); } ppc_release_vec_register(gen->f, x_vec); @@ -798,6 +844,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ gen.immed_reg = ppc_reserve_register(func, 6); gen.const_reg = ppc_reserve_register(func, 7); + gen.builtins_reg = ppc_reserve_register(func, 8); gen.one_vec = -1; gen.bit31_vec = -1; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h index 7cd2bf9aff2..829ec075e7f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h @@ -35,6 +35,9 @@ extern "C" { struct tgsi_token; struct ppc_function; +extern const float ppc_builtin_constants[]; + + boolean tgsi_emit_ppc(const struct tgsi_token *tokens, struct ppc_function *function, -- cgit v1.2.3