diff options
Diffstat (limited to 'src/gallium/auxiliary/rtasm')
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc.c | 365 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc.h | 181 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 245 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 35 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 66 | ||||
-rw-r--r-- | src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 11 |
6 files changed, 853 insertions, 50 deletions
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c new file mode 100644 index 00000000000..534a23568d5 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -0,0 +1,365 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * \author Brian Paul + */ + + +#include "util/u_memory.h" +#include "pipe/p_debug.h" +#include "rtasm_ppc.h" + + +void +ppc_init_func(struct ppc_function *p, unsigned max_inst) +{ + p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); + p->num_inst = 0; + p->max_inst = max_inst; + p->vec_used = ~0; +} + + +void +ppc_release_func(struct ppc_function *p) +{ + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + align_free(p->store); + } + p->store = NULL; +} + + +/** + * Alloate a vector register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_vec_register(struct ppc_function *p, int reg) +{ + unsigned i; + for (i = 0; i < PPC_NUM_VEC_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->vec_used & mask) != 0) { + p->vec_used &= ~mask; + return i; + } + } + + return -1; +} + + +/** + * Mark the given vector register as "unallocated". + */ +void +ppc_release_vec_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_VEC_REGS); + assert((p->vec_used & (1 << reg)) == 0); + + p->vec_used |= (1 << reg); +} + + + +union vx_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned op2:11; + } inst; +}; + +union vxr_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned rC:1; + unsigned op2:10; + } inst; +}; + +union va_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned vC:5; + unsigned op2:6; + } inst; +}; + + +static inline void +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vx_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + +static inline void +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vxr_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.rC = 0; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + +static inline void +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +{ + union va_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.vC = vC; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +void +ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB) +{ + emit_vx(p, 10, vD, vA, vB); +} + +/** vector float substract */ +void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 74, vD, vA, vB); +} + +/** vector float min */ +void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1098, vD, vA, vB); +} + +/** vector float max */ +void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1034, vD, vA, vB); +} + +/** vector float mult add */ +void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 46, vD, vA, vB, vC); +} + +/** vector float compare greater than */ +void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 710, vD, vA, vB); +} + +/** vector float compare greater than or equal to */ +void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 454, vD, vA, vB); +} + +/** vector float compare equal */ +void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 198, vD, vA, vB); +} + +/** vector float 2^x */ +void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 394, vD, 0, vB); +} + +/** vector float log2(x) */ +void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 458, vD, 0, vB); +} + +/** vector float reciprocol */ +void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 266, vD, 0, vB); +} + +/** vector float reciprocol sqrt estimate */ +void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 330, vD, 0, vB); +} + +/** vector float round to negative infinity */ +void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 714, vD, 0, vB); +} + +/** vector float round to positive infinity */ +void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 650, vD, 0, vB); +} + +/** vector float round to nearest int */ +void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 522, vD, 0, vB); +} + +/** vector float round to int toward zero */ +void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 586, vD, 0, vB); +} + + + +/** + ** bitwise operations + **/ + + +/** vector and */ +void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1028, vD, vA, vB); +} + +/** vector and complement */ +void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1092, vD, vA, vB); +} + +/** vector or */ +void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1156, vD, vA, vB); +} + +/** vector nor */ +void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1284, vD, vA, vB); +} + +/** vector xor */ +void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1220, vD, vA, vB); +} + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 43, vD, vA, vB, vC); +} + +/** vector select */ +void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 42, vD, vA, vB, vC); +} + +/** vector splat byte */ +void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 42, vD, imm, vB); +} + +/** vector splat half word */ +void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 588, vD, imm, vB); +} + +/** vector splat word */ +void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 652, vD, imm, vB); +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h new file mode 100644 index 00000000000..ed14e943df6 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -0,0 +1,181 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * \author Brian Paul + */ + + +#ifndef RTASM_PPC_H +#define RTASM_PPC_H + + +#include "pipe/p_compiler.h" + + +#define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ + +#define PPC_NUM_VEC_REGS 32 + + +struct ppc_function +{ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; + uint32_t vec_used; /** used/free vector registers bitmask */ + uint32_t reg_used; /** used/free general-purpose registers bitmask */ +}; + + + +extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); +extern void ppc_release_func(struct ppc_function *p); + +extern int ppc_allocate_vec_register(struct ppc_function *p, int reg); +extern void ppc_release_vec_register(struct ppc_function *p, int reg); + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +extern void +ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB); + +/** vector float substract */ +extern void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float min */ +extern void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float max */ +extern void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float mult add */ +extern void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector float compare greater than */ +extern void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare greater than or equal to */ +extern void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare equal */ +extern void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float 2^x */ +extern void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float log2(x) */ +extern void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol */ +extern void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol sqrt estimate */ +extern void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to negative infinity */ +extern void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to positive infinity */ +extern void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to nearest int */ +extern void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to int toward zero */ +extern void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); + + + +/** + ** bitwise operations + **/ + + +/** vector and */ +extern void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector and complement */ +extern void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector or */ +extern void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector nor */ +extern void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector xor */ +extern void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +extern void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector select */ +extern void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector splat byte */ +extern void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat half word */ +extern void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat word */ +extern void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); + + +#endif /* RTASM_PPC_H */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index a04cc6c4ff7..491141f1908 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -27,12 +27,16 @@ * Real-time assembly generation interface for Cell B.E. SPEs. * * \author Ian Romanick <[email protected]> + * \author Brian Paul */ + +#include <stdio.h> #include "pipe/p_compiler.h" #include "util/u_memory.h" #include "rtasm_ppc_spe.h" + #ifdef GALLIUM_CELL /** * SPE instruction types @@ -143,8 +147,25 @@ union spe_inst_RI18 { /*@}*/ +static void +indent(const struct spe_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +static const char * +rem_prefix(const char *longname) +{ + return longname + 4; +} + + static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB) + unsigned rA, unsigned rB, const char *name) { union spe_inst_RR inst; inst.inst.op = op; @@ -153,11 +174,15 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t$%d, $%d, $%d\n", rem_prefix(name), rT, rA, rB); + } } static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB, unsigned rC) + unsigned rA, unsigned rB, unsigned rC, const char *name) { union spe_inst_RRR inst; inst.inst.op = op; @@ -167,11 +192,15 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rC = rC; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t$%d, $%d, $%d, $%d\n", rem_prefix(name), rT, rA, rB, rC); + } } static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) + unsigned rA, int imm, const char *name) { union spe_inst_RI7 inst; inst.inst.op = op; @@ -180,12 +209,16 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + } } static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) + unsigned rA, int imm, const char *name) { union spe_inst_RI8 inst; inst.inst.op = op; @@ -194,12 +227,16 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + } } static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) + unsigned rA, int imm, const char *name) { union spe_inst_RI10 inst; inst.inst.op = op; @@ -208,11 +245,19 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + if (strcmp(name, "spe_lqd") == 0 || + strcmp(name, "spe_stqd") == 0) + printf("%s\t$%d, 0x%x($%d)\n", rem_prefix(name), rT, imm, rA); + else + printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + } } static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, - int imm) + int imm, const char *name) { union spe_inst_RI16 inst; inst.inst.op = op; @@ -220,11 +265,15 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t$%d, 0x%x\n", rem_prefix(name), rT, imm); + } } static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, - int imm) + int imm, const char *name) { union spe_inst_RI18 inst; inst.inst.op = op; @@ -232,6 +281,10 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t$%d, 0x%x\n", rem_prefix(name), rT, imm); + } } @@ -240,61 +293,61 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, #define EMIT_(_name, _op) \ void _name (struct spe_function *p, unsigned rT) \ { \ - emit_RR(p, _op, rT, 0, 0); \ + emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ } #define EMIT_R(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA) \ { \ - emit_RR(p, _op, rT, rA, 0); \ + emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ } #define EMIT_RR(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ { \ - emit_RR(p, _op, rT, rA, rB); \ + emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ } #define EMIT_RRR(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ { \ - emit_RRR(p, _op, rT, rA, rB, rC); \ + emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ } #define EMIT_RI7(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ { \ - emit_RI7(p, _op, rT, rA, imm); \ + emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI8(_name, _op, bias) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ { \ - emit_RI8(p, _op, rT, rA, bias - imm); \ + emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ } #define EMIT_RI10(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ { \ - emit_RI10(p, _op, rT, rA, imm); \ + emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI16(_name, _op) \ void _name (struct spe_function *p, unsigned rT, int imm) \ { \ - emit_RI16(p, _op, rT, imm); \ + emit_RI16(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_RI18(_name, _op) \ void _name (struct spe_function *p, unsigned rT, int imm) \ { \ - emit_RI18(p, _op, rT, imm); \ + emit_RI18(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_I16(_name, _op) \ void _name (struct spe_function *p, int imm) \ { \ - emit_RI16(p, _op, 0, imm); \ + emit_RI16(p, _op, 0, imm, __FUNCTION__); \ } #include "rtasm_ppc_spe.h" @@ -314,6 +367,9 @@ void spe_init_func(struct spe_function *p, unsigned code_size) */ p->regs[0] = ~7; p->regs[1] = (1U << (80 - 64)) - 1; + + p->print = false; + p->indent = 0; } @@ -327,8 +383,15 @@ void spe_release_func(struct spe_function *p) } +/** Return current code size in bytes. */ +unsigned spe_code_size(const struct spe_function *p) +{ + return p->num_inst * SPE_INST_SIZE; +} + + /** - * Alloate a SPE register. + * Allocate a SPE register. * \return register index or -1 if none left. */ int spe_allocate_available_register(struct spe_function *p) @@ -382,6 +445,32 @@ void spe_release_register(struct spe_function *p, int reg) } +void +spe_print_code(struct spe_function *p, boolean enable) +{ + p->print = enable; +} + + +void +spe_indent(struct spe_function *p, int spaces) +{ + p->indent += spaces; +} + + +extern void +spe_comment(struct spe_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + /** * For branch instructions: * \param d if 1, disable interupts if branch is taken @@ -392,51 +481,51 @@ void spe_release_register(struct spe_function *p, int reg) /** Branch Indirect to address in rA */ void spe_bi(struct spe_function *p, unsigned rA, int d, int e) { - emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Interupt Return */ void spe_iret(struct spe_function *p, unsigned rA, int d, int e) { - emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link on external data */ void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link. Save PC in rT, jump to rA. */ void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } @@ -505,7 +594,28 @@ spe_load_int(struct spe_function *p, unsigned rT, int i) } else { spe_ilhu(p, rT, i >> 16); - spe_iohl(p, rT, i & 0xffff); + if (i & 0xffff) + spe_iohl(p, rT, i & 0xffff); + } +} + +void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) +{ + /* If the whole value is in the lower 18 bits, use ila, which + * doesn't sign-extend. Otherwise, if the two halfwords of + * the constant are identical, use ilh. Otherwise, we have + * to use ilhu followed by iohl. + */ + if ((ui & 0xfffc0000) == ui) { + spe_ila(p, rT, ui); + } + else if ((ui >> 16) == (ui & 0xffff)) { + spe_ilh(p, rT, ui & 0xffff); + } + else { + spe_ilhu(p, rT, ui >> 16); + if (ui & 0xffff) + spe_iohl(p, rT, ui & 0xffff); } } @@ -513,22 +623,29 @@ spe_load_int(struct spe_function *p, unsigned rT, int i) void spe_splat(struct spe_function *p, unsigned rT, unsigned rA) { - spe_ila(p, rT, 66051); + /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ + spe_ila(p, rT, 0x00010203); spe_shufb(p, rT, rA, rA, rT); } void -spe_complement(struct spe_function *p, unsigned rT) +spe_complement(struct spe_function *p, unsigned rT, unsigned rA) { - spe_nor(p, rT, rT, rT); + spe_nor(p, rT, rA, rA); } void spe_move(struct spe_function *p, unsigned rT, unsigned rA) { - spe_ori(p, rT, rA, 0); + /* Use different instructions depending on the instruction address + * to take advantage of the dual pipelines. + */ + if (p->num_inst & 1) + spe_shlqbyi(p, rT, rA, 0); /* odd pipe */ + else + spe_ori(p, rT, rA, 0); /* even pipe */ } @@ -539,4 +656,70 @@ spe_zero(struct spe_function *p, unsigned rT) } +void +spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) +{ + assert(word >= 0); + assert(word <= 3); + + if (word == 0) { + int tmp1 = rT; + spe_ila(p, tmp1, 66051); + spe_shufb(p, rT, rA, rA, tmp1); + } + else { + /* XXX review this, we may not need the rotqbyi instruction */ + int tmp1 = rT; + int tmp2 = spe_allocate_available_register(p); + + spe_ila(p, tmp1, 66051); + spe_rotqbyi(p, tmp2, rA, 4 * word); + spe_shufb(p, rT, tmp2, tmp2, tmp1); + + spe_release_register(p, tmp2); + } +} + +/** + * For each 32-bit float element of rA and rB, choose the smaller of the + * two, compositing them into the rT register. + * + * The Float Compare Greater Than (fcgt) instruction will put 1s into + * compare_reg where rA > rB, and 0s where rA <= rB. + * + * Then the Select Bits (selb) instruction will take bits from rA where + * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA + * where rA <= rB and from rB where rB > rA, which is exactly the + * "min" operation. + * + * The compare_reg could in many cases be the same as rT, unless + * rT == rA || rt == rB. But since this is common in constructions + * like "x = min(x, a)", we always allocate a new register to be safe. + */ +void +spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) +{ + unsigned int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rA, rB, compare_reg); + spe_release_register(p, compare_reg); +} + +/** + * For each 32-bit float element of rA and rB, choose the greater of the + * two, compositing them into the rT register. + * + * The logic is similar to that of spe_float_min() above; the only + * difference is that the registers on spe_selb() have been reversed, + * so that the larger of the two is selected instead of the smaller. + */ +void +spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) +{ + unsigned int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rB, rA, compare_reg); + spe_release_register(p, compare_reg); +} + #endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index d95e5aace34..61c7edeb604 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -28,6 +28,7 @@ * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf * * \author Ian Romanick <[email protected]> + * \author Brian Paul */ #ifndef RTASM_PPC_SPE_H @@ -39,10 +40,10 @@ /** number of general-purpose SIMD registers */ #define SPE_NUM_REGS 128 -/** Return Address register */ +/** Return Address register (aka $lr / Link Register) */ #define SPE_REG_RA 0 -/** Stack Pointer register */ +/** Stack Pointer register (aka $sp) */ #define SPE_REG_SP 1 @@ -63,15 +64,25 @@ struct spe_function * spe_release_register */ uint64_t regs[SPE_NUM_REGS / 64]; + + boolean print; /**< print/dump instructions as they're emitted? */ + int indent; /**< number of spaces to indent */ }; + extern void spe_init_func(struct spe_function *p, unsigned code_size); extern void spe_release_func(struct spe_function *p); +extern unsigned spe_code_size(const struct spe_function *p); extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); extern void spe_release_register(struct spe_function *p, int reg); +extern void spe_print_code(struct spe_function *p, boolean enable); +extern void spe_indent(struct spe_function *p, int spaces); +extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); + + #endif /* RTASM_PPC_SPE_H */ #ifndef EMIT_ @@ -292,13 +303,17 @@ spe_load_float(struct spe_function *p, unsigned rT, float x); extern void spe_load_int(struct spe_function *p, unsigned rT, int i); +/** Load/splat immediate unsigned int into rT. */ +extern void +spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); + /** Replicate word 0 of rA across rT. */ extern void spe_splat(struct spe_function *p, unsigned rT, unsigned rA); -/** Complement/invert all bits in rT. */ +/** rT = complement_all_bits(rA). */ extern void -spe_complement(struct spe_function *p, unsigned rT); +spe_complement(struct spe_function *p, unsigned rT, unsigned rA); /** rT = rA. */ extern void @@ -308,6 +323,18 @@ spe_move(struct spe_function *p, unsigned rT, unsigned rA); extern void spe_zero(struct spe_function *p, unsigned rT); +/** rT = splat(rA, word) */ +extern void +spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word); + +/** rT = float min(rA, rB) */ +extern void +spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); + +/** rT = float max(rA, rB) */ +extern void +spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); + /* Floating-point instructions */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 6d4c081e04e..3bba9dcc076 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -240,7 +240,8 @@ static void emit_modrm( struct x86_function *p, /* Oh-oh we've stumbled into the SIB thing. */ if (regmem.file == file_REG32 && - regmem.idx == reg_SP) { + regmem.idx == reg_SP && + regmem.mod != mod_REG) { emit_1ub(p, 0x24); /* simplistic! */ } @@ -435,25 +436,70 @@ void x86_call( struct x86_function *p, struct x86_reg reg) } -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); } -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) +/** + * Immediate group 1 instructions. + */ +static INLINE void +x86_group1_imm( struct x86_function *p, + unsigned op, struct x86_reg dst, int imm ) { - DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); - emit_1ub(p, 0x80); - emit_modrm_noreg(p, 0, dst); - emit_1ub(p, imm); + if(-0x80 <= imm && imm < 0x80) { + emit_1ub(p, 0x83); + emit_modrm_noreg(p, op, dst); + emit_1b(p, (char)imm); + } + else { + emit_1ub(p, 0x81); + emit_modrm_noreg(p, op, dst); + emit_1i(p, imm); + } +} + +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 0, dst, imm); +} + +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 1, dst, imm); +} + +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 4, dst, imm); +} + +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 5, dst, imm); +} + +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 6, dst, imm); +} + +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 7, dst, imm); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index af94577aaba..510aa1b0dec 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -152,12 +152,13 @@ void x86_jmp( struct x86_function *p, int label ); /* void x86_call( struct x86_function *p, void (*label)() ); */ void x86_call( struct x86_function *p, struct x86_reg reg); -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); /* Macro for sse_shufps() and sse2_pshufd(): |