summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorBen Skeggs <[email protected]>2008-09-12 20:33:59 +1000
committerBen Skeggs <[email protected]>2008-09-12 20:33:59 +1000
commit81335d0f1760fe172a106f79e81281c1f0d7dedf (patch)
treef081924b0dad0fdfb1fa94580d63a88498469909 /src/gallium/auxiliary
parentf302fca5eb63e4bca8af5b35c585451486143e6a (diff)
parentaa66f08a21b791f338b519f0c2162cd8f7b3aeb0 (diff)
Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c151
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h56
2 files changed, 176 insertions, 31 deletions
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
index 285ddc0e3f3..a04cc6c4ff7 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -151,8 +151,8 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -165,8 +165,8 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.rB = rB;
inst.inst.rA = rA;
inst.inst.rC = rC;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -178,8 +178,8 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i7 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -192,8 +192,8 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i8 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -206,8 +206,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.i10 = imm;
inst.inst.rA = rA;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -218,8 +218,8 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.op = op;
inst.inst.i16 = imm;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -230,8 +230,8 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT,
inst.inst.op = op;
inst.inst.i18 = imm;
inst.inst.rT = rT;
- *p->csr = inst.bits;
- p->csr++;
+ p->store[p->num_inst++] = inst.bits;
+ assert(p->num_inst <= p->max_inst);
}
@@ -300,13 +300,16 @@ void _name (struct spe_function *p, int imm) \
#include "rtasm_ppc_spe.h"
-/*
+/**
+ * Initialize an spe_function.
+ * \param code_size size of instruction buffer to allocate, in bytes.
*/
void spe_init_func(struct spe_function *p, unsigned code_size)
{
p->store = align_malloc(code_size, 16);
- p->csr = p->store;
-
+ p->num_inst = 0;
+ p->max_inst = code_size / SPE_INST_SIZE;
+
/* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
*/
p->regs[0] = ~7;
@@ -316,21 +319,26 @@ void spe_init_func(struct spe_function *p, unsigned code_size)
void spe_release_func(struct spe_function *p)
{
+ assert(p->num_inst <= p->max_inst);
if (p->store != NULL) {
align_free(p->store);
}
p->store = NULL;
- p->csr = NULL;
}
+/**
+ * Alloate a SPE register.
+ * \return register index or -1 if none left.
+ */
int spe_allocate_available_register(struct spe_function *p)
{
unsigned i;
- for (i = 0; i < 128; i++) {
+ for (i = 0; i < SPE_NUM_REGS; i++) {
const uint64_t mask = (1ULL << (i % 64));
const unsigned idx = i / 64;
+ assert(idx < 2);
if ((p->regs[idx] & mask) != 0) {
p->regs[idx] &= ~mask;
return i;
@@ -341,11 +349,15 @@ int spe_allocate_available_register(struct spe_function *p)
}
+/**
+ * Mark the given SPE register as "allocated".
+ */
int spe_allocate_register(struct spe_function *p, int reg)
{
const unsigned idx = reg / 64;
const unsigned bit = reg % 64;
+ assert(reg < SPE_NUM_REGS);
assert((p->regs[idx] & (1ULL << bit)) != 0);
p->regs[idx] &= ~(1ULL << bit);
@@ -353,57 +365,75 @@ int spe_allocate_register(struct spe_function *p, int reg)
}
+/**
+ * Mark the given SPE register as "unallocated".
+ */
void spe_release_register(struct spe_function *p, int reg)
{
const unsigned idx = reg / 64;
const unsigned bit = reg % 64;
+ assert(idx < 2);
+
+ assert(reg < SPE_NUM_REGS);
assert((p->regs[idx] & (1ULL << bit)) == 0);
p->regs[idx] |= (1ULL << bit);
}
+/**
+ * For branch instructions:
+ * \param d if 1, disable interupts if branch is taken
+ * \param e if 1, enable interupts if branch is taken
+ * If d and e are both zero, don't change interupt status (right?)
+ */
-
+/** Branch Indirect to address in rA */
void spe_bi(struct spe_function *p, unsigned rA, int d, int e)
{
emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4));
}
+/** Interupt Return */
void spe_iret(struct spe_function *p, unsigned rA, int d, int e)
{
emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4));
}
+/** Branch indirect and set link on external data */
void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d,
int e)
{
emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4));
}
+/** Branch indirect and set link. Save PC in rT, jump to rA. */
void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d,
int e)
{
emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4));
}
-void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d,
- int e)
+/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */
+void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4));
}
+/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */
void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4));
}
+/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */
void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4));
}
+/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */
void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e)
{
emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4));
@@ -432,4 +462,81 @@ EMIT_R (spe_mfspr, 0x00c);
EMIT_R (spe_mtspr, 0x10c);
#endif
+
+/**
+ ** Helper / "macro" instructions.
+ ** Use somewhat verbose names as a reminder that these aren't native
+ ** SPE instructions.
+ **/
+
+
+void
+spe_load_float(struct spe_function *p, unsigned rT, float x)
+{
+ if (x == 0.0f) {
+ spe_il(p, rT, 0x0);
+ }
+ else if (x == 0.5f) {
+ spe_ilhu(p, rT, 0x3f00);
+ }
+ else if (x == 1.0f) {
+ spe_ilhu(p, rT, 0x3f80);
+ }
+ else if (x == -1.0f) {
+ spe_ilhu(p, rT, 0xbf80);
+ }
+ else {
+ union {
+ float f;
+ unsigned u;
+ } bits;
+ bits.f = x;
+ spe_ilhu(p, rT, bits.u >> 16);
+ spe_iohl(p, rT, bits.u & 0xffff);
+ }
+}
+
+
+void
+spe_load_int(struct spe_function *p, unsigned rT, int i)
+{
+ if (-32768 <= i && i <= 32767) {
+ spe_il(p, rT, i);
+ }
+ else {
+ spe_ilhu(p, rT, i >> 16);
+ spe_iohl(p, rT, i & 0xffff);
+ }
+}
+
+
+void
+spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
+{
+ spe_ila(p, rT, 66051);
+ spe_shufb(p, rT, rA, rA, rT);
+}
+
+
+void
+spe_complement(struct spe_function *p, unsigned rT)
+{
+ spe_nor(p, rT, rT, rT);
+}
+
+
+void
+spe_move(struct spe_function *p, unsigned rT, unsigned rA)
+{
+ spe_ori(p, rT, rA, 0);
+}
+
+
+void
+spe_zero(struct spe_function *p, unsigned rT)
+{
+ spe_xor(p, rT, rT, rT);
+}
+
+
#endif /* GALLIUM_CELL */
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
index 1cacc717b15..d95e5aace34 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -25,6 +25,7 @@
/**
* \file
* Real-time assembly generation interface for Cell B.E. SPEs.
+ * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
*
* \author Ian Romanick <[email protected]>
*/
@@ -32,13 +33,24 @@
#ifndef RTASM_PPC_SPE_H
#define RTASM_PPC_SPE_H
-struct spe_function {
- /**
- *
- */
- uint32_t *store;
- uint32_t *csr;
- const char *fn;
+/** 4 bytes per instruction */
+#define SPE_INST_SIZE 4
+
+/** number of general-purpose SIMD registers */
+#define SPE_NUM_REGS 128
+
+/** Return Address register */
+#define SPE_REG_RA 0
+
+/** Stack Pointer register */
+#define SPE_REG_SP 1
+
+
+struct spe_function
+{
+ uint32_t *store; /**< instruction buffer */
+ uint num_inst;
+ uint max_inst;
/**
* Mask of used / unused registers
@@ -50,7 +62,7 @@ struct spe_function {
* spe_allocate_register, spe_allocate_available_register,
* spe_release_register
*/
- uint64_t regs[2];
+ uint64_t regs[SPE_NUM_REGS / 64];
};
extern void spe_init_func(struct spe_function *p, unsigned code_size);
@@ -119,7 +131,8 @@ EMIT_RI16(spe_ilhu, 0x082);
EMIT_RI16(spe_il, 0x081);
EMIT_RI18(spe_ila, 0x021);
EMIT_RI16(spe_iohl, 0x0c1);
-EMIT_RI16(spe_fsmbi, 0x0c5);
+EMIT_RI16(spe_fsmbi, 0x065);
+
/* Integer and logical instructions
@@ -271,6 +284,31 @@ extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
int d, int e);
+/** Load/splat immediate float into rT. */
+extern void
+spe_load_float(struct spe_function *p, unsigned rT, float x);
+
+/** Load/splat immediate int into rT. */
+extern void
+spe_load_int(struct spe_function *p, unsigned rT, int i);
+
+/** Replicate word 0 of rA across rT. */
+extern void
+spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
+
+/** Complement/invert all bits in rT. */
+extern void
+spe_complement(struct spe_function *p, unsigned rT);
+
+/** rT = rA. */
+extern void
+spe_move(struct spe_function *p, unsigned rT, unsigned rA);
+
+/** rT = {0,0,0,0}. */
+extern void
+spe_zero(struct spe_function *p, unsigned rT);
+
+
/* Floating-point instructions
*/
EMIT_RR (spe_fa, 0x2c4);