diff options
author | Keith Whitwell <[email protected]> | 2005-06-07 10:59:37 +0000 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2005-06-07 10:59:37 +0000 |
commit | 757e0855adb1b1eb45b55e1fcf6acb47224b2853 (patch) | |
tree | 7b9fe3f1f8ab9bc4659bc50c409d6725925a909a /src/mesa/tnl | |
parent | 17d249d31b3b382af04610b2ca37d01b8f06abe4 (diff) |
Simplify interpreted language:
- Expand operand argument specifiers so that all registers and
parameters can be referenced directly. Remove old PAR/PRL
instructions.
- No 3 operand instructions, translate MAD -> MUL, ADD. (No room for
3 full operands in instruction struct).
- Translate SWZ instructions into 1 or 2 reduced swizzles at compile
time.
- Add hardwired code for moving input and output values to/from the
register file. Drop old INx, OUT instructions.
Diffstat (limited to 'src/mesa/tnl')
-rw-r--r-- | src/mesa/tnl/t_vb_arbprogram.c | 1206 |
1 files changed, 581 insertions, 625 deletions
diff --git a/src/mesa/tnl/t_vb_arbprogram.c b/src/mesa/tnl/t_vb_arbprogram.c index 3be82c72985..5494eed0984 100644 --- a/src/mesa/tnl/t_vb_arbprogram.c +++ b/src/mesa/tnl/t_vb_arbprogram.c @@ -49,118 +49,87 @@ /* New, internal instructions: */ -#define IN1 (VP_OPCODE_XPD+1) -#define IN2 (IN1+1) /* intput-to-reg MOV */ -#define IN3 (IN1+2) -#define IN4 (IN1+3) -#define OUT (IN1+4) /* reg-to-output MOV */ -#define OUM (IN1+5) /* reg-to-output MOV with mask */ -#define RSW (IN1+6) -#define MSK (IN1+7) /* reg-to-reg MOV with mask */ -#define PAR (IN1+8) /* parameter-to-reg MOV */ -#define PRL (IN1+9) /* parameter-to-reg MOV */ +#define RSW (VP_MAX_OPCODE) +#define SEL (VP_MAX_OPCODE+1) +#define REL (VP_MAX_OPCODE+2) /* Layout of register file: 0 -- Scratch (Arg0) 1 -- Scratch (Arg1) - 2 -- Scratch (Arg2) - 3 -- Scratch (Result) + 2 -- Scratch (Result) 4 -- Program Temporary 0 + 16 -- Program Temporary 12 (max for NV_VERTEX_PROGRAM) + 17 -- Output 0 + 31 -- Output 15 (max for NV_VERTEX_PROGRAM) (Last writeable register) + 32 -- Parameter 0 .. - 31 -- Program Temporary 27 - 32 -- State/Input/Const shadow 0 - .. - 63 -- State/Input/Const shadow 31 + 127 -- Parameter 63 (max for NV_VERTEX_PROGRAM) */ - - -#define REG_ARG0 0 -#define REG_ARG1 1 -#define REG_ARG2 2 -#define REG_RES 3 -#define REG_TMP0 4 -#define REG_TMP_MAX 32 -#define REG_TMP_NR (REG_TMP_MAX-REG_TMP0) -#define REG_PAR0 32 -#define REG_PAR_MAX 64 -#define REG_PAR_NR (REG_PAR_MAX-REG_PAR0) - -#define REG_MAX 64 -#define REG_SWZDST_MAX 16 +#define FILE_REG 0 +#define FILE_LOCAL_PARAM 1 +#define FILE_ENV_PARAM 2 +#define FILE_STATE_PARAM 3 + + +#define REG_ARG0 0 +#define REG_ARG1 1 +#define REG_ARG2 2 +#define REG_RES 3 +#define REG_ADDR 4 +#define REG_TMP0 5 +#define REG_TMP11 16 +#define REG_OUT0 17 +#define REG_OUT14 31 +#define REG_IN0 32 +#define REG_IN15 47 +#define REG_ID 48 /* 0,0,0,1 */ +#define REG_MAX 128 +#define REG_INVALID ~0 /* ARB_vp instructions are broken down into one or more of the * following micro-instructions, each representable in a 32 bit packed * structure. */ - -union instruction { - struct { - GLuint opcode:6; - GLuint dst:5; - GLuint arg0:6; - GLuint arg1:6; - GLuint elt:2; /* x,y,z or w */ - GLuint pad:7; - } scl; +struct reg { + GLuint file:2; + GLuint idx:7; +}; +union instruction { struct { GLuint opcode:6; GLuint dst:5; - GLuint arg0:6; - GLuint arg1:6; - GLuint arg2:6; + GLuint file0:2; + GLuint idx0:7; + GLuint file1:2; + GLuint idx1:7; GLuint pad:3; - } vec; + } alu; struct { GLuint opcode:6; - GLuint dst:4; /* NOTE! REG 0..16 only! */ - GLuint arg0:6; - GLuint neg:4; - GLuint swz:12; - } swz; - - struct { - GLuint opcode:6; - GLuint dst:6; - GLuint arg0:6; - GLuint neg:1; /* 1 bit only */ + GLuint dst:5; + GLuint file0:2; + GLuint idx0:7; + GLuint neg:4; GLuint swz:8; /* xyzw only */ - GLuint pad:5; } rsw; struct { GLuint opcode:6; - GLuint reg:6; - GLuint file:5; - GLuint idx:8; /* plenty? */ - GLuint rel:1; - GLuint pad:6; - } inr; - - - struct { - GLuint opcode:6; - GLuint reg:6; - GLuint file:5; - GLuint idx:8; /* plenty? */ - GLuint mask:4; - GLuint pad:3; - } out; - - struct { - GLuint opcode:6; GLuint dst:5; - GLuint arg0:6; + GLuint idx0:7; /* note! */ + GLuint file1:2; + GLuint idx1:7; GLuint mask:4; - GLuint pad:11; - } msk; + GLuint pad:1; + } sel; GLuint dword; }; @@ -168,32 +137,39 @@ union instruction { struct compilation { - struct { - GLuint file:5; - GLuint idx:8; - } reg[REG_PAR_NR]; - - GLuint par_active; - GLuint par_protected; - GLuint tmp_active; - + GLuint reg_active; union instruction *csr; - struct vertex_buffer *VB; /* for input sizes! */ }; +struct input { + GLuint idx; + GLfloat *data; + GLuint stride; + GLuint size; +}; + +struct output { + GLuint idx; + GLfloat *data; +}; + /*--------------------------------------------------------------------------- */ /*! * Private storage for the vertex program pipeline stage. */ struct arb_vp_machine { - GLfloat reg[REG_MAX][4]; /* Program temporaries, shadowed parameters and inputs, - plus some internal values */ - - GLfloat (*File[8])[4]; /* Src/Dest for PAR/PRL instructions. */ + GLfloat reg[REG_MAX][4]; /* Program temporaries, inputs and outputs */ + GLfloat (*File[4])[4]; /* All values reference-able from the program. */ GLint AddressReg; + struct input input[16]; + GLuint nr_inputs; + + struct output output[15]; + GLuint nr_outputs; + union instruction store[1024]; union instruction *instructions; GLint nr_instructions; @@ -213,10 +189,8 @@ struct arb_vp_machine { /*--------------------------------------------------------------------------- */ struct opcode_info { - GLuint type; GLuint nr_args; const char *string; - void (*func)( struct arb_vp_machine *, union instruction ); void (*print)( union instruction , const struct opcode_info * ); }; @@ -272,11 +246,7 @@ static GLfloat RoughApproxPow2(GLfloat t) static GLfloat RoughApproxPower(GLfloat x, GLfloat y) { -#if 0 - return (GLfloat) exp(y * log(x)); -#else return (GLfloat) _mesa_pow(x, y); -#endif } @@ -284,156 +254,50 @@ static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; - -/** - * This is probably the least-optimal part of the process, have to - * multiply out the stride to access each incoming input value. - */ -static GLfloat *get_input( struct arb_vp_machine *m, GLuint index ) -{ - return VEC_ELT(m->VB->AttribPtr[index], GLfloat, m->vtx_nr); -} +#define GET_RSW(swz, idx) (((swz) >> ((idx)*2)) & 0x3) /** - * Fetch a 4-element float vector from the given source register. - * Deal with the possibility that not all elements are present. - */ -static void do_IN1( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.inr.reg]; - const GLfloat *src = get_input(m, op.inr.idx); - - result[0] = src[0]; - result[1] = 0; - result[2] = 0; - result[3] = 1; -} - -static void do_IN2( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.inr.reg]; - const GLfloat *src = get_input(m, op.inr.idx); - - result[0] = src[0]; - result[1] = src[1]; - result[2] = 0; - result[3] = 1; -} - -static void do_IN3( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.inr.reg]; - const GLfloat *src = get_input(m, op.inr.idx); - - result[0] = src[0]; - result[1] = src[1]; - result[2] = src[2]; - result[3] = 1; -} - -static void do_IN4( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.inr.reg]; - const GLfloat *src = get_input(m, op.inr.idx); - - result[0] = src[0]; - result[1] = src[1]; - result[2] = src[2]; - result[3] = src[3]; -} - -/** * Perform a reduced swizzle: */ static void do_RSW( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->reg[op.rsw.dst]; - const GLfloat *arg0 = m->reg[op.rsw.arg0]; + const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0]; GLuint swz = op.rsw.swz; GLuint neg = op.rsw.neg; - GLuint i; - - if (neg) - for (i = 0; i < 4; i++, swz >>= 2) - result[i] = -arg0[swz & 0x3]; - else - for (i = 0; i < 4; i++, swz >>= 2) - result[i] = arg0[swz & 0x3]; -} - - - -/** - * Store 4 floats into an external address. - */ -static void do_OUM( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr]; - const GLfloat *value = m->reg[op.out.reg]; - - if (op.out.mask & 0x1) dst[0] = value[0]; - if (op.out.mask & 0x2) dst[1] = value[1]; - if (op.out.mask & 0x4) dst[2] = value[2]; - if (op.out.mask & 0x8) dst[3] = value[3]; -} - -static void do_OUT( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr]; - const GLfloat *value = m->reg[op.out.reg]; - dst[0] = value[0]; - dst[1] = value[1]; - dst[2] = value[2]; - dst[3] = value[3]; + result[0] = arg0[GET_RSW(swz, 0)]; + result[1] = arg0[GET_RSW(swz, 1)]; + result[2] = arg0[GET_RSW(swz, 2)]; + result[3] = arg0[GET_RSW(swz, 3)]; + + if (neg) { + if (neg & 0x1) result[0] = -result[0]; + if (neg & 0x2) result[1] = -result[1]; + if (neg & 0x4) result[2] = -result[2]; + if (neg & 0x8) result[3] = -result[3]; + } } -/* Register-to-register MOV with writemask. +/* Used to implement write masking */ -static void do_MSK( struct arb_vp_machine *m, union instruction op ) +static void do_SEL( struct arb_vp_machine *m, union instruction op ) { - GLfloat *dst = m->reg[op.msk.dst]; - const GLfloat *arg0 = m->reg[op.msk.arg0]; + GLfloat *dst = m->reg[op.sel.dst]; + const GLfloat *arg0 = m->reg[op.sel.idx0]; + const GLfloat *arg1 = m->File[op.sel.file1][op.sel.idx1]; - if (op.msk.mask & 0x1) dst[0] = arg0[0]; - if (op.msk.mask & 0x2) dst[1] = arg0[1]; - if (op.msk.mask & 0x4) dst[2] = arg0[2]; - if (op.msk.mask & 0x8) dst[3] = arg0[3]; -} - - -/* Retreive parameters and other constant values: - */ -static void do_PAR( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.inr.reg]; - const GLfloat *src = m->File[op.inr.file][op.inr.idx]; - - result[0] = src[0]; - result[1] = src[1]; - result[2] = src[2]; - result[3] = src[3]; + dst[0] = (op.sel.mask & 0x1) ? arg0[0] : arg1[0]; + dst[1] = (op.sel.mask & 0x2) ? arg0[1] : arg1[1]; + dst[2] = (op.sel.mask & 0x4) ? arg0[2] : arg1[2]; + dst[3] = (op.sel.mask & 0x8) ? arg0[3] : arg1[3]; } -#define RELADDR_MASK (MAX_NV_VERTEX_PROGRAM_PARAMS-1) - -static void do_PRL( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.inr.reg]; - GLuint index = (op.inr.idx + m->AddressReg) & RELADDR_MASK; - const GLfloat *src = m->File[op.inr.file][index]; - - result[0] = src[0]; - result[1] = src[1]; - result[2] = src[2]; - result[3] = src[3]; -} - static void do_PRT( struct arb_vp_machine *m, union instruction op ) { - const GLfloat *arg0 = m->reg[op.vec.arg0]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr, arg0[0], arg0[1], arg0[2], arg0[3]); @@ -447,8 +311,8 @@ static void do_PRT( struct arb_vp_machine *m, union instruction op ) static void do_ABS( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0]; result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1]; @@ -458,9 +322,9 @@ static void do_ABS( struct arb_vp_machine *m, union instruction op ) static void do_ADD( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = arg0[0] + arg1[0]; result[1] = arg0[1] + arg1[1]; @@ -471,16 +335,16 @@ static void do_ADD( struct arb_vp_machine *m, union instruction op ) static void do_ARL( struct arb_vp_machine *m, union instruction op ) { - const GLfloat *arg0 = m->reg[op.out.reg]; - m->AddressReg = (GLint) floor(arg0[0]); + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + m->reg[REG_ADDR][0] = FLOORF(arg0[0]); } static void do_DP3( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; - const GLfloat *arg1 = m->reg[op.scl.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] * arg1[0] + arg0[1] * arg1[1] + @@ -489,26 +353,13 @@ static void do_DP3( struct arb_vp_machine *m, union instruction op ) PUFF(result); } -#if 0 -static void do_MAT4( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; - const GLfloat *mat[] = m->reg[op.scl.arg1]; - - result[0] = (arg0[0] * mat0[0] + arg0[1] * mat0[1] + arg0[2] * mat0[2] + arg0[3] * mat0[3]); - result[1] = (arg0[0] * mat1[0] + arg0[1] * mat1[1] + arg0[2] * mat1[2] + arg0[3] * mat1[3]); - result[2] = (arg0[0] * mat2[0] + arg0[1] * mat2[1] + arg0[2] * mat2[2] + arg0[3] * mat2[3]); - result[3] = (arg0[0] * mat3[0] + arg0[1] * mat3[1] + arg0[2] * mat3[2] + arg0[3] * mat3[3]); -} -#endif static void do_DP4( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; - const GLfloat *arg1 = m->reg[op.scl.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] * arg1[0] + arg0[1] * arg1[1] + @@ -520,9 +371,9 @@ static void do_DP4( struct arb_vp_machine *m, union instruction op ) static void do_DPH( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; - const GLfloat *arg1 = m->reg[op.scl.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] * arg1[0] + arg0[1] * arg1[1] + @@ -534,9 +385,9 @@ static void do_DPH( struct arb_vp_machine *m, union instruction op ) static void do_DST( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = 1.0F; result[1] = arg0[1] * arg1[1]; @@ -547,8 +398,8 @@ static void do_DST( struct arb_vp_machine *m, union instruction op ) static void do_EX2( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = (GLfloat)RoughApproxPow2(arg0[0]); PUFF(result); @@ -556,8 +407,8 @@ static void do_EX2( struct arb_vp_machine *m, union instruction op ) static void do_EXP( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; GLfloat tmp = arg0[0]; GLfloat flr_tmp = FLOORF(tmp); @@ -572,8 +423,8 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op ) static void do_FLR( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = FLOORF(arg0[0]); result[1] = FLOORF(arg0[1]); @@ -583,8 +434,8 @@ static void do_FLR( struct arb_vp_machine *m, union instruction op ) static void do_FRC( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = arg0[0] - FLOORF(arg0[0]); result[1] = arg0[1] - FLOORF(arg0[1]); @@ -594,8 +445,8 @@ static void do_FRC( struct arb_vp_machine *m, union instruction op ) static void do_LG2( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = RoughApproxLog2(arg0[0]); PUFF(result); @@ -605,8 +456,8 @@ static void do_LG2( struct arb_vp_machine *m, union instruction op ) static void do_LIT( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */ GLfloat tmp[4]; @@ -624,8 +475,8 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op ) static void do_LOG( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; GLfloat tmp = FABSF(arg0[0]); int exponent; GLfloat mantissa = FREXPF(tmp, &exponent); @@ -636,25 +487,11 @@ static void do_LOG( struct arb_vp_machine *m, union instruction op ) result[3] = 1.0; } - -static void do_MAD( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; - const GLfloat *arg2 = m->reg[op.vec.arg2]; - - result[0] = arg0[0] * arg1[0] + arg2[0]; - result[1] = arg0[1] * arg1[1] + arg2[1]; - result[2] = arg0[2] * arg1[2] + arg2[2]; - result[3] = arg0[3] * arg1[3] + arg2[3]; -} - static void do_MAX( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0]; result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1]; @@ -665,9 +502,9 @@ static void do_MAX( struct arb_vp_machine *m, union instruction op ) static void do_MIN( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0]; result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1]; @@ -677,8 +514,8 @@ static void do_MIN( struct arb_vp_machine *m, union instruction op ) static void do_MOV( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = arg0[0]; result[1] = arg0[1]; @@ -688,9 +525,9 @@ static void do_MOV( struct arb_vp_machine *m, union instruction op ) static void do_MUL( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = arg0[0] * arg1[0]; result[1] = arg0[1] * arg1[1]; @@ -701,18 +538,30 @@ static void do_MUL( struct arb_vp_machine *m, union instruction op ) static void do_POW( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; - const GLfloat *arg1 = m->reg[op.scl.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]); PUFF(result); } +static void do_REL( struct arb_vp_machine *m, union instruction op ) +{ + GLfloat *result = m->reg[op.alu.dst]; + GLuint idx = (op.alu.idx0 + (GLint)m->reg[REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1); + const GLfloat *arg0 = m->File[op.alu.file0][idx]; + + result[0] = arg0[0]; + result[1] = arg0[1]; + result[2] = arg0[2]; + result[3] = arg0[3]; +} + static void do_RCP( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = 1.0F / arg0[0]; PUFF(result); @@ -720,8 +569,8 @@ static void do_RCP( struct arb_vp_machine *m, union instruction op ) static void do_RSQ( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.scl.dst]; - const GLfloat *arg0 = m->reg[op.scl.arg0]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = INV_SQRTF(FABSF(arg0[0])); PUFF(result); @@ -730,9 +579,9 @@ static void do_RSQ( struct arb_vp_machine *m, union instruction op ) static void do_SGE( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F; result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F; @@ -743,9 +592,9 @@ static void do_SGE( struct arb_vp_machine *m, union instruction op ) static void do_SLT( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F; result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F; @@ -753,29 +602,11 @@ static void do_SLT( struct arb_vp_machine *m, union instruction op ) result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F; } -static void do_SWZ( struct arb_vp_machine *m, union instruction op ) -{ - GLfloat *result = m->reg[op.swz.dst]; - const GLfloat *arg0 = m->reg[op.swz.arg0]; - GLuint swz = op.swz.swz; - GLuint neg = op.swz.neg; - GLuint i; - - for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) { - switch (swz & 0x7) { - case SWIZZLE_ZERO: result[i] = 0.0; break; - case SWIZZLE_ONE: result[i] = 1.0; break; - default: result[i] = arg0[swz & 0x7]; break; - } - if (neg & 0x1) result[i] = -result[i]; - } -} - static void do_SUB( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = arg0[0] - arg1[0]; result[1] = arg0[1] - arg1[1]; @@ -786,9 +617,9 @@ static void do_SUB( struct arb_vp_machine *m, union instruction op ) static void do_XPD( struct arb_vp_machine *m, union instruction op ) { - GLfloat *result = m->reg[op.vec.dst]; - const GLfloat *arg0 = m->reg[op.vec.arg0]; - const GLfloat *arg1 = m->reg[op.vec.arg1]; + GLfloat *result = m->reg[op.alu.dst]; + const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; + const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; result[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; @@ -801,20 +632,6 @@ static void do_NOP( struct arb_vp_machine *m, union instruction op ) /* Some useful debugging functions: */ -static void print_reg( GLuint reg ) -{ - if (reg == REG_RES) - _mesa_printf("RES"); - else if (reg >= REG_ARG0 && reg <= REG_ARG2) - _mesa_printf("ARG%d", reg - REG_ARG0); - else if (reg >= REG_TMP0 && reg < REG_TMP_MAX) - _mesa_printf("TMP%d", reg - REG_TMP0); - else if (reg >= REG_PAR0 && reg < REG_PAR_MAX) - _mesa_printf("PAR%d", reg - REG_PAR0); - else - _mesa_printf("???"); -} - static void print_mask( GLuint mask ) { _mesa_printf("."); @@ -824,45 +641,38 @@ static void print_mask( GLuint mask ) if (mask&0x8) _mesa_printf("w"); } -static void print_extern( GLuint file, GLuint idx ) +static void print_reg( GLuint file, GLuint reg ) { static const char *reg_file[] = { - "TEMPORARY", - "INPUT", - "OUTPUT", + "REG", "LOCAL_PARAM", "ENV_PARAM", - "NAMED_PARAM", "STATE_VAR", - "WRITE_ONLY", - "ADDRESS" }; - _mesa_printf("%s:%d", reg_file[file], idx); -} - - - -static void print_SWZ( union instruction op, const struct opcode_info *info ) -{ - GLuint swz = op.swz.swz; - GLuint neg = op.swz.neg; - GLuint i; - - _mesa_printf("%s ", info->string); - print_reg(op.swz.dst); - _mesa_printf(", "); - print_reg(op.swz.arg0); - _mesa_printf("."); - for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) { - const char *cswz = "xyzw01??"; - if (neg & 0x1) - _mesa_printf("-"); - _mesa_printf("%c", cswz[swz&0x7]); + if (file == 0) { + if (reg == REG_RES) + _mesa_printf("RES"); + else if (reg >= REG_ARG0 && reg <= REG_ARG1) + _mesa_printf("ARG%d", reg - REG_ARG0); + else if (reg >= REG_TMP0 && reg <= REG_TMP11) + _mesa_printf("TMP%d", reg - REG_TMP0); + else if (reg >= REG_IN0 && reg <= REG_IN15) + _mesa_printf("IN%d", reg - REG_IN0); + else if (reg >= REG_OUT0 && reg <= REG_OUT14) + _mesa_printf("OUT%d", reg - REG_OUT0); + else if (reg == REG_ADDR) + _mesa_printf("ADDR"); + else if (reg == REG_ID) + _mesa_printf("ID"); + else + _mesa_printf("REG%d", reg); } - _mesa_printf("\n"); + else + _mesa_printf("%s:%d", reg_file[file], reg); } + static void print_RSW( union instruction op, const struct opcode_info *info ) { GLuint swz = op.rsw.swz; @@ -870,13 +680,13 @@ static void print_RSW( union instruction op, const struct opcode_info *info ) GLuint i; _mesa_printf("%s ", info->string); - print_reg(op.rsw.dst); + print_reg(0, op.rsw.dst); _mesa_printf(", "); - print_reg(op.rsw.arg0); + print_reg(op.rsw.file0, op.rsw.idx0); _mesa_printf("."); for (i = 0; i < 4; i++, swz >>= 2) { const char *cswz = "xyzw"; - if (neg) + if (neg & (1<<i)) _mesa_printf("-"); _mesa_printf("%c", cswz[swz&0x3]); } @@ -884,193 +694,203 @@ static void print_RSW( union instruction op, const struct opcode_info *info ) } -static void print_SCL( union instruction op, const struct opcode_info *info ) -{ - _mesa_printf("%s ", info->string); - print_reg(op.scl.dst); - _mesa_printf(", "); - print_reg(op.scl.arg0); - if (info->nr_args > 1) { - _mesa_printf(", "); - print_reg(op.scl.arg1); - } - _mesa_printf("\n"); -} - - -static void print_VEC( union instruction op, const struct opcode_info *info ) +static void print_ALU( union instruction op, const struct opcode_info *info ) { _mesa_printf("%s ", info->string); - print_reg(op.vec.dst); + print_reg(0, op.alu.dst); _mesa_printf(", "); - print_reg(op.vec.arg0); + print_reg(op.alu.file0, op.alu.idx0); if (info->nr_args > 1) { _mesa_printf(", "); - print_reg(op.vec.arg1); - } - if (info->nr_args > 2) { - _mesa_printf(", "); - print_reg(op.vec.arg2); + print_reg(op.alu.file1, op.alu.idx1); } _mesa_printf("\n"); } -static void print_MSK( union instruction op, const struct opcode_info *info ) +static void print_SEL( union instruction op, const struct opcode_info *info ) { _mesa_printf("%s ", info->string); - print_reg(op.msk.dst); - print_mask(op.msk.mask); + print_reg(0, op.sel.dst); _mesa_printf(", "); - print_reg(op.msk.arg0); - _mesa_printf("\n"); -} - -static void print_IN( union instruction op, const struct opcode_info *info ) -{ - _mesa_printf("%s ", info->string); - print_reg(op.inr.reg); + print_reg(0, op.sel.idx0); + print_mask(op.sel.mask); _mesa_printf(", "); - print_extern(op.inr.file, op.inr.idx); + print_reg(op.sel.file1, op.sel.idx1); + print_mask(~op.sel.mask); _mesa_printf("\n"); } -static void print_OUT( union instruction op, const struct opcode_info *info ) -{ - _mesa_printf("%s ", info->string); - print_extern(op.out.file, op.out.idx); - if (op.out.opcode == OUM) - print_mask(op.out.mask); - _mesa_printf(", "); - print_reg(op.out.reg); - _mesa_printf("\n"); -} static void print_NOP( union instruction op, const struct opcode_info *info ) { } #define NOP 0 -#define VEC 1 -#define SCL 2 -#define SWZ 3 +#define ALU 1 +#define SWZ 2 static const struct opcode_info opcode_info[] = { - { VEC, 1, "ABS", do_ABS, print_VEC }, - { VEC, 2, "ADD", do_ADD, print_VEC }, - { OUT, 1, "ARL", do_ARL, print_OUT }, - { SCL, 2, "DP3", do_DP3, print_SCL }, - { SCL, 2, "DP4", do_DP4, print_SCL }, - { SCL, 2, "DPH", do_DPH, print_SCL }, - { VEC, 2, "DST", do_DST, print_VEC }, - { NOP, 0, "END", do_NOP, print_NOP }, - { SCL, 1, "EX2", do_EX2, print_VEC }, - { VEC, 1, "EXP", do_EXP, print_VEC }, - { VEC, 1, "FLR", do_FLR, print_VEC }, - { VEC, 1, "FRC", do_FRC, print_VEC }, - { SCL, 1, "LG2", do_LG2, print_VEC }, - { VEC, 1, "LIT", do_LIT, print_VEC }, - { VEC, 1, "LOG", do_LOG, print_VEC }, - { VEC, 3, "MAD", do_MAD, print_VEC }, - { VEC, 2, "MAX", do_MAX, print_VEC }, - { VEC, 2, "MIN", do_MIN, print_VEC }, - { VEC, 1, "MOV", do_MOV, print_VEC }, - { VEC, 2, "MUL", do_MUL, print_VEC }, - { SCL, 2, "POW", do_POW, print_VEC }, - { VEC, 1, "PRT", do_PRT, print_VEC }, /* PRINT */ - { NOP, 1, "RCC", do_NOP, print_NOP }, - { SCL, 1, "RCP", do_RCP, print_VEC }, - { SCL, 1, "RSQ", do_RSQ, print_VEC }, - { VEC, 2, "SGE", do_SGE, print_VEC }, - { VEC, 2, "SLT", do_SLT, print_VEC }, - { VEC, 2, "SUB", do_SUB, print_VEC }, - { SWZ, 1, "SWZ", do_SWZ, print_SWZ }, - { VEC, 2, "XPD", do_XPD, print_VEC }, - { IN4, 1, "IN1", do_IN1, print_IN }, /* Internals */ - { IN4, 1, "IN2", do_IN2, print_IN }, - { IN4, 1, "IN3", do_IN3, print_IN }, - { IN4, 1, "IN4", do_IN4, print_IN }, - { OUT, 1, "OUT", do_OUT, print_OUT }, - { OUT, 1, "OUM", do_OUM, print_OUT }, - { SWZ, 1, "RSW", do_RSW, print_RSW }, - { MSK, 1, "MSK", do_MSK, print_MSK }, - { IN4, 1, "PAR", do_PAR, print_IN }, - { IN4, 1, "PRL", do_PRL, print_IN }, + { 1, "ABS", print_ALU }, + { 2, "ADD", print_ALU }, + { 1, "ARL", print_ALU }, + { 2, "DP3", print_ALU }, + { 2, "DP4", print_ALU }, + { 2, "DPH", print_ALU }, + { 2, "DST", print_ALU }, + { 0, "END", print_NOP }, + { 1, "EX2", print_ALU }, + { 1, "EXP", print_ALU }, + { 1, "FLR", print_ALU }, + { 1, "FRC", print_ALU }, + { 1, "LG2", print_ALU }, + { 1, "LIT", print_ALU }, + { 1, "LOG", print_ALU }, + { 3, "MAD", print_NOP }, + { 2, "MAX", print_ALU }, + { 2, "MIN", print_ALU }, + { 1, "MOV", print_ALU }, + { 2, "MUL", print_ALU }, + { 2, "POW", print_ALU }, + { 1, "PRT", print_ALU }, /* PRINT */ + { 1, "RCC", print_NOP }, + { 1, "RCP", print_ALU }, + { 1, "RSQ", print_ALU }, + { 2, "SGE", print_ALU }, + { 2, "SLT", print_ALU }, + { 2, "SUB", print_ALU }, + { 1, "SWZ", print_NOP }, + { 2, "XPD", print_ALU }, + { 1, "RSW", print_RSW }, + { 2, "SEL", print_SEL }, + { 1, "REL", print_ALU }, }; -static GLuint cvp_load_reg( struct compilation *cp, - GLuint file, - GLuint index, - GLuint rel ) +static void (* const opcode_func[])(struct arb_vp_machine *, union instruction) = +{ + do_ABS, + do_ADD, + do_ARL, + do_DP3, + do_DP4, + do_DPH, + do_DST, + do_NOP, + do_EX2, + do_EXP, + do_FLR, + do_FRC, + do_LG2, + do_LIT, + do_LOG, + do_NOP, + do_MAX, + do_MIN, + do_MOV, + do_MUL, + do_POW, + do_PRT, + do_NOP, + do_RCP, + do_RSQ, + do_SGE, + do_SLT, + do_SUB, + do_RSW, + do_XPD, + do_RSW, + do_SEL, + do_REL, +}; + +static union instruction *cvp_next_instruction( struct compilation *cp ) { - GLuint i, op; + union instruction *op = cp->csr++; + op->dword = 0; + return op; +} - if (file == PROGRAM_TEMPORARY) - return index + REG_TMP0; +static struct reg cvp_make_reg( GLuint file, GLuint idx ) +{ + struct reg reg; + reg.file = file; + reg.idx = idx; + return reg; +} - /* Don't try to cache relatively addressed values yet: - */ - if (!rel) { - for (i = 0; i < REG_PAR_NR; i++) { - if ((cp->par_active & (1<<i)) && - cp->reg[i].file == file && - cp->reg[i].idx == index) { - cp->par_protected |= (1<<i); - return i + REG_PAR0; - } - } - } +static struct reg cvp_emit_rel( struct compilation *cp, + struct reg reg, + struct reg tmpreg ) +{ + union instruction *op = cvp_next_instruction(cp); + op->alu.opcode = REL; + op->alu.file0 = reg.file; + op->alu.idx0 = reg.idx; + op->alu.dst = tmpreg.idx; + return tmpreg; +} - /* Not already loaded, so identify a slot and load it. - * TODO: preload these values once only! - * TODO: better eviction strategy! - */ - if (cp->par_active == ~0) { - assert(cp->par_protected != ~0); - cp->par_active = cp->par_protected; - } - i = ffs(~cp->par_active); - assert(i); - i--; +static struct reg cvp_load_reg( struct compilation *cp, + GLuint file, + GLuint index, + GLuint rel, + GLuint tmpidx ) +{ + struct reg tmpreg = cvp_make_reg(FILE_REG, tmpidx); + struct reg reg; + switch (file) { + case PROGRAM_TEMPORARY: + return cvp_make_reg(FILE_REG, REG_TMP0 + index); - if (file == PROGRAM_INPUT) - op = IN1 + cp->VB->AttribPtr[index]->size - 1; - else if (rel) - op = PRL; - else - op = PAR; - - cp->csr->dword = 0; - cp->csr->inr.opcode = op; - cp->csr->inr.reg = i + REG_PAR0; - cp->csr->inr.file = file; - cp->csr->inr.idx = index; - cp->csr++; - - cp->reg[i].file = file; - cp->reg[i].idx = index; - cp->par_protected |= (1<<i); - cp->par_active |= (1<<i); - return i + REG_PAR0; -} - -static void cvp_release_regs( struct compilation *cp ) -{ - cp->par_protected = 0; -} + case PROGRAM_INPUT: + return cvp_make_reg(FILE_REG, REG_IN0 + index); + case PROGRAM_OUTPUT: + return cvp_make_reg(FILE_REG, REG_OUT0 + index); + /* These two aren't populated by the parser? + */ + case PROGRAM_LOCAL_PARAM: + reg = cvp_make_reg(FILE_LOCAL_PARAM, index); + if (rel) + return cvp_emit_rel(cp, reg, tmpreg); + else + return reg; + + case PROGRAM_ENV_PARAM: + reg = cvp_make_reg(FILE_ENV_PARAM, index); + if (rel) + return cvp_emit_rel(cp, reg, tmpreg); + else + return reg; + + case PROGRAM_STATE_VAR: + reg = cvp_make_reg(FILE_STATE_PARAM, index); + if (rel) + return cvp_emit_rel(cp, reg, tmpreg); + else + return reg; + + /* Invalid values: + */ + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + default: + assert(0); + return tmpreg; /* can't happen */ + } +} -static GLuint cvp_emit_arg( struct compilation *cp, - const struct vp_src_register *src, - GLuint arg ) +static struct reg cvp_emit_arg( struct compilation *cp, + const struct vp_src_register *src, + GLuint arg ) { - GLuint reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr ); + struct reg reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr, arg ); union instruction rsw, noop; - + /* Emit any necessary swizzling. */ rsw.dword = 0; @@ -1088,12 +908,13 @@ static GLuint cvp_emit_arg( struct compilation *cp, (3<<6)); if (rsw.dword != noop.dword) { - GLuint rsw_reg = arg; - cp->csr->dword = rsw.dword; - cp->csr->rsw.opcode = RSW; - cp->csr->rsw.arg0 = reg; - cp->csr->rsw.dst = rsw_reg; - cp->csr++; + union instruction *op = cvp_next_instruction(cp); + struct reg rsw_reg = cvp_make_reg(FILE_REG, REG_ARG0 + arg); + op->dword = rsw.dword; + op->rsw.opcode = RSW; + op->rsw.file0 = reg.file; + op->rsw.idx0 = reg.idx; + op->rsw.dst = rsw_reg.idx; return rsw_reg; } else @@ -1102,48 +923,82 @@ static GLuint cvp_emit_arg( struct compilation *cp, static GLuint cvp_choose_result( struct compilation *cp, const struct vp_dst_register *dst, - union instruction *fixup, - GLuint maxreg) + union instruction *fixup ) { GLuint mask = dst->WriteMask; + GLuint idx; - if (dst->File == PROGRAM_TEMPORARY) { - - /* Optimization: When writing (with a writemask) to an undefined - * value for the first time, the writemask may be ignored. In - * practise this means that the MSK instruction to implement the - * writemask can be dropped. + switch (dst->File) { + case PROGRAM_TEMPORARY: + idx = REG_TMP0 + dst->Index; + break; + case PROGRAM_OUTPUT: + idx = REG_OUT0 + dst->Index; + break; + default: + assert(0); + return REG_RES; /* can't happen */ + } + + /* Optimization: When writing (with a writemask) to an undefined + * value for the first time, the writemask may be ignored. + */ + if (mask != WRITEMASK_XYZW && (cp->reg_active & (1 << idx))) { + fixup->sel.opcode = SEL; + fixup->sel.idx0 = REG_RES; + fixup->sel.file1 = FILE_REG; + fixup->sel.idx1 = idx; + fixup->sel.dst = idx; + fixup->sel.mask = mask; + cp->reg_active |= 1 << idx; + return REG_RES; + } + else { + fixup->dword = 0; + cp->reg_active |= 1 << idx; + return idx; + } +} + +#define RSW_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6)) + +static struct reg cvp_emit_rsw( struct compilation *cp, + GLuint dst, + struct reg src, + GLuint neg, + GLuint swz, + GLboolean force) +{ + struct reg retval; + + if (swz != RSW_NOOP || neg != 0) { + union instruction *op = cvp_next_instruction(cp); + op->rsw.opcode = RSW; + op->rsw.dst = dst; + op->rsw.file0 = src.file; + op->rsw.idx0 = src.idx; + op->rsw.neg = neg; + op->rsw.swz = swz; + + retval.file = FILE_REG; + retval.idx = dst; + return retval; + } + else if (force) { + /* Oops. Degenerate case: */ - if (dst->Index < maxreg && - (mask == 0xf || !(cp->tmp_active & (1<<dst->Index)))) { - fixup->dword = 0; - cp->tmp_active |= (1<<dst->Index); - return REG_TMP0 + dst->Index; - } - else if (mask != 0xf) { - fixup->msk.opcode = MSK; - fixup->msk.arg0 = REG_RES; - fixup->msk.dst = REG_TMP0 + dst->Index; - fixup->msk.mask = mask; - cp->tmp_active |= (1<<dst->Index); - return REG_RES; - } - else { - fixup->vec.opcode = VP_OPCODE_MOV; - fixup->vec.arg0 = REG_RES; - fixup->vec.dst = REG_TMP0 + dst->Index; - cp->tmp_active |= (1<<dst->Index); - return REG_RES; - } + union instruction *op = cvp_next_instruction(cp); + op->alu.opcode = VP_OPCODE_MOV; + op->alu.dst = dst; + op->alu.file0 = src.file; + op->alu.idx0 = src.idx; + + retval.file = FILE_REG; + retval.idx = dst; + return retval; } else { - assert(dst->File == PROGRAM_OUTPUT); - fixup->out.opcode = (mask == 0xf) ? OUT : OUM; - fixup->out.reg = REG_RES; - fixup->out.file = dst->File; - fixup->out.idx = dst->Index; - fixup->out.mask = mask; - return REG_RES; + return src; } } @@ -1152,76 +1007,129 @@ static void cvp_emit_inst( struct compilation *cp, const struct vp_instruction *inst ) { const struct opcode_info *info = &opcode_info[inst->Opcode]; + union instruction *op; union instruction fixup; - GLuint reg[3]; + struct reg reg[3]; GLuint result, i; /* Need to handle SWZ, ARL specially. */ - switch (info->type) { - case OUT: - assert(inst->Opcode == VP_OPCODE_ARL); - reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 ); - - cp->csr->dword = 0; - cp->csr->out.opcode = inst->Opcode; - cp->csr->out.reg = reg[0]; - cp->csr->out.file = PROGRAM_ADDRESS; - cp->csr->out.idx = 0; + switch (inst->Opcode) { + /* Split into mul and add: + */ + case VP_OPCODE_MAD: + result = cvp_choose_result( cp, &inst->DstReg, &fixup ); + for (i = 0; i < 3; i++) + reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0+i ); + + op = cvp_next_instruction(cp); + op->alu.opcode = VP_OPCODE_MUL; + op->alu.file0 = reg[0].file; + op->alu.idx0 = reg[0].idx; + op->alu.file1 = reg[1].file; + op->alu.idx1 = reg[1].idx; + op->alu.dst = REG_ARG0; + + op = cvp_next_instruction(cp); + op->alu.opcode = VP_OPCODE_ADD; + op->alu.file0 = FILE_REG; + op->alu.idx0 = REG_ARG0; + op->alu.file1 = reg[2].file; + op->alu.idx1 = reg[2].idx; + op->alu.dst = result; break; - case SWZ: - assert(inst->Opcode == VP_OPCODE_SWZ); - result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_SWZDST_MAX ); + case VP_OPCODE_ARL: reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 ); - cp->csr->dword = 0; - cp->csr->swz.opcode = VP_OPCODE_SWZ; - cp->csr->swz.arg0 = reg[0]; - cp->csr->swz.dst = result; - cp->csr->swz.neg = inst->SrcReg[0].Negate; - cp->csr->swz.swz = inst->SrcReg[0].Swizzle; - cp->csr++; - - if (result == REG_RES) { - cp->csr->dword = fixup.dword; - cp->csr++; - } + op = cvp_next_instruction(cp); + op->alu.opcode = inst->Opcode; + op->alu.dst = REG_ADDR; + op->alu.file0 = reg[0].file; + op->alu.idx0 = reg[0].idx; break; - case VEC: - case SCL: /* for now */ - result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_MAX ); + case VP_OPCODE_SWZ: { + GLuint swz0, swz1; + GLuint neg0, neg1; + GLuint mask = 0; - reg[0] = reg[1] = reg[2] = 0; + /* Translate 3-bit-per-element swizzle into two 2-bit swizzles, + * one from the source register the other from a constant + * {0,0,0,1}. + */ + for (i = 0; i < 4; i++) { + GLuint swzelt = GET_SWZ(inst->SrcReg[0].Swizzle, i); + if (swzelt >= SWIZZLE_ZERO) { + neg0 |= inst->SrcReg[0].Negate & (1<<i); + if (swzelt == SWIZZLE_ONE) + swz0 |= SWIZZLE_W << (i*2); + else if (i < SWIZZLE_W) + swz0 |= i << (i*2); + } + else { + mask |= 1<<i; + neg1 |= inst->SrcReg[0].Negate & (1<<i); + swz1 |= swzelt << (i*2); + } + } - for (i = 0; i < info->nr_args; i++) - reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i ); + result = cvp_choose_result( cp, &inst->DstReg, &fixup ); + reg[0].file = FILE_REG; + reg[0].idx = REG_ID; + reg[1] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 ); - cp->csr->dword = 0; - cp->csr->vec.opcode = inst->Opcode; - cp->csr->vec.arg0 = reg[0]; - cp->csr->vec.arg1 = reg[1]; - cp->csr->vec.arg2 = reg[2]; - cp->csr->vec.dst = result; - cp->csr++; + if (mask == WRITEMASK_XYZW) { + cvp_emit_rsw(cp, result, reg[0], neg0, swz0, GL_TRUE); + + } + else if (mask == 0) { + cvp_emit_rsw(cp, result, reg[1], neg1, swz1, GL_TRUE); + } + else { + reg[0] = cvp_emit_rsw(cp, REG_ARG0, reg[0], neg0, swz0, GL_FALSE); + reg[1] = cvp_emit_rsw(cp, REG_ARG1, reg[1], neg1, swz1, GL_FALSE); + + assert(reg[0].file == FILE_REG); + + op = cvp_next_instruction(cp); + op->sel.opcode = SEL; + op->sel.dst = result; + op->sel.idx0 = reg[0].idx; + op->sel.file1 = reg[1].file; + op->sel.idx1 = reg[1].idx; + op->sel.mask = mask; + } if (result == REG_RES) { - cp->csr->dword = fixup.dword; - cp->csr++; - } + op = cvp_next_instruction(cp); + op->dword = fixup.dword; + } break; - - - case NOP: + } + case VP_OPCODE_PRINT: + case VP_OPCODE_END: break; default: - assert(0); + result = cvp_choose_result( cp, &inst->DstReg, &fixup ); + for (i = 0; i < info->nr_args; i++) + reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i ); + + op = cvp_next_instruction(cp); + op->alu.opcode = inst->Opcode; + op->alu.file0 = reg[0].file; + op->alu.idx0 = reg[0].idx; + op->alu.file1 = reg[1].file; + op->alu.idx1 = reg[1].idx; + op->alu.dst = result; + + if (result == REG_RES) { + op = cvp_next_instruction(cp); + op->dword = fixup.dword; + } break; } - - cvp_release_regs( cp ); } @@ -1254,7 +1162,7 @@ static void compile_vertex_program( struct arb_vp_machine *m, if (DISASSEM) { for (i = 0; i < m->nr_instructions; i++) { union instruction insn = m->instructions[i]; - const struct opcode_info *info = &opcode_info[insn.vec.opcode]; + const struct opcode_info *info = &opcode_info[insn.alu.opcode]; info->print( insn, info ); } _mesa_printf("\n\n"); @@ -1390,15 +1298,62 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) if (program->Parameters) { _mesa_load_state_parameters(ctx, program->Parameters); - m->File[PROGRAM_STATE_VAR] = program->Parameters->ParameterValues; } + + + /* Initialize regs where necessary: + */ + ASSIGN_4V(m->reg[REG_ID], 0, 0, 0, 1); + + m->nr_inputs = m->nr_outputs = 0; + + for (i = 0; i < 16; i++) { + if (program->InputsRead & (1<<i)) { + GLuint j = m->nr_inputs++; + m->input[j].idx = i; + m->input[j].data = m->VB->AttribPtr[i]->data; + m->input[j].stride = m->VB->AttribPtr[i]->stride; + m->input[j].size = m->VB->AttribPtr[i]->size; + ASSIGN_4V(m->reg[REG_IN0 + i], 0, 0, 0, 1); + } + } + + for (i = 0; i < 15; i++) { + if (program->OutputsWritten & (1<<i)) { + GLuint j = m->nr_outputs++; + m->output[j].idx = i; + m->output[j].data = m->attribs[i].data; + } + } + /* Run the actual program: */ for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) { + for (j = 0; j < m->nr_inputs; j++) { + GLuint idx = REG_IN0 + m->input[j].idx; + switch (m->input[j].size) { + case 4: m->reg[idx][3] = m->input[j].data[3]; + case 3: m->reg[idx][2] = m->input[j].data[2]; + case 2: m->reg[idx][1] = m->input[j].data[1]; + case 1: m->reg[idx][0] = m->input[j].data[0]; + } + + STRIDE_F(m->input[j].data, m->input[j].stride); + } + for (j = 0; j < m->nr_instructions; j++) { union instruction inst = m->instructions[j]; - opcode_info[inst.vec.opcode].func( m, inst ); + opcode_func[inst.alu.opcode]( m, inst ); + } + + for (j = 0; j < m->nr_outputs; j++) { + GLuint idx = REG_OUT0 + m->output[j].idx; + m->output[j].data[0] = m->reg[idx][0]; + m->output[j].data[1] = m->reg[idx][1]; + m->output[j].data[2] = m->reg[idx][2]; + m->output[j].data[3] = m->reg[idx][3]; + m->output[j].data += 4; } } @@ -1488,9 +1443,10 @@ validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage ) /* Grab the state GL state and put into registers: */ - m->File[PROGRAM_LOCAL_PARAM] = program->Base.LocalParams; - m->File[PROGRAM_ENV_PARAM] = ctx->VertexProgram.Parameters; - m->File[PROGRAM_STATE_VAR] = 0; + m->File[FILE_REG] = m->reg; + m->File[FILE_LOCAL_PARAM] = program->Base.LocalParams; + m->File[FILE_ENV_PARAM] = ctx->VertexProgram.Parameters; + m->File[FILE_STATE_PARAM] = program->Parameters->ParameterValues; } } |