diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/swrast/s_fragprog_to_c.c | 687 |
1 files changed, 687 insertions, 0 deletions
diff --git a/src/mesa/swrast/s_fragprog_to_c.c b/src/mesa/swrast/s_fragprog_to_c.c new file mode 100644 index 00000000000..78526ca1466 --- /dev/null +++ b/src/mesa/swrast/s_fragprog_to_c.c @@ -0,0 +1,687 @@ +/* + * Mesa 3-D graphics library + * Version: 6.1 + * + * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* An amusing little utility to print ARB fragment programs out as a C + * function. Resulting code not tested except visually. + */ + + +#include "glheader.h" +#include "colormac.h" +#include "context.h" +#include "nvfragprog.h" +#include "macros.h" +#include "program.h" + +#include "s_nvfragprog.h" +#include "s_span.h" +#include "s_texture.h" + + +/* UREG - a way of representing an FP source register including + * swizzling and negation in a single GLuint. Major flaw is the + * limitiation to source->Index < 32. Secondary flaw is the fact that + * it's overkill & we could probably just pass around the original + * datatypes instead. + */ + +#define UREG_TYPE_TEMP 0 +#define UREG_TYPE_INTERP 1 +#define UREG_TYPE_LOCAL_CONST 2 +#define UREG_TYPE_ENV_CONST 3 +#define UREG_TYPE_STATE_CONST 4 +#define UREG_TYPE_PARAM 5 +#define UREG_TYPE_OUTPUT 6 +#define UREG_TYPE_MASK 0x7 + +#define UREG_TYPE_SHIFT 29 +#define UREG_NR_SHIFT 24 +#define UREG_NR_MASK 0x1f /* 31 */ +#define UREG_CHANNEL_X_NEGATE_SHIFT 23 +#define UREG_CHANNEL_X_SHIFT 20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT 19 +#define UREG_CHANNEL_Y_SHIFT 16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT 15 +#define UREG_CHANNEL_Z_SHIFT 12 +#define UREG_CHANNEL_W_NEGATE_SHIFT 11 +#define UREG_CHANNEL_W_SHIFT 8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 +#define UREG_CHANNEL_ZERO_SHIFT 4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ 1 +#define UREG_CHANNEL_ONE_SHIFT 0 + +#define UREG_BAD 0xffffffff /* not a valid ureg */ + +#define _X 0 +#define _Y 1 +#define _Z 2 +#define _W 3 +#define _ZERO 4 /* NOTE! */ +#define _ONE 5 /* NOTE! */ + + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \ + ((nr) << UREG_NR_SHIFT) | \ + (_X << UREG_CHANNEL_X_SHIFT) | \ + (_Y << UREG_CHANNEL_Y_SHIFT) | \ + (_Z << UREG_CHANNEL_Z_SHIFT) | \ + (_W << UREG_CHANNEL_W_SHIFT) | \ + (_ZERO << UREG_CHANNEL_ZERO_SHIFT) | \ + (_ONE << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & \ + (0xf<<UREG_CHANNEL_X_SHIFT)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&UREG_TYPE_MASK) +#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)&UREG_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +#define deref(reg,pos) swizzle(reg, pos, pos, pos, pos) + + +static __inline int is_swizzled( int reg ) +{ + return ((reg & UREG_XYZW_CHANNEL_MASK) != + (UREG(0,0) & UREG_XYZW_CHANNEL_MASK)); +} + + +/* One neat thing about the UREG representation: + */ +static __inline int swizzle( int reg, int x, int y, int z, int w ) +{ + return ((reg & ~UREG_XYZW_CHANNEL_MASK) | + CHANNEL_SRC( GET_CHANNEL_SRC( reg, x ), 0 ) | + CHANNEL_SRC( GET_CHANNEL_SRC( reg, y ), 1 ) | + CHANNEL_SRC( GET_CHANNEL_SRC( reg, z ), 2 ) | + CHANNEL_SRC( GET_CHANNEL_SRC( reg, w ), 3 )); +} + +/* Another neat thing about the UREG representation: + */ +static __inline int negate( int reg, int x, int y, int z, int w ) +{ + return reg ^ (((x&1)<<UREG_CHANNEL_X_NEGATE_SHIFT)| + ((y&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT)| + ((z&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT)| + ((w&1)<<UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + + +static GLuint src_reg_file( GLuint file ) +{ + switch (file) { + case PROGRAM_TEMPORARY: return UREG_TYPE_TEMP; + case PROGRAM_INPUT: return UREG_TYPE_INTERP; + case PROGRAM_LOCAL_PARAM: return UREG_TYPE_LOCAL_CONST; + case PROGRAM_ENV_PARAM: return UREG_TYPE_ENV_CONST; + + case PROGRAM_STATE_VAR: return UREG_TYPE_STATE_CONST; + case PROGRAM_NAMED_PARAM: return UREG_TYPE_PARAM; + default: return UREG_BAD; + } +} + + +/** + * Retrieve a ureg for the given source register. Will emit + * constants, apply swizzling and negation as needed. + */ +static GLuint src_vector( const struct fp_src_register *source ) +{ + GLuint src; + +/* fprintf(stderr, "%s File %d, Index %d\n", */ +/* __FUNCTION__, source->File, source->Index); */ + + assert(source->Index < 32); /* limitiation of UREG representation */ + + src = UREG( src_reg_file( source->File ), source->Index ); + + src = swizzle(src, + _X + source->Swizzle[0], + _X + source->Swizzle[1], + _X + source->Swizzle[2], + _X + source->Swizzle[3]); + + if (source->NegateBase) + src = negate( src, 1,1,1,1 ); + + return src; +} + +static void print_header( void ) +{ + printf("static void run_program( const GLfloat (*local_param)[4], \n" + " const GLfloat (*env_param)[4], \n" + " const GLfloat (*state_param)[4], \n" + " const GLfloat (*interp)[4], \n" + " GLfloat *outputs)\n" + "{\n" + " GLfloat temp[32][4];\n" + ); +} + +static void print_footer( void ) +{ + printf("}\n"); +} + +static void print_dest_reg( const struct fp_instruction *inst ) +{ + switch (inst->DstReg.File) { + case PROGRAM_OUTPUT: + printf("outputs[%d]", inst->DstReg.Index); + break; + case PROGRAM_TEMPORARY: + printf("temp[%d]", inst->DstReg.Index); + break; + default: + break; + } +} + +static void print_dest( const struct fp_instruction *inst, + GLuint idx ) +{ + print_dest_reg(inst); + printf("[%d]", idx); +} + + +#define UREG_SRC0(reg) (((reg)>>UREG_CHANNEL_X_SHIFT) & 0x7) + +static void print_reg( GLuint arg ) +{ + switch (GET_UREG_TYPE(arg)) { + case UREG_TYPE_TEMP: printf("temp"); break; + case UREG_TYPE_INTERP: printf("interp"); break; + case UREG_TYPE_LOCAL_CONST: printf("local_const"); break; + case UREG_TYPE_ENV_CONST: printf("env_const"); break; + case UREG_TYPE_STATE_CONST: printf("state_const"); break; + case UREG_TYPE_PARAM: printf("param"); break; + }; + + printf("[%d]", GET_UREG_NR(arg)); +} + + +static void print_arg( const struct fragment_program *p, + GLuint arg ) +{ + GLuint src = UREG_SRC0(arg); + + if (src == _ZERO) { + printf("0"); + return; + } + + if (arg & (1<<UREG_CHANNEL_X_NEGATE_SHIFT)) + printf("-"); + + if (src == _ONE) { + printf("1"); + return; + } + + if (GET_UREG_TYPE(arg) == UREG_TYPE_STATE_CONST) { + printf("%g", p->Parameters->Parameters[GET_UREG_NR(arg)].Values[src]); + return; + } + + print_reg( arg ); + + switch (src){ + case _X: printf("[0]"); break; + case _Y: printf("[1]"); break; + case _Z: printf("[2]"); break; + case _W: printf("[3]"); break; + } +} + + +/* This is where the handling of expressions breaks down into string + * processing: + */ +static void print_expression( const struct fragment_program *p, + GLuint i, + const char *fmt, + va_list ap ) +{ + while (*fmt) { + if (*fmt == '%' && *(fmt+1) == 's') { + int reg = va_arg(ap, int); + + /* Use of deref() is a bit of a hack: + */ + print_arg( p, deref(reg, i) ); + fmt += 2; + } + else { + putchar(*fmt); + fmt++; + } + } + + printf(";\n"); +} + +static void do_tex_simple( const struct fragment_program *p, + const struct fp_instruction *inst, + const char *fn, GLuint texunit, GLuint arg ) +{ + printf(" %s( ctx, ", fn); + print_reg(arg); + printf(", %d, ", texunit ); + print_dest_reg(inst); + printf(");\n"); +} + + +static void do_tex( const struct fragment_program *p, + const struct fp_instruction *inst, + const char *fn, GLuint texunit, GLuint arg ) +{ + GLuint i; + GLboolean need_tex = GL_FALSE, need_result = GL_FALSE; + + for (i = 0; i < 4; i++) + if (!inst->DstReg.WriteMask[i]) + need_result = GL_TRUE; + + if (is_swizzled(arg)) + need_tex = GL_TRUE; + + if (!need_tex && !need_result) { + do_tex_simple( p, inst, fn, texunit, arg ); + return; + } + + printf(" {\n"); + printf(" GLfloat texcoord[4];\n"); + printf(" GLfloat result[4];\n"); + + for (i = 0; i < 4; i++) { + printf(" texcoord[%d] = ", i); + print_arg( p, deref(arg, i) ); + printf(";\n"); + } + + printf(" %s( ctx, texcoord, %d, result);\n", fn, texunit ); + + for (i = 0; i < 4; i++) { + if (inst->DstReg.WriteMask[i]) { + printf(" "); + print_dest(inst, i); + printf(" = result[%d];\n", i); + } + } + + printf(" }\n"); +} + +static void assign_single( GLuint i, + const struct fragment_program *p, + const struct fp_instruction *inst, + const char *fmt, + ... ) +{ + va_list ap; + va_start( ap, fmt ); + + if (inst->DstReg.WriteMask[i]) { + printf(" "); + print_dest(inst, i); + printf(" = "); + print_expression( p, i, fmt, ap); + } + + va_end( ap ); +} + +static void assign4( const struct fragment_program *p, + const struct fp_instruction *inst, + const char *fmt, + ... ) +{ + GLuint i; + va_list ap; + va_start( ap, fmt ); + + for (i = 0; i < 4; i++) + if (inst->DstReg.WriteMask[i]) { + printf(" "); + print_dest(inst, i); + printf(" = "); + print_expression( p, i, fmt, ap); + } + + va_end( ap ); +} + +static void assign4_replicate( const struct fragment_program *p, + const struct fp_instruction *inst, + const char *fmt, + ... ) +{ + GLuint i; + GLboolean ok = 0; + va_list ap; + + for (i = 0; i < 4; i++) + if (inst->DstReg.WriteMask[i]) + ok = 1; + + if (!ok) return; + + va_start( ap, fmt ); + + printf(" "); + + for (i = 0; i < 4; i++) + if (inst->DstReg.WriteMask[i]) { + print_dest(inst, i); + printf(" = "); + } + + print_expression( p, 0, fmt, ap); + + va_end( ap ); +} + + + + + + +static GLuint nr_args( GLuint opcode ) +{ + switch (opcode) { + case FP_OPCODE_ABS: return 1; + case FP_OPCODE_ADD: return 2; + case FP_OPCODE_CMP: return 3; + case FP_OPCODE_COS: return 1; + case FP_OPCODE_DP3: return 2; + case FP_OPCODE_DP4: return 2; + case FP_OPCODE_DPH: return 2; + case FP_OPCODE_DST: return 2; + case FP_OPCODE_EX2: return 1; + case FP_OPCODE_FLR: return 1; + case FP_OPCODE_FRC: return 1; + case FP_OPCODE_KIL: return 1; + case FP_OPCODE_LG2: return 1; + case FP_OPCODE_LIT: return 1; + case FP_OPCODE_LRP: return 3; + case FP_OPCODE_MAD: return 3; + case FP_OPCODE_MAX: return 2; + case FP_OPCODE_MIN: return 2; + case FP_OPCODE_MOV: return 1; + case FP_OPCODE_MUL: return 2; + case FP_OPCODE_POW: return 2; + case FP_OPCODE_RCP: return 1; + case FP_OPCODE_RSQ: return 1; + case FP_OPCODE_SCS: return 1; + case FP_OPCODE_SGE: return 2; + case FP_OPCODE_SIN: return 1; + case FP_OPCODE_SLT: return 2; + case FP_OPCODE_SUB: return 2; + case FP_OPCODE_SWZ: return 1; + case FP_OPCODE_TEX: return 1; + case FP_OPCODE_TXB: return 1; + case FP_OPCODE_TXP: return 1; + case FP_OPCODE_XPD: return 2; + default: return 0; + } +} + + + +static void upload_program( const struct fragment_program *p ) +{ + const struct fp_instruction *inst = p->Instructions; + + for (; inst->Opcode != FP_OPCODE_END; inst++) { + + GLuint src[3], i; + GLuint nr = nr_args( inst->Opcode ); + + for (i = 0; i < nr; i++) + src[i] = src_vector( &inst->SrcReg[i] ); + + switch (inst->Opcode) { + case FP_OPCODE_ABS: + assign4(p, inst, "FABSF(%s)", src[0]); + break; + + case FP_OPCODE_ADD: + assign4(p, inst, "%s + %s", src[0], src[1]); + break; + + case FP_OPCODE_CMP: + assign4(p, inst, "%s < 0.0F ? %s : %s", src[0], src[1], src[2]); + break; + + case FP_OPCODE_COS: + assign4_replicate(p, inst, "COS(%s)", src[0]); + break; + + case FP_OPCODE_DP3: + assign4_replicate(p, inst, + "%s*%s + %s*%s + %s*%s", + deref(src[0],_X), + deref(src[1],_X), + deref(src[0],_Y), + deref(src[1],_Y), + deref(src[0],_Z), + deref(src[1],_Z)); + break; + + case FP_OPCODE_DP4: + assign4_replicate(p, inst, + "%s*%s + %s*%s + %s*%s + %s*%s", + deref(src[0],_X), + deref(src[1],_X), + deref(src[0],_Y), + deref(src[1],_Y), + deref(src[0],_Z), + deref(src[1],_Z)); + break; + + case FP_OPCODE_DPH: + assign4_replicate(p, inst, + "%s*%s + %s*%s + %s*%s + %s", + deref(src[0],_X), + deref(src[1],_X), + deref(src[0],_Y), + deref(src[1],_Y), + deref(src[1],_Z)); + break; + + case FP_OPCODE_DST: + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + * + * Here we hope that the compiler can optimize away "x*1" to "x". + */ + assign4(p, inst, + "%s*%s", + swizzle(src[0], _ONE, _Y, _Z, _ONE), + swizzle(src[1], _ONE, _Y, _ONE, _W )); + break; + + case FP_OPCODE_EX2: + assign4_replicate(p, inst, "EX2(%s)", src[0]); + break; + + case FP_OPCODE_FLR: + assign4_replicate(p, inst, "FLR(%s)", src[0]); + break; + + case FP_OPCODE_FRC: + assign4_replicate(p, inst, "FRC(%s)", src[0]); + break; + + case FP_OPCODE_KIL: + /* TODO */ + break; + + case FP_OPCODE_LG2: + assign4_replicate(p, inst, "LOG(%s)", deref(src[0], _X)); + break; + + case FP_OPCODE_LIT: + assign_single(0, p, inst, "1.0"); + assign_single(1, p, inst, "MIN2(%s, 0)", deref(src[0], _X)); + assign_single(2, p, inst, "(%s > 0.0) ? EXP(%s * MIN2(%s, 0)) : 0.0", + deref(src[0], _X), + deref(src[0], _Z), + deref(src[0], _Y)); + assign_single(3, p, inst, "1.0"); + break; + + case FP_OPCODE_LRP: + assign4(p, inst, + "%s * %s + (1.0 - %s) * %s", + src[0], src[1], src[0], src[2]); + break; + + case FP_OPCODE_MAD: + assign4(p, inst, "%s * %s + %s", src[0], src[1], src[2]); + break; + + case FP_OPCODE_MAX: + assign4(p, inst, "MAX2(%s, %s)", src[0], src[1]); + break; + + case FP_OPCODE_MIN: + assign4(p, inst, "MIN2(%s, %s)", src[0], src[1]); + break; + + case FP_OPCODE_MOV: + assign4(p, inst, "%s", src[0]); + break; + + case FP_OPCODE_MUL: + assign4(p, inst, "%s * %s", src[0], src[1]); + break; + + case FP_OPCODE_POW: + assign4_replicate(p, inst, "POW(%s, %s)", + deref(src[0], _X), + deref(src[1], _X)); + break; + + case FP_OPCODE_RCP: + assign4_replicate(p, inst, "1.0/%s", deref(src[0], _X)); + break; + + case FP_OPCODE_RSQ: + assign4_replicate(p, inst, "INV_SQRTF(%s)", deref(src[0], _X)); + break; + + case FP_OPCODE_SCS: + if (inst->DstReg.WriteMask[0]) { + assign_single(0, p, inst, "COS(%s)", deref(src[0], _X)); + } + + if (inst->DstReg.WriteMask[1]) { + assign_single(1, p, inst, "SIN(%s)", deref(src[0], _X)); + } + break; + + case FP_OPCODE_SGE: + assign4(p, inst, "%s >= %s ? 1.0 : 0.0", src[0], src[1]); + break; + + case FP_OPCODE_SIN: + assign4_replicate(p, inst, "SIN(%s)", deref(src[0], _X)); + break; + + case FP_OPCODE_SLT: + assign4(p, inst, "%s < %s ? 1.0 : 0.0", src[0], src[1]); + break; + + case FP_OPCODE_SUB: + assign4(p, inst, "%s - %s", src[0], src[1]); + break; + + case FP_OPCODE_SWZ: /* same implementation as MOV: */ + assign4(p, inst, "%s", src[0]); + break; + + case FP_OPCODE_TEX: + do_tex(p, inst, "TEX", inst->TexSrcUnit, src[0]); + break; + + case FP_OPCODE_TXB: + do_tex(p, inst, "TXB", inst->TexSrcUnit, src[0]); + break; + + case FP_OPCODE_TXP: + do_tex(p, inst, "TXP", inst->TexSrcUnit, src[0]); + break; + + case FP_OPCODE_X2D: + /* Cross product: + * result.x = src[0].y * src[1].z - src[0].z * src[1].y; + * result.y = src[0].z * src[1].x - src[0].x * src[1].z; + * result.z = src[0].x * src[1].y - src[0].y * src[1].x; + * result.w = undef; + */ + assign4(p, inst, + "%s * %s - %s * %s", + swizzle(src[0], _Y, _Z, _X, _ONE), + swizzle(src[1], _Z, _X, _Y, _ONE), + swizzle(src[0], _Z, _X, _Y, _ONE), + swizzle(src[1], _Y, _Z, _X, _ONE)); + break; + + default: + return; + } + } +} + + + + + +void _swrast_translate_program( GLcontext *ctx ) +{ + if (ctx->FragmentProgram.Current) { + print_header(); + upload_program( ctx->FragmentProgram.Current ); + print_footer(); + } +} + |