diff options
author | Luca Barbieri <[email protected]> | 2010-09-03 22:06:41 +0200 |
---|---|---|
committer | Luca Barbieri <[email protected]> | 2010-09-03 22:37:35 +0200 |
commit | befc66788bf30f0de39c21a41bfb4d81b257b4d8 (patch) | |
tree | 2191ec7fc3108f0336b2dca2357a9dfad3ff0bc8 /src/gallium/drivers/nvfx | |
parent | f061524f0737bf59dad6ab9bb2e0015df804e4b5 (diff) |
nvfx: implement LIT in fp
Diffstat (limited to 'src/gallium/drivers/nvfx')
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_fragprog.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/nvfx/nvfx_shader.h | 1 |
2 files changed, 24 insertions, 1 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 275672a31fa..6eb744e6546 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1,3 +1,4 @@ +#include <float.h> #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" @@ -629,7 +630,28 @@ nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, case TGSI_OPCODE_LG2: nvfx_fp_emit(fpc, arith(sat, LG2, dst, mask, src[0], none, none)); break; -// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LIT: + if(!nvfx->is_nv4x) + nvfx_fp_emit(fpc, arith(sat, LIT_NV30, dst, mask, src[0], src[1], src[2])); + else { + /* we use FLT_MIN, so that log2 never gives -infinity, and thus multiplication by + * specular 0 always gives 0, so that ex2 gives 1, to satisfy the 0^0 = 1 requirement + * + * NOTE: if we start using half precision, we might need an fp16 FLT_MIN here instead + */ + float maxv[4] = {0, FLT_MIN, 0, 0}; + struct nvfx_src maxs = nvfx_src(constant(fpc, -1, maxv)); + tmp = nvfx_src(temp(fpc)); + if (ci>= 0 || ii >= 0) { + nvfx_fp_emit(fpc, arith(0, MOV, tmp.reg, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, maxs, none, none)); + maxs = tmp; + } + nvfx_fp_emit(fpc, arith(0, MAX, tmp.reg, NVFX_FP_MASK_Y | NVFX_FP_MASK_W, swz(src[0], X, X, X, Y), swz(maxs, X, X, Y, Y), none)); + nvfx_fp_emit(fpc, arith(0, LG2, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), none, none)); + nvfx_fp_emit(fpc, arith(0, MUL, tmp.reg, NVFX_FP_MASK_W, swz(tmp, W, W, W, W), swz(src[0], W, W, W, W), none)); + nvfx_fp_emit(fpc, arith(sat, LITEX2_NV40, dst, mask, swz(tmp, Y, Y, W, W), none, none)); + } + break; case TGSI_OPCODE_LRP: if(!nvfx->is_nv4x) nvfx_fp_emit(fpc, arith(sat, LRP_NV30, dst, mask, src[0], src[1], src[2])); diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h index e642a27af86..badc63d3b42 100644 --- a/src/gallium/drivers/nvfx/nvfx_shader.h +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -263,6 +263,7 @@ /* NV40 only fragment program opcodes */ #define NVFX_FP_OP_OPCODE_TXL_NV40 0x2F +#define NVFX_FP_OP_OPCODE_LITEX2_NV40 0x3C /* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ #define NV40_FP_OP_BRA_OPCODE_BRK 0x0 |