From 360ba5b74858b3854784e3d4678c242140088395 Mon Sep 17 00:00:00 2001 From: Tiziano Bacocco Date: Fri, 30 Jan 2015 20:10:38 +0100 Subject: st/nine: Implement TEXBEM,TEXBEML and BEM Signed-off-by: Tiziano Bacocco --- src/gallium/state_trackers/nine/device9.c | 32 ++++++- src/gallium/state_trackers/nine/nine_shader.c | 126 +++++++++++++++++++++++-- src/gallium/state_trackers/nine/nine_shader.h | 1 + src/gallium/state_trackers/nine/nine_state.c | 10 ++ src/gallium/state_trackers/nine/nine_state.h | 2 + src/gallium/state_trackers/nine/pixelshader9.c | 1 + src/gallium/state_trackers/nine/pixelshader9.h | 1 + 7 files changed, 165 insertions(+), 8 deletions(-) (limited to 'src/gallium/state_trackers') diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index e0f3e398ad7..1ca04a430ea 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -342,8 +342,9 @@ NineDevice9_ctor( struct NineDevice9 *This, This->state.vs_const_f = CALLOC(This->vs_const_size, 1); This->state.ps_const_f = CALLOC(This->ps_const_size, 1); This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1); + This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1); if (!This->state.vs_const_f || !This->state.ps_const_f || - !This->state.vs_lconstf_temp) + !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp) return E_OUTOFMEMORY; if (strstr(pScreen->get_name(pScreen), "AMD") || @@ -466,6 +467,7 @@ NineDevice9_dtor( struct NineDevice9 *This ) FREE(This->state.vs_const_f); FREE(This->state.ps_const_f); FREE(This->state.vs_lconstf_temp); + FREE(This->state.ps_lconstf_temp); if (This->swapchains) { for (i = 0; i < This->nswapchains; ++i) @@ -2636,6 +2638,7 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This, DWORD Value ) { struct nine_state *state = This->update; + int bumpmap_index = -1; DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value); nine_dump_D3DTSS_value(DBG_FF, Type, Value); @@ -2644,6 +2647,33 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This, user_assert(Type < Elements(state->ff.tex_stage[0]), D3DERR_INVALIDCALL); state->ff.tex_stage[Stage][Type] = Value; + switch (Type) { + case D3DTSS_BUMPENVMAT00: + bumpmap_index = 4 * Stage; + break; + case D3DTSS_BUMPENVMAT10: + bumpmap_index = 4 * Stage + 1; + break; + case D3DTSS_BUMPENVMAT01: + bumpmap_index = 4 * Stage + 2; + break; + case D3DTSS_BUMPENVMAT11: + bumpmap_index = 4 * Stage + 3; + break; + case D3DTSS_BUMPENVLSCALE: + bumpmap_index = 4 * 8 + 2 * Stage; + break; + case D3DTSS_BUMPENVLOFFSET: + bumpmap_index = 4 * 8 + 2 * Stage + 1; + break; + default: + break; + } + + if (bumpmap_index >= 0) { + state->bumpmap_vars[bumpmap_index] = Value; + state->changed.group |= NINE_STATE_PS_CONST; + } state->changed.group |= NINE_STATE_FF_PSSTAGES; state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32); diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 754f5af6b8e..a11c4c7c35a 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -89,6 +89,15 @@ static inline const char *d3dsio_to_string(unsigned opcode); #define NINE_SWIZZLE4(x,y,z,w) \ TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w +#define NINE_CONSTANT_SRC(index) \ + ureg_src_register(TGSI_FILE_CONSTANT, index) + +#define NINE_APPLY_SWIZZLE(src, s) \ + ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s)) + +#define NINE_CONSTANT_SRC_SWIZZLE(index, s) \ + NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s) + #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT) #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT) #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT) @@ -2135,12 +2144,76 @@ DECL_SPECIAL(TEXKILL) DECL_SPECIAL(TEXBEM) { - STUB(D3DERR_INVALIDCALL); -} + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_dst tmp, tmp2; + struct ureg_src sample, m00, m01, m10, m11; + struct ureg_src bumpenvlscale, bumpenvloffset; + const int m = tx->insn.dst[0].idx; + const int n = tx->insn.src[0].idx; -DECL_SPECIAL(TEXBEML) -{ - STUB(D3DERR_INVALIDCALL); + assert(tx->version.major == 1); + + sample = ureg_DECL_sampler(ureg, m); + tx->info->sampler_mask |= 1 << m; + + tx_texcoord_alloc(tx, m); + + tmp = tx_scratch(tx); + tmp2 = tx_scratch(tx); + /* + * Bump-env-matrix: + * 00 is X + * 01 is Y + * 10 is Z + * 11 is W + */ + nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2); + m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); + m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); + m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); + m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); + + /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ + if (m % 2 == 0) { + bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X); + bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y); + } else { + bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z); + bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W); + } + + /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), tx->regs.vT[m]); + /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), + NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); + + /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), tx->regs.vT[m]); + /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), + NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); + + /* Now the texture coordinates are in tmp.xy */ + + if (tx->insn.opcode == D3DSIO_TEXBEM) { + ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); + } else if (tx->insn.opcode == D3DSIO_TEXBEML) { + /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ + ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); + ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z), + bumpenvlscale, bumpenvloffset); + ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); + } + + tx->info->bumpenvmat_needed = 1; + + return D3D_OK; } DECL_SPECIAL(TEXREG2AR) @@ -2421,7 +2494,43 @@ DECL_SPECIAL(TEXDEPTH) DECL_SPECIAL(BEM) { - STUB(D3DERR_INVALIDCALL); + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); + struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); + struct ureg_src m00, m01, m10, m11; + const int m = tx->insn.dst[0].idx; + struct ureg_dst tmp; + /* + * Bump-env-matrix: + * 00 is X + * 01 is Y + * 10 is Z + * 11 is W + */ + nine_info_mark_const_f_used(tx->info, 8 + m); + m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); + m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); + m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); + m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); + /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, + NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X)); + /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, + NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); + + /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, + NINE_APPLY_SWIZZLE(src1, X), src0); + /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, + NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); + ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); + + tx->info->bumpenvmat_needed = 1; + + return D3D_OK; } DECL_SPECIAL(TEXLD) @@ -2616,7 +2725,7 @@ struct sm1_op_info inst_table[] = _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), - _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)), + _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), @@ -3023,6 +3132,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info) info->lconstf.data = NULL; info->lconstf.ranges = NULL; + info->bumpenvmat_needed = 0; + for (i = 0; i < Elements(tx->regs.rL); ++i) { tx->regs.rL[i] = ureg_dst_undef(); } @@ -3233,6 +3344,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) info->const_int_slots > 0 ? max_const_f + info->const_int_slots : info->const_float_slots; + info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ for (s = 0; s < slot_max; s++) diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h index ec256c153a9..f2b1e8bf6e2 100644 --- a/src/gallium/state_trackers/nine/nine_shader.h +++ b/src/gallium/state_trackers/nine/nine_shader.h @@ -68,6 +68,7 @@ struct nine_shader_info unsigned const_bool_slots; struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */ + uint8_t bumpenvmat_needed; }; static inline void diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 4bf5908ebe9..403cd23c0a5 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -586,6 +586,7 @@ update_ps_constants_userbuf(struct NineDevice9 *device) struct nine_state *state = &device->state; struct pipe_context *pipe = device->pipe; struct pipe_constant_buffer cb; + int i; cb.buffer = NULL; cb.buffer_offset = 0; cb.buffer_size = device->state.ps->const_used_size; @@ -606,6 +607,14 @@ update_ps_constants_userbuf(struct NineDevice9 *device) state->changed.ps_const_b = 0; } + /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */ + if (device->state.ps->bumpenvmat_needed) { + memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars)); + + cb.user_buffer = device->state.ps_lconstf_temp; + } + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); if (device->state.changed.ps_const_f) { @@ -1152,6 +1161,7 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, } state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE; state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1; + memset(&state->bumpmap_vars, 0, sizeof(state->bumpmap_vars)); for (s = 0; s < Elements(state->samp); ++s) { memcpy(&state->samp[s], nine_samp_state_defaults, diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index cac9af66532..89a5fd992c4 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -148,6 +148,8 @@ struct nine_state float *ps_const_f; int ps_const_i[NINE_MAX_CONST_I][4]; BOOL ps_const_b[NINE_MAX_CONST_B]; + float *ps_lconstf_temp; + uint32_t bumpmap_vars[48]; uint32_t ps_key; struct { diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c index 3f176a312bf..010c4f4bd28 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.c +++ b/src/gallium/state_trackers/nine/pixelshader9.c @@ -72,6 +72,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, This->sampler_mask = info.sampler_mask; This->rt_mask = info.rt_mask; This->const_used_size = info.const_used_size; + This->bumpenvmat_needed = info.bumpenvmat_needed; /* no constant relative addressing for ps */ assert(info.lconstf.data == NULL); assert(info.lconstf.ranges == NULL); diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h index 6dad1d1ee76..9715d9044ce 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.h +++ b/src/gallium/state_trackers/nine/pixelshader9.h @@ -41,6 +41,7 @@ struct NinePixelShader9 unsigned const_used_size; /* in bytes */ + uint8_t bumpenvmat_needed; uint16_t sampler_mask; uint16_t sampler_mask_shadow; uint8_t rt_mask; -- cgit v1.2.3