diff options
-rw-r--r-- | src/gallium/state_trackers/nine/device9.c | 85 | ||||
-rw-r--r-- | src/gallium/state_trackers/nine/device9.h | 1 | ||||
-rw-r--r-- | src/gallium/state_trackers/nine/nine_shader.c | 5 | ||||
-rw-r--r-- | src/gallium/state_trackers/nine/nine_state.c | 170 | ||||
-rw-r--r-- | src/gallium/state_trackers/nine/nine_state.h | 13 | ||||
-rw-r--r-- | src/gallium/state_trackers/nine/stateblock9.c | 85 | ||||
-rw-r--r-- | src/gallium/state_trackers/nine/vertexshader9.c | 15 | ||||
-rw-r--r-- | src/gallium/state_trackers/nine/vertexshader9.h | 8 |
8 files changed, 322 insertions, 60 deletions
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index dca75c53e6e..f1354904344 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -168,12 +168,31 @@ NineDevice9_ctor( struct NineDevice9 *This, if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) { DBG("Application asked full Software Vertex Processing.\n"); This->swvp = true; + This->may_swvp = true; } else This->swvp = false; - if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) + if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) { DBG("Application asked mixed Software Vertex Processing.\n"); + This->may_swvp = true; + } /* TODO: check if swvp is resetted by device Resets */ + if (This->may_swvp && + (This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX, + PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE) + < (NINE_MAX_CONST_F_SWVP/2) * sizeof(float[4]) || + This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX, + PIPE_SHADER_CAP_MAX_CONST_BUFFERS) < 5)) { + /* Note: We just go on, some apps never use the abilities of + * swvp, and just set more constants than allowed at init. + * Only cards we support that are affected are the r500 */ + WARN("Card unable to handle Software Vertex Processing. Game may fail\n"); + } + + /* When may_swvp, SetConstant* limits are different */ + if (This->may_swvp) + This->caps.MaxVertexShaderConst = NINE_MAX_CONST_F_SWVP; + This->pipe = This->screen->context_create(This->screen, NULL, 0); if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */ @@ -322,12 +341,22 @@ NineDevice9_ctor( struct NineDevice9 *This, This->vs_const_size = max_const_vs * sizeof(float[4]); This->ps_const_size = max_const_ps * sizeof(float[4]); /* Include space for I,B constants for user constbuf. */ + if (This->may_swvp) { + This->state.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1); + if (!This->state.vs_const_f_swvp) + return E_OUTOFMEMORY; + This->state.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1); + This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1); + This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1); + } else { + This->state.vs_const_f_swvp = NULL; + This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1); + This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1); + This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1); + } This->state.vs_const_f = CALLOC(This->vs_const_size, 1); This->state.ps_const_f = CALLOC(This->ps_const_size, 1); - This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1); This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1); - This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1); - This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1); if (!This->state.vs_const_f || !This->state.ps_const_f || !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp || !This->state.vs_const_i || !This->state.vs_const_b) @@ -464,6 +493,7 @@ NineDevice9_dtor( struct NineDevice9 *This ) FREE(This->state.ps_lconstf_temp); FREE(This->state.vs_const_i); FREE(This->state.vs_const_b); + FREE(This->state.vs_const_f_swvp); if (This->swapchains) { for (i = 0; i < This->nswapchains; ++i) @@ -2490,11 +2520,11 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This, /* TODO: texture/sampler state */ memcpy(dst->changed.rs, nine_render_states_vertex, sizeof(dst->changed.rs)); - nine_ranges_insert(&dst->changed.vs_const_f, 0, This->max_vs_const_f, + nine_ranges_insert(&dst->changed.vs_const_f, 0, This->may_swvp ? NINE_MAX_CONST_F_SWVP : This->max_vs_const_f, &This->range_pool); - nine_ranges_insert(&dst->changed.vs_const_i, 0, NINE_MAX_CONST_I, + nine_ranges_insert(&dst->changed.vs_const_i, 0, This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I, &This->range_pool); - nine_ranges_insert(&dst->changed.vs_const_b, 0, NINE_MAX_CONST_B, + nine_ranges_insert(&dst->changed.vs_const_b, 0, This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B, &This->range_pool); for (s = 0; s < NINE_MAX_SAMPLERS; ++s) dst->changed.sampler[s] |= 1 << D3DSAMP_DMAPOFFSET; @@ -2890,6 +2920,7 @@ NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This, { if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) { This->swvp = bSoftware; + This->state.changed.group |= NINE_STATE_SWVP; return D3D_OK; } else return D3DERR_INVALIDCALL; /* msdn. TODO: check in practice */ @@ -3376,6 +3407,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, UINT Vector4fCount ) { struct nine_state *state = This->update; + float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f; DBG("This=%p StartRegister=%u pConstantData=%p Vector4fCount=%u\n", This, StartRegister, pConstantData, Vector4fCount); @@ -3388,12 +3420,12 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, user_assert(pConstantData, D3DERR_INVALIDCALL); if (!This->is_recording) { - if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData, + if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData, Vector4fCount * 4 * sizeof(state->vs_const_f[0]))) return D3D_OK; } - memcpy(&state->vs_const_f[StartRegister * 4], + memcpy(&vs_const_f[StartRegister * 4], pConstantData, Vector4fCount * 4 * sizeof(state->vs_const_f[0])); @@ -3401,6 +3433,14 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, StartRegister, StartRegister + Vector4fCount, &This->range_pool); + if (This->may_swvp) { + Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister; + if (StartRegister < NINE_MAX_CONST_F) + memcpy(&state->vs_const_f[StartRegister * 4], + pConstantData, + Vector4fCount * 4 * sizeof(state->vs_const_f[0])); + } + state->changed.group |= NINE_STATE_VS_CONST; return D3D_OK; @@ -3413,13 +3453,14 @@ NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This, UINT Vector4fCount ) { const struct nine_state *state = &This->state; + float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f; user_assert(StartRegister < This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL); user_assert(StartRegister + Vector4fCount <= This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); memcpy(pConstantData, - &state->vs_const_f[StartRegister * 4], + &vs_const_f[StartRegister * 4], Vector4fCount * 4 * sizeof(state->vs_const_f[0])); return D3D_OK; @@ -3437,8 +3478,10 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, DBG("This=%p StartRegister=%u pConstantData=%p Vector4iCount=%u\n", This, StartRegister, pConstantData, Vector4iCount); - user_assert(StartRegister < NINE_MAX_CONST_I, D3DERR_INVALIDCALL); - user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL); + user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I), + D3DERR_INVALIDCALL); + user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I), + D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); if (This->driver_caps.vs_integer) { @@ -3476,8 +3519,10 @@ NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This, const struct nine_state *state = &This->state; int i; - user_assert(StartRegister < NINE_MAX_CONST_I, D3DERR_INVALIDCALL); - user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL); + user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I), + D3DERR_INVALIDCALL); + user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I), + D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); if (This->driver_caps.vs_integer) { @@ -3509,8 +3554,10 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, DBG("This=%p StartRegister=%u pConstantData=%p BoolCount=%u\n", This, StartRegister, pConstantData, BoolCount); - user_assert(StartRegister < NINE_MAX_CONST_B, D3DERR_INVALIDCALL); - user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL); + user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B), + D3DERR_INVALIDCALL); + user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B), + D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); if (!This->is_recording) { @@ -3543,8 +3590,10 @@ NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This, const struct nine_state *state = &This->state; int i; - user_assert(StartRegister < NINE_MAX_CONST_B, D3DERR_INVALIDCALL); - user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL); + user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B), + D3DERR_INVALIDCALL); + user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B), + D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); for (i = 0; i < BoolCount; i++) diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index f2fd164cc12..b6aa5e06531 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -48,6 +48,7 @@ struct NineDevice9 { struct NineUnknown base; boolean ex; + boolean may_swvp; /* G3D context */ struct pipe_screen *screen; diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 2d4e323a4ae..2b573e6879e 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -3501,7 +3501,10 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) tx->parse++; /* for byte_size */ if (tx->failure) { - ERR("Encountered buggy shader\n"); + /* For VS shaders, we print the warning later, + * we first try with swvp. */ + if (IS_PS) + ERR("Encountered buggy shader\n"); ureg_destroy(tx->ureg); hr = D3DERR_INVALIDCALL; goto out; diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 2faca121fe6..024e639f92f 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -79,6 +79,143 @@ prepare_rasterizer(struct NineDevice9 *device) } static void +prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + if (state->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 4096 * sizeof(float[4]); + cb.user_buffer = state->vs_const_f_swvp; + + if (state->vs->lconstf.ranges) { + const struct nine_lconstf *lconstf = &device->state.vs->lconstf; + const struct nine_range *r = lconstf->ranges; + unsigned n = 0; + float *dst = device->state.vs_lconstf_temp; + float *src = (float *)cb.user_buffer; + memcpy(dst, src, cb.buffer_size); + while (r) { + unsigned p = r->bgn; + unsigned c = r->end - r->bgn; + memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float)); + n += c; + r = r->next; + } + cb.user_buffer = dst; + } + + state->pipe.cb0_swvp = cb; + + cb.user_buffer = (char *)cb.user_buffer + 4096 * sizeof(float[4]); + state->pipe.cb1_swvp = cb; + } + + if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 2048 * sizeof(float[4]); + cb.user_buffer = state->vs_const_i; + + state->pipe.cb2_swvp = cb; + state->changed.vs_const_i = 0; + } + + if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) { + struct pipe_constant_buffer cb; + + cb.buffer = NULL; + cb.buffer_offset = 0; + cb.buffer_size = 512 * sizeof(float[4]); + cb.user_buffer = state->vs_const_b; + + state->pipe.cb3_swvp = cb; + state->changed.vs_const_b = 0; + } + + if (!device->driver_caps.user_cbufs) { + struct pipe_constant_buffer *cb = &(state->pipe.cb0_swvp); + u_upload_data(device->constbuf_uploader, + 0, + cb->buffer_size, + device->constbuf_alignment, + cb->user_buffer, + &(cb->buffer_offset), + &(cb->buffer)); + u_upload_unmap(device->constbuf_uploader); + cb->user_buffer = NULL; + + cb = &(state->pipe.cb1_swvp); + u_upload_data(device->constbuf_uploader, + 0, + cb->buffer_size, + device->constbuf_alignment, + cb->user_buffer, + &(cb->buffer_offset), + &(cb->buffer)); + u_upload_unmap(device->constbuf_uploader); + cb->user_buffer = NULL; + + cb = &(state->pipe.cb2_swvp); + u_upload_data(device->constbuf_uploader, + 0, + cb->buffer_size, + device->constbuf_alignment, + cb->user_buffer, + &(cb->buffer_offset), + &(cb->buffer)); + u_upload_unmap(device->constbuf_uploader); + cb->user_buffer = NULL; + + cb = &(state->pipe.cb3_swvp); + u_upload_data(device->constbuf_uploader, + 0, + cb->buffer_size, + device->constbuf_alignment, + cb->user_buffer, + &(cb->buffer_offset), + &(cb->buffer)); + u_upload_unmap(device->constbuf_uploader); + cb->user_buffer = NULL; + } + + if (device->state.changed.vs_const_f) { + struct nine_range *r = device->state.changed.vs_const_f; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_f = NULL; + } + + if (device->state.changed.vs_const_i) { + struct nine_range *r = device->state.changed.vs_const_i; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_i = NULL; + } + + if (device->state.changed.vs_const_b) { + struct nine_range *r = device->state.changed.vs_const_b; + struct nine_range *p = r; + while (p->next) + p = p->next; + nine_range_pool_put_chain(&device->range_pool, r, p); + device->state.changed.vs_const_b = NULL; + } + + state->changed.group &= ~NINE_STATE_VS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_VS; +} + +static void prepare_vs_constants_userbuf(struct NineDevice9 *device) { struct nine_state *state = &device->state; @@ -88,21 +225,27 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device) cb.buffer_size = device->state.vs->const_used_size; cb.user_buffer = device->state.vs_const_f; - if (!cb.buffer_size) + if (device->swvp) { + prepare_vs_constants_userbuf_swvp(device); return; + } - if (state->changed.vs_const_i) { + if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) { int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; memcpy(idst, state->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4])); state->changed.vs_const_i = 0; } - if (state->changed.vs_const_b) { + + if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) { int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f]; uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I]; memcpy(bdst, state->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL)); state->changed.vs_const_b = 0; } + if (!cb.buffer_size) + return; + if (device->state.vs->lconstf.ranges) { /* TODO: Can we make it so that we don't have to copy everything ? */ const struct nine_lconstf *lconstf = &device->state.vs->lconstf; @@ -251,7 +394,7 @@ prepare_vs(struct NineDevice9 *device, uint8_t shader_changed) int has_key_changed = 0; if (likely(state->programmable_vs)) - has_key_changed = NineVertexShader9_UpdateKey(vs, state); + has_key_changed = NineVertexShader9_UpdateKey(vs, device); if (!shader_changed && !has_key_changed) return 0; @@ -740,8 +883,16 @@ commit_vs_constants(struct NineDevice9 *device) if (unlikely(!device->state.programmable_vs)) pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff); - else - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); + else { + if (device->swvp) { + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb0_swvp); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, &device->state.pipe.cb1_swvp); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, &device->state.pipe.cb2_swvp); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, &device->state.pipe.cb3_swvp); + } else { + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); + } + } } static inline void @@ -777,7 +928,8 @@ commit_ps(struct NineDevice9 *device) (NINE_STATE_VS | \ NINE_STATE_TEXTURE | \ NINE_STATE_FOG_SHADER | \ - NINE_STATE_POINTSIZE_SHADER) + NINE_STATE_POINTSIZE_SHADER | \ + NINE_STATE_SWVP) #define NINE_STATE_SHADER_CHANGE_PS \ (NINE_STATE_PS | \ @@ -886,14 +1038,14 @@ nine_update_state(struct NineDevice9 *device) commit_index_buffer(device); } - if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) { + if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) { if (group & NINE_STATE_MULTISAMPLE) group |= check_multisample(device); if (group & NINE_STATE_RASTERIZER) prepare_rasterizer(device); if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) update_textures_and_samplers(device); - if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs) + if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && state->programmable_vs) prepare_vs_constants_userbuf(device); if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) prepare_ps_constants_userbuf(device); diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 8c9483231e0..2aa424d46a7 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -84,8 +84,9 @@ #define NINE_STATE_PS1X_SHADER (1 << 26) #define NINE_STATE_POINTSIZE_SHADER (1 << 27) #define NINE_STATE_MULTISAMPLE (1 << 28) -#define NINE_STATE_ALL 0x1fffffff -#define NINE_STATE_UNHANDLED (1 << 29) +#define NINE_STATE_SWVP (1 << 29) +#define NINE_STATE_ALL 0x3fffffff +#define NINE_STATE_UNHANDLED (1 << 30) #define NINE_STATE_COMMIT_DSA (1 << 0) #define NINE_STATE_COMMIT_RASTERIZER (1 << 1) @@ -101,6 +102,9 @@ #define NINE_MAX_CONST_F 256 #define NINE_MAX_CONST_I 16 #define NINE_MAX_CONST_B 16 +#define NINE_MAX_CONST_F_SWVP 8192 +#define NINE_MAX_CONST_I_SWVP 2048 +#define NINE_MAX_CONST_B_SWVP 2048 #define NINE_MAX_CONST_ALL 276 /* B consts count only 1/4 th */ #define NINE_CONST_I_BASE(nconstf) \ @@ -157,6 +161,7 @@ struct nine_state */ struct NineVertexShader9 *vs; float *vs_const_f; + float *vs_const_f_swvp; int *vs_const_i; BOOL *vs_const_b; float *vs_lconstf_temp; @@ -229,6 +234,10 @@ struct nine_state struct pipe_rasterizer_state rast; struct pipe_blend_state blend; struct pipe_constant_buffer cb_vs; + struct pipe_constant_buffer cb0_swvp; + struct pipe_constant_buffer cb1_swvp; + struct pipe_constant_buffer cb2_swvp; + struct pipe_constant_buffer cb3_swvp; struct pipe_constant_buffer cb_ps; struct pipe_constant_buffer cb_vs_ff; struct pipe_constant_buffer cb_ps_ff; diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c index 19c3766b3c6..102213e417e 100644 --- a/src/gallium/state_trackers/nine/stateblock9.c +++ b/src/gallium/state_trackers/nine/stateblock9.c @@ -30,8 +30,9 @@ /* XXX TODO: handling of lights is broken */ -#define VS_CONST_I_SIZE (NINE_MAX_CONST_I * sizeof(int[4])) -#define VS_CONST_B_SIZE (NINE_MAX_CONST_B * sizeof(BOOL)) +#define VS_CONST_I_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_I_SWVP * sizeof(int[4])) : (NINE_MAX_CONST_I * sizeof(int[4]))) +#define VS_CONST_B_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_B_SWVP * sizeof(BOOL)) : (NINE_MAX_CONST_B * sizeof(BOOL))) +#define VS_CONST_F_SWVP_SIZE (NINE_MAX_CONST_F_SWVP * sizeof(float[4])) HRESULT NineStateBlock9_ctor( struct NineStateBlock9 *This, @@ -49,12 +50,19 @@ NineStateBlock9_ctor( struct NineStateBlock9 *This, This->state.vs_const_f = MALLOC(This->base.device->vs_const_size); This->state.ps_const_f = MALLOC(This->base.device->ps_const_size); - This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE); - This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE); + This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE(This->base.device)); + This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE(This->base.device)); if (!This->state.vs_const_f || !This->state.ps_const_f || !This->state.vs_const_i || !This->state.vs_const_b) return E_OUTOFMEMORY; + if (This->base.device->may_swvp) { + This->state.vs_const_f_swvp = MALLOC(VS_CONST_F_SWVP_SIZE); + if (!This->state.vs_const_f_swvp) + return E_OUTOFMEMORY; + } else + This->state.vs_const_f_swvp = NULL; + return D3D_OK; } @@ -71,6 +79,7 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This ) FREE(state->ps_const_f); FREE(state->vs_const_i); FREE(state->vs_const_b); + FREE(state->vs_const_f_swvp); FREE(state->ff.light); @@ -101,7 +110,8 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This ) * TODO: compare ? */ static void -nine_state_copy_common(struct nine_state *dst, +nine_state_copy_common(struct NineDevice9 *device, + struct nine_state *dst, struct nine_state *src, struct nine_state *mask, /* aliases either src or dst */ const boolean apply, @@ -130,13 +140,32 @@ nine_state_copy_common(struct nine_state *dst, */ if (mask->changed.group & NINE_STATE_VS_CONST) { struct nine_range *r; - for (r = mask->changed.vs_const_f; r; r = r->next) { - memcpy(&dst->vs_const_f[r->bgn * 4], - &src->vs_const_f[r->bgn * 4], - (r->end - r->bgn) * 4 * sizeof(float)); - if (apply) - nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, - pool); + if (device->may_swvp) { + for (r = mask->changed.vs_const_f; r; r = r->next) { + int bgn = r->bgn; + int end = r->end; + memcpy(&dst->vs_const_f_swvp[bgn * 4], + &src->vs_const_f_swvp[bgn * 4], + (end - bgn) * 4 * sizeof(float)); + if (apply) + nine_ranges_insert(&dst->changed.vs_const_f, bgn, end, + pool); + if (bgn < device->max_vs_const_f) { + end = MIN2(end, device->max_vs_const_f); + memcpy(&dst->vs_const_f[bgn * 4], + &src->vs_const_f[bgn * 4], + (end - bgn) * 4 * sizeof(float)); + } + } + } else { + for (r = mask->changed.vs_const_f; r; r = r->next) { + memcpy(&dst->vs_const_f[r->bgn * 4], + &src->vs_const_f[r->bgn * 4], + (r->end - r->bgn) * 4 * sizeof(float)); + if (apply) + nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, + pool); + } } for (r = mask->changed.vs_const_i; r; r = r->next) { memcpy(&dst->vs_const_i[r->bgn * 4], @@ -342,7 +371,8 @@ nine_state_copy_common(struct nine_state *dst, } static void -nine_state_copy_common_all(struct nine_state *dst, +nine_state_copy_common_all(struct NineDevice9 *device, + struct nine_state *dst, const struct nine_state *src, struct nine_state *help, const boolean apply, @@ -369,12 +399,15 @@ nine_state_copy_common_all(struct nine_state *dst, if (1) { struct nine_range *r = help->changed.vs_const_f; memcpy(&dst->vs_const_f[0], - &src->vs_const_f[0], (r->end - r->bgn) * 4 * sizeof(float)); + &src->vs_const_f[0], device->max_vs_const_f * 4 * sizeof(float)); + if (device->may_swvp) + memcpy(dst->vs_const_f_swvp, + src->vs_const_f_swvp, VS_CONST_F_SWVP_SIZE); if (apply) nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, pool); - memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE); - memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE); + memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE(device)); + memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE(device)); if (apply) { r = help->changed.vs_const_i; nine_ranges_insert(&dst->changed.vs_const_i, r->bgn, r->end, pool); @@ -491,17 +524,18 @@ nine_state_copy_common_all(struct nine_state *dst, HRESULT NINE_WINAPI NineStateBlock9_Capture( struct NineStateBlock9 *This ) { + struct NineDevice9 *device = This->base.device; struct nine_state *dst = &This->state; - struct nine_state *src = &This->base.device->state; - const int MaxStreams = This->base.device->caps.MaxStreams; + struct nine_state *src = &device->state; + const int MaxStreams = device->caps.MaxStreams; unsigned s; DBG("This=%p\n", This); if (This->type == NINESBT_ALL) - nine_state_copy_common_all(dst, src, dst, FALSE, NULL, MaxStreams); + nine_state_copy_common_all(device, dst, src, dst, FALSE, NULL, MaxStreams); else - nine_state_copy_common(dst, src, dst, FALSE, NULL); + nine_state_copy_common(device, dst, src, dst, FALSE, NULL); if (dst->changed.group & NINE_STATE_VDECL) nine_bind(&dst->vdecl, src->vdecl); @@ -521,18 +555,19 @@ NineStateBlock9_Capture( struct NineStateBlock9 *This ) HRESULT NINE_WINAPI NineStateBlock9_Apply( struct NineStateBlock9 *This ) { - struct nine_state *dst = &This->base.device->state; + struct NineDevice9 *device = This->base.device; + struct nine_state *dst = &device->state; struct nine_state *src = &This->state; - struct nine_range_pool *pool = &This->base.device->range_pool; - const int MaxStreams = This->base.device->caps.MaxStreams; + struct nine_range_pool *pool = &device->range_pool; + const int MaxStreams = device->caps.MaxStreams; unsigned s; DBG("This=%p\n", This); if (This->type == NINESBT_ALL) - nine_state_copy_common_all(dst, src, src, TRUE, pool, MaxStreams); + nine_state_copy_common_all(device, dst, src, src, TRUE, pool, MaxStreams); else - nine_state_copy_common(dst, src, src, TRUE, pool); + nine_state_copy_common(device, dst, src, src, TRUE, pool); if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl) NineDevice9_SetVertexDeclaration(This->base.device, (IDirect3DVertexDeclaration9 *)src->vdecl); diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c index bc09a413fab..92f8f6bb581 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.c +++ b/src/gallium/state_trackers/nine/vertexshader9.c @@ -63,12 +63,21 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, info.fog_enable = 0; info.point_size_min = 0; info.point_size_max = 0; - info.swvp_on = false; + info.swvp_on = !!(device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING); hr = nine_translate_shader(device, &info); + if (hr == D3DERR_INVALIDCALL && + (device->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)) { + /* Retry with a swvp shader. It will require swvp to be on. */ + info.swvp_on = true; + hr = nine_translate_shader(device, &info); + } + if (hr == D3DERR_INVALIDCALL) + ERR("Encountered buggy shader\n"); if (FAILED(hr)) return hr; This->byte_code.version = info.version; + This->swvp_only = info.swvp_on; This->byte_code.tokens = mem_dup(pFunction, info.byte_size); if (!This->byte_code.tokens) @@ -77,7 +86,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, This->variant.cso = info.cso; This->last_cso = info.cso; - This->last_key = 0; + This->last_key = (uint32_t) (info.swvp_on << 9); This->const_used_size = info.const_used_size; This->lconstf = info.lconstf; @@ -168,7 +177,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This ) info.fog_enable = device->state.rs[D3DRS_FOGENABLE]; info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]); info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]); - info.swvp_on = false; + info.swvp_on = device->swvp; hr = nine_translate_shader(This->base.device, &info); if (FAILED(hr)) diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h index 3c9db7990a0..823c71aa85e 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.h +++ b/src/gallium/state_trackers/nine/vertexshader9.h @@ -26,6 +26,7 @@ #include "util/u_half.h" #include "iunknown.h" +#include "device9.h" #include "nine_helpers.h" #include "nine_shader.h" #include "nine_state.h" @@ -50,6 +51,7 @@ struct NineVertexShader9 boolean position_t; /* if true, disable vport transform */ boolean point_size; /* if true, set rasterizer.point_size_per_vertex to 1 */ + boolean swvp_only; unsigned const_used_size; /* in bytes */ @@ -73,8 +75,9 @@ NineVertexShader9( void *data ) static inline BOOL NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs, - struct nine_state *state ) + struct NineDevice9 *device ) { + struct nine_state *state = &(device->state); uint8_t samplers_shadow; uint64_t key; BOOL res; @@ -84,7 +87,8 @@ NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs, key = samplers_shadow; if (vs->byte_code.version < 0x30) - key |= (uint32_t) (state->rs[D3DRS_FOGENABLE] << 8); + key |= (uint32_t) ((!!state->rs[D3DRS_FOGENABLE]) << 8); + key |= (uint32_t) (device->swvp << 9); /* We want to use a 64 bits key for performance. * Use compressed float16 values for the pointsize min/max in the key. |