summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAxel Davy <[email protected]>2016-09-17 14:16:41 +0200
committerAxel Davy <[email protected]>2016-10-10 23:43:49 +0200
commit3bf02d383fe94a69dfec3ff54ede3e3b2e9dff6b (patch)
treefc45cd758c5ceffcbe95f053430a1a778ecf9c8f
parentf8c8f4424405c4789a044470e64df810720358c8 (diff)
st/nine: Partial software vertex processing support
Software Vertex Processing allows: . Less limitations for shaders (more loops, etc) . Less limitations for ff (more enabled lights, 255 matrices for VertexBlend) In particular shaders can get more constants. This patch implements support for this (not using software rendering, but hardware rendering, as llvmpipe and dx10+ hw have the same limits...) This is considered a second class path. Even apps asking for "Mixed Vertex processing" (ie the ability to switch to swvp on demand) do not use the feature much. Some just initialize more constants than the normal limit at the start of the application, but never use more than the normal limit. When the apps do not need the software vertex processing features, they do not seem to turn it on. This means it is ok if that path is slow. Thus no care has been made to make the path optimized. Signed-off-by: Axel Davy <[email protected]>
-rw-r--r--src/gallium/state_trackers/nine/device9.c85
-rw-r--r--src/gallium/state_trackers/nine/device9.h1
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.c5
-rw-r--r--src/gallium/state_trackers/nine/nine_state.c170
-rw-r--r--src/gallium/state_trackers/nine/nine_state.h13
-rw-r--r--src/gallium/state_trackers/nine/stateblock9.c85
-rw-r--r--src/gallium/state_trackers/nine/vertexshader9.c15
-rw-r--r--src/gallium/state_trackers/nine/vertexshader9.h8
8 files changed, 322 insertions, 60 deletions
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index dca75c53e6e..f1354904344 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -168,12 +168,31 @@ NineDevice9_ctor( struct NineDevice9 *This,
if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) {
DBG("Application asked full Software Vertex Processing.\n");
This->swvp = true;
+ This->may_swvp = true;
} else
This->swvp = false;
- if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)
+ if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
DBG("Application asked mixed Software Vertex Processing.\n");
+ This->may_swvp = true;
+ }
/* TODO: check if swvp is resetted by device Resets */
+ if (This->may_swvp &&
+ (This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX,
+ PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE)
+ < (NINE_MAX_CONST_F_SWVP/2) * sizeof(float[4]) ||
+ This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX,
+ PIPE_SHADER_CAP_MAX_CONST_BUFFERS) < 5)) {
+ /* Note: We just go on, some apps never use the abilities of
+ * swvp, and just set more constants than allowed at init.
+ * Only cards we support that are affected are the r500 */
+ WARN("Card unable to handle Software Vertex Processing. Game may fail\n");
+ }
+
+ /* When may_swvp, SetConstant* limits are different */
+ if (This->may_swvp)
+ This->caps.MaxVertexShaderConst = NINE_MAX_CONST_F_SWVP;
+
This->pipe = This->screen->context_create(This->screen, NULL, 0);
if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
@@ -322,12 +341,22 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->vs_const_size = max_const_vs * sizeof(float[4]);
This->ps_const_size = max_const_ps * sizeof(float[4]);
/* Include space for I,B constants for user constbuf. */
+ if (This->may_swvp) {
+ This->state.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+ if (!This->state.vs_const_f_swvp)
+ return E_OUTOFMEMORY;
+ This->state.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+ This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1);
+ This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1);
+ } else {
+ This->state.vs_const_f_swvp = NULL;
+ This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+ This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
+ This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
+ }
This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
- This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
- This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
- This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
if (!This->state.vs_const_f || !This->state.ps_const_f ||
!This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp ||
!This->state.vs_const_i || !This->state.vs_const_b)
@@ -464,6 +493,7 @@ NineDevice9_dtor( struct NineDevice9 *This )
FREE(This->state.ps_lconstf_temp);
FREE(This->state.vs_const_i);
FREE(This->state.vs_const_b);
+ FREE(This->state.vs_const_f_swvp);
if (This->swapchains) {
for (i = 0; i < This->nswapchains; ++i)
@@ -2490,11 +2520,11 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This,
/* TODO: texture/sampler state */
memcpy(dst->changed.rs,
nine_render_states_vertex, sizeof(dst->changed.rs));
- nine_ranges_insert(&dst->changed.vs_const_f, 0, This->max_vs_const_f,
+ nine_ranges_insert(&dst->changed.vs_const_f, 0, This->may_swvp ? NINE_MAX_CONST_F_SWVP : This->max_vs_const_f,
&This->range_pool);
- nine_ranges_insert(&dst->changed.vs_const_i, 0, NINE_MAX_CONST_I,
+ nine_ranges_insert(&dst->changed.vs_const_i, 0, This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I,
&This->range_pool);
- nine_ranges_insert(&dst->changed.vs_const_b, 0, NINE_MAX_CONST_B,
+ nine_ranges_insert(&dst->changed.vs_const_b, 0, This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B,
&This->range_pool);
for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
dst->changed.sampler[s] |= 1 << D3DSAMP_DMAPOFFSET;
@@ -2890,6 +2920,7 @@ NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
{
if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
This->swvp = bSoftware;
+ This->state.changed.group |= NINE_STATE_SWVP;
return D3D_OK;
} else
return D3DERR_INVALIDCALL; /* msdn. TODO: check in practice */
@@ -3376,6 +3407,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
UINT Vector4fCount )
{
struct nine_state *state = This->update;
+ float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f;
DBG("This=%p StartRegister=%u pConstantData=%p Vector4fCount=%u\n",
This, StartRegister, pConstantData, Vector4fCount);
@@ -3388,12 +3420,12 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (!This->is_recording) {
- if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData,
+ if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData,
Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
return D3D_OK;
}
- memcpy(&state->vs_const_f[StartRegister * 4],
+ memcpy(&vs_const_f[StartRegister * 4],
pConstantData,
Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
@@ -3401,6 +3433,14 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
StartRegister, StartRegister + Vector4fCount,
&This->range_pool);
+ if (This->may_swvp) {
+ Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
+ if (StartRegister < NINE_MAX_CONST_F)
+ memcpy(&state->vs_const_f[StartRegister * 4],
+ pConstantData,
+ Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
+ }
+
state->changed.group |= NINE_STATE_VS_CONST;
return D3D_OK;
@@ -3413,13 +3453,14 @@ NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
UINT Vector4fCount )
{
const struct nine_state *state = &This->state;
+ float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f;
user_assert(StartRegister < This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL);
user_assert(StartRegister + Vector4fCount <= This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
memcpy(pConstantData,
- &state->vs_const_f[StartRegister * 4],
+ &vs_const_f[StartRegister * 4],
Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
return D3D_OK;
@@ -3437,8 +3478,10 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
DBG("This=%p StartRegister=%u pConstantData=%p Vector4iCount=%u\n",
This, StartRegister, pConstantData, Vector4iCount);
- user_assert(StartRegister < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
- user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
+ user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+ D3DERR_INVALIDCALL);
+ user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+ D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (This->driver_caps.vs_integer) {
@@ -3476,8 +3519,10 @@ NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
const struct nine_state *state = &This->state;
int i;
- user_assert(StartRegister < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
- user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
+ user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+ D3DERR_INVALIDCALL);
+ user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+ D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (This->driver_caps.vs_integer) {
@@ -3509,8 +3554,10 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
DBG("This=%p StartRegister=%u pConstantData=%p BoolCount=%u\n",
This, StartRegister, pConstantData, BoolCount);
- user_assert(StartRegister < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
- user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
+ user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+ D3DERR_INVALIDCALL);
+ user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+ D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
if (!This->is_recording) {
@@ -3543,8 +3590,10 @@ NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
const struct nine_state *state = &This->state;
int i;
- user_assert(StartRegister < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
- user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
+ user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+ D3DERR_INVALIDCALL);
+ user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+ D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
for (i = 0; i < BoolCount; i++)
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index f2fd164cc12..b6aa5e06531 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -48,6 +48,7 @@ struct NineDevice9
{
struct NineUnknown base;
boolean ex;
+ boolean may_swvp;
/* G3D context */
struct pipe_screen *screen;
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 2d4e323a4ae..2b573e6879e 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -3501,7 +3501,10 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
tx->parse++; /* for byte_size */
if (tx->failure) {
- ERR("Encountered buggy shader\n");
+ /* For VS shaders, we print the warning later,
+ * we first try with swvp. */
+ if (IS_PS)
+ ERR("Encountered buggy shader\n");
ureg_destroy(tx->ureg);
hr = D3DERR_INVALIDCALL;
goto out;
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 2faca121fe6..024e639f92f 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -79,6 +79,143 @@ prepare_rasterizer(struct NineDevice9 *device)
}
static void
+prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
+{
+ struct nine_state *state = &device->state;
+
+ if (state->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 4096 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_f_swvp;
+
+ if (state->vs->lconstf.ranges) {
+ const struct nine_lconstf *lconstf = &device->state.vs->lconstf;
+ const struct nine_range *r = lconstf->ranges;
+ unsigned n = 0;
+ float *dst = device->state.vs_lconstf_temp;
+ float *src = (float *)cb.user_buffer;
+ memcpy(dst, src, cb.buffer_size);
+ while (r) {
+ unsigned p = r->bgn;
+ unsigned c = r->end - r->bgn;
+ memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
+ n += c;
+ r = r->next;
+ }
+ cb.user_buffer = dst;
+ }
+
+ state->pipe.cb0_swvp = cb;
+
+ cb.user_buffer = (char *)cb.user_buffer + 4096 * sizeof(float[4]);
+ state->pipe.cb1_swvp = cb;
+ }
+
+ if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 2048 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_i;
+
+ state->pipe.cb2_swvp = cb;
+ state->changed.vs_const_i = 0;
+ }
+
+ if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
+ struct pipe_constant_buffer cb;
+
+ cb.buffer = NULL;
+ cb.buffer_offset = 0;
+ cb.buffer_size = 512 * sizeof(float[4]);
+ cb.user_buffer = state->vs_const_b;
+
+ state->pipe.cb3_swvp = cb;
+ state->changed.vs_const_b = 0;
+ }
+
+ if (!device->driver_caps.user_cbufs) {
+ struct pipe_constant_buffer *cb = &(state->pipe.cb0_swvp);
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb->buffer_size,
+ device->constbuf_alignment,
+ cb->user_buffer,
+ &(cb->buffer_offset),
+ &(cb->buffer));
+ u_upload_unmap(device->constbuf_uploader);
+ cb->user_buffer = NULL;
+
+ cb = &(state->pipe.cb1_swvp);
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb->buffer_size,
+ device->constbuf_alignment,
+ cb->user_buffer,
+ &(cb->buffer_offset),
+ &(cb->buffer));
+ u_upload_unmap(device->constbuf_uploader);
+ cb->user_buffer = NULL;
+
+ cb = &(state->pipe.cb2_swvp);
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb->buffer_size,
+ device->constbuf_alignment,
+ cb->user_buffer,
+ &(cb->buffer_offset),
+ &(cb->buffer));
+ u_upload_unmap(device->constbuf_uploader);
+ cb->user_buffer = NULL;
+
+ cb = &(state->pipe.cb3_swvp);
+ u_upload_data(device->constbuf_uploader,
+ 0,
+ cb->buffer_size,
+ device->constbuf_alignment,
+ cb->user_buffer,
+ &(cb->buffer_offset),
+ &(cb->buffer));
+ u_upload_unmap(device->constbuf_uploader);
+ cb->user_buffer = NULL;
+ }
+
+ if (device->state.changed.vs_const_f) {
+ struct nine_range *r = device->state.changed.vs_const_f;
+ struct nine_range *p = r;
+ while (p->next)
+ p = p->next;
+ nine_range_pool_put_chain(&device->range_pool, r, p);
+ device->state.changed.vs_const_f = NULL;
+ }
+
+ if (device->state.changed.vs_const_i) {
+ struct nine_range *r = device->state.changed.vs_const_i;
+ struct nine_range *p = r;
+ while (p->next)
+ p = p->next;
+ nine_range_pool_put_chain(&device->range_pool, r, p);
+ device->state.changed.vs_const_i = NULL;
+ }
+
+ if (device->state.changed.vs_const_b) {
+ struct nine_range *r = device->state.changed.vs_const_b;
+ struct nine_range *p = r;
+ while (p->next)
+ p = p->next;
+ nine_range_pool_put_chain(&device->range_pool, r, p);
+ device->state.changed.vs_const_b = NULL;
+ }
+
+ state->changed.group &= ~NINE_STATE_VS_CONST;
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
+}
+
+static void
prepare_vs_constants_userbuf(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
@@ -88,21 +225,27 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device)
cb.buffer_size = device->state.vs->const_used_size;
cb.user_buffer = device->state.vs_const_f;
- if (!cb.buffer_size)
+ if (device->swvp) {
+ prepare_vs_constants_userbuf_swvp(device);
return;
+ }
- if (state->changed.vs_const_i) {
+ if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
memcpy(idst, state->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
state->changed.vs_const_i = 0;
}
- if (state->changed.vs_const_b) {
+
+ if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
memcpy(bdst, state->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
state->changed.vs_const_b = 0;
}
+ if (!cb.buffer_size)
+ return;
+
if (device->state.vs->lconstf.ranges) {
/* TODO: Can we make it so that we don't have to copy everything ? */
const struct nine_lconstf *lconstf = &device->state.vs->lconstf;
@@ -251,7 +394,7 @@ prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
int has_key_changed = 0;
if (likely(state->programmable_vs))
- has_key_changed = NineVertexShader9_UpdateKey(vs, state);
+ has_key_changed = NineVertexShader9_UpdateKey(vs, device);
if (!shader_changed && !has_key_changed)
return 0;
@@ -740,8 +883,16 @@ commit_vs_constants(struct NineDevice9 *device)
if (unlikely(!device->state.programmable_vs))
pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff);
- else
- pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+ else {
+ if (device->swvp) {
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb0_swvp);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, &device->state.pipe.cb1_swvp);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, &device->state.pipe.cb2_swvp);
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, &device->state.pipe.cb3_swvp);
+ } else {
+ pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+ }
+ }
}
static inline void
@@ -777,7 +928,8 @@ commit_ps(struct NineDevice9 *device)
(NINE_STATE_VS | \
NINE_STATE_TEXTURE | \
NINE_STATE_FOG_SHADER | \
- NINE_STATE_POINTSIZE_SHADER)
+ NINE_STATE_POINTSIZE_SHADER | \
+ NINE_STATE_SWVP)
#define NINE_STATE_SHADER_CHANGE_PS \
(NINE_STATE_PS | \
@@ -886,14 +1038,14 @@ nine_update_state(struct NineDevice9 *device)
commit_index_buffer(device);
}
- if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) {
+ if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) {
if (group & NINE_STATE_MULTISAMPLE)
group |= check_multisample(device);
if (group & NINE_STATE_RASTERIZER)
prepare_rasterizer(device);
if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
update_textures_and_samplers(device);
- if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs)
+ if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && state->programmable_vs)
prepare_vs_constants_userbuf(device);
if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
prepare_ps_constants_userbuf(device);
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index 8c9483231e0..2aa424d46a7 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -84,8 +84,9 @@
#define NINE_STATE_PS1X_SHADER (1 << 26)
#define NINE_STATE_POINTSIZE_SHADER (1 << 27)
#define NINE_STATE_MULTISAMPLE (1 << 28)
-#define NINE_STATE_ALL 0x1fffffff
-#define NINE_STATE_UNHANDLED (1 << 29)
+#define NINE_STATE_SWVP (1 << 29)
+#define NINE_STATE_ALL 0x3fffffff
+#define NINE_STATE_UNHANDLED (1 << 30)
#define NINE_STATE_COMMIT_DSA (1 << 0)
#define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
@@ -101,6 +102,9 @@
#define NINE_MAX_CONST_F 256
#define NINE_MAX_CONST_I 16
#define NINE_MAX_CONST_B 16
+#define NINE_MAX_CONST_F_SWVP 8192
+#define NINE_MAX_CONST_I_SWVP 2048
+#define NINE_MAX_CONST_B_SWVP 2048
#define NINE_MAX_CONST_ALL 276 /* B consts count only 1/4 th */
#define NINE_CONST_I_BASE(nconstf) \
@@ -157,6 +161,7 @@ struct nine_state
*/
struct NineVertexShader9 *vs;
float *vs_const_f;
+ float *vs_const_f_swvp;
int *vs_const_i;
BOOL *vs_const_b;
float *vs_lconstf_temp;
@@ -229,6 +234,10 @@ struct nine_state
struct pipe_rasterizer_state rast;
struct pipe_blend_state blend;
struct pipe_constant_buffer cb_vs;
+ struct pipe_constant_buffer cb0_swvp;
+ struct pipe_constant_buffer cb1_swvp;
+ struct pipe_constant_buffer cb2_swvp;
+ struct pipe_constant_buffer cb3_swvp;
struct pipe_constant_buffer cb_ps;
struct pipe_constant_buffer cb_vs_ff;
struct pipe_constant_buffer cb_ps_ff;
diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c
index 19c3766b3c6..102213e417e 100644
--- a/src/gallium/state_trackers/nine/stateblock9.c
+++ b/src/gallium/state_trackers/nine/stateblock9.c
@@ -30,8 +30,9 @@
/* XXX TODO: handling of lights is broken */
-#define VS_CONST_I_SIZE (NINE_MAX_CONST_I * sizeof(int[4]))
-#define VS_CONST_B_SIZE (NINE_MAX_CONST_B * sizeof(BOOL))
+#define VS_CONST_I_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_I_SWVP * sizeof(int[4])) : (NINE_MAX_CONST_I * sizeof(int[4])))
+#define VS_CONST_B_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_B_SWVP * sizeof(BOOL)) : (NINE_MAX_CONST_B * sizeof(BOOL)))
+#define VS_CONST_F_SWVP_SIZE (NINE_MAX_CONST_F_SWVP * sizeof(float[4]))
HRESULT
NineStateBlock9_ctor( struct NineStateBlock9 *This,
@@ -49,12 +50,19 @@ NineStateBlock9_ctor( struct NineStateBlock9 *This,
This->state.vs_const_f = MALLOC(This->base.device->vs_const_size);
This->state.ps_const_f = MALLOC(This->base.device->ps_const_size);
- This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE);
- This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE);
+ This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE(This->base.device));
+ This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE(This->base.device));
if (!This->state.vs_const_f || !This->state.ps_const_f ||
!This->state.vs_const_i || !This->state.vs_const_b)
return E_OUTOFMEMORY;
+ if (This->base.device->may_swvp) {
+ This->state.vs_const_f_swvp = MALLOC(VS_CONST_F_SWVP_SIZE);
+ if (!This->state.vs_const_f_swvp)
+ return E_OUTOFMEMORY;
+ } else
+ This->state.vs_const_f_swvp = NULL;
+
return D3D_OK;
}
@@ -71,6 +79,7 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This )
FREE(state->ps_const_f);
FREE(state->vs_const_i);
FREE(state->vs_const_b);
+ FREE(state->vs_const_f_swvp);
FREE(state->ff.light);
@@ -101,7 +110,8 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This )
* TODO: compare ?
*/
static void
-nine_state_copy_common(struct nine_state *dst,
+nine_state_copy_common(struct NineDevice9 *device,
+ struct nine_state *dst,
struct nine_state *src,
struct nine_state *mask, /* aliases either src or dst */
const boolean apply,
@@ -130,13 +140,32 @@ nine_state_copy_common(struct nine_state *dst,
*/
if (mask->changed.group & NINE_STATE_VS_CONST) {
struct nine_range *r;
- for (r = mask->changed.vs_const_f; r; r = r->next) {
- memcpy(&dst->vs_const_f[r->bgn * 4],
- &src->vs_const_f[r->bgn * 4],
- (r->end - r->bgn) * 4 * sizeof(float));
- if (apply)
- nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end,
- pool);
+ if (device->may_swvp) {
+ for (r = mask->changed.vs_const_f; r; r = r->next) {
+ int bgn = r->bgn;
+ int end = r->end;
+ memcpy(&dst->vs_const_f_swvp[bgn * 4],
+ &src->vs_const_f_swvp[bgn * 4],
+ (end - bgn) * 4 * sizeof(float));
+ if (apply)
+ nine_ranges_insert(&dst->changed.vs_const_f, bgn, end,
+ pool);
+ if (bgn < device->max_vs_const_f) {
+ end = MIN2(end, device->max_vs_const_f);
+ memcpy(&dst->vs_const_f[bgn * 4],
+ &src->vs_const_f[bgn * 4],
+ (end - bgn) * 4 * sizeof(float));
+ }
+ }
+ } else {
+ for (r = mask->changed.vs_const_f; r; r = r->next) {
+ memcpy(&dst->vs_const_f[r->bgn * 4],
+ &src->vs_const_f[r->bgn * 4],
+ (r->end - r->bgn) * 4 * sizeof(float));
+ if (apply)
+ nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end,
+ pool);
+ }
}
for (r = mask->changed.vs_const_i; r; r = r->next) {
memcpy(&dst->vs_const_i[r->bgn * 4],
@@ -342,7 +371,8 @@ nine_state_copy_common(struct nine_state *dst,
}
static void
-nine_state_copy_common_all(struct nine_state *dst,
+nine_state_copy_common_all(struct NineDevice9 *device,
+ struct nine_state *dst,
const struct nine_state *src,
struct nine_state *help,
const boolean apply,
@@ -369,12 +399,15 @@ nine_state_copy_common_all(struct nine_state *dst,
if (1) {
struct nine_range *r = help->changed.vs_const_f;
memcpy(&dst->vs_const_f[0],
- &src->vs_const_f[0], (r->end - r->bgn) * 4 * sizeof(float));
+ &src->vs_const_f[0], device->max_vs_const_f * 4 * sizeof(float));
+ if (device->may_swvp)
+ memcpy(dst->vs_const_f_swvp,
+ src->vs_const_f_swvp, VS_CONST_F_SWVP_SIZE);
if (apply)
nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, pool);
- memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE);
- memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE);
+ memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE(device));
+ memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE(device));
if (apply) {
r = help->changed.vs_const_i;
nine_ranges_insert(&dst->changed.vs_const_i, r->bgn, r->end, pool);
@@ -491,17 +524,18 @@ nine_state_copy_common_all(struct nine_state *dst,
HRESULT NINE_WINAPI
NineStateBlock9_Capture( struct NineStateBlock9 *This )
{
+ struct NineDevice9 *device = This->base.device;
struct nine_state *dst = &This->state;
- struct nine_state *src = &This->base.device->state;
- const int MaxStreams = This->base.device->caps.MaxStreams;
+ struct nine_state *src = &device->state;
+ const int MaxStreams = device->caps.MaxStreams;
unsigned s;
DBG("This=%p\n", This);
if (This->type == NINESBT_ALL)
- nine_state_copy_common_all(dst, src, dst, FALSE, NULL, MaxStreams);
+ nine_state_copy_common_all(device, dst, src, dst, FALSE, NULL, MaxStreams);
else
- nine_state_copy_common(dst, src, dst, FALSE, NULL);
+ nine_state_copy_common(device, dst, src, dst, FALSE, NULL);
if (dst->changed.group & NINE_STATE_VDECL)
nine_bind(&dst->vdecl, src->vdecl);
@@ -521,18 +555,19 @@ NineStateBlock9_Capture( struct NineStateBlock9 *This )
HRESULT NINE_WINAPI
NineStateBlock9_Apply( struct NineStateBlock9 *This )
{
- struct nine_state *dst = &This->base.device->state;
+ struct NineDevice9 *device = This->base.device;
+ struct nine_state *dst = &device->state;
struct nine_state *src = &This->state;
- struct nine_range_pool *pool = &This->base.device->range_pool;
- const int MaxStreams = This->base.device->caps.MaxStreams;
+ struct nine_range_pool *pool = &device->range_pool;
+ const int MaxStreams = device->caps.MaxStreams;
unsigned s;
DBG("This=%p\n", This);
if (This->type == NINESBT_ALL)
- nine_state_copy_common_all(dst, src, src, TRUE, pool, MaxStreams);
+ nine_state_copy_common_all(device, dst, src, src, TRUE, pool, MaxStreams);
else
- nine_state_copy_common(dst, src, src, TRUE, pool);
+ nine_state_copy_common(device, dst, src, src, TRUE, pool);
if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
NineDevice9_SetVertexDeclaration(This->base.device, (IDirect3DVertexDeclaration9 *)src->vdecl);
diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c
index bc09a413fab..92f8f6bb581 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.c
+++ b/src/gallium/state_trackers/nine/vertexshader9.c
@@ -63,12 +63,21 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
info.fog_enable = 0;
info.point_size_min = 0;
info.point_size_max = 0;
- info.swvp_on = false;
+ info.swvp_on = !!(device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING);
hr = nine_translate_shader(device, &info);
+ if (hr == D3DERR_INVALIDCALL &&
+ (device->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)) {
+ /* Retry with a swvp shader. It will require swvp to be on. */
+ info.swvp_on = true;
+ hr = nine_translate_shader(device, &info);
+ }
+ if (hr == D3DERR_INVALIDCALL)
+ ERR("Encountered buggy shader\n");
if (FAILED(hr))
return hr;
This->byte_code.version = info.version;
+ This->swvp_only = info.swvp_on;
This->byte_code.tokens = mem_dup(pFunction, info.byte_size);
if (!This->byte_code.tokens)
@@ -77,7 +86,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
This->variant.cso = info.cso;
This->last_cso = info.cso;
- This->last_key = 0;
+ This->last_key = (uint32_t) (info.swvp_on << 9);
This->const_used_size = info.const_used_size;
This->lconstf = info.lconstf;
@@ -168,7 +177,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]);
info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]);
- info.swvp_on = false;
+ info.swvp_on = device->swvp;
hr = nine_translate_shader(This->base.device, &info);
if (FAILED(hr))
diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h
index 3c9db7990a0..823c71aa85e 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.h
+++ b/src/gallium/state_trackers/nine/vertexshader9.h
@@ -26,6 +26,7 @@
#include "util/u_half.h"
#include "iunknown.h"
+#include "device9.h"
#include "nine_helpers.h"
#include "nine_shader.h"
#include "nine_state.h"
@@ -50,6 +51,7 @@ struct NineVertexShader9
boolean position_t; /* if true, disable vport transform */
boolean point_size; /* if true, set rasterizer.point_size_per_vertex to 1 */
+ boolean swvp_only;
unsigned const_used_size; /* in bytes */
@@ -73,8 +75,9 @@ NineVertexShader9( void *data )
static inline BOOL
NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
- struct nine_state *state )
+ struct NineDevice9 *device )
{
+ struct nine_state *state = &(device->state);
uint8_t samplers_shadow;
uint64_t key;
BOOL res;
@@ -84,7 +87,8 @@ NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
key = samplers_shadow;
if (vs->byte_code.version < 0x30)
- key |= (uint32_t) (state->rs[D3DRS_FOGENABLE] << 8);
+ key |= (uint32_t) ((!!state->rs[D3DRS_FOGENABLE]) << 8);
+ key |= (uint32_t) (device->swvp << 9);
/* We want to use a 64 bits key for performance.
* Use compressed float16 values for the pointsize min/max in the key.