diff options
author | Jason Ekstrand <[email protected]> | 2015-08-25 17:12:03 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-08-25 18:41:21 -0700 |
commit | 9b387b5d3f4103c51079ea5298d33086af6da433 (patch) | |
tree | 4127f2284b6b4a5746bbc01bbfc6a97305057cb4 /src/gallium/state_trackers/nine | |
parent | 5360edcb304e147341b934567f3bbf40e9d5a3b5 (diff) | |
parent | 1d2a844e7d55645ea3d24fb589bec03695b3d2b1 (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/gallium/state_trackers/nine')
27 files changed, 2147 insertions, 1205 deletions
diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c index c5ffcb15a18..69e0fa25961 100644 --- a/src/gallium/state_trackers/nine/adapter9.c +++ b/src/gallium/state_trackers/nine/adapter9.c @@ -545,7 +545,7 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, /*D3DDEVCAPS_RTPATCHES |*/ /*D3DDEVCAPS_RTPATCHHANDLEZERO |*/ /*D3DDEVCAPS_SEPARATETEXTUREMEMORIES |*/ - /*D3DDEVCAPS_TEXTURENONLOCALVIDMEM |*/ + D3DDEVCAPS_TEXTURENONLOCALVIDMEM | /* D3DDEVCAPS_TEXTURESYSTEMMEMORY |*/ D3DDEVCAPS_TEXTUREVIDEOMEMORY | D3DDEVCAPS_TLVERTEXSYSTEMMEMORY | @@ -561,32 +561,32 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, D3DPMISCCAPS_TSSARGTEMP | D3DPMISCCAPS_BLENDOP | D3DPIPECAP(INDEP_BLEND_ENABLE, D3DPMISCCAPS_INDEPENDENTWRITEMASKS) | - /*D3DPMISCCAPS_PERSTAGECONSTANT |*/ + /*D3DPMISCCAPS_PERSTAGECONSTANT |*/ /* TODO */ /*D3DPMISCCAPS_POSTBLENDSRGBCONVERT |*/ /* TODO */ D3DPMISCCAPS_FOGANDSPECULARALPHA | D3DPIPECAP(BLEND_EQUATION_SEPARATE, D3DPMISCCAPS_SEPARATEALPHABLEND) | D3DPIPECAP(MIXED_COLORBUFFER_FORMATS, D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS) | D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING | - /*D3DPMISCCAPS_FOGVERTEXCLAMPED*/0; + D3DPMISCCAPS_FOGVERTEXCLAMPED; if (!screen->get_param(screen, PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION)) pCaps->PrimitiveMiscCaps |= D3DPMISCCAPS_CLIPTLVERTS; pCaps->RasterCaps = D3DPIPECAP(ANISOTROPIC_FILTER, D3DPRASTERCAPS_ANISOTROPY) | - /*D3DPRASTERCAPS_COLORPERSPECTIVE |*/ + D3DPRASTERCAPS_COLORPERSPECTIVE | D3DPRASTERCAPS_DITHER | D3DPRASTERCAPS_DEPTHBIAS | - /*D3DPRASTERCAPS_FOGRANGE |*/ - /*D3DPRASTERCAPS_FOGTABLE |*/ - /*D3DPRASTERCAPS_FOGVERTEX |*/ + D3DPRASTERCAPS_FOGRANGE | + D3DPRASTERCAPS_FOGTABLE | + D3DPRASTERCAPS_FOGVERTEX | D3DPRASTERCAPS_MIPMAPLODBIAS | D3DPRASTERCAPS_MULTISAMPLE_TOGGLE | D3DPRASTERCAPS_SCISSORTEST | D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS | /*D3DPRASTERCAPS_WBUFFER |*/ - /*D3DPRASTERCAPS_WFOG |*/ + D3DPRASTERCAPS_WFOG | /*D3DPRASTERCAPS_ZBUFFERLESSHSR |*/ - /*D3DPRASTERCAPS_ZFOG |*/ + D3DPRASTERCAPS_ZFOG | D3DPRASTERCAPS_ZTEST; pCaps->ZCmpCaps = D3DPCMPCAPS_NEVER | @@ -697,15 +697,12 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, pCaps->MaxAnisotropy = (DWORD)screen->get_paramf(screen, PIPE_CAPF_MAX_TEXTURE_ANISOTROPY); - pCaps->MaxVertexW = 1.0f; /* XXX */ - pCaps->GuardBandLeft = screen->get_paramf(screen, - PIPE_CAPF_GUARD_BAND_LEFT); - pCaps->GuardBandTop = screen->get_paramf(screen, - PIPE_CAPF_GUARD_BAND_TOP); - pCaps->GuardBandRight = screen->get_paramf(screen, - PIPE_CAPF_GUARD_BAND_RIGHT); - pCaps->GuardBandBottom = screen->get_paramf(screen, - PIPE_CAPF_GUARD_BAND_BOTTOM); + /* Values for GeForce 9600 GT */ + pCaps->MaxVertexW = 1e10f; + pCaps->GuardBandLeft = -1e9f; + pCaps->GuardBandTop = -1e9f; + pCaps->GuardBandRight = 1e9f; + pCaps->GuardBandBottom = 1e9f; pCaps->ExtentsAdjust = 0.0f; pCaps->StencilCaps = @@ -724,8 +721,6 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, /*D3DFVFCAPS_DONOTSTRIPELEMENTS |*/ D3DFVFCAPS_PSIZE; - /* XXX: Some of these are probably not in SM2.0 so cap them when I figure - * them out. For now leave them all enabled. */ pCaps->TextureOpCaps = D3DTEXOPCAPS_DISABLE | D3DTEXOPCAPS_SELECTARG1 | D3DTEXOPCAPS_SELECTARG2 | @@ -796,7 +791,8 @@ NineAdapter9_GetDeviceCaps( struct NineAdapter9 *This, pCaps->MaxVertexShaderConst = NINE_MAX_CONST_F; pCaps->PixelShaderVersion = D3DPS_VERSION(3,0); - pCaps->PixelShader1xMaxValue = 8.0f; /* XXX: wine */ + /* Value for GeForce 9600 GT */ + pCaps->PixelShader1xMaxValue = 65504.f; pCaps->DevCaps2 = D3DDEVCAPS2_STREAMOFFSET | D3DDEVCAPS2_VERTEXELEMENTSCANSHARESTREAMOFFSET | diff --git a/src/gallium/state_trackers/nine/basetexture9.c b/src/gallium/state_trackers/nine/basetexture9.c index 17a8f448ee6..d13138b7d5c 100644 --- a/src/gallium/state_trackers/nine/basetexture9.c +++ b/src/gallium/state_trackers/nine/basetexture9.c @@ -57,7 +57,8 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This, user_assert(!(Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL)) || Pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); user_assert(!(Usage & D3DUSAGE_DYNAMIC) || - Pool != D3DPOOL_MANAGED, D3DERR_INVALIDCALL); + !(Pool == D3DPOOL_MANAGED || + Pool == D3DPOOL_SCRATCH), D3DERR_INVALIDCALL); hr = NineResource9_ctor(&This->base, pParams, initResource, alloc, Type, Pool, Usage); if (FAILED(hr)) @@ -85,6 +86,9 @@ NineBaseTexture9_ctor( struct NineBaseTexture9 *This, util_format_has_depth(util_format_description(This->base.info.format)); list_inithead(&This->list); + list_inithead(&This->list2); + if (Pool == D3DPOOL_MANAGED) + list_add(&This->list2, &This->base.base.device->managed_textures); return D3D_OK; } @@ -98,7 +102,9 @@ NineBaseTexture9_dtor( struct NineBaseTexture9 *This ) pipe_sampler_view_reference(&This->view[1], NULL); if (This->list.prev != NULL && This->list.next != NULL) - list_del(&This->list), + list_del(&This->list); + if (This->list2.prev != NULL && This->list2.next != NULL) + list_del(&This->list2); NineResource9_dtor(&This->base); } @@ -153,6 +159,8 @@ NineBaseTexture9_SetAutoGenFilterType( struct NineBaseTexture9 *This, user_assert(FilterType != D3DTEXF_NONE, D3DERR_INVALIDCALL); This->mipfilter = FilterType; + This->dirty_mip = TRUE; + NineBaseTexture9_GenerateMipSubLevels(This); return D3D_OK; } @@ -310,14 +318,12 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) tex->dirty_box.width, tex->dirty_box.height, tex->dirty_box.depth); if (tex->dirty_box.width) { - for (l = 0; l <= last_level; ++l) { + for (l = min_level_dirty; l <= last_level; ++l) { u_box_minify_2d(&box, &tex->dirty_box, l); - NineVolume9_AddDirtyRegion(tex->volumes[l], &tex->dirty_box); + NineVolume9_UploadSelf(tex->volumes[l], &box); } memset(&tex->dirty_box, 0, sizeof(tex->dirty_box)); } - for (l = min_level_dirty; l <= last_level; ++l) - NineVolume9_UploadSelf(tex->volumes[l]); } else { assert(!"invalid texture type"); } @@ -361,8 +367,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) box.width = u_minify(This->base.info.width0, l); box.height = u_minify(This->base.info.height0, l); box.depth = u_minify(This->base.info.depth0, l); - NineVolume9_AddDirtyRegion(tex->volumes[l], &box); - NineVolume9_UploadSelf(tex->volumes[l]); + NineVolume9_UploadSelf(tex->volumes[l], &box); } } else { assert(!"invalid texture type"); @@ -381,8 +386,7 @@ NineBaseTexture9_UploadSelf( struct NineBaseTexture9 *This ) void WINAPI NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ) { - struct pipe_resource *resource = This->base.resource; - + struct pipe_resource *resource; unsigned base_level = 0; unsigned last_level = This->base.info.last_level - This->managed.lod; unsigned first_layer = 0; @@ -405,6 +409,8 @@ NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ) last_layer = util_max_layer(This->view[0]->texture, base_level); + resource = This->base.resource; + util_gen_mipmap(This->pipe, resource, resource->format, base_level, last_level, first_layer, last_layer, filter); @@ -530,6 +536,11 @@ NineBaseTexture9_UpdateSamplerView( struct NineBaseTexture9 *This, swizzle[2] = PIPE_SWIZZLE_RED; swizzle[3] = PIPE_SWIZZLE_RED; } + } else if (resource->format == PIPE_FORMAT_RGTC2_UNORM) { + swizzle[0] = PIPE_SWIZZLE_GREEN; + swizzle[1] = PIPE_SWIZZLE_RED; + swizzle[2] = PIPE_SWIZZLE_ONE; + swizzle[3] = PIPE_SWIZZLE_ONE; } else if (resource->format != PIPE_FORMAT_A8_UNORM && resource->format != PIPE_FORMAT_RGTC1_UNORM) { /* exceptions: @@ -578,6 +589,21 @@ NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This ) NineBaseTexture9_UploadSelf(This); } +void +NineBaseTexture9_UnLoad( struct NineBaseTexture9 *This ) +{ + if (This->base.pool != D3DPOOL_MANAGED || + This->managed.lod_resident == -1) + return; + + pipe_resource_reference(&This->base.resource, NULL); + This->managed.lod_resident = -1; + This->managed.dirty = TRUE; + + /* If the texture is bound, we have to re-upload it */ + BASETEX_REGISTER_UPDATE(This); +} + #ifdef DEBUG void NineBaseTexture9_Dump( struct NineBaseTexture9 *This ) diff --git a/src/gallium/state_trackers/nine/basetexture9.h b/src/gallium/state_trackers/nine/basetexture9.h index 9d6fb0c002a..b19a62195fc 100644 --- a/src/gallium/state_trackers/nine/basetexture9.h +++ b/src/gallium/state_trackers/nine/basetexture9.h @@ -30,7 +30,8 @@ struct NineBaseTexture9 { struct NineResource9 base; - struct list_head list; + struct list_head list; /* for update_textures */ + struct list_head list2; /* for managed_textures */ /* g3d */ struct pipe_context *pipe; @@ -94,6 +95,9 @@ NineBaseTexture9_GenerateMipSubLevels( struct NineBaseTexture9 *This ); void WINAPI NineBaseTexture9_PreLoad( struct NineBaseTexture9 *This ); +void +NineBaseTexture9_UnLoad( struct NineBaseTexture9 *This ); + /* For D3DPOOL_MANAGED only (after SetLOD change): */ HRESULT NineBaseTexture9_CreatePipeResource( struct NineBaseTexture9 *This, diff --git a/src/gallium/state_trackers/nine/cubetexture9.c b/src/gallium/state_trackers/nine/cubetexture9.c index edea1f28a8d..abba2637946 100644 --- a/src/gallium/state_trackers/nine/cubetexture9.c +++ b/src/gallium/state_trackers/nine/cubetexture9.c @@ -43,7 +43,7 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, struct pipe_screen *screen = pParams->device->screen; enum pipe_format pf; unsigned i, l, f, offset, face_size = 0; - unsigned *level_offsets; + unsigned *level_offsets = NULL; D3DSURFACE_DESC sfdesc; void *p; HRESULT hr; @@ -70,6 +70,13 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2) return D3DERR_INVALIDCALL; + if (compressed_format(Format)) { + const unsigned w = util_format_get_blockwidth(pf); + const unsigned h = util_format_get_blockheight(pf); + + user_assert(!(EdgeLength % w) && !(EdgeLength % h), D3DERR_INVALIDCALL); + } + info->screen = pParams->device->screen; info->target = PIPE_TEXTURE_CUBE; info->format = pf; @@ -106,7 +113,7 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, face_size = nine_format_get_size_and_offsets(pf, level_offsets, EdgeLength, EdgeLength, info->last_level); - This->managed_buffer = MALLOC(6 * face_size); + This->managed_buffer = align_malloc(6 * face_size, 32); if (!This->managed_buffer) return E_OUTOFMEMORY; } @@ -150,8 +157,12 @@ NineCubeTexture9_ctor( struct NineCubeTexture9 *This, } } - for (i = 0; i < 6; ++i) /* width = 0 means empty, depth stays 1 */ + for (i = 0; i < 6; ++i) { + /* Textures start initially dirty */ + This->dirty_rect[i].width = EdgeLength; + This->dirty_rect[i].height = EdgeLength; This->dirty_rect[i].depth = 1; + } return D3D_OK; } @@ -259,13 +270,17 @@ NineCubeTexture9_AddDirtyRect( struct NineCubeTexture9 *This, user_assert(FaceType < 6, D3DERR_INVALIDCALL); if (This->base.base.pool != D3DPOOL_MANAGED) { - if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) + if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) { This->base.dirty_mip = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } return D3D_OK; } - This->base.managed.dirty = TRUE; - BASETEX_REGISTER_UPDATE(&This->base); + if (This->base.base.pool == D3DPOOL_MANAGED) { + This->base.managed.dirty = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } if (!pDirtyRect) { u_box_origin_2d(This->base.base.info.width0, diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 55948cbb67f..99197a4361b 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -119,48 +119,6 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset ) This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf); } -void -NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask ) -{ - struct pipe_context *pipe = This->pipe; - - DBG("This=%p mask=%u\n", This, mask); - - if (mask & 0x1) { - struct pipe_constant_buffer cb; - cb.buffer_offset = 0; - - if (This->prefer_user_constbuf) { - cb.buffer = NULL; - cb.user_buffer = This->state.vs_const_f; - } else { - cb.buffer = This->constbuf_vs; - cb.user_buffer = NULL; - } - cb.buffer_size = This->vs_const_size; - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); - - if (This->prefer_user_constbuf) { - cb.user_buffer = This->state.ps_const_f; - } else { - cb.buffer = This->constbuf_ps; - } - cb.buffer_size = This->ps_const_size; - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); - } - - if (mask & 0x2) { - struct pipe_poly_stipple stipple; - memset(&stipple, ~0, sizeof(stipple)); - pipe->set_polygon_stipple(pipe, &stipple); - } - - This->state.changed.group = NINE_STATE_ALL; - This->state.changed.vtxbuf = (1ULL << This->caps.MaxStreams) - 1; - This->state.changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; - This->state.changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; -} - #define GET_PCAP(n) pScreen->get_param(pScreen, PIPE_CAP_##n) HRESULT NineDevice9_ctor( struct NineDevice9 *This, @@ -186,6 +144,7 @@ NineDevice9_ctor( struct NineDevice9 *This, if (FAILED(hr)) { return hr; } list_inithead(&This->update_textures); + list_inithead(&This->managed_textures); This->screen = pScreen; This->caps = *pCaps; @@ -341,16 +300,19 @@ NineDevice9_ctor( struct NineDevice9 *This, This->state.vs_const_f = CALLOC(This->vs_const_size, 1); This->state.ps_const_f = CALLOC(This->ps_const_size, 1); This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1); + This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1); if (!This->state.vs_const_f || !This->state.ps_const_f || - !This->state.vs_lconstf_temp) + !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp) return E_OUTOFMEMORY; if (strstr(pScreen->get_name(pScreen), "AMD") || strstr(pScreen->get_name(pScreen), "ATI")) { - This->prefer_user_constbuf = TRUE; This->driver_bugs.buggy_barycentrics = TRUE; } + /* Disable NV path for now, needs some fixes */ + This->prefer_user_constbuf = TRUE; + tmpl.target = PIPE_BUFFER; tmpl.format = PIPE_FORMAT_R8_UNORM; tmpl.height0 = 1; @@ -376,6 +338,8 @@ NineDevice9_ctor( struct NineDevice9 *This, { struct pipe_resource tmplt; struct pipe_sampler_view templ; + struct pipe_sampler_state samp; + memset(&samp, 0, sizeof(samp)); tmplt.target = PIPE_TEXTURE_2D; tmplt.width0 = 1; @@ -404,22 +368,39 @@ NineDevice9_ctor( struct NineDevice9 *This, templ.swizzle_a = PIPE_SWIZZLE_ONE; templ.target = This->dummy_texture->target; - This->dummy_sampler = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ); - if (!This->dummy_sampler) + This->dummy_sampler_view = This->pipe->create_sampler_view(This->pipe, This->dummy_texture, &templ); + if (!This->dummy_sampler_view) return D3DERR_DRIVERINTERNALERROR; + + samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + samp.max_lod = 15.0f; + samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; + samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + samp.compare_mode = PIPE_TEX_COMPARE_NONE; + samp.compare_func = PIPE_FUNC_LEQUAL; + samp.normalized_coords = 1; + samp.seamless_cube_map = 1; + This->dummy_sampler_state = samp; } /* Allocate upload helper for drivers that suck (from st pov ;). */ - { - unsigned bind = 0; - This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS); - This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS); + This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS); + This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS); + This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS); + + if (!This->driver_caps.user_vbufs) + This->vertex_uploader = u_upload_create(This->pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER); + if (!This->driver_caps.user_ibufs) + This->index_uploader = u_upload_create(This->pipe, 128 * 1024, 4, PIPE_BIND_INDEX_BUFFER); + if (!This->driver_caps.user_cbufs) { + unsigned alignment = GET_PCAP(CONSTANT_BUFFER_OFFSET_ALIGNMENT); - if (!This->driver_caps.user_vbufs) bind |= PIPE_BIND_VERTEX_BUFFER; - if (!This->driver_caps.user_ibufs) bind |= PIPE_BIND_INDEX_BUFFER; - if (bind) - This->upload = u_upload_create(This->pipe, 1 << 20, 4, bind); + This->constbuf_uploader = u_upload_create(This->pipe, This->vs_const_size, + alignment, PIPE_BIND_CONSTANT_BUFFER); } This->driver_caps.window_space_position_support = GET_PCAP(TGSI_VS_WINDOW_SPACE_POSITION); @@ -429,10 +410,15 @@ NineDevice9_ctor( struct NineDevice9 *This, nine_ff_init(This); /* initialize fixed function code */ NineDevice9_SetDefaultState(This, FALSE); - NineDevice9_RestoreNonCSOState(This, ~0); + + { + struct pipe_poly_stipple stipple; + memset(&stipple, ~0, sizeof(stipple)); + This->pipe->set_polygon_stipple(This->pipe, &stipple); + } This->update = &This->state; - nine_update_state(This, ~0); + nine_update_state(This); ID3DPresentGroup_Release(This->present); @@ -452,12 +438,16 @@ NineDevice9_dtor( struct NineDevice9 *This ) nine_ff_fini(This); nine_state_clear(&This->state, TRUE); - if (This->upload) - u_upload_destroy(This->upload); + if (This->vertex_uploader) + u_upload_destroy(This->vertex_uploader); + if (This->index_uploader) + u_upload_destroy(This->index_uploader); + if (This->constbuf_uploader) + u_upload_destroy(This->constbuf_uploader); nine_bind(&This->record, NULL); - pipe_sampler_view_reference(&This->dummy_sampler, NULL); + pipe_sampler_view_reference(&This->dummy_sampler_view, NULL); pipe_resource_reference(&This->dummy_texture, NULL); pipe_resource_reference(&This->constbuf_vs, NULL); pipe_resource_reference(&This->constbuf_ps, NULL); @@ -465,6 +455,7 @@ NineDevice9_dtor( struct NineDevice9 *This ) FREE(This->state.vs_const_f); FREE(This->state.ps_const_f); FREE(This->state.vs_lconstf_temp); + FREE(This->state.ps_lconstf_temp); if (This->swapchains) { for (i = 0; i < This->nswapchains; ++i) @@ -547,10 +538,14 @@ NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This ) HRESULT WINAPI NineDevice9_EvictManagedResources( struct NineDevice9 *This ) { - /* We don't really need to do anything here, but might want to free up - * the GPU virtual address space by killing pipe_resources. - */ - STUB(D3D_OK); + struct NineBaseTexture9 *tex; + + DBG("This=%p\n", This); + LIST_FOR_EACH_ENTRY(tex, &This->managed_textures, list2) { + NineBaseTexture9_UnLoad(tex); + } + + return D3D_OK; } HRESULT WINAPI @@ -599,11 +594,11 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap ) { - /* TODO: hardware cursor */ struct NineSurface9 *surf = NineSurface9(pCursorBitmap); struct pipe_context *pipe = This->pipe; struct pipe_box box; struct pipe_transfer *transfer; + BOOL hw_cursor; void *ptr; DBG_FLAG(DBG_SWAPCHAIN, "This=%p XHotSpot=%u YHotSpot=%u " @@ -611,8 +606,15 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, user_assert(pCursorBitmap, D3DERR_INVALIDCALL); - This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0); - This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0); + if (This->swapchains[0]->params.Windowed) { + This->cursor.w = MIN2(surf->desc.Width, 32); + This->cursor.h = MIN2(surf->desc.Height, 32); + hw_cursor = 1; /* always use hw cursor for windowed mode */ + } else { + This->cursor.w = MIN2(surf->desc.Width, This->cursor.image->width0); + This->cursor.h = MIN2(surf->desc.Height, This->cursor.image->height0); + hw_cursor = This->cursor.w == 32 && This->cursor.h == 32; + } u_box_origin_2d(This->cursor.w, This->cursor.h, &box); @@ -643,16 +645,21 @@ NineDevice9_SetCursorProperties( struct NineDevice9 *This, lock.pBits, lock.Pitch, This->cursor.w, This->cursor.h); - if (!This->cursor.software && - This->cursor.w == 32 && This->cursor.h == 32) - ID3DPresent_SetCursor(This->swapchains[0]->present, - lock.pBits, &This->cursor.hotspot, - This->cursor.visible); + if (hw_cursor) + hw_cursor = ID3DPresent_SetCursor(This->swapchains[0]->present, + lock.pBits, + &This->cursor.hotspot, + This->cursor.visible) == D3D_OK; NineSurface9_UnlockRect(surf); } pipe->transfer_unmap(pipe, transfer); + /* hide cursor if we emulate it */ + if (!hw_cursor) + ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, FALSE); + This->cursor.software = !hw_cursor; + return D3D_OK; } @@ -670,7 +677,7 @@ NineDevice9_SetCursorPosition( struct NineDevice9 *This, This->cursor.pos.y = Y; if (!This->cursor.software) - ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos); + This->cursor.software = ID3DPresent_SetCursorPos(swap->present, &This->cursor.pos) != D3D_OK; } BOOL WINAPI @@ -683,7 +690,7 @@ NineDevice9_ShowCursor( struct NineDevice9 *This, This->cursor.visible = bShow && (This->cursor.hotspot.x != -1); if (!This->cursor.software) - ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow); + This->cursor.software = ID3DPresent_SetCursor(This->swapchains[0]->present, NULL, NULL, bShow) != D3D_OK; return old; } @@ -752,8 +759,8 @@ NineDevice9_Reset( struct NineDevice9 *This, for (i = 0; i < This->nswapchains; ++i) { D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i]; hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL); - if (FAILED(hr)) - return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST; + if (hr != D3D_OK) + return hr; } nine_pipe_context_clear(This); @@ -1108,6 +1115,13 @@ create_zs_or_rt_surface(struct NineDevice9 *This, default: break; } + if (compressed_format(Format)) { + const unsigned w = util_format_get_blockwidth(templ.format); + const unsigned h = util_format_get_blockheight(templ.format); + + user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL); + } + if (Pool == D3DPOOL_DEFAULT && Format != D3DFMT_NULL) { /* resource_create doesn't return an error code, so check format here */ user_assert(templ.format != PIPE_FORMAT_NONE, D3DERR_INVALIDCALL); @@ -1173,6 +1187,8 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This, { struct NineSurface9 *dst = NineSurface9(pDestinationSurface); struct NineSurface9 *src = NineSurface9(pSourceSurface); + int copy_width, copy_height; + RECT destRect; DBG("This=%p pSourceSurface=%p pDestinationSurface=%p " "pSourceRect=%p pDestPoint=%p\n", This, @@ -1184,13 +1200,75 @@ NineDevice9_UpdateSurface( struct NineDevice9 *This, if (pDestPoint) DBG("pDestPoint = (%u,%u)\n", pDestPoint->x, pDestPoint->y); + user_assert(dst && src, D3DERR_INVALIDCALL); + user_assert(dst->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL); user_assert(src->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL); user_assert(dst->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL); user_assert(src->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL); - return NineSurface9_CopySurface(dst, src, pDestPoint, pSourceRect); + user_assert(!src->lock_count, D3DERR_INVALIDCALL); + user_assert(!dst->lock_count, D3DERR_INVALIDCALL); + + user_assert(dst->desc.Format == src->desc.Format, D3DERR_INVALIDCALL); + user_assert(!depth_stencil_format(dst->desc.Format), D3DERR_INVALIDCALL); + + if (pSourceRect) { + copy_width = pSourceRect->right - pSourceRect->left; + copy_height = pSourceRect->bottom - pSourceRect->top; + + user_assert(pSourceRect->left >= 0 && + copy_width > 0 && + pSourceRect->right <= src->desc.Width && + pSourceRect->top >= 0 && + copy_height > 0 && + pSourceRect->bottom <= src->desc.Height, + D3DERR_INVALIDCALL); + } else { + copy_width = src->desc.Width; + copy_height = src->desc.Height; + } + + destRect.right = copy_width; + destRect.bottom = copy_height; + + if (pDestPoint) { + user_assert(pDestPoint->x >= 0 && pDestPoint->y >= 0, + D3DERR_INVALIDCALL); + destRect.right += pDestPoint->x; + destRect.bottom += pDestPoint->y; + } + + user_assert(destRect.right <= dst->desc.Width && + destRect.bottom <= dst->desc.Height, + D3DERR_INVALIDCALL); + + if (compressed_format(dst->desc.Format)) { + const unsigned w = util_format_get_blockwidth(dst->base.info.format); + const unsigned h = util_format_get_blockheight(dst->base.info.format); + + if (pDestPoint) { + user_assert(!(pDestPoint->x % w) && !(pDestPoint->y % h), + D3DERR_INVALIDCALL); + } + + if (pSourceRect) { + user_assert(!(pSourceRect->left % w) && !(pSourceRect->top % h), + D3DERR_INVALIDCALL); + } + if (!(copy_width == src->desc.Width && + copy_width == dst->desc.Width && + copy_height == src->desc.Height && + copy_height == dst->desc.Height)) { + user_assert(!(copy_width % w) && !(copy_height % h), + D3DERR_INVALIDCALL); + } + } + + NineSurface9_CopyMemToDefault(dst, src, pDestPoint, pSourceRect); + + return D3D_OK; } HRESULT WINAPI @@ -1202,6 +1280,7 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, struct NineBaseTexture9 *srcb = NineBaseTexture9(pSourceTexture); unsigned l, m; unsigned last_level = dstb->base.info.last_level; + RECT rect; DBG("This=%p pSourceTexture=%p pDestinationTexture=%p\n", This, pSourceTexture, pDestinationTexture); @@ -1227,10 +1306,6 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, user_assert(dstb->base.type == srcb->base.type, D3DERR_INVALIDCALL); - /* TODO: We can restrict the update to the dirty portions of the source. - * Yes, this seems silly, but it's what MSDN says ... - */ - /* Find src level that matches dst level 0: */ user_assert(srcb->base.info.width0 >= dstb->base.info.width0 && srcb->base.info.height0 >= dstb->base.info.height0 && @@ -1254,9 +1329,25 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, struct NineTexture9 *dst = NineTexture9(dstb); struct NineTexture9 *src = NineTexture9(srcb); - for (l = 0; l <= last_level; ++l, ++m) - NineSurface9_CopySurface(dst->surfaces[l], - src->surfaces[m], NULL, NULL); + if (src->dirty_rect.width == 0) + return D3D_OK; + + pipe_box_to_rect(&rect, &src->dirty_rect); + for (l = 0; l < m; ++l) + rect_minify_inclusive(&rect); + + for (l = 0; l <= last_level; ++l, ++m) { + fit_rect_format_inclusive(dst->base.base.info.format, + &rect, + dst->surfaces[l]->desc.Width, + dst->surfaces[l]->desc.Height); + NineSurface9_CopyMemToDefault(dst->surfaces[l], + src->surfaces[m], + (POINT *)&rect, + &rect); + rect_minify_inclusive(&rect); + } + u_box_origin_2d(0, 0, &src->dirty_rect); } else if (dstb->base.type == D3DRTYPE_CUBETEXTURE) { struct NineCubeTexture9 *dst = NineCubeTexture9(dstb); @@ -1265,10 +1356,25 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, /* GPUs usually have them stored as arrays of mip-mapped 2D textures. */ for (z = 0; z < 6; ++z) { + if (src->dirty_rect[z].width == 0) + continue; + + pipe_box_to_rect(&rect, &src->dirty_rect[z]); + for (l = 0; l < m; ++l) + rect_minify_inclusive(&rect); + for (l = 0; l <= last_level; ++l, ++m) { - NineSurface9_CopySurface(dst->surfaces[l * 6 + z], - src->surfaces[m * 6 + z], NULL, NULL); + fit_rect_format_inclusive(dst->base.base.info.format, + &rect, + dst->surfaces[l * 6 + z]->desc.Width, + dst->surfaces[l * 6 + z]->desc.Height); + NineSurface9_CopyMemToDefault(dst->surfaces[l * 6 + z], + src->surfaces[m * 6 + z], + (POINT *)&rect, + &rect); + rect_minify_inclusive(&rect); } + u_box_origin_2d(0, 0, &src->dirty_rect[z]); m -= l; } } else @@ -1276,9 +1382,12 @@ NineDevice9_UpdateTexture( struct NineDevice9 *This, struct NineVolumeTexture9 *dst = NineVolumeTexture9(dstb); struct NineVolumeTexture9 *src = NineVolumeTexture9(srcb); + if (src->dirty_box.width == 0) + return D3D_OK; for (l = 0; l <= last_level; ++l, ++m) - NineVolume9_CopyVolume(dst->volumes[l], - src->volumes[m], 0, 0, 0, NULL); + NineVolume9_CopyMemToDefault(dst->volumes[l], + src->volumes[m], 0, 0, 0, NULL); + u_box_3d(0, 0, 0, 0, 0, 0, &src->dirty_box); } else{ assert(!"invalid texture type"); } @@ -1308,7 +1417,12 @@ NineDevice9_GetRenderTargetData( struct NineDevice9 *This, user_assert(dst->desc.MultiSampleType < 2, D3DERR_INVALIDCALL); user_assert(src->desc.MultiSampleType < 2, D3DERR_INVALIDCALL); - return NineSurface9_CopySurface(dst, src, NULL, NULL); + user_assert(src->desc.Width == dst->desc.Width, D3DERR_INVALIDCALL); + user_assert(src->desc.Height == dst->desc.Height, D3DERR_INVALIDCALL); + + NineSurface9_CopyDefaultToMem(dst, src); + + return D3D_OK; } HRESULT WINAPI @@ -1448,6 +1562,7 @@ NineDevice9_StretchRect( struct NineDevice9 *This, blit.filter = Filter == D3DTEXF_LINEAR ? PIPE_TEX_FILTER_LINEAR : PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; + blit.alpha_blend = FALSE; /* If both of a src and dst dimension are negative, flip them. */ if (blit.dst.box.width < 0 && blit.src.box.width < 0) { @@ -1464,8 +1579,12 @@ NineDevice9_StretchRect( struct NineDevice9 *This, user_assert(!scaled || dst != src, D3DERR_INVALIDCALL); user_assert(!scaled || - !NineSurface9_IsOffscreenPlain(dst) || + !NineSurface9_IsOffscreenPlain(dst), D3DERR_INVALIDCALL); + user_assert(!NineSurface9_IsOffscreenPlain(dst) || NineSurface9_IsOffscreenPlain(src), D3DERR_INVALIDCALL); + user_assert(NineSurface9_IsOffscreenPlain(dst) || + dst->desc.Usage & (D3DUSAGE_RENDERTARGET | D3DUSAGE_DEPTHSTENCIL), + D3DERR_INVALIDCALL); user_assert(!scaled || (!util_format_is_compressed(dst->base.info.format) && !util_format_is_compressed(src->base.info.format)), @@ -1561,11 +1680,8 @@ NineDevice9_ColorFill( struct NineDevice9 *This, } d3dcolor_to_pipe_color_union(&rgba, color); - fallback = - !This->screen->is_format_supported(This->screen, surf->base.info.format, - surf->base.info.target, - surf->base.info.nr_samples, - PIPE_BIND_RENDER_TARGET); + fallback = !(surf->base.info.bind & PIPE_BIND_RENDER_TARGET); + if (!fallback) { psurf = NineSurface9_GetSurface(surf, 0); if (!psurf) @@ -1774,7 +1890,7 @@ NineDevice9_Clear( struct NineDevice9 *This, return D3D_OK; d3dcolor_to_pipe_color_union(&rgba, Color); - nine_update_state(This, NINE_STATE_FB); + nine_update_state_framebuffer(This); rect.x1 = This->state.viewport.X; rect.y1 = This->state.viewport.Y; @@ -2012,8 +2128,10 @@ NineDevice9_SetLight( struct NineDevice9 *This, return E_OUTOFMEMORY; state->ff.num_lights = N; - for (; n < Index; ++n) + for (; n < Index; ++n) { + memset(&state->ff.light[n], 0, sizeof(D3DLIGHT9)); state->ff.light[n].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID; + } } state->ff.light[Index] = *pLight; @@ -2508,6 +2626,7 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This, DWORD Value ) { struct nine_state *state = This->update; + int bumpmap_index = -1; DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value); nine_dump_D3DTSS_value(DBG_FF, Type, Value); @@ -2516,6 +2635,36 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This, user_assert(Type < Elements(state->ff.tex_stage[0]), D3DERR_INVALIDCALL); state->ff.tex_stage[Stage][Type] = Value; + switch (Type) { + case D3DTSS_BUMPENVMAT00: + bumpmap_index = 4 * Stage; + break; + case D3DTSS_BUMPENVMAT10: + bumpmap_index = 4 * Stage + 1; + break; + case D3DTSS_BUMPENVMAT01: + bumpmap_index = 4 * Stage + 2; + break; + case D3DTSS_BUMPENVMAT11: + bumpmap_index = 4 * Stage + 3; + break; + case D3DTSS_BUMPENVLSCALE: + bumpmap_index = 4 * 8 + 2 * Stage; + break; + case D3DTSS_BUMPENVLOFFSET: + bumpmap_index = 4 * 8 + 2 * Stage + 1; + break; + case D3DTSS_TEXTURETRANSFORMFLAGS: + state->changed.group |= NINE_STATE_PS1X_SHADER; + break; + default: + break; + } + + if (bumpmap_index >= 0) { + state->bumpmap_vars[bumpmap_index] = Value; + state->changed.group |= NINE_STATE_PS_CONST; + } state->changed.group |= NINE_STATE_FF_PSSTAGES; state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32); @@ -2560,12 +2709,11 @@ NineDevice9_SetSamplerState( struct NineDevice9 *This, if (Sampler >= D3DDMAPSAMPLER) Sampler = Sampler - D3DDMAPSAMPLER + NINE_MAX_SAMPLERS_PS; - state->samp[Sampler][Type] = Value; - state->changed.group |= NINE_STATE_SAMPLER; - state->changed.sampler[Sampler] |= 1 << Type; - - if (Type == D3DSAMP_SRGBTEXTURE) - state->changed.srgb = TRUE; + if (state->samp[Sampler][Type] != Value || unlikely(This->is_recording)) { + state->samp[Sampler][Type] = Value; + state->changed.group |= NINE_STATE_SAMPLER; + state->changed.sampler[Sampler] |= 1 << Type; + } return D3D_OK; } @@ -2724,7 +2872,7 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This, DBG("iface %p, PrimitiveType %u, StartVertex %u, PrimitiveCount %u\n", This, PrimitiveType, StartVertex, PrimitiveCount); - nine_update_state(This, ~0); + nine_update_state(This); init_draw_info(&info, This, PrimitiveType, PrimitiveCount); info.indexed = FALSE; @@ -2757,7 +2905,7 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, user_assert(This->state.idxbuf, D3DERR_INVALIDCALL); user_assert(This->state.vdecl, D3DERR_INVALIDCALL); - nine_update_state(This, ~0); + nine_update_state(This); init_draw_info(&info, This, PrimitiveType, PrimitiveCount); info.indexed = TRUE; @@ -2789,7 +2937,7 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This, user_assert(pVertexStreamZeroData && VertexStreamZeroStride, D3DERR_INVALIDCALL); - nine_update_state(This, ~0); + nine_update_state(This); init_draw_info(&info, This, PrimitiveType, PrimitiveCount); info.indexed = FALSE; @@ -2803,13 +2951,16 @@ NineDevice9_DrawPrimitiveUP( struct NineDevice9 *This, vtxbuf.buffer = NULL; vtxbuf.user_buffer = pVertexStreamZeroData; - if (!This->driver_caps.user_vbufs) - u_upload_data(This->upload, + if (!This->driver_caps.user_vbufs) { + u_upload_data(This->vertex_uploader, 0, (info.max_index + 1) * VertexStreamZeroStride, /* XXX */ vtxbuf.user_buffer, &vtxbuf.buffer_offset, &vtxbuf.buffer); + u_upload_unmap(This->vertex_uploader); + vtxbuf.user_buffer = NULL; + } This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vtxbuf); @@ -2851,7 +3002,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, user_assert(IndexDataFormat == D3DFMT_INDEX16 || IndexDataFormat == D3DFMT_INDEX32, D3DERR_INVALIDCALL); - nine_update_state(This, ~0); + nine_update_state(This); init_draw_info(&info, This, PrimitiveType, PrimitiveCount); info.indexed = TRUE; @@ -2872,23 +3023,28 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, if (!This->driver_caps.user_vbufs) { const unsigned base = info.min_index * VertexStreamZeroStride; - u_upload_data(This->upload, + u_upload_data(This->vertex_uploader, base, (info.max_index - info.min_index + 1) * VertexStreamZeroStride, /* XXX */ (const uint8_t *)vbuf.user_buffer + base, &vbuf.buffer_offset, &vbuf.buffer); + u_upload_unmap(This->vertex_uploader); /* Won't be used: */ vbuf.buffer_offset -= base; + vbuf.user_buffer = NULL; } - if (!This->driver_caps.user_ibufs) - u_upload_data(This->upload, + if (!This->driver_caps.user_ibufs) { + u_upload_data(This->index_uploader, 0, info.count * ibuf.index_size, ibuf.user_buffer, &ibuf.offset, &ibuf.buffer); + u_upload_unmap(This->index_uploader); + ibuf.user_buffer = NULL; + } This->pipe->set_vertex_buffers(This->pipe, 0, 1, &vbuf); This->pipe->set_index_buffer(This->pipe, &ibuf); @@ -2935,7 +3091,7 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This, if (!screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS)) STUB(D3DERR_INVALIDCALL); - nine_update_state(This, ~0); + nine_update_state(This); /* TODO: Create shader with stream output. */ STUB(D3DERR_INVALIDCALL); @@ -3105,6 +3261,13 @@ NineDevice9_SetVertexShader( struct NineDevice9 *This, DBG("This=%p pShader=%p\n", This, pShader); + if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader) + return D3D_OK; + + /* ff -> non-ff: commit back non-ff constants */ + if (!state->vs && pShader) + state->commit |= NINE_STATE_COMMIT_CONST_VS; + nine_bind(&state->vs, pShader); state->changed.group |= NINE_STATE_VS; @@ -3139,6 +3302,12 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This, return D3D_OK; user_assert(pConstantData, D3DERR_INVALIDCALL); + if (!This->is_recording) { + if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData, + Vector4fCount * 4 * sizeof(state->vs_const_f[0]))) + return D3D_OK; + } + memcpy(&state->vs_const_f[StartRegister * 4], pConstantData, Vector4fCount * 4 * sizeof(state->vs_const_f[0])); @@ -3188,6 +3357,11 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This, user_assert(pConstantData, D3DERR_INVALIDCALL); if (This->driver_caps.vs_integer) { + if (!This->is_recording) { + if (!memcmp(&state->vs_const_i[StartRegister][0], pConstantData, + Vector4iCount * sizeof(state->vs_const_i[0]))) + return D3D_OK; + } memcpy(&state->vs_const_i[StartRegister][0], pConstantData, Vector4iCount * sizeof(state->vs_const_i[0])); @@ -3252,6 +3426,16 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This, user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); + if (!This->is_recording) { + bool noChange = true; + for (i = 0; i < BoolCount; i++) { + if (!!state->vs_const_b[StartRegister + i] != !!pConstantData[i]) + noChange = false; + } + if (noChange) + return D3D_OK; + } + for (i = 0; i < BoolCount; i++) state->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; @@ -3433,6 +3617,13 @@ NineDevice9_SetPixelShader( struct NineDevice9 *This, DBG("This=%p pShader=%p\n", This, pShader); + if (!This->is_recording && state->ps == (struct NinePixelShader9*)pShader) + return D3D_OK; + + /* ff -> non-ff: commit back non-ff constants */ + if (!state->ps && pShader) + state->commit |= NINE_STATE_COMMIT_CONST_PS; + nine_bind(&state->ps, pShader); state->changed.group |= NINE_STATE_PS; @@ -3473,6 +3664,12 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This, return D3D_OK; user_assert(pConstantData, D3DERR_INVALIDCALL); + if (!This->is_recording) { + if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData, + Vector4fCount * 4 * sizeof(state->ps_const_f[0]))) + return D3D_OK; + } + memcpy(&state->ps_const_f[StartRegister * 4], pConstantData, Vector4fCount * 4 * sizeof(state->ps_const_f[0])); @@ -3522,6 +3719,11 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This, user_assert(pConstantData, D3DERR_INVALIDCALL); if (This->driver_caps.ps_integer) { + if (!This->is_recording) { + if (!memcmp(&state->ps_const_i[StartRegister][0], pConstantData, + Vector4iCount * sizeof(state->ps_const_i[0]))) + return D3D_OK; + } memcpy(&state->ps_const_i[StartRegister][0], pConstantData, Vector4iCount * sizeof(state->ps_const_i[0])); @@ -3585,6 +3787,16 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This, user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL); user_assert(pConstantData, D3DERR_INVALIDCALL); + if (!This->is_recording) { + bool noChange = true; + for (i = 0; i < BoolCount; i++) { + if (!!state->ps_const_b[StartRegister + i] != !!pConstantData[i]) + noChange = false; + } + if (noChange) + return D3D_OK; + } + for (i = 0; i < BoolCount; i++) state->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0; diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h index 74607451c5f..98d9c4df06a 100644 --- a/src/gallium/state_trackers/nine/device9.h +++ b/src/gallium/state_trackers/nine/device9.h @@ -69,6 +69,7 @@ struct NineDevice9 struct nine_state state; /* device state */ struct list_head update_textures; + struct list_head managed_textures; boolean is_recording; boolean in_scene; @@ -83,7 +84,8 @@ struct NineDevice9 uint16_t max_ps_const_f; struct pipe_resource *dummy_texture; - struct pipe_sampler_view *dummy_sampler; + struct pipe_sampler_view *dummy_sampler_view; + struct pipe_sampler_state dummy_sampler_state; struct gen_mipmap_state *gen_mipmap; @@ -113,6 +115,7 @@ struct NineDevice9 struct { boolean user_vbufs; boolean user_ibufs; + boolean user_cbufs; boolean window_space_position_support; boolean vs_integer; boolean ps_integer; @@ -122,7 +125,9 @@ struct NineDevice9 boolean buggy_barycentrics; } driver_bugs; - struct u_upload_mgr *upload; + struct u_upload_mgr *vertex_uploader; + struct u_upload_mgr *index_uploader; + struct u_upload_mgr *constbuf_uploader; struct nine_range_pool range_pool; @@ -180,10 +185,6 @@ NineDevice9_GetCSO( struct NineDevice9 *This ); const D3DCAPS9 * NineDevice9_GetCaps( struct NineDevice9 *This ); -/* Mask: 0x1 = constant buffers, 0x2 = stipple */ -void -NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask ); - /*** Direct3D public ***/ HRESULT WINAPI diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c index 8a53f0d9038..fe8933be69a 100644 --- a/src/gallium/state_trackers/nine/nine_ff.c +++ b/src/gallium/state_trackers/nine/nine_ff.c @@ -22,6 +22,7 @@ #include "tgsi/tgsi_dump.h" #include "util/u_box.h" #include "util/u_hash_table.h" +#include "util/u_upload_mgr.h" #define NINE_TGSI_LAZY_DEVS 1 @@ -30,13 +31,6 @@ #define NINE_FF_NUM_VS_CONST 256 #define NINE_FF_NUM_PS_CONST 24 -#define NINED3DTSS_TCI_DISABLE 0 -#define NINED3DTSS_TCI_PASSTHRU 1 -#define NINED3DTSS_TCI_CAMERASPACENORMAL 2 -#define NINED3DTSS_TCI_CAMERASPACEPOSITION 3 -#define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 4 -#define NINED3DTSS_TCI_SPHEREMAP 5 - struct fvec4 { float x, y, z, w; @@ -63,16 +57,20 @@ struct nine_ff_vs_key uint32_t fog_range : 1; uint32_t color0in_one : 1; uint32_t color1in_one : 1; - uint32_t pad1 : 8; - uint32_t tc_gen : 24; /* 8 * 3 bits */ - uint32_t pad2 : 8; - uint32_t tc_idx : 24; + uint32_t fog : 1; + uint32_t pad1 : 7; + uint32_t tc_dim_input: 16; /* 8 * 2 bits */ + uint32_t pad2 : 16; + uint32_t tc_dim_output: 24; /* 8 * 3 bits */ uint32_t pad3 : 8; - uint32_t tc_dim : 24; /* 8 * 3 bits */ + uint32_t tc_gen : 24; /* 8 * 3 bits */ uint32_t pad4 : 8; + uint32_t tc_idx : 24; + uint32_t pad5 : 8; + uint32_t passthrough; }; - uint64_t value64[2]; /* don't forget to resize VertexShader9.ff_key */ - uint32_t value32[4]; + uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */ + uint32_t value32[6]; }; }; @@ -106,15 +104,18 @@ struct nine_ff_ps_key uint32_t alphaarg2 : 3; uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */ uint32_t textarget : 2; /* 1D/2D/3D/CUBE */ - uint32_t projected : 1; + uint32_t pad : 1; /* that's 32 bit exactly */ } ts[8]; - uint32_t fog : 1; /* for vFog with programmable VS */ + uint32_t projected : 16; + uint32_t fog : 1; /* for vFog coming from VS */ uint32_t fog_mode : 2; - uint32_t specular : 1; /* 9 32-bit words with this */ + uint32_t specular : 1; + uint32_t pad1 : 12; /* 9 32-bit words with this */ uint8_t colorarg_b4[3]; uint8_t colorarg_b5[3]; uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */ + uint8_t pad2[3]; }; uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */ uint32_t value32[12]; @@ -222,7 +223,6 @@ static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override) * CONST[28].x___ RS.FogEnd * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart) * CONST[28].__z_ RS.FogDensity - * CONST[29] RS.FogColor * CONST[30].x___ TWEENFACTOR * @@ -334,16 +334,15 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) { const struct nine_ff_vs_key *key = vs->key; struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_VERTEX); - struct ureg_dst oPos, oCol[2], oTex[8], oPsz, oFog; - struct ureg_dst rCol[2]; /* oCol if no fog, TEMP otherwise */ + struct ureg_dst oPos, oCol[2], oPsz, oFog; struct ureg_dst rVtx, rNrm; struct ureg_dst r[8]; struct ureg_dst AR; - struct ureg_dst tmp, tmp_x, tmp_z; + struct ureg_dst tmp, tmp_x, tmp_y, tmp_z; unsigned i, c; unsigned label[32], l = 0; unsigned num_r = 8; - boolean need_rNrm = key->lighting || key->pointscale; + boolean need_rNrm = key->lighting || key->pointscale || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); boolean need_rVtx = key->lighting || key->fog_mode; const unsigned texcoord_sn = get_texcoord_sn(device->screen); @@ -406,9 +405,9 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) if (key->vertexpointsize) vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE); - if (key->vertexblend_indexed) + if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES); - if (key->vertexblend) + if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT); if (key->vertextween) { vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1)); @@ -420,19 +419,16 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */ oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0)); oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); + if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { + oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0); + oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); + } if (key->vertexpointsize || key->pointscale) { oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, TGSI_WRITEMASK_X, 0, 1); oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); } - if (key->fog_mode) { - /* We apply fog to the vertex colors, oFog is for programmable shaders only ? - */ - oFog = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_FOG, 0, - TGSI_WRITEMASK_X, 0, 1); - oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); - } /* Declare TEMPs: */ @@ -440,18 +436,11 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) r[i] = ureg_DECL_local_temporary(ureg); tmp = r[0]; tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); + tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); if (key->lighting || key->vertexblend) AR = ureg_DECL_address(ureg); - if (key->fog_mode) { - rCol[0] = r[2]; - rCol[1] = r[3]; - } else { - rCol[0] = oCol[0]; - rCol[1] = oCol[1]; - } - rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ); rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ); @@ -560,8 +549,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1)); #endif } else if (key->pointscale) { - struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); - struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); @@ -582,72 +569,85 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) #endif } - /* Texture coordinate generation: - * XXX: D3DTTFF_PROJECTED, transform matrix - */ for (i = 0; i < 8; ++i) { - struct ureg_dst dst[5]; - struct ureg_src src; - unsigned c; + struct ureg_dst oTex, input_coord, transformed, t; + unsigned c, writemask; const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7; const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7; - const unsigned dim = (key->tc_dim >> (i * 3)) & 0x7; + unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3); + const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7; + /* No texture output of index s */ if (tci == NINED3DTSS_TCI_DISABLE) continue; - oTex[i] = ureg_DECL_output(ureg, texcoord_sn, i); - - if (tci == NINED3DTSS_TCI_PASSTHRU) - vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); - - if (!dim) { - dst[c = 4] = oTex[i]; - } else { - dst[4] = r[5]; - src = ureg_src(dst[4]); - for (c = 0; c < (dim - 1); ++c) - dst[c] = ureg_writemask(tmp, (1 << dim) - 1); - dst[c] = ureg_writemask(oTex[i], (1 << dim) - 1); - } + oTex = ureg_DECL_output(ureg, texcoord_sn, i); + input_coord = r[5]; + transformed = r[6]; + /* Get the coordinate */ switch (tci) { case NINED3DTSS_TCI_PASSTHRU: - ureg_MOV(ureg, dst[4], vs->aTex[idx]); + /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx * + * Else the idx is used only to determine wrapping mode. */ + vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); + ureg_MOV(ureg, input_coord, vs->aTex[idx]); break; case NINED3DTSS_TCI_CAMERASPACENORMAL: - assert(dim <= 3); - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rNrm)); - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rNrm)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + dim_input = 4; break; case NINED3DTSS_TCI_CAMERASPACEPOSITION: - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx)); - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + dim_input = 4; break; case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: tmp.WriteMask = TGSI_WRITEMASK_XYZ; ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm)); ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp)); ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); - ureg_SUB(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp)); - ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp)); + ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); + dim_input = 4; tmp.WriteMask = TGSI_WRITEMASK_XYZW; break; case NINED3DTSS_TCI_SPHEREMAP: assert(!"TODO"); break; default: + assert(0); break; } - if (!dim) - continue; - dst[c].WriteMask = ~dst[c].WriteMask; - if (dst[c].WriteMask) - ureg_MOV(ureg, dst[c], src); /* store untransformed components */ - dst[c].WriteMask = ~dst[c].WriteMask; - if (dim > 0) ureg_MUL(ureg, dst[0], _XXXX(src), _CONST(128 + i * 4)); - if (dim > 1) ureg_MAD(ureg, dst[1], _YYYY(src), _CONST(129 + i * 4), ureg_src(tmp)); - if (dim > 2) ureg_MAD(ureg, dst[2], _ZZZZ(src), _CONST(130 + i * 4), ureg_src(tmp)); - if (dim > 3) ureg_MAD(ureg, dst[3], _WWWW(src), _CONST(131 + i * 4), ureg_src(tmp)); + + /* Apply the transformation */ + /* dim_output == 0 => do not transform the components. + * XYZRHW also disables transformation */ + if (!dim_output || key->position_t) { + transformed = input_coord; + writemask = TGSI_WRITEMASK_XYZW; + } else { + for (c = 0; c < dim_output; c++) { + t = ureg_writemask(transformed, 1 << c); + switch (dim_input) { + /* dim_input = 1 2 3: -> we add trailing 1 to input*/ + case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c))); + break; + case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); + ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c))); + break; + case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); + ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c))); + break; + case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break; + default: + assert(0); + } + } + writemask = (1 << dim_output) - 1; + } + + ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed)); } /* === Lighting: @@ -692,8 +692,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) * specular += light.specular * atten * powFact; */ if (key->lighting) { - struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); - struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W); struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ); struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ); @@ -851,22 +849,22 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE); ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE); } - ureg_MAD(ureg, rCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp)); - ureg_MUL(ureg, rCol[1], ureg_src(rS), vs->mtlS); + ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp)); + ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); } else /* COLOR */ if (key->darkness) { if (key->mtl_emissive == 0 && key->mtl_ambient == 0) { - ureg_MAD(ureg, rCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19)); + ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19)); } else { - ureg_MAD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); + ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE); - ureg_ADD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp)); + ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp)); } - ureg_MUL(ureg, rCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS); + ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS); } else { - ureg_MOV(ureg, rCol[0], vs->aCol[0]); - ureg_MOV(ureg, rCol[1], vs->aCol[1]); + ureg_MOV(ureg, oCol[0], vs->aCol[0]); + ureg_MOV(ureg, oCol[1], vs->aCol[1]); } /* === Process fog. @@ -874,10 +872,6 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) * exp(x) = ex2(log2(e) * x) */ if (key->fog_mode) { - /* Fog doesn't affect alpha, TODO: combine with light code output */ - ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), _W(rCol[0])); - ureg_MOV(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_W), _W(rCol[1])); - if (key->position_t) { ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); } else @@ -905,10 +899,58 @@ nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); } ureg_MOV(ureg, oFog, _X(tmp)); - ureg_LRP(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[0]), _CONST(29)); - ureg_LRP(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[1]), _CONST(29)); + } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) { + ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); } + if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) { + struct ureg_src input; + struct ureg_dst output; + input = vs->aWgt; + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) { + struct ureg_src input; + struct ureg_dst output; + input = vs->aInd; + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) { + struct ureg_src input; + struct ureg_dst output; + input = vs->aNrm; + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) { + struct ureg_src input; + struct ureg_dst output; + input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) { + struct ureg_src input; + struct ureg_dst output; + input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL); + output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { + struct ureg_src input; + struct ureg_dst output; + input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG); + input = ureg_scalar(input, TGSI_SWIZZLE_X); + output = oFog; + ureg_MOV(ureg, output, input); + } + if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) { + (void) 0; /* TODO: replace z of position output ? */ + } + + if (key->position_t && device->driver_caps.window_space_position_support) ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); @@ -1270,10 +1312,18 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) if (key->ts[s].colorop == D3DTOP_BUMPENVMAP || key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) { } - if (key->ts[s].projected) - ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]); - else + if (key->projected & (3 << (s *2))) { + unsigned dim = 1 + ((key->projected >> (2 * s)) & 3); + if (dim == 4) + ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]); + else { + ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(ps.vT[s], dim-1)); + ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), ps.vT[s]); + ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]); + } + } else { ureg_TEX(ureg, ps.rTex, target, ps.vT[s], ps.s[s]); + } } if (s == 0 && @@ -1316,6 +1366,10 @@ nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) colorarg[2] != alphaarg[2]) dst.WriteMask = TGSI_WRITEMASK_XYZ; + /* Special DOTPRODUCT behaviour (see wine tests) */ + if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3) + dst.WriteMask = TGSI_WRITEMASK_XYZW; + if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]); if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]); if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]); @@ -1406,12 +1460,18 @@ nine_ff_get_vs(struct NineDevice9 *device) else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { s = usage / NINE_DECLUSAGE_COUNT; if (s < 8) - input_texture_coord[s] = 1; + input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type); else DBG("FF given texture coordinate >= 8. Ignoring\n"); - } + } else if (usage < NINE_DECLUSAGE_NONE) + key.passthrough |= 1 << usage; } } + /* ff vs + ps 3.0: some elements are passed to the ps (wine test). + * We do restrict to indices 0 */ + key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) | + (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) | + (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE)); if (!key.vertexpointsize) key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE]; @@ -1427,6 +1487,7 @@ nine_ff_get_vs(struct NineDevice9 *device) key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE]; key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE]; } + key.fog = !!state->rs[D3DRS_FOGENABLE]; key.fog_mode = state->rs[D3DRS_FOGENABLE] ? state->rs[D3DRS_FOGVERTEXMODE] : 0; if (key.fog_mode) key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE]; @@ -1448,7 +1509,7 @@ nine_ff_get_vs(struct NineDevice9 *device) for (s = 0; s < 8; ++s) { unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; - unsigned dim = MIN2(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7, 4); + unsigned dim; if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU) gen = NINED3DTSS_TCI_PASSTHRU; @@ -1458,7 +1519,14 @@ nine_ff_get_vs(struct NineDevice9 *device) key.tc_gen |= gen << (s * 3); key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3); - key.tc_dim |= dim << (s * 3); + key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2); + + dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; + if (dim > 4) + dim = input_texture_coord[s]; + if (dim == 1) /* NV behaviour */ + dim = 0; + key.tc_dim_output |= dim << (s * 3); } vs = util_hash_table_get(device->ff.ht_vs, &key); @@ -1473,6 +1541,7 @@ nine_ff_get_vs(struct NineDevice9 *device) memcpy(&vs->ff_key, &key, sizeof(vs->ff_key)); err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs); + (void)err; assert(err == PIPE_OK); device->ff.num_vs++; NineUnknown_ConvertRefToBind(NineUnknown(vs)); @@ -1543,8 +1612,6 @@ nine_ff_get_ps(struct NineDevice9 *device) } key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP; - key.ts[s].projected = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED); - if (state->texture[s]) { switch (state->texture[s]->base.type) { case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break; @@ -1558,10 +1625,14 @@ nine_ff_get_ps(struct NineDevice9 *device) key.ts[s].textarget = 1; } } + + key.projected = nine_ff_get_projected_key(state); + for (; s < 8; ++s) key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; if (state->rs[D3DRS_FOGENABLE]) key.fog_mode = state->rs[D3DRS_FOGTABLEMODE]; + key.fog = !!state->rs[D3DRS_FOGENABLE]; ps = util_hash_table_get(device->ff.ht_ps, &key); if (ps) @@ -1573,6 +1644,7 @@ nine_ff_get_ps(struct NineDevice9 *device) memcpy(&ps->ff_key, &key, sizeof(ps->ff_key)); err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps); + (void)err; assert(err == PIPE_OK); device->ff.num_ps++; NineUnknown_ConvertRefToBind(NineUnknown(ps)); @@ -1689,7 +1761,6 @@ nine_ff_load_point_and_fog_params(struct NineDevice9 *device) if (isinf(dst[28].y)) dst[28].y = 0.0f; dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]); - d3dcolor_to_rgba(&dst[29].x, state->rs[D3DRS_FOGCOLOR]); } static void @@ -1703,7 +1774,7 @@ nine_ff_load_tex_matrices(struct NineDevice9 *device) return; for (s = 0; s < 8; ++s) { if (IS_D3DTS_DIRTY(state, TEXTURE0 + s)) - M[32 + s] = *nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE); + nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE)); } } @@ -1762,28 +1833,22 @@ nine_ff_load_viewport_info(struct NineDevice9 *device) void nine_ff_update(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; + struct pipe_constant_buffer cb; DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps); /* NOTE: the only reference belongs to the hash table */ - if (!device->state.vs) + if (!device->state.vs) { device->ff.vs = nine_ff_get_vs(device); - if (!device->state.ps) + device->state.changed.group |= NINE_STATE_VS; + } + if (!device->state.ps) { device->ff.ps = nine_ff_get_ps(device); + device->state.changed.group |= NINE_STATE_PS; + } if (!device->state.vs) { - if (device->state.ff.clobber.vs_const) { - device->state.ff.clobber.vs_const = FALSE; - device->state.changed.group |= - NINE_STATE_FF_VSTRANSF | - NINE_STATE_FF_MATERIAL | - NINE_STATE_FF_LIGHTING | - NINE_STATE_FF_OTHER; - device->state.ff.changed.transform[0] |= 0xff000c; - device->state.ff.changed.transform[8] |= 0xff; - } nine_ff_load_vs_transforms(device); nine_ff_load_tex_matrices(device); nine_ff_load_lights(device); @@ -1792,57 +1857,45 @@ nine_ff_update(struct NineDevice9 *device) memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform)); - device->state.changed.group |= NINE_STATE_VS; - device->state.changed.group |= NINE_STATE_VS_CONST; - - if (device->prefer_user_constbuf) { - struct pipe_context *pipe = device->pipe; - struct pipe_constant_buffer cb; - cb.buffer_offset = 0; - cb.buffer = NULL; - cb.user_buffer = device->ff.vs_const; - cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); - } else { - struct pipe_box box; - u_box_1d(0, NINE_FF_NUM_VS_CONST * 4 * sizeof(float), &box); - pipe->transfer_inline_write(pipe, device->constbuf_vs, 0, - 0, &box, - device->ff.vs_const, 0, 0); - nine_ranges_insert(&device->state.changed.vs_const_f, 0, NINE_FF_NUM_VS_CONST, - &device->range_pool); + cb.buffer_offset = 0; + cb.buffer = NULL; + cb.user_buffer = device->ff.vs_const; + cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; } + state->pipe.cb_vs_ff = cb; + state->commit |= NINE_STATE_COMMIT_CONST_VS; } if (!device->state.ps) { - if (device->state.ff.clobber.ps_const) { - device->state.ff.clobber.ps_const = FALSE; - device->state.changed.group |= - NINE_STATE_FF_PSSTAGES | - NINE_STATE_FF_OTHER; - } nine_ff_load_ps_params(device); - device->state.changed.group |= NINE_STATE_PS; - device->state.changed.group |= NINE_STATE_PS_CONST; - - if (device->prefer_user_constbuf) { - struct pipe_context *pipe = device->pipe; - struct pipe_constant_buffer cb; - cb.buffer_offset = 0; - cb.buffer = NULL; - cb.user_buffer = device->ff.ps_const; - cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); - } else { - struct pipe_box box; - u_box_1d(0, NINE_FF_NUM_PS_CONST * 4 * sizeof(float), &box); - pipe->transfer_inline_write(pipe, device->constbuf_ps, 0, - 0, &box, - device->ff.ps_const, 0, 0); - nine_ranges_insert(&device->state.changed.ps_const_f, 0, NINE_FF_NUM_PS_CONST, - &device->range_pool); + cb.buffer_offset = 0; + cb.buffer = NULL; + cb.user_buffer = device->ff.ps_const; + cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; } + state->pipe.cb_ps_ff = cb; + state->commit |= NINE_STATE_COMMIT_CONST_PS; } device->state.changed.group &= ~NINE_STATE_FF; diff --git a/src/gallium/state_trackers/nine/nine_ff.h b/src/gallium/state_trackers/nine/nine_ff.h index 7cefa65b1c4..9c33c76370d 100644 --- a/src/gallium/state_trackers/nine/nine_ff.h +++ b/src/gallium/state_trackers/nine/nine_ff.h @@ -3,6 +3,7 @@ #define _NINE_FF_H_ #include "device9.h" +#include "vertexdeclaration9.h" boolean nine_ff_init(struct NineDevice9 *); void nine_ff_fini(struct NineDevice9 *); @@ -29,4 +30,84 @@ nine_d3d_matrix_inverse_3x3(D3DMATRIX *, const D3DMATRIX *); void nine_d3d_matrix_transpose(D3DMATRIX *, const D3DMATRIX *); +#define NINED3DTSS_TCI_DISABLE 0 +#define NINED3DTSS_TCI_PASSTHRU 1 +#define NINED3DTSS_TCI_CAMERASPACENORMAL 2 +#define NINED3DTSS_TCI_CAMERASPACEPOSITION 3 +#define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 4 +#define NINED3DTSS_TCI_SPHEREMAP 5 + +static inline unsigned +nine_decltype_get_dim(BYTE type) +{ + switch (type) { + case D3DDECLTYPE_FLOAT1: return 1; + case D3DDECLTYPE_FLOAT2: return 2; + case D3DDECLTYPE_FLOAT3: return 3; + case D3DDECLTYPE_FLOAT4: return 4; + case D3DDECLTYPE_D3DCOLOR: return 1; + case D3DDECLTYPE_UBYTE4: return 4; + case D3DDECLTYPE_SHORT2: return 2; + case D3DDECLTYPE_SHORT4: return 4; + case D3DDECLTYPE_UBYTE4N: return 4; + case D3DDECLTYPE_SHORT2N: return 2; + case D3DDECLTYPE_SHORT4N: return 4; + case D3DDECLTYPE_USHORT2N: return 2; + case D3DDECLTYPE_USHORT4N: return 4; + case D3DDECLTYPE_UDEC3: return 3; + case D3DDECLTYPE_DEC3N: return 3; + case D3DDECLTYPE_FLOAT16_2: return 2; + case D3DDECLTYPE_FLOAT16_4: return 4; + default: + assert(!"Implementation error !"); + } + return 0; +} + +static inline uint16_t +nine_ff_get_projected_key(struct nine_state *state) +{ + unsigned s, i; + uint16_t projected = 0; + char input_texture_coord[8]; + memset(&input_texture_coord, 0, sizeof(input_texture_coord)); + + if (state->vdecl) { + for (i = 0; i < state->vdecl->nelems; i++) { + uint16_t usage = state->vdecl->usage_map[i]; + if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { + s = usage / NINE_DECLUSAGE_COUNT; + if (s < 8) + input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type); + } + } + } + + for (s = 0; s < 8; ++s) { + unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; + unsigned dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; + unsigned proj = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED); + + if (!state->vs) { + if (dim > 4) + dim = input_texture_coord[s]; + + if (!dim && gen == NINED3DTSS_TCI_PASSTHRU) + dim = input_texture_coord[s]; + else if (!dim) + dim = 4; + + if (dim == 1) /* NV behaviour */ + proj = 0; + if (dim > input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU) + proj = 0; + } else { + dim = 4; + } + if (proj) + projected |= (dim-1) << (2 * s); + } + return projected; +} + #endif /* _NINE_FF_H_ */ diff --git a/src/gallium/state_trackers/nine/nine_pipe.c b/src/gallium/state_trackers/nine/nine_pipe.c index 4cf37b9f59c..2be30f7e097 100644 --- a/src/gallium/state_trackers/nine/nine_pipe.c +++ b/src/gallium/state_trackers/nine/nine_pipe.c @@ -27,7 +27,8 @@ #include "cso_cache/cso_context.h" void -nine_convert_dsa_state(struct cso_context *ctx, const DWORD *rs) +nine_convert_dsa_state(struct pipe_depth_stencil_alpha_state *dsa_state, + const DWORD *rs) { struct pipe_depth_stencil_alpha_state dsa; @@ -65,16 +66,15 @@ nine_convert_dsa_state(struct cso_context *ctx, const DWORD *rs) dsa.alpha.ref_value = (float)rs[D3DRS_ALPHAREF] / 255.0f; } - cso_set_depth_stencil_alpha(ctx, &dsa); + *dsa_state = dsa; } -/* TODO: Keep a static copy in device so we don't have to memset every time ? */ void -nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs) +nine_convert_rasterizer_state(struct pipe_rasterizer_state *rast_state, const DWORD *rs) { struct pipe_rasterizer_state rast; - memset(&rast, 0, sizeof(rast)); /* memcmp safety */ + memset(&rast, 0, sizeof(rast)); rast.flatshade = rs[D3DRS_SHADEMODE] == D3DSHADE_FLAT; /* rast.light_twoside = 0; */ @@ -92,7 +92,7 @@ nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs) /* rast.poly_stipple_enable = 0; */ /* rast.point_smooth = 0; */ rast.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT; - rast.point_quad_rasterization = !!rs[D3DRS_POINTSPRITEENABLE]; + rast.point_quad_rasterization = 1; rast.point_size_per_vertex = rs[NINED3DRS_VSPOINTSIZE]; rast.multisample = !!rs[D3DRS_MULTISAMPLEANTIALIAS]; rast.line_smooth = !!rs[D3DRS_ANTIALIASEDLINEENABLE]; @@ -110,12 +110,28 @@ nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs) /* rast.line_stipple_pattern = 0; */ rast.sprite_coord_enable = rs[D3DRS_POINTSPRITEENABLE] ? 0xff : 0x00; rast.line_width = 1.0f; - rast.point_size = rs[NINED3DRS_VSPOINTSIZE] ? 1.0f : asfloat(rs[D3DRS_POINTSIZE]); /* XXX: D3DRS_POINTSIZE_MIN/MAX */ - rast.offset_units = asfloat(rs[D3DRS_DEPTHBIAS]) * asfloat(rs[NINED3DRS_ZBIASSCALE]); + if (rs[NINED3DRS_VSPOINTSIZE]) { + rast.point_size = 1.0f; + } else { + rast.point_size = CLAMP(asfloat(rs[D3DRS_POINTSIZE]), + asfloat(rs[D3DRS_POINTSIZE_MIN]), + asfloat(rs[D3DRS_POINTSIZE_MAX])); + } + /* offset_units has the ogl/d3d11 meaning. + * d3d9: offset = scale * dz + bias + * ogl/d3d11: offset = scale * dz + r * bias + * with r implementation dependant and is supposed to be + * the smallest value the depth buffer format can hold. + * In practice on current and past hw it seems to be 2^-23 + * for all formats except float formats where it varies depending + * on the content. + * For now use 1 << 23, but in the future perhaps add a way in gallium + * to get r for the format or get the gallium behaviour */ + rast.offset_units = asfloat(rs[D3DRS_DEPTHBIAS]) * (float)(1 << 23); rast.offset_scale = asfloat(rs[D3DRS_SLOPESCALEDEPTHBIAS]); /* rast.offset_clamp = 0.0f; */ - cso_set_rasterizer(ctx, &rast); + *rast_state = rast; } static inline void @@ -137,7 +153,7 @@ nine_convert_blend_state_fixup(struct pipe_blend_state *blend, const DWORD *rs) } void -nine_convert_blend_state(struct cso_context *ctx, const DWORD *rs) +nine_convert_blend_state(struct pipe_blend_state *blend_state, const DWORD *rs) { struct pipe_blend_state blend; @@ -181,7 +197,7 @@ nine_convert_blend_state(struct cso_context *ctx, const DWORD *rs) /* blend.force_srgb = !!rs[D3DRS_SRGBWRITEENABLE]; */ - cso_set_blend(ctx, &blend); + *blend_state = blend; } void @@ -239,8 +255,8 @@ nine_pipe_context_clear(struct NineDevice9 *This) cso_set_samplers(cso, PIPE_SHADER_VERTEX, 0, NULL); cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 0, NULL); - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, 0, NULL); - pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, 0, NULL); + cso_set_sampler_views(cso, PIPE_SHADER_VERTEX, 0, NULL); + cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 0, NULL); pipe->set_vertex_buffers(pipe, 0, This->caps.MaxStreams, NULL); pipe->set_index_buffer(pipe, NULL); diff --git a/src/gallium/state_trackers/nine/nine_pipe.h b/src/gallium/state_trackers/nine/nine_pipe.h index 43a7737cdf9..86117866ed5 100644 --- a/src/gallium/state_trackers/nine/nine_pipe.h +++ b/src/gallium/state_trackers/nine/nine_pipe.h @@ -27,6 +27,7 @@ #include "pipe/p_format.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" /* pipe_box */ +#include "util/macros.h" #include "util/u_rect.h" #include "util/u_format.h" #include "nine_helpers.h" @@ -36,9 +37,9 @@ struct cso_context; extern const enum pipe_format nine_d3d9_to_pipe_format_map[120]; extern const D3DFORMAT nine_pipe_to_d3d9_format_map[PIPE_FORMAT_COUNT]; -void nine_convert_dsa_state(struct cso_context *, const DWORD *); -void nine_convert_rasterizer_state(struct cso_context *, const DWORD *); -void nine_convert_blend_state(struct cso_context *, const DWORD *); +void nine_convert_dsa_state(struct pipe_depth_stencil_alpha_state *, const DWORD *); +void nine_convert_rasterizer_state(struct pipe_rasterizer_state *, const DWORD *); +void nine_convert_blend_state(struct pipe_blend_state *, const DWORD *); void nine_convert_sampler_state(struct cso_context *, int idx, const DWORD *); void nine_pipe_context_clear(struct NineDevice9 *); @@ -81,6 +82,49 @@ rect_to_pipe_box(struct pipe_box *dst, const RECT *src) dst->depth = 1; } +static inline void +pipe_box_to_rect(RECT *dst, const struct pipe_box *src) +{ + dst->left = src->x; + dst->right = src->x + src->width; + dst->top = src->y; + dst->bottom = src->y + src->height; +} + +static inline void +rect_minify_inclusive(RECT *rect) +{ + rect->left = rect->left >> 2; + rect->top = rect->top >> 2; + rect->right = DIV_ROUND_UP(rect->right, 2); + rect->bottom = DIV_ROUND_UP(rect->bottom, 2); +} + +/* We suppose: + * 0 <= rect->left < rect->right + * 0 <= rect->top < rect->bottom + */ +static inline void +fit_rect_format_inclusive(enum pipe_format format, RECT *rect, int width, int height) +{ + const unsigned w = util_format_get_blockwidth(format); + const unsigned h = util_format_get_blockheight(format); + + if (util_format_is_compressed(format)) { + rect->left = rect->left - rect->left % w; + rect->top = rect->top - rect->top % h; + rect->right = (rect->right % w) == 0 ? + rect->right : + rect->right - (rect->right % w) + w; + rect->bottom = (rect->bottom % h) == 0 ? + rect->bottom : + rect->bottom - (rect->bottom % h) + h; + } + + rect->right = MIN2(rect->right, width); + rect->bottom = MIN2(rect->bottom, height); +} + static inline boolean rect_to_pipe_box_clamp(struct pipe_box *dst, const RECT *src) { @@ -164,6 +208,23 @@ pipe_to_d3d9_format(enum pipe_format format) return nine_pipe_to_d3d9_format_map[format]; } +/* ATI1 and ATI2 are not officially compressed in d3d9 */ +static inline boolean +compressed_format( D3DFORMAT fmt ) +{ + switch (fmt) { + case D3DFMT_DXT1: + case D3DFMT_DXT2: + case D3DFMT_DXT3: + case D3DFMT_DXT4: + case D3DFMT_DXT5: + return TRUE; + default: + break; + } + return FALSE; +} + static inline boolean depth_stencil_format( D3DFORMAT fmt ) { diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 754f5af6b8e..28f27870dc8 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -89,6 +89,15 @@ static inline const char *d3dsio_to_string(unsigned opcode); #define NINE_SWIZZLE4(x,y,z,w) \ TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w +#define NINE_CONSTANT_SRC(index) \ + ureg_src_register(TGSI_FILE_CONSTANT, index) + +#define NINE_APPLY_SWIZZLE(src, s) \ + ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s)) + +#define NINE_CONSTANT_SRC_SWIZZLE(index, s) \ + NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s) + #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT) #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT) #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT) @@ -444,6 +453,9 @@ struct shader_translator BYTE minor; } version; unsigned processor; /* TGSI_PROCESSOR_VERTEX/FRAMGENT */ + unsigned num_constf_allowed; + unsigned num_consti_allowed; + unsigned num_constb_allowed; boolean native_integers; boolean inline_subroutines; @@ -505,7 +517,6 @@ struct shader_translator #define IS_VS (tx->processor == TGSI_PROCESSOR_VERTEX) #define IS_PS (tx->processor == TGSI_PROCESSOR_FRAGMENT) -#define NINE_MAX_CONST_F_SHADER (tx->processor == TGSI_PROCESSOR_VERTEX ? NINE_MAX_CONST_F : NINE_MAX_CONST_F_PS3) #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;} @@ -528,7 +539,7 @@ static boolean tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) { INT i; - if (index < 0 || index >= NINE_MAX_CONST_F_SHADER) { + if (index < 0 || index >= tx->num_constf_allowed) { tx->failure = TRUE; return FALSE; } @@ -543,7 +554,7 @@ tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) static boolean tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) { - if (index < 0 || index >= NINE_MAX_CONST_I) { + if (index < 0 || index >= tx->num_consti_allowed) { tx->failure = TRUE; return FALSE; } @@ -554,7 +565,7 @@ tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) static boolean tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) { - if (index < 0 || index >= NINE_MAX_CONST_B) { + if (index < 0 || index >= tx->num_constb_allowed) { tx->failure = TRUE; return FALSE; } @@ -568,9 +579,7 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) { unsigned n; - FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_F_SHADER) - if (IS_VS && index >= NINE_MAX_CONST_F_SHADER) - WARN("lconstf index %i too high, indirect access won't work\n", index); + FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed) for (n = 0; n < tx->num_lconstf; ++n) if (tx->lconstf[n].idx == index) @@ -592,7 +601,7 @@ tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) static void tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) { - FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_I) + FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed) tx->lconsti[index].idx = index; tx->lconsti[index].reg = tx->native_integers ? ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : @@ -601,7 +610,7 @@ tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) static void tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) { - FAILURE_VOID(index < 0 || index >= NINE_MAX_CONST_B) + FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed) tx->lconstb[index].idx = index; tx->lconstb[index].reg = tx->native_integers ? ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : @@ -672,6 +681,54 @@ tx_pred_alloc(struct shader_translator *tx, INT idx) tx->regs.p = ureg_DECL_predicate(tx->ureg); } +/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions + * the projection should be applied on the texture. It doesn't + * apply on texkill. + * The doc is very imprecise here (it says the projection is done + * before rasterization, thus in vs, which seems wrong since ps instructions + * are affected differently) + * For now we only apply to the ps TEX instruction and TEXBEM. + * Perhaps some other instructions would need it */ +static inline void +apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, + struct ureg_src src, INT idx) +{ + struct ureg_dst tmp; + unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); + + /* no projection */ + if (dim == 1) { + ureg_MOV(tx->ureg, dst, src); + } else { + tmp = tx_scratch_scalar(tx); + ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1)); + ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src); + } +} + +static inline void +TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, + unsigned target, struct ureg_src src0, + struct ureg_src src1, INT idx) +{ + unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); + struct ureg_dst tmp; + + /* dim == 1: no projection + * Looks like must be disabled when it makes no + * sense according the texture dimensions + */ + if (dim == 1 || dim <= target) { + ureg_TEX(tx->ureg, dst, target, src0, src1); + } else if (dim == 4) { + ureg_TXP(tx->ureg, dst, target, src0, src1); + } else { + tmp = tx_scratch(tx); + apply_ps1x_projection(tx, tmp, src0, idx); + ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1); + } +} + static inline void tx_texcoord_alloc(struct shader_translator *tx, INT idx) { @@ -1086,9 +1143,18 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) assert(param->idx >= 0 && param->idx < 4); assert(!param->rel); tx->info->rt_mask |= 1 << param->idx; - if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) - tx->regs.oCol[param->idx] = - ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); + if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) { + /* ps < 3: oCol[0] will have fog blending afterward + * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */ + if (!IS_VS && tx->version.major < 3 && param->idx == 0) { + tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg); + } else if (IS_VS && tx->version.major < 3 && param->idx == 1) { + tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg); + } else { + tx->regs.oCol[param->idx] = + ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); + } + } dst = tx->regs.oCol[param->idx]; if (IS_VS && tx->version.major < 3) dst = ureg_saturate(dst); @@ -1824,7 +1890,7 @@ sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem, sem->Index = 0; break; default: - assert(!"Invalid DECLUSAGE."); + unreachable(!"Invalid DECLUSAGE."); break; } } @@ -2135,12 +2201,79 @@ DECL_SPECIAL(TEXKILL) DECL_SPECIAL(TEXBEM) { - STUB(D3DERR_INVALIDCALL); -} + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_dst tmp, tmp2, texcoord; + struct ureg_src sample, m00, m01, m10, m11; + struct ureg_src bumpenvlscale, bumpenvloffset; + const int m = tx->insn.dst[0].idx; + const int n = tx->insn.src[0].idx; -DECL_SPECIAL(TEXBEML) -{ - STUB(D3DERR_INVALIDCALL); + assert(tx->version.major == 1); + + sample = ureg_DECL_sampler(ureg, m); + tx->info->sampler_mask |= 1 << m; + + tx_texcoord_alloc(tx, m); + + tmp = tx_scratch(tx); + tmp2 = tx_scratch(tx); + texcoord = tx_scratch(tx); + /* + * Bump-env-matrix: + * 00 is X + * 01 is Y + * 10 is Z + * 11 is W + */ + nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2); + m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); + m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); + m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); + m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); + + /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ + if (m % 2 == 0) { + bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X); + bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y); + } else { + bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z); + bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W); + } + + apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m); + + /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord)); + /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), + NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); + + /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord)); + /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, + NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), + NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); + + /* Now the texture coordinates are in tmp.xy */ + + if (tx->insn.opcode == D3DSIO_TEXBEM) { + ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); + } else if (tx->insn.opcode == D3DSIO_TEXBEML) { + /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ + ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); + ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z), + bumpenvlscale, bumpenvloffset); + ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); + } + + tx->info->bumpenvmat_needed = 1; + + return D3D_OK; } DECL_SPECIAL(TEXREG2AR) @@ -2421,7 +2554,43 @@ DECL_SPECIAL(TEXDEPTH) DECL_SPECIAL(BEM) { - STUB(D3DERR_INVALIDCALL); + struct ureg_program *ureg = tx->ureg; + struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); + struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); + struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); + struct ureg_src m00, m01, m10, m11; + const int m = tx->insn.dst[0].idx; + struct ureg_dst tmp; + /* + * Bump-env-matrix: + * 00 is X + * 01 is Y + * 10 is Z + * 11 is W + */ + nine_info_mark_const_f_used(tx->info, 8 + m); + m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); + m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); + m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); + m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); + /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, + NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X)); + /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, + NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); + + /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, + NINE_APPLY_SWIZZLE(src1, X), src0); + /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ + ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, + NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); + ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); + + tx->info->bumpenvmat_needed = 1; + + return D3D_OK; } DECL_SPECIAL(TEXLD) @@ -2482,7 +2651,7 @@ DECL_SPECIAL(TEX) src[1] = ureg_DECL_sampler(ureg, s); tx->info->sampler_mask |= 1 << s; - ureg_TEX(ureg, dst, t, src[0], src[1]); + TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s); return D3D_OK; } @@ -2616,7 +2785,7 @@ struct sm1_op_info inst_table[] = _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), - _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)), + _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), @@ -3023,6 +3192,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info) info->lconstf.data = NULL; info->lconstf.ranges = NULL; + info->bumpenvmat_needed = 0; + for (i = 0; i < Elements(tx->regs.rL); ++i) { tx->regs.rL[i] = ureg_dst_undef(); } @@ -3074,6 +3245,57 @@ tgsi_processor_from_type(unsigned shader_type) } } +static void +shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) +{ + struct ureg_program *ureg = tx->ureg; + struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); + struct ureg_src fog_end, fog_coeff, fog_density; + struct ureg_src fog_vs, depth, fog_color; + struct ureg_dst fog_factor; + + if (!tx->info->fog_enable) { + ureg_MOV(ureg, oCol0, src_col); + return; + } + + if (tx->info->fog_mode != D3DFOG_NONE) + depth = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, + TGSI_INTERPOLATE_LINEAR), + TGSI_SWIZZLE_Z); + + nine_info_mark_const_f_used(tx->info, 33); + fog_color = NINE_CONSTANT_SRC(32); + fog_factor = tx_scratch_scalar(tx); + + if (tx->info->fog_mode == D3DFOG_LINEAR) { + fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X); + fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y); + ureg_SUB(ureg, fog_factor, fog_end, depth); + ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff); + } else if (tx->info->fog_mode == D3DFOG_EXP) { + fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X); + ureg_MUL(ureg, fog_factor, depth, fog_density); + ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); + ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); + } else if (tx->info->fog_mode == D3DFOG_EXP2) { + fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X); + ureg_MUL(ureg, fog_factor, depth, fog_density); + ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor)); + ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); + ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); + } else { + fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, + TGSI_INTERPOLATE_PERSPECTIVE), + TGSI_SWIZZLE_X); + ureg_MOV(ureg, fog_factor, fog_vs); + } + + ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ), + tx_src_scalar(fog_factor), src_col, fog_color); + ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); +} + #define GET_CAP(n) device->screen->get_param( \ device->screen, PIPE_CAP_##n) #define GET_SHADER_CAP(n) device->screen->get_shader_param( \ @@ -3123,6 +3345,24 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) tx->texcoord_sn = tx->want_texcoord ? TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; + if (IS_VS) { + tx->num_constf_allowed = NINE_MAX_CONST_F; + } else if (tx->version.major < 2) {/* IS_PS v1 */ + tx->num_constf_allowed = 8; + } else if (tx->version.major == 2) {/* IS_PS v2 */ + tx->num_constf_allowed = 32; + } else {/* IS_PS v3 */ + tx->num_constf_allowed = NINE_MAX_CONST_F_PS3; + } + + if (tx->version.major < 2) { + tx->num_consti_allowed = 0; + tx->num_constb_allowed = 0; + } else { + tx->num_consti_allowed = NINE_MAX_CONST_I; + tx->num_constb_allowed = NINE_MAX_CONST_B; + } + /* VS must always write position. Declare it here to make it the 1st output. * (Some drivers like nv50 are buggy and rely on that.) */ @@ -3145,10 +3385,26 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) goto out; } - if (IS_PS && (tx->version.major < 2) && tx->num_temp) { - ureg_MOV(tx->ureg, ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0), - ureg_src(tx->regs.r[0])); - info->rt_mask |= 0x1; + if (IS_PS && tx->version.major < 3) { + if (tx->version.major < 2) { + assert(tx->num_temp); /* there must be color output */ + info->rt_mask |= 0x1; + shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); + } else { + shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); + } + } + + if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { + tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0); + ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); + } + + /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */ + if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) { + struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1); + ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1])); + ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f)); } if (info->position_t) @@ -3233,6 +3489,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info) info->const_int_slots > 0 ? max_const_f + info->const_int_slots : info->const_float_slots; + info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ for (s = 0; s < slot_max; s++) diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h index ec256c153a9..41577ac572b 100644 --- a/src/gallium/state_trackers/nine/nine_shader.h +++ b/src/gallium/state_trackers/nine/nine_shader.h @@ -59,6 +59,10 @@ struct nine_shader_info uint16_t sampler_mask_shadow; /* in, which samplers use depth compare */ uint8_t rt_mask; /* out, which render targets are being written */ + uint8_t fog_enable; + uint8_t fog_mode; + uint16_t projected; /* ps 1.1 to 1.3 */ + unsigned const_i_base; /* in vec4 (16 byte) units */ unsigned const_b_base; /* in vec4 (16 byte) units */ unsigned const_used_size; @@ -68,6 +72,7 @@ struct nine_shader_info unsigned const_bool_slots; struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */ + uint8_t bumpenvmat_needed; }; static inline void @@ -137,4 +142,48 @@ nine_shader_variants_free(struct nine_shader_variant *list) } } +struct nine_shader_variant64 +{ + struct nine_shader_variant64 *next; + void *cso; + uint64_t key; +}; + +static inline void * +nine_shader_variant_get64(struct nine_shader_variant64 *list, uint64_t key) +{ + while (list->key != key && list->next) + list = list->next; + if (list->key == key) + return list->cso; + return NULL; +} + +static inline boolean +nine_shader_variant_add64(struct nine_shader_variant64 *list, + uint64_t key, void *cso) +{ + while (list->next) { + assert(list->key != key); + list = list->next; + } + list->next = MALLOC_STRUCT(nine_shader_variant64); + if (!list->next) + return FALSE; + list->next->next = NULL; + list->next->key = key; + list->next->cso = cso; + return TRUE; +} + +static inline void +nine_shader_variants_free64(struct nine_shader_variant64 *list) +{ + while (list->next) { + struct nine_shader_variant64 *ptr = list->next; + list->next = ptr->next; + FREE(ptr); + } +} + #endif /* _NINE_SHADER_H_ */ diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 6c835858d18..558d07a2bd0 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -33,352 +33,36 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "cso_cache/cso_context.h" +#include "util/u_upload_mgr.h" #include "util/u_math.h" #define DBG_CHANNEL DBG_DEVICE -static uint32_t -update_framebuffer(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - struct nine_state *state = &device->state; - struct pipe_framebuffer_state *fb = &device->state.fb; - unsigned i; - struct NineSurface9 *rt0 = state->rt[0]; - unsigned w = rt0->desc.Width; - unsigned h = rt0->desc.Height; - D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType; - unsigned mask = state->ps ? state->ps->rt_mask : 1; - const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; - - DBG("\n"); - - state->rt_mask = 0x0; - fb->nr_cbufs = 0; - - /* all render targets must have the same size and the depth buffer must be - * bigger. Multisample has to match, according to spec. But some apps do - * things wrong there, and no error is returned. The behaviour they get - * apparently is that depth buffer is disabled if it doesn't match. - * Surely the same for render targets. */ - - /* Special case: D3DFMT_NULL is used to bound no real render target, - * but render to depth buffer. We have to not take into account the render - * target info. TODO: know what should happen when there are several render targers - * and the first one is D3DFMT_NULL */ - if (rt0->desc.Format == D3DFMT_NULL && state->ds) { - w = state->ds->desc.Width; - h = state->ds->desc.Height; - nr_samples = state->ds->desc.MultiSampleType; - } - - for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) { - struct NineSurface9 *rt = state->rt[i]; - - if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) && - rt->desc.Width == w && rt->desc.Height == h && - rt->desc.MultiSampleType == nr_samples) { - fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB); - state->rt_mask |= 1 << i; - fb->nr_cbufs = i + 1; - - if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) { - assert(rt->texture == D3DRTYPE_TEXTURE || - rt->texture == D3DRTYPE_CUBETEXTURE); - NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE; - } - } else { - /* Color outputs must match RT slot, - * drivers will have to handle NULL entries for GL, too. - */ - fb->cbufs[i] = NULL; - } - } - - if (state->ds && state->ds->desc.Width >= w && - state->ds->desc.Height >= h && - state->ds->desc.MultiSampleType == nr_samples) { - fb->zsbuf = NineSurface9_GetSurface(state->ds, 0); - } else { - fb->zsbuf = NULL; - } - - fb->width = w; - fb->height = h; - - pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */ - - if (fb->zsbuf) { - DWORD scale; - switch (fb->zsbuf->format) { - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - scale = fui(1.0f); - break; - case PIPE_FORMAT_Z16_UNORM: - scale = fui((float)(1 << 16)); - break; - default: - scale = fui((float)(1 << 24)); - break; - } - if (state->rs[NINED3DRS_ZBIASSCALE] != scale) { - state->rs[NINED3DRS_ZBIASSCALE] = scale; - state->changed.group |= NINE_STATE_RASTERIZER; - } - } - - return state->changed.group; -} - -static void -update_viewport(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - const D3DVIEWPORT9 *vport = &device->state.viewport; - struct pipe_viewport_state pvport; - - /* D3D coordinates are: - * -1 .. +1 for X,Y and - * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz) - */ - pvport.scale[0] = (float)vport->Width * 0.5f; - pvport.scale[1] = (float)vport->Height * -0.5f; - pvport.scale[2] = vport->MaxZ - vport->MinZ; - pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X; - pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y; - pvport.translate[2] = vport->MinZ; - - /* We found R600 and SI cards have some imprecision - * on the barycentric coordinates used for interpolation. - * Some shaders rely on having something precise. - * We found that the proprietary driver has the imprecision issue, - * except when the render target width and height are powers of two. - * It is using some sort of workaround for these cases - * which covers likely all the cases the applications rely - * on something precise. - * We haven't found the workaround, but it seems like it's better - * for applications if the imprecision is biased towards infinity - * instead of -infinity (which is what measured). So shift slightly - * the viewport: not enough to change rasterization result (in particular - * for multisampling), but enough to make the imprecision biased - * towards infinity. We do this shift only if render target width and - * height are powers of two. - * Solves 'red shadows' bug on UE3 games. - */ - if (device->driver_bugs.buggy_barycentrics && - ((vport->Width & (vport->Width-1)) == 0) && - ((vport->Height & (vport->Height-1)) == 0)) { - pvport.translate[0] -= 1.0f / 128.0f; - pvport.translate[1] -= 1.0f / 128.0f; - } - - pipe->set_viewport_states(pipe, 0, 1, &pvport); -} - -static inline void -update_scissor(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - - pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor); -} +/* State preparation only */ static inline void -update_blend(struct NineDevice9 *device) +prepare_blend(struct NineDevice9 *device) { - nine_convert_blend_state(device->cso, device->state.rs); + nine_convert_blend_state(&device->state.pipe.blend, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_BLEND; } static inline void -update_dsa(struct NineDevice9 *device) +prepare_dsa(struct NineDevice9 *device) { - nine_convert_dsa_state(device->cso, device->state.rs); + nine_convert_dsa_state(&device->state.pipe.dsa, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_DSA; } static inline void -update_rasterizer(struct NineDevice9 *device) +prepare_rasterizer(struct NineDevice9 *device) { - nine_convert_rasterizer_state(device->cso, device->state.rs); + nine_convert_rasterizer_state(&device->state.pipe.rast, device->state.rs); + device->state.commit |= NINE_STATE_COMMIT_RASTERIZER; } -/* Loop through VS inputs and pick the vertex elements with the declared - * usage from the vertex declaration, then insert the instance divisor from - * the stream source frequency setting. - */ static void -update_vertex_elements(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - const struct NineVertexDeclaration9 *vdecl = device->state.vdecl; - const struct NineVertexShader9 *vs; - unsigned n, b, i; - int index; - char vdecl_index_map[16]; /* vs->num_inputs <= 16 */ - char used_streams[device->caps.MaxStreams]; - int dummy_vbo_stream = -1; - BOOL need_dummy_vbo = FALSE; - struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; - - state->stream_usage_mask = 0; - memset(vdecl_index_map, -1, 16); - memset(used_streams, 0, device->caps.MaxStreams); - vs = device->state.vs ? device->state.vs : device->ff.vs; - - if (vdecl) { - for (n = 0; n < vs->num_inputs; ++n) { - DBG("looking up input %u (usage %u) from vdecl(%p)\n", - n, vs->input_map[n].ndecl, vdecl); - - for (i = 0; i < vdecl->nelems; i++) { - if (vdecl->usage_map[i] == vs->input_map[n].ndecl) { - vdecl_index_map[n] = i; - used_streams[vdecl->elems[i].vertex_buffer_index] = 1; - break; - } - } - if (vdecl_index_map[n] < 0) - need_dummy_vbo = TRUE; - } - } else { - /* No vertex declaration. Likely will never happen in practice, - * but we need not crash on this */ - need_dummy_vbo = TRUE; - } - - if (need_dummy_vbo) { - for (i = 0; i < device->caps.MaxStreams; i++ ) { - if (!used_streams[i]) { - dummy_vbo_stream = i; - break; - } - } - } - /* there are less vertex shader inputs than stream slots, - * so if we need a slot for the dummy vbo, we should have found one */ - assert (!need_dummy_vbo || dummy_vbo_stream != -1); - - for (n = 0; n < vs->num_inputs; ++n) { - index = vdecl_index_map[n]; - if (index >= 0) { - ve[n] = vdecl->elems[index]; - b = ve[n].vertex_buffer_index; - state->stream_usage_mask |= 1 << b; - /* XXX wine just uses 1 here: */ - if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) - ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; - } else { - /* if the vertex declaration is incomplete compared to what the - * vertex shader needs, we bind a dummy vbo with 0 0 0 0. - * This is not precised by the spec, but is the behaviour - * tested on win */ - ve[n].vertex_buffer_index = dummy_vbo_stream; - ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - ve[n].src_offset = 0; - ve[n].instance_divisor = 0; - } - } - - if (state->dummy_vbo_bound_at != dummy_vbo_stream) { - if (state->dummy_vbo_bound_at >= 0) - state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; - if (dummy_vbo_stream >= 0) { - state->changed.vtxbuf |= 1 << dummy_vbo_stream; - state->vbo_bound_done = FALSE; - } - state->dummy_vbo_bound_at = dummy_vbo_stream; - } - - cso_set_vertex_elements(device->cso, vs->num_inputs, ve); - - state->changed.stream_freq = 0; -} - -static inline uint32_t -update_shader_variant_keys(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - uint32_t mask = 0; - uint32_t vs_key = state->samplers_shadow; - uint32_t ps_key = state->samplers_shadow; - - vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0); - ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0); - - if (state->vs) vs_key &= state->vs->sampler_mask; - if (state->ps) { - if (unlikely(state->ps->byte_code.version < 0x20)) { - /* no depth textures, but variable targets */ - uint32_t m = state->ps->sampler_mask; - ps_key = 0; - while (m) { - int s = ffs(m) - 1; - m &= ~(1 << s); - ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2); - } - } else { - ps_key &= state->ps->sampler_mask; - } - } - - if (state->vs && state->vs_key != vs_key) { - state->vs_key = vs_key; - mask |= NINE_STATE_VS; - } - if (state->ps && state->ps_key != ps_key) { - state->ps_key = ps_key; - mask |= NINE_STATE_PS; - } - return mask; -} - -static inline uint32_t -update_vs(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - struct NineVertexShader9 *vs = state->vs; - uint32_t changed_group = 0; - - /* likely because we dislike FF */ - if (likely(vs)) { - state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key); - } else { - vs = device->ff.vs; - state->cso.vs = vs->variant.cso; - } - device->pipe->bind_vs_state(device->pipe, state->cso.vs); - - if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) { - state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size; - changed_group |= NINE_STATE_RASTERIZER; - } - - if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask) - /* Bound dummy sampler. */ - changed_group |= NINE_STATE_SAMPLER; - return changed_group; -} - -static inline uint32_t -update_ps(struct NineDevice9 *device) -{ - struct nine_state *state = &device->state; - struct NinePixelShader9 *ps = state->ps; - uint32_t changed_group = 0; - - if (likely(ps)) { - state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key); - } else { - ps = device->ff.ps; - state->cso.ps = ps->variant.cso; - } - device->pipe->bind_fs_state(device->pipe, state->cso.ps); - - if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask) - /* Bound dummy sampler. */ - changed_group |= NINE_STATE_SAMPLER; - return changed_group; -} +prepare_ps_constants_userbuf(struct NineDevice9 *device); #define DO_UPLOAD_CONST_F(buf,p,c,d) \ do { \ @@ -391,7 +75,7 @@ update_ps(struct NineDevice9 *device) /* OK, this is a bit ugly ... */ static void -update_constants(struct NineDevice9 *device, unsigned shader_type) +upload_constants(struct NineDevice9 *device, unsigned shader_type) { struct pipe_context *pipe = device->pipe; struct pipe_resource *buf; @@ -438,10 +122,17 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) lconstf_ranges = device->state.vs->lconstf.ranges; lconstf_data = device->state.vs->lconstf.data; - device->state.ff.clobber.vs_const = TRUE; device->state.changed.group &= ~NINE_STATE_VS_CONST; } else { DBG("PS\n"); + /* features only implemented on the userbuf path */ + if (device->state.ps->bumpenvmat_needed || ( + device->state.ps->byte_code.version < 0x30 && + device->state.rs[D3DRS_FOGENABLE])) { + device->prefer_user_constbuf = TRUE; + prepare_ps_constants_userbuf(device); + return; + } buf = device->constbuf_ps; const_f = device->state.ps_const_f; @@ -464,7 +155,6 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) lconstf_ranges = NULL; lconstf_data = NULL; - device->state.ff.clobber.ps_const = TRUE; device->state.changed.group &= ~NINE_STATE_PS_CONST; } @@ -524,10 +214,9 @@ update_constants(struct NineDevice9 *device, unsigned shader_type) } static void -update_vs_constants_userbuf(struct NineDevice9 *device) +prepare_vs_constants_userbuf(struct NineDevice9 *device) { struct nine_state *state = &device->state; - struct pipe_context *pipe = device->pipe; struct pipe_constant_buffer cb; cb.buffer = NULL; cb.buffer_offset = 0; @@ -567,7 +256,18 @@ update_vs_constants_userbuf(struct NineDevice9 *device) cb.user_buffer = dst; } - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb); + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; + } + + state->pipe.cb_vs = cb; if (device->state.changed.vs_const_f) { struct nine_range *r = device->state.changed.vs_const_f; @@ -578,22 +278,19 @@ update_vs_constants_userbuf(struct NineDevice9 *device) device->state.changed.vs_const_f = NULL; } state->changed.group &= ~NINE_STATE_VS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_VS; } static void -update_ps_constants_userbuf(struct NineDevice9 *device) +prepare_ps_constants_userbuf(struct NineDevice9 *device) { struct nine_state *state = &device->state; - struct pipe_context *pipe = device->pipe; struct pipe_constant_buffer cb; cb.buffer = NULL; cb.buffer_offset = 0; cb.buffer_size = device->state.ps->const_used_size; cb.user_buffer = device->state.ps_const_f; - if (!cb.buffer_size) - return; - if (state->changed.ps_const_i) { int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f]; memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i)); @@ -606,7 +303,47 @@ update_ps_constants_userbuf(struct NineDevice9 *device) state->changed.ps_const_b = 0; } - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb); + /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */ + if (device->state.ps->bumpenvmat_needed) { + memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars)); + + cb.user_buffer = device->state.ps_lconstf_temp; + } + + if (state->ps->byte_code.version < 0x30 && + state->rs[D3DRS_FOGENABLE]) { + float *dst = &state->ps_lconstf_temp[4 * 32]; + if (cb.user_buffer != state->ps_lconstf_temp) { + memcpy(state->ps_lconstf_temp, cb.user_buffer, cb.buffer_size); + cb.user_buffer = state->ps_lconstf_temp; + } + + d3dcolor_to_rgba(dst, state->rs[D3DRS_FOGCOLOR]); + if (state->rs[D3DRS_FOGTABLEMODE] == D3DFOG_LINEAR) { + dst[4] = asfloat(state->rs[D3DRS_FOGEND]); + dst[5] = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART])); + } else if (state->rs[D3DRS_FOGTABLEMODE] != D3DFOG_NONE) { + dst[4] = asfloat(state->rs[D3DRS_FOGDENSITY]); + } + cb.buffer_size = 4 * 4 * 34; + } + + if (!cb.buffer_size) + return; + + if (!device->driver_caps.user_cbufs) { + u_upload_data(device->constbuf_uploader, + 0, + cb.buffer_size, + cb.user_buffer, + &cb.buffer_offset, + &cb.buffer); + u_upload_unmap(device->constbuf_uploader); + cb.user_buffer = NULL; + } + + state->pipe.cb_ps = cb; if (device->state.changed.ps_const_f) { struct nine_range *r = device->state.changed.ps_const_f; @@ -617,6 +354,286 @@ update_ps_constants_userbuf(struct NineDevice9 *device) device->state.changed.ps_const_f = NULL; } state->changed.group &= ~NINE_STATE_PS_CONST; + state->commit |= NINE_STATE_COMMIT_CONST_PS; +} + +static inline uint32_t +prepare_vs(struct NineDevice9 *device, uint8_t shader_changed) +{ + struct nine_state *state = &device->state; + struct NineVertexShader9 *vs = state->vs; + uint32_t changed_group = 0; + int has_key_changed = 0; + + if (likely(vs)) + has_key_changed = NineVertexShader9_UpdateKey(vs, state); + + if (!shader_changed && !has_key_changed) + return 0; + + /* likely because we dislike FF */ + if (likely(vs)) { + state->cso.vs = NineVertexShader9_GetVariant(vs); + } else { + vs = device->ff.vs; + state->cso.vs = vs->ff_cso; + } + + if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) { + state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size; + changed_group |= NINE_STATE_RASTERIZER; + } + + if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask) + /* Bound dummy sampler. */ + changed_group |= NINE_STATE_SAMPLER; + + state->commit |= NINE_STATE_COMMIT_VS; + return changed_group; +} + +static inline uint32_t +prepare_ps(struct NineDevice9 *device, uint8_t shader_changed) +{ + struct nine_state *state = &device->state; + struct NinePixelShader9 *ps = state->ps; + uint32_t changed_group = 0; + int has_key_changed = 0; + + if (likely(ps)) + has_key_changed = NinePixelShader9_UpdateKey(ps, state); + + if (!shader_changed && !has_key_changed) + return 0; + + if (likely(ps)) { + state->cso.ps = NinePixelShader9_GetVariant(ps); + } else { + ps = device->ff.ps; + state->cso.ps = ps->ff_cso; + } + + if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask) + /* Bound dummy sampler. */ + changed_group |= NINE_STATE_SAMPLER; + + state->commit |= NINE_STATE_COMMIT_PS; + return changed_group; +} + +/* State preparation incremental */ + +/* State preparation + State commit */ + +static uint32_t +update_framebuffer(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + struct nine_state *state = &device->state; + struct pipe_framebuffer_state *fb = &device->state.fb; + unsigned i; + struct NineSurface9 *rt0 = state->rt[0]; + unsigned w = rt0->desc.Width; + unsigned h = rt0->desc.Height; + D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType; + unsigned mask = state->ps ? state->ps->rt_mask : 1; + const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0; + + DBG("\n"); + + state->rt_mask = 0x0; + fb->nr_cbufs = 0; + + /* all render targets must have the same size and the depth buffer must be + * bigger. Multisample has to match, according to spec. But some apps do + * things wrong there, and no error is returned. The behaviour they get + * apparently is that depth buffer is disabled if it doesn't match. + * Surely the same for render targets. */ + + /* Special case: D3DFMT_NULL is used to bound no real render target, + * but render to depth buffer. We have to not take into account the render + * target info. TODO: know what should happen when there are several render targers + * and the first one is D3DFMT_NULL */ + if (rt0->desc.Format == D3DFMT_NULL && state->ds) { + w = state->ds->desc.Width; + h = state->ds->desc.Height; + nr_samples = state->ds->desc.MultiSampleType; + } + + for (i = 0; i < device->caps.NumSimultaneousRTs; ++i) { + struct NineSurface9 *rt = state->rt[i]; + + if (rt && rt->desc.Format != D3DFMT_NULL && (mask & (1 << i)) && + rt->desc.Width == w && rt->desc.Height == h && + rt->desc.MultiSampleType == nr_samples) { + fb->cbufs[i] = NineSurface9_GetSurface(rt, sRGB); + state->rt_mask |= 1 << i; + fb->nr_cbufs = i + 1; + + if (unlikely(rt->desc.Usage & D3DUSAGE_AUTOGENMIPMAP)) { + assert(rt->texture == D3DRTYPE_TEXTURE || + rt->texture == D3DRTYPE_CUBETEXTURE); + NineBaseTexture9(rt->base.base.container)->dirty_mip = TRUE; + } + } else { + /* Color outputs must match RT slot, + * drivers will have to handle NULL entries for GL, too. + */ + fb->cbufs[i] = NULL; + } + } + + if (state->ds && state->ds->desc.Width >= w && + state->ds->desc.Height >= h && + state->ds->desc.MultiSampleType == nr_samples) { + fb->zsbuf = NineSurface9_GetSurface(state->ds, 0); + } else { + fb->zsbuf = NULL; + } + + fb->width = w; + fb->height = h; + + pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */ + + return state->changed.group; +} + +static void +update_viewport(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + const D3DVIEWPORT9 *vport = &device->state.viewport; + struct pipe_viewport_state pvport; + + /* D3D coordinates are: + * -1 .. +1 for X,Y and + * 0 .. +1 for Z (we use pipe_rasterizer_state.clip_halfz) + */ + pvport.scale[0] = (float)vport->Width * 0.5f; + pvport.scale[1] = (float)vport->Height * -0.5f; + pvport.scale[2] = vport->MaxZ - vport->MinZ; + pvport.translate[0] = (float)vport->Width * 0.5f + (float)vport->X; + pvport.translate[1] = (float)vport->Height * 0.5f + (float)vport->Y; + pvport.translate[2] = vport->MinZ; + + /* We found R600 and SI cards have some imprecision + * on the barycentric coordinates used for interpolation. + * Some shaders rely on having something precise. + * We found that the proprietary driver has the imprecision issue, + * except when the render target width and height are powers of two. + * It is using some sort of workaround for these cases + * which covers likely all the cases the applications rely + * on something precise. + * We haven't found the workaround, but it seems like it's better + * for applications if the imprecision is biased towards infinity + * instead of -infinity (which is what measured). So shift slightly + * the viewport: not enough to change rasterization result (in particular + * for multisampling), but enough to make the imprecision biased + * towards infinity. We do this shift only if render target width and + * height are powers of two. + * Solves 'red shadows' bug on UE3 games. + */ + if (device->driver_bugs.buggy_barycentrics && + ((vport->Width & (vport->Width-1)) == 0) && + ((vport->Height & (vport->Height-1)) == 0)) { + pvport.translate[0] -= 1.0f / 128.0f; + pvport.translate[1] -= 1.0f / 128.0f; + } + + pipe->set_viewport_states(pipe, 0, 1, &pvport); +} + +/* Loop through VS inputs and pick the vertex elements with the declared + * usage from the vertex declaration, then insert the instance divisor from + * the stream source frequency setting. + */ +static void +update_vertex_elements(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + const struct NineVertexDeclaration9 *vdecl = device->state.vdecl; + const struct NineVertexShader9 *vs; + unsigned n, b, i; + int index; + char vdecl_index_map[16]; /* vs->num_inputs <= 16 */ + char used_streams[device->caps.MaxStreams]; + int dummy_vbo_stream = -1; + BOOL need_dummy_vbo = FALSE; + struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; + + state->stream_usage_mask = 0; + memset(vdecl_index_map, -1, 16); + memset(used_streams, 0, device->caps.MaxStreams); + vs = device->state.vs ? device->state.vs : device->ff.vs; + + if (vdecl) { + for (n = 0; n < vs->num_inputs; ++n) { + DBG("looking up input %u (usage %u) from vdecl(%p)\n", + n, vs->input_map[n].ndecl, vdecl); + + for (i = 0; i < vdecl->nelems; i++) { + if (vdecl->usage_map[i] == vs->input_map[n].ndecl) { + vdecl_index_map[n] = i; + used_streams[vdecl->elems[i].vertex_buffer_index] = 1; + break; + } + } + if (vdecl_index_map[n] < 0) + need_dummy_vbo = TRUE; + } + } else { + /* No vertex declaration. Likely will never happen in practice, + * but we need not crash on this */ + need_dummy_vbo = TRUE; + } + + if (need_dummy_vbo) { + for (i = 0; i < device->caps.MaxStreams; i++ ) { + if (!used_streams[i]) { + dummy_vbo_stream = i; + break; + } + } + } + /* there are less vertex shader inputs than stream slots, + * so if we need a slot for the dummy vbo, we should have found one */ + assert (!need_dummy_vbo || dummy_vbo_stream != -1); + + for (n = 0; n < vs->num_inputs; ++n) { + index = vdecl_index_map[n]; + if (index >= 0) { + ve[n] = vdecl->elems[index]; + b = ve[n].vertex_buffer_index; + state->stream_usage_mask |= 1 << b; + /* XXX wine just uses 1 here: */ + if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) + ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF; + } else { + /* if the vertex declaration is incomplete compared to what the + * vertex shader needs, we bind a dummy vbo with 0 0 0 0. + * This is not precised by the spec, but is the behaviour + * tested on win */ + ve[n].vertex_buffer_index = dummy_vbo_stream; + ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + ve[n].src_offset = 0; + ve[n].instance_divisor = 0; + } + } + + if (state->dummy_vbo_bound_at != dummy_vbo_stream) { + if (state->dummy_vbo_bound_at >= 0) + state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at; + if (dummy_vbo_stream >= 0) { + state->changed.vtxbuf |= 1 << dummy_vbo_stream; + state->vbo_bound_done = FALSE; + } + state->dummy_vbo_bound_at = dummy_vbo_stream; + } + + cso_set_vertex_elements(device->cso, vs->num_inputs, ve); + + state->changed.stream_freq = 0; } static void @@ -627,7 +644,6 @@ update_vertex_buffers(struct NineDevice9 *device) struct pipe_vertex_buffer dummy_vtxbuf; uint32_t mask = state->changed.vtxbuf; unsigned i; - unsigned start; DBG("mask=%x\n", mask); @@ -656,27 +672,6 @@ update_vertex_buffers(struct NineDevice9 *device) state->changed.vtxbuf = 0; } -static inline void -update_index_buffer(struct NineDevice9 *device) -{ - struct pipe_context *pipe = device->pipe; - if (device->state.idxbuf) - pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer); - else - pipe->set_index_buffer(pipe, NULL); -} - -/* TODO: only go through dirty textures */ -static void -validate_textures(struct NineDevice9 *device) -{ - struct NineBaseTexture9 *tex, *ptr; - LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) { - list_delinit(&tex->list); - NineBaseTexture9_Validate(tex); - } -} - static inline boolean update_sampler_derived(struct nine_state *state, unsigned s) { @@ -706,20 +701,16 @@ update_sampler_derived(struct nine_state *state, unsigned s) static void update_textures_and_samplers(struct NineDevice9 *device) { - struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; struct pipe_sampler_view *view[NINE_MAX_SAMPLERS]; - struct pipe_sampler_state samp; unsigned num_textures; unsigned i; - boolean commit_views; boolean commit_samplers; uint16_t sampler_mask = state->ps ? state->ps->sampler_mask : device->ff.ps->sampler_mask; /* TODO: Can we reduce iterations here ? */ - commit_views = FALSE; commit_samplers = FALSE; state->bound_samplers_mask_ps = 0; for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_PS; ++i) { @@ -749,26 +740,12 @@ update_textures_and_samplers(struct NineDevice9 *device) * unbind dummy sampler directly when they are not needed * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ - view[i] = device->dummy_sampler; + view[i] = device->dummy_sampler_view; num_textures = i + 1; - memset(&samp, 0, sizeof(samp)); - samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - samp.max_lod = 15.0f; - samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.compare_mode = PIPE_TEX_COMPARE_NONE; - samp.compare_func = PIPE_FUNC_LEQUAL; - samp.normalized_coords = 1; - samp.seamless_cube_map = 1; - cso_single_sampler(device->cso, PIPE_SHADER_FRAGMENT, - s - NINE_SAMPLER_PS(0), &samp); + s - NINE_SAMPLER_PS(0), &device->dummy_sampler_state); - commit_views = TRUE; commit_samplers = TRUE; state->changed.sampler[s] = ~0; } @@ -776,16 +753,11 @@ update_textures_and_samplers(struct NineDevice9 *device) state->bound_samplers_mask_ps |= (1 << s); } - commit_views |= (state->changed.texture & NINE_PS_SAMPLERS_MASK) != 0; - commit_views |= state->changed.srgb; - if (commit_views) - pipe->set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0, - num_textures, view); + cso_set_sampler_views(device->cso, PIPE_SHADER_FRAGMENT, num_textures, view); if (commit_samplers) cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT); - commit_views = FALSE; commit_samplers = FALSE; sampler_mask = state->vs ? state->vs->sampler_mask : 0; state->bound_samplers_mask_vs = 0; @@ -816,76 +788,170 @@ update_textures_and_samplers(struct NineDevice9 *device) * unbind dummy sampler directly when they are not needed * anymore, but they're going to be removed as long as texture * or sampler states are changed. */ - view[i] = device->dummy_sampler; + view[i] = device->dummy_sampler_view; num_textures = i + 1; - memset(&samp, 0, sizeof(samp)); - samp.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - samp.max_lod = 15.0f; - samp.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - samp.min_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - samp.compare_mode = PIPE_TEX_COMPARE_NONE; - samp.compare_func = PIPE_FUNC_LEQUAL; - samp.normalized_coords = 1; - samp.seamless_cube_map = 1; - cso_single_sampler(device->cso, PIPE_SHADER_VERTEX, - s - NINE_SAMPLER_VS(0), &samp); + s - NINE_SAMPLER_VS(0), &device->dummy_sampler_state); - commit_views = TRUE; commit_samplers = TRUE; state->changed.sampler[s] = ~0; } state->bound_samplers_mask_vs |= (1 << s); } - commit_views |= (state->changed.texture & NINE_VS_SAMPLERS_MASK) != 0; - commit_views |= state->changed.srgb; - if (commit_views) - pipe->set_sampler_views(pipe, PIPE_SHADER_VERTEX, 0, - num_textures, view); + + cso_set_sampler_views(device->cso, PIPE_SHADER_VERTEX, num_textures, view); if (commit_samplers) cso_single_sampler_done(device->cso, PIPE_SHADER_VERTEX); - state->changed.srgb = FALSE; state->changed.texture = 0; } +/* State commit only */ + +static inline void +commit_blend(struct NineDevice9 *device) +{ + cso_set_blend(device->cso, &device->state.pipe.blend); +} + +static inline void +commit_dsa(struct NineDevice9 *device) +{ + cso_set_depth_stencil_alpha(device->cso, &device->state.pipe.dsa); +} + +static inline void +commit_scissor(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor); +} + +static inline void +commit_rasterizer(struct NineDevice9 *device) +{ + cso_set_rasterizer(device->cso, &device->state.pipe.rast); +} -#define NINE_STATE_FREQ_GROUP_0 \ - (NINE_STATE_FB | \ - NINE_STATE_VIEWPORT | \ - NINE_STATE_SCISSOR | \ - NINE_STATE_BLEND | \ - NINE_STATE_DSA | \ - NINE_STATE_RASTERIZER | \ - NINE_STATE_VS | \ - NINE_STATE_PS | \ - NINE_STATE_BLEND_COLOR | \ - NINE_STATE_STENCIL_REF | \ +static inline void +commit_index_buffer(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + if (device->state.idxbuf) + pipe->set_index_buffer(pipe, &device->state.idxbuf->buffer); + else + pipe->set_index_buffer(pipe, NULL); +} + +static inline void +commit_vs_constants(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + if (unlikely(!device->state.vs)) + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff); + else + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs); +} + +static inline void +commit_ps_constants(struct NineDevice9 *device) +{ + struct pipe_context *pipe = device->pipe; + + if (unlikely(!device->state.ps)) + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps_ff); + else + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps); +} + +static inline void +commit_vs(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + device->pipe->bind_vs_state(device->pipe, state->cso.vs); +} + + +static inline void +commit_ps(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + device->pipe->bind_fs_state(device->pipe, state->cso.ps); +} +/* State Update */ + +#define NINE_STATE_SHADER_CHANGE_VS \ + (NINE_STATE_VS | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_FOG_SHADER) + +#define NINE_STATE_SHADER_CHANGE_PS \ + (NINE_STATE_PS | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_FOG_SHADER | \ + NINE_STATE_PS1X_SHADER) + +#define NINE_STATE_FREQUENT \ + (NINE_STATE_RASTERIZER | \ + NINE_STATE_TEXTURE | \ + NINE_STATE_SAMPLER | \ + NINE_STATE_VS_CONST | \ + NINE_STATE_PS_CONST) + +#define NINE_STATE_COMMON \ + (NINE_STATE_FB | \ + NINE_STATE_BLEND | \ + NINE_STATE_DSA | \ + NINE_STATE_VIEWPORT | \ + NINE_STATE_VDECL | \ + NINE_STATE_IDXBUF) + +#define NINE_STATE_RARE \ + (NINE_STATE_SCISSOR | \ + NINE_STATE_BLEND_COLOR | \ + NINE_STATE_STENCIL_REF | \ NINE_STATE_SAMPLE_MASK) -#define NINE_STATE_FREQ_GROUP_1 ~NINE_STATE_FREQ_GROUP_0 -#define NINE_STATE_SHADER_VARIANT_GROUP \ - (NINE_STATE_TEXTURE | \ - NINE_STATE_VS | \ - NINE_STATE_PS) +/* TODO: only go through dirty textures */ +static void +validate_textures(struct NineDevice9 *device) +{ + struct NineBaseTexture9 *tex, *ptr; + LIST_FOR_EACH_ENTRY_SAFE(tex, ptr, &device->update_textures, list) { + list_delinit(&tex->list); + NineBaseTexture9_Validate(tex); + } +} + +void +nine_update_state_framebuffer(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + validate_textures(device); + + if (state->changed.group & NINE_STATE_FB) + update_framebuffer(device); + + state->changed.group &= ~NINE_STATE_FB; +} boolean -nine_update_state(struct NineDevice9 *device, uint32_t mask) +nine_update_state(struct NineDevice9 *device) { struct pipe_context *pipe = device->pipe; struct nine_state *state = &device->state; uint32_t group; - DBG("changed state groups: %x | %x\n", - state->changed.group & NINE_STATE_FREQ_GROUP_0, - state->changed.group & NINE_STATE_FREQ_GROUP_1); + DBG("changed state groups: %x\n", state->changed.group); /* NOTE: We may want to use the cso cache for everything, or let * NineDevice9.RestoreNonCSOState actually set the states, then we wouldn't @@ -896,35 +962,79 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) validate_textures(device); /* may clobber state */ /* ff_update may change VS/PS dirty bits */ - if ((mask & NINE_STATE_FF) && unlikely(!state->vs || !state->ps)) + if (unlikely(!state->vs || !state->ps)) nine_ff_update(device); - group = state->changed.group & mask; + group = state->changed.group; - if (group & NINE_STATE_SHADER_VARIANT_GROUP) - group |= update_shader_variant_keys(device); + if (group & (NINE_STATE_SHADER_CHANGE_VS | NINE_STATE_SHADER_CHANGE_PS)) { + if (group & NINE_STATE_SHADER_CHANGE_VS) + group |= prepare_vs(device, (group & NINE_STATE_VS) != 0); /* may set NINE_STATE_RASTERIZER and NINE_STATE_SAMPLER*/ + if (group & NINE_STATE_SHADER_CHANGE_PS) + group |= prepare_ps(device, (group & NINE_STATE_PS) != 0); + } - if (group & NINE_STATE_FREQ_GROUP_0) { + if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) { if (group & NINE_STATE_FB) - group = update_framebuffer(device) & mask; + group |= update_framebuffer(device); /* may set NINE_STATE_RASTERIZER */ + if (group & NINE_STATE_BLEND) + prepare_blend(device); + if (group & NINE_STATE_DSA) + prepare_dsa(device); if (group & NINE_STATE_VIEWPORT) update_viewport(device); - if (group & NINE_STATE_SCISSOR) - update_scissor(device); - - if (group & NINE_STATE_DSA) - update_dsa(device); - if (group & NINE_STATE_BLEND) - update_blend(device); - - if (group & NINE_STATE_VS) - group |= update_vs(device); + if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) || + state->changed.stream_freq & ~1) + update_vertex_elements(device); + if (group & NINE_STATE_IDXBUF) + commit_index_buffer(device); + } + if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) { if (group & NINE_STATE_RASTERIZER) - update_rasterizer(device); + prepare_rasterizer(device); + if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) + update_textures_and_samplers(device); + if (device->prefer_user_constbuf) { + if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs) + prepare_vs_constants_userbuf(device); + if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) + prepare_ps_constants_userbuf(device); + } else { + if ((group & NINE_STATE_VS_CONST) && state->vs) + upload_constants(device, PIPE_SHADER_VERTEX); + if ((group & NINE_STATE_PS_CONST) && state->ps) + upload_constants(device, PIPE_SHADER_FRAGMENT); + } + } - if (group & NINE_STATE_PS) - group |= update_ps(device); + if (state->changed.vtxbuf) + update_vertex_buffers(device); + + if (state->commit & NINE_STATE_COMMIT_BLEND) + commit_blend(device); + if (state->commit & NINE_STATE_COMMIT_DSA) + commit_dsa(device); + if (state->commit & NINE_STATE_COMMIT_RASTERIZER) + commit_rasterizer(device); + if (state->commit & NINE_STATE_COMMIT_CONST_VS) + commit_vs_constants(device); + if (state->commit & NINE_STATE_COMMIT_CONST_PS) + commit_ps_constants(device); + if (state->commit & NINE_STATE_COMMIT_VS) + commit_vs(device); + if (state->commit & NINE_STATE_COMMIT_PS) + commit_ps(device); + + state->commit = 0; + + if (unlikely(state->changed.ucp)) { + pipe->set_clip_state(pipe, &state->clip); + state->changed.ucp = 0; + } + if (unlikely(group & NINE_STATE_RARE)) { + if (group & NINE_STATE_SCISSOR) + commit_scissor(device); if (group & NINE_STATE_BLEND_COLOR) { struct pipe_blend_color color; d3dcolor_to_rgba(&color.color[0], state->rs[D3DRS_BLENDFACTOR]); @@ -941,38 +1051,7 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) } } - if (state->changed.ucp) { - pipe->set_clip_state(pipe, &state->clip); - state->changed.ucp = 0; - } - - if (group & (NINE_STATE_FREQ_GROUP_1 | NINE_STATE_VS)) { - if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER)) - update_textures_and_samplers(device); - - if (group & NINE_STATE_IDXBUF) - update_index_buffer(device); - - if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) || - state->changed.stream_freq & ~1) - update_vertex_elements(device); - - if (device->prefer_user_constbuf) { - if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs) - update_vs_constants_userbuf(device); - if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps) - update_ps_constants_userbuf(device); - } else { - if ((group & NINE_STATE_VS_CONST) && state->vs) - update_constants(device, PIPE_SHADER_VERTEX); - if ((group & NINE_STATE_PS_CONST) && state->ps) - update_constants(device, PIPE_SHADER_FRAGMENT); - } - } - if (state->changed.vtxbuf) - update_vertex_buffers(device); - - device->state.changed.group &= ~mask | + device->state.changed.group &= (NINE_STATE_FF | NINE_STATE_VS_CONST | NINE_STATE_PS_CONST); DBG("finished\n"); @@ -980,6 +1059,7 @@ nine_update_state(struct NineDevice9 *device, uint32_t mask) return TRUE; } +/* State defaults */ static const DWORD nine_render_state_defaults[NINED3DRS_LAST + 1] = { @@ -1134,6 +1214,18 @@ static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] = [NINED3DSAMP_MINLOD] = 0, [NINED3DSAMP_SHADOW] = 0 }; + +void nine_state_restore_non_cso(struct NineDevice9 *device) +{ + struct nine_state *state = &device->state; + + state->changed.group = NINE_STATE_ALL; + state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1; + state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; + state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; + state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; +} + void nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, boolean is_reset) @@ -1152,6 +1244,7 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, } state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE; state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1; + memset(&state->bumpmap_vars, 0, sizeof(state->bumpmap_vars)); for (s = 0; s < Elements(state->samp); ++s) { memcpy(&state->samp[s], nine_samp_state_defaults, @@ -1170,6 +1263,9 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, /* Set changed flags to initialize driver. */ state->changed.group = NINE_STATE_ALL; + state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1; + state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1; + state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK; state->ff.changed.transform[0] = ~0; state->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32); @@ -1186,6 +1282,23 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps, state->dummy_vbo_bound_at = -1; state->vbo_bound_done = FALSE; } + + if (!device->prefer_user_constbuf) { + /* fill cb_vs and cb_ps for the non user constbuf path */ + struct pipe_constant_buffer cb; + + cb.buffer_offset = 0; + cb.buffer_size = device->vs_const_size; + cb.buffer = device->constbuf_vs; + cb.user_buffer = NULL; + state->pipe.cb_vs = cb; + + cb.buffer_size = device->ps_const_size; + cb.buffer = device->constbuf_ps; + state->pipe.cb_ps = cb; + + state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS; + } } void @@ -1353,15 +1466,15 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_ZFUNC] = NINE_STATE_DSA, [D3DRS_ALPHAREF] = NINE_STATE_DSA, [D3DRS_ALPHAFUNC] = NINE_STATE_DSA, - [D3DRS_DITHERENABLE] = NINE_STATE_RASTERIZER, + [D3DRS_DITHERENABLE] = NINE_STATE_BLEND, [D3DRS_ALPHABLENDENABLE] = NINE_STATE_BLEND, - [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER, + [D3DRS_FOGENABLE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST, [D3DRS_SPECULARENABLE] = NINE_STATE_FF_LIGHTING, - [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER, - [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER, - [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER, - [D3DRS_FOGEND] = NINE_STATE_FF_OTHER, - [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER, + [D3DRS_FOGCOLOR] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGTABLEMODE] = NINE_STATE_FF_OTHER | NINE_STATE_FOG_SHADER | NINE_STATE_PS_CONST, + [D3DRS_FOGSTART] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGEND] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, + [D3DRS_FOGDENSITY] = NINE_STATE_FF_OTHER | NINE_STATE_PS_CONST, [D3DRS_RANGEFOGENABLE] = NINE_STATE_FF_OTHER, [D3DRS_STENCILENABLE] = NINE_STATE_DSA, [D3DRS_STENCILFAIL] = NINE_STATE_DSA, @@ -1394,7 +1507,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_VERTEXBLEND] = NINE_STATE_FF_OTHER, [D3DRS_CLIPPLANEENABLE] = NINE_STATE_RASTERIZER, [D3DRS_POINTSIZE] = NINE_STATE_RASTERIZER, - [D3DRS_POINTSIZE_MIN] = NINE_STATE_MISC_CONST, + [D3DRS_POINTSIZE_MIN] = NINE_STATE_RASTERIZER, [D3DRS_POINTSPRITEENABLE] = NINE_STATE_RASTERIZER, [D3DRS_POINTSCALEENABLE] = NINE_STATE_FF_OTHER, [D3DRS_POINTSCALE_A] = NINE_STATE_FF_OTHER, @@ -1404,7 +1517,7 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_MULTISAMPLEMASK] = NINE_STATE_SAMPLE_MASK, [D3DRS_PATCHEDGESTYLE] = NINE_STATE_UNHANDLED, [D3DRS_DEBUGMONITORTOKEN] = NINE_STATE_UNHANDLED, - [D3DRS_POINTSIZE_MAX] = NINE_STATE_MISC_CONST, + [D3DRS_POINTSIZE_MAX] = NINE_STATE_RASTERIZER, [D3DRS_INDEXEDVERTEXBLENDENABLE] = NINE_STATE_FF_OTHER, [D3DRS_COLORWRITEENABLE] = NINE_STATE_BLEND, [D3DRS_TWEENFACTOR] = NINE_STATE_FF_OTHER, @@ -1446,6 +1559,8 @@ const uint32_t nine_render_state_group[NINED3DRS_LAST + 1] = [D3DRS_BLENDOPALPHA] = NINE_STATE_BLEND }; +/* Misc */ + D3DMATRIX * nine_state_access_transform(struct nine_state *state, D3DTRANSFORMSTATETYPE t, boolean alloc) @@ -1601,4 +1716,3 @@ const char *nine_d3drs_to_string(DWORD State) return "(invalid)"; } } - diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h index 2bf3f637f93..b34da70ef48 100644 --- a/src/gallium/state_trackers/nine/nine_state.h +++ b/src/gallium/state_trackers/nine/nine_state.h @@ -33,8 +33,7 @@ #define NINED3DRS_VSPOINTSIZE (D3DRS_BLENDOPALPHA + 1) #define NINED3DRS_RTMASK (D3DRS_BLENDOPALPHA + 2) -#define NINED3DRS_ZBIASSCALE (D3DRS_BLENDOPALPHA + 3) -#define NINED3DRS_ALPHACOVERAGE (D3DRS_BLENDOPALPHA + 4) +#define NINED3DRS_ALPHACOVERAGE (D3DRS_BLENDOPALPHA + 3) #define D3DRS_LAST D3DRS_BLENDOPALPHA #define NINED3DRS_LAST NINED3DRS_ALPHACOVERAGE /* 213 */ @@ -67,17 +66,26 @@ #define NINE_STATE_BLEND_COLOR (1 << 16) #define NINE_STATE_STENCIL_REF (1 << 17) #define NINE_STATE_SAMPLE_MASK (1 << 18) -#define NINE_STATE_MISC_CONST (1 << 19) -#define NINE_STATE_FF (0x1f << 20) -#define NINE_STATE_FF_VS (0x17 << 20) -#define NINE_STATE_FF_PS (0x18 << 20) -#define NINE_STATE_FF_LIGHTING (1 << 20) -#define NINE_STATE_FF_MATERIAL (1 << 21) -#define NINE_STATE_FF_VSTRANSF (1 << 22) -#define NINE_STATE_FF_PSSTAGES (1 << 23) -#define NINE_STATE_FF_OTHER (1 << 24) -#define NINE_STATE_ALL 0x1ffffff -#define NINE_STATE_UNHANDLED (1 << 25) +#define NINE_STATE_FF (0x1f << 19) +#define NINE_STATE_FF_VS (0x17 << 19) +#define NINE_STATE_FF_PS (0x18 << 19) +#define NINE_STATE_FF_LIGHTING (1 << 19) +#define NINE_STATE_FF_MATERIAL (1 << 20) +#define NINE_STATE_FF_VSTRANSF (1 << 21) +#define NINE_STATE_FF_PSSTAGES (1 << 22) +#define NINE_STATE_FF_OTHER (1 << 23) +#define NINE_STATE_FOG_SHADER (1 << 24) +#define NINE_STATE_PS1X_SHADER (1 << 25) +#define NINE_STATE_ALL 0x3ffffff +#define NINE_STATE_UNHANDLED (1 << 26) + +#define NINE_STATE_COMMIT_DSA (1 << 0) +#define NINE_STATE_COMMIT_RASTERIZER (1 << 1) +#define NINE_STATE_COMMIT_BLEND (1 << 2) +#define NINE_STATE_COMMIT_CONST_VS (1 << 3) +#define NINE_STATE_COMMIT_CONST_PS (1 << 4) +#define NINE_STATE_COMMIT_VS (1 << 5) +#define NINE_STATE_COMMIT_PS (1 << 6) #define NINE_MAX_SIMULTANEOUS_RENDERTARGETS 4 @@ -94,6 +102,8 @@ NINE_MAX_CONST_I * 4 * sizeof(int)) +#define NINE_MAX_TEXTURE_STAGES 8 + #define NINE_MAX_LIGHTS 65536 #define NINE_MAX_LIGHTS_ACTIVE 8 @@ -124,7 +134,6 @@ struct nine_state uint16_t vs_const_b; /* NINE_MAX_CONST_B == 16 */ uint16_t ps_const_b; uint8_t ucp; - boolean srgb; } changed; struct NineSurface9 *rt[NINE_MAX_SIMULTANEOUS_RENDERTARGETS]; @@ -143,13 +152,13 @@ struct nine_state int vs_const_i[NINE_MAX_CONST_I][4]; BOOL vs_const_b[NINE_MAX_CONST_B]; float *vs_lconstf_temp; - uint32_t vs_key; struct NinePixelShader9 *ps; float *ps_const_f; int ps_const_i[NINE_MAX_CONST_I][4]; BOOL ps_const_b[NINE_MAX_CONST_B]; - uint32_t ps_key; + float *ps_lconstf_temp; + uint32_t bumpmap_vars[6 * NINE_MAX_TEXTURE_STAGES]; struct { void *vs; @@ -184,13 +193,9 @@ struct nine_state struct { struct { uint32_t group; - uint32_t tex_stage[NINE_MAX_SAMPLERS][(NINED3DTSS_COUNT + 31) / 32]; + uint32_t tex_stage[NINE_MAX_TEXTURE_STAGES][(NINED3DTSS_COUNT + 31) / 32]; uint32_t transform[(NINED3DTS_COUNT + 31) / 32]; } changed; - struct { - boolean vs_const; - boolean ps_const; - } clobber; D3DMATRIX *transform; /* access only via nine_state_access_transform */ unsigned num_transforms; @@ -205,8 +210,19 @@ struct nine_state D3DMATERIAL9 material; - DWORD tex_stage[NINE_MAX_SAMPLERS][NINED3DTSS_COUNT]; + DWORD tex_stage[NINE_MAX_TEXTURE_STAGES][NINED3DTSS_COUNT]; } ff; + + uint32_t commit; + struct { + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_constant_buffer cb_vs; + struct pipe_constant_buffer cb_ps; + struct pipe_constant_buffer cb_vs_ff; + struct pipe_constant_buffer cb_ps_ff; + } pipe; }; /* map D3DRS -> NINE_STATE_x @@ -220,8 +236,10 @@ extern const uint32_t nine_render_states_vertex[(NINED3DRS_COUNT + 31) / 32]; struct NineDevice9; -boolean nine_update_state(struct NineDevice9 *, uint32_t group_mask); +void nine_update_state_framebuffer(struct NineDevice9 *); +boolean nine_update_state(struct NineDevice9 *); +void nine_state_restore_non_cso(struct NineDevice9 *device); void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *, boolean is_reset); void nine_state_clear(struct nine_state *, const boolean device); diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c index 3f176a312bf..42bc349c2cc 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.c +++ b/src/gallium/state_trackers/nine/pixelshader9.c @@ -46,7 +46,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, return hr; if (cso) { - This->variant.cso = cso; + This->ff_cso = cso; return D3D_OK; } device = This->base.device; @@ -57,6 +57,8 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, info.const_b_base = NINE_CONST_B_BASE(device->max_ps_const_f) / 16; info.sampler_mask_shadow = 0x0; info.sampler_ps1xtypes = 0x0; + info.fog_enable = 0; + info.projected = 0; hr = nine_translate_shader(device, &info); if (FAILED(hr)) @@ -69,9 +71,13 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, This->byte_code.size = info.byte_size; This->variant.cso = info.cso; + This->last_cso = info.cso; + This->last_key = 0; + This->sampler_mask = info.sampler_mask; This->rt_mask = info.rt_mask; This->const_used_size = info.const_used_size; + This->bumpenvmat_needed = info.bumpenvmat_needed; /* no constant relative addressing for ps */ assert(info.lconstf.data == NULL); assert(info.lconstf.ranges == NULL); @@ -82,11 +88,12 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This, void NinePixelShader9_dtor( struct NinePixelShader9 *This ) { - DBG("This=%p cso=%p\n", This, This->variant.cso); + DBG("This=%p\n", This); if (This->base.device) { struct pipe_context *pipe = This->base.device->pipe; - struct nine_shader_variant *var = &This->variant; + struct nine_shader_variant64 *var = &This->variant; + do { if (var->cso) { if (This->base.device->state.cso.ps == var->cso) @@ -95,8 +102,14 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This ) } var = var->next; } while (var); + + if (This->ff_cso) { + if (This->ff_cso == This->base.device->state.cso.ps) + pipe->bind_fs_state(pipe, NULL); + pipe->delete_fs_state(pipe, This->ff_cso); + } } - nine_shader_variants_free(&This->variant); + nine_shader_variants_free64(&This->variant); FREE((void *)This->byte_code.tokens); /* const_cast */ @@ -124,10 +137,16 @@ NinePixelShader9_GetFunction( struct NinePixelShader9 *This, } void * -NinePixelShader9_GetVariant( struct NinePixelShader9 *This, - uint32_t key ) +NinePixelShader9_GetVariant( struct NinePixelShader9 *This ) { - void *cso = nine_shader_variant_get(&This->variant, key); + void *cso; + uint64_t key; + + key = This->next_key; + if (key == This->last_key) + return This->last_cso; + + cso = nine_shader_variant_get64(&This->variant, key); if (!cso) { struct NineDevice9 *device = This->base.device; struct nine_shader_info info; @@ -139,13 +158,20 @@ NinePixelShader9_GetVariant( struct NinePixelShader9 *This, info.byte_code = This->byte_code.tokens; info.sampler_mask_shadow = key & 0xffff; info.sampler_ps1xtypes = key; + info.fog_enable = device->state.rs[D3DRS_FOGENABLE]; + info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE]; + info.projected = (key >> 48) & 0xffff; hr = nine_translate_shader(This->base.device, &info); if (FAILED(hr)) return NULL; - nine_shader_variant_add(&This->variant, key, info.cso); + nine_shader_variant_add64(&This->variant, key, info.cso); cso = info.cso; } + + This->last_key = key; + This->last_cso = cso; + return cso; } diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h index 6dad1d1ee76..e09009f6621 100644 --- a/src/gallium/state_trackers/nine/pixelshader9.h +++ b/src/gallium/state_trackers/nine/pixelshader9.h @@ -25,13 +25,16 @@ #include "iunknown.h" #include "nine_shader.h" +#include "nine_state.h" +#include "basetexture9.h" +#include "nine_ff.h" struct nine_lconstf; struct NinePixelShader9 { struct NineUnknown base; - struct nine_shader_variant variant; + struct nine_shader_variant64 variant; struct { const DWORD *tokens; @@ -41,11 +44,17 @@ struct NinePixelShader9 unsigned const_used_size; /* in bytes */ + uint8_t bumpenvmat_needed; uint16_t sampler_mask; - uint16_t sampler_mask_shadow; uint8_t rt_mask; uint64_t ff_key[6]; + void *ff_cso; + + uint64_t last_key; + void *last_cso; + + uint64_t next_key; }; static inline struct NinePixelShader9 * NinePixelShader9( void *data ) @@ -53,9 +62,49 @@ NinePixelShader9( void *data ) return (struct NinePixelShader9 *)data; } +static inline BOOL +NinePixelShader9_UpdateKey( struct NinePixelShader9 *ps, + struct nine_state *state ) +{ + uint16_t samplers_shadow; + uint32_t samplers_ps1_types; + uint16_t projected; + uint64_t key; + BOOL res; + + if (unlikely(ps->byte_code.version < 0x20)) { + /* no depth textures, but variable targets */ + uint32_t m = ps->sampler_mask; + samplers_ps1_types = 0; + while (m) { + int s = ffs(m) - 1; + m &= ~(1 << s); + samplers_ps1_types |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2); + } + key = samplers_ps1_types; + } else { + samplers_shadow = (uint16_t)((state->samplers_shadow & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0)); + key = samplers_shadow & ps->sampler_mask; + } + + if (ps->byte_code.version < 0x30) { + key |= ((uint64_t)state->rs[D3DRS_FOGENABLE]) << 32; + key |= ((uint64_t)state->rs[D3DRS_FOGTABLEMODE]) << 33; + } + + if (unlikely(ps->byte_code.version < 0x14)) { + projected = nine_ff_get_projected_key(state); + key |= ((uint64_t) projected) << 48; + } + + res = ps->last_key != key; + if (res) + ps->next_key = key; + return res; +} + void * -NinePixelShader9_GetVariant( struct NinePixelShader9 *vs, - uint32_t key ); +NinePixelShader9_GetVariant( struct NinePixelShader9 *ps ); /*** public ***/ diff --git a/src/gallium/state_trackers/nine/resource9.c b/src/gallium/state_trackers/nine/resource9.c index bbc8320071b..6d915338b24 100644 --- a/src/gallium/state_trackers/nine/resource9.c +++ b/src/gallium/state_trackers/nine/resource9.c @@ -161,20 +161,22 @@ NineResource9_GetPrivateData( struct NineResource9 *This, DWORD *pSizeOfData ) { struct pheader *header; + DWORD sizeofdata; DBG("This=%p refguid=%p pData=%p pSizeOfData=%p\n", This, refguid, pData, pSizeOfData); - user_assert(pSizeOfData, E_POINTER); - header = util_hash_table_get(This->pdata, refguid); if (!header) { return D3DERR_NOTFOUND; } + user_assert(pSizeOfData, E_POINTER); + sizeofdata = *pSizeOfData; + *pSizeOfData = header->size; + if (!pData) { - *pSizeOfData = header->size; return D3D_OK; } - if (*pSizeOfData < header->size) { + if (sizeofdata < header->size) { return D3DERR_MOREDATA; } @@ -206,10 +208,13 @@ DWORD WINAPI NineResource9_SetPriority( struct NineResource9 *This, DWORD PriorityNew ) { - DWORD prev = This->priority; - + DWORD prev; DBG("This=%p, PriorityNew=%d\n", This, PriorityNew); + if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE) + return 0; + + prev = This->priority; This->priority = PriorityNew; return prev; } @@ -217,6 +222,9 @@ NineResource9_SetPriority( struct NineResource9 *This, DWORD WINAPI NineResource9_GetPriority( struct NineResource9 *This ) { + if (This->pool != D3DPOOL_MANAGED || This->type == D3DRTYPE_SURFACE) + return 0; + return This->priority; } diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c index 032b9ffcbf0..6d6e1be0b7f 100644 --- a/src/gallium/state_trackers/nine/stateblock9.c +++ b/src/gallium/state_trackers/nine/stateblock9.c @@ -251,7 +251,7 @@ nine_state_copy_common(struct nine_state *dst, dst->ff.material = src->ff.material; if (mask->changed.group & NINE_STATE_FF_PSSTAGES) { - for (s = 0; s < NINE_MAX_SAMPLERS; ++s) { + for (s = 0; s < NINE_MAX_TEXTURE_STAGES; ++s) { for (i = 0; i < NINED3DTSS_COUNT; ++i) if (mask->ff.changed.tex_stage[s][i / 32] & (1 << (i % 32))) dst->ff.tex_stage[s][i] = src->ff.tex_stage[s][i]; diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c index 7533cb3a454..14c1ce927ad 100644 --- a/src/gallium/state_trackers/nine/surface9.c +++ b/src/gallium/state_trackers/nine/surface9.c @@ -104,11 +104,11 @@ NineSurface9_ctor( struct NineSurface9 *This, /* Ram buffer with no parent. Has to allocate the resource itself */ if (!pResource && !pContainer) { assert(!user_buffer); - This->data = MALLOC( + This->data = align_malloc( nine_format_get_level_alloc_size(This->base.info.format, pDesc->Width, pDesc->Height, - 0)); + 0), 32); if (!This->data) return E_OUTOFMEMORY; } @@ -273,7 +273,7 @@ NineSurface9_AddDirtyRect( struct NineSurface9 *This, This->texture == D3DRTYPE_CUBETEXTURE || This->texture == D3DRTYPE_TEXTURE); - if (This->base.pool != D3DPOOL_MANAGED) + if (This->base.pool == D3DPOOL_DEFAULT) return; /* Add a dirty rect to level 0 of the parent texture */ @@ -287,7 +287,7 @@ NineSurface9_AddDirtyRect( struct NineSurface9 *This, NineTexture9(This->base.base.container); NineTexture9_AddDirtyRect(tex, &dirty_rect); - } else { /* This->texture == D3DRTYPE_CUBETEXTURE */ + } else if (This->texture == D3DRTYPE_CUBETEXTURE) { struct NineCubeTexture9 *ctex = NineCubeTexture9(This->base.base.container); @@ -323,6 +323,13 @@ NineSurface9_LockRect( struct NineSurface9 *This, nine_D3DLOCK_to_str(Flags)); NineSurface9_Dump(This); + /* check if it's already locked */ + user_assert(This->lock_count == 0, D3DERR_INVALIDCALL); + + /* set pBits to NULL after lock_count check */ + user_assert(pLockedRect, E_POINTER); + pLockedRect->pBits = NULL; + #ifdef NINE_STRICT user_assert(This->base.pool != D3DPOOL_DEFAULT || (resource && (resource->flags & NINE_RESOURCE_FLAG_LOCKABLE)), @@ -337,19 +344,17 @@ NineSurface9_LockRect( struct NineSurface9 *This, user_assert(!((Flags & D3DLOCK_DISCARD) && (Flags & D3DLOCK_READONLY)), D3DERR_INVALIDCALL); - /* check if it's already locked */ - user_assert(This->lock_count == 0, D3DERR_INVALIDCALL); - user_assert(pLockedRect, E_POINTER); - user_assert(This->desc.MultiSampleType == D3DMULTISAMPLE_NONE, D3DERR_INVALIDCALL); - if (pRect && This->base.pool == D3DPOOL_DEFAULT && - util_format_is_compressed(This->base.info.format)) { + if (pRect && This->desc.Pool == D3DPOOL_DEFAULT && + compressed_format (This->desc.Format)) { const unsigned w = util_format_get_blockwidth(This->base.info.format); const unsigned h = util_format_get_blockheight(This->base.info.format); - user_assert(!(pRect->left % w) && !(pRect->right % w) && - !(pRect->top % h) && !(pRect->bottom % h), + user_assert((pRect->left == 0 && pRect->right == This->desc.Width && + pRect->top == 0 && pRect->bottom == This->desc.Height) || + (!(pRect->left % w) && !(pRect->right % w) && + !(pRect->top % h) && !(pRect->bottom % h)), D3DERR_INVALIDCALL); } @@ -363,13 +368,9 @@ NineSurface9_LockRect( struct NineSurface9 *This, usage |= PIPE_TRANSFER_DONTBLOCK; if (pRect) { + /* Windows XP accepts invalid locking rectangles, Windows 7 rejects + * them. Use Windows XP behaviour for now. */ rect_to_pipe_box(&box, pRect); - if (u_box_clip_2d(&box, &box, This->desc.Width, - This->desc.Height) < 0) { - DBG("pRect clipped by Width=%u Height=%u\n", - This->desc.Width, This->desc.Height); - return D3DERR_INVALIDCALL; - } } else { u_box_origin_2d(This->desc.Width, This->desc.Height, &box); } @@ -463,140 +464,92 @@ IDirect3DSurface9Vtbl NineSurface9_vtable = { (void *)NineSurface9_ReleaseDC }; -HRESULT -NineSurface9_CopySurface( struct NineSurface9 *This, - struct NineSurface9 *From, - const POINT *pDestPoint, - const RECT *pSourceRect ) +/* When this function is called, we have already checked + * The copy regions fit the surfaces */ +void +NineSurface9_CopyMemToDefault( struct NineSurface9 *This, + struct NineSurface9 *From, + const POINT *pDestPoint, + const RECT *pSourceRect ) { struct pipe_context *pipe = This->pipe; struct pipe_resource *r_dst = This->base.resource; - struct pipe_resource *r_src = From->base.resource; - struct pipe_transfer *transfer; - struct pipe_box src_box; struct pipe_box dst_box; - uint8_t *p_dst; const uint8_t *p_src; + int src_x, src_y, dst_x, dst_y, copy_width, copy_height; - DBG("This=%p From=%p pDestPoint=%p pSourceRect=%p\n", - This, From, pDestPoint, pSourceRect); - - assert(This->base.pool != D3DPOOL_MANAGED && - From->base.pool != D3DPOOL_MANAGED); + assert(This->base.pool == D3DPOOL_DEFAULT && + From->base.pool == D3DPOOL_SYSTEMMEM); - user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL); + if (pDestPoint) { + dst_x = pDestPoint->x; + dst_y = pDestPoint->y; + } else { + dst_x = 0; + dst_y = 0; + } - dst_box.x = pDestPoint ? pDestPoint->x : 0; - dst_box.y = pDestPoint ? pDestPoint->y : 0; + if (pSourceRect) { + src_x = pSourceRect->left; + src_y = pSourceRect->top; + copy_width = pSourceRect->right - pSourceRect->left; + copy_height = pSourceRect->bottom - pSourceRect->top; + } else { + src_x = 0; + src_y = 0; + copy_width = From->desc.Width; + copy_height = From->desc.Height; + } - user_assert(dst_box.x >= 0 && - dst_box.y >= 0, D3DERR_INVALIDCALL); + u_box_2d_zslice(dst_x, dst_y, This->layer, + copy_width, copy_height, &dst_box); - dst_box.z = This->layer; - src_box.z = From->layer; + p_src = NineSurface9_GetSystemMemPointer(From, src_x, src_y); - dst_box.depth = 1; - src_box.depth = 1; + pipe->transfer_inline_write(pipe, r_dst, This->level, + 0, /* WRITE|DISCARD are implicit */ + &dst_box, p_src, From->stride, 0); - if (pSourceRect) { - /* make sure it doesn't range outside the source surface */ - user_assert(pSourceRect->left >= 0 && - pSourceRect->right <= From->desc.Width && - pSourceRect->top >= 0 && - pSourceRect->bottom <= From->desc.Height, - D3DERR_INVALIDCALL); - if (rect_to_pipe_box_xy_only_clamp(&src_box, pSourceRect)) - return D3D_OK; - } else { - src_box.x = 0; - src_box.y = 0; - src_box.width = From->desc.Width; - src_box.height = From->desc.Height; - } + NineSurface9_MarkContainerDirty(This); +} - /* limits */ - dst_box.width = This->desc.Width - dst_box.x; - dst_box.height = This->desc.Height - dst_box.y; +void +NineSurface9_CopyDefaultToMem( struct NineSurface9 *This, + struct NineSurface9 *From ) +{ + struct pipe_context *pipe = This->pipe; + struct pipe_resource *r_src = From->base.resource; + struct pipe_transfer *transfer; + struct pipe_box src_box; + uint8_t *p_dst; + const uint8_t *p_src; - user_assert(src_box.width <= dst_box.width && - src_box.height <= dst_box.height, D3DERR_INVALIDCALL); + assert(This->base.pool == D3DPOOL_SYSTEMMEM && + From->base.pool == D3DPOOL_DEFAULT); - dst_box.width = src_box.width; - dst_box.height = src_box.height; + assert(This->desc.Width == From->desc.Width); + assert(This->desc.Height == From->desc.Height); - /* check source block align for compressed textures */ - if (util_format_is_compressed(From->base.info.format) && - ((src_box.width != From->desc.Width) || - (src_box.height != From->desc.Height))) { - const unsigned w = util_format_get_blockwidth(From->base.info.format); - const unsigned h = util_format_get_blockheight(From->base.info.format); - user_assert(!(src_box.width % w) && - !(src_box.height % h), - D3DERR_INVALIDCALL); - } + u_box_origin_2d(This->desc.Width, This->desc.Height, &src_box); + src_box.z = From->layer; - /* check destination block align for compressed textures */ - if (util_format_is_compressed(This->base.info.format) && - ((dst_box.width != This->desc.Width) || - (dst_box.height != This->desc.Height) || - dst_box.x != 0 || - dst_box.y != 0)) { - const unsigned w = util_format_get_blockwidth(This->base.info.format); - const unsigned h = util_format_get_blockheight(This->base.info.format); - user_assert(!(dst_box.x % w) && !(dst_box.width % w) && - !(dst_box.y % h) && !(dst_box.height % h), - D3DERR_INVALIDCALL); - } + p_src = pipe->transfer_map(pipe, r_src, From->level, + PIPE_TRANSFER_READ, + &src_box, &transfer); + p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0); - if (r_dst && r_src) { - pipe->resource_copy_region(pipe, - r_dst, This->level, - dst_box.x, dst_box.y, dst_box.z, - r_src, From->level, - &src_box); - } else - if (r_dst) { - p_src = NineSurface9_GetSystemMemPointer(From, src_box.x, src_box.y); - - pipe->transfer_inline_write(pipe, r_dst, This->level, - 0, /* WRITE|DISCARD are implicit */ - &dst_box, p_src, From->stride, 0); - } else - if (r_src) { - p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0); - - p_src = pipe->transfer_map(pipe, r_src, From->level, - PIPE_TRANSFER_READ, - &src_box, &transfer); - if (!p_src) - return D3DERR_DRIVERINTERNALERROR; - - util_copy_rect(p_dst, This->base.info.format, - This->stride, dst_box.x, dst_box.y, - dst_box.width, dst_box.height, - p_src, - transfer->stride, src_box.x, src_box.y); - - pipe->transfer_unmap(pipe, transfer); - } else { - p_dst = NineSurface9_GetSystemMemPointer(This, 0, 0); - p_src = NineSurface9_GetSystemMemPointer(From, 0, 0); - - util_copy_rect(p_dst, This->base.info.format, - This->stride, dst_box.x, dst_box.y, - dst_box.width, dst_box.height, - p_src, - From->stride, src_box.x, src_box.y); - } + assert (p_src && p_dst); - if (This->base.pool == D3DPOOL_DEFAULT) - NineSurface9_MarkContainerDirty(This); - if (!r_dst && This->base.resource) - NineSurface9_AddDirtyRect(This, &dst_box); + util_copy_rect(p_dst, This->base.info.format, + This->stride, 0, 0, + This->desc.Width, This->desc.Height, + p_src, + transfer->stride, 0, 0); - return D3D_OK; + pipe->transfer_unmap(pipe, transfer); } + /* Gladly, rendering to a MANAGED surface is not permitted, so we will * never have to do the reverse, i.e. download the surface. */ diff --git a/src/gallium/state_trackers/nine/surface9.h b/src/gallium/state_trackers/nine/surface9.h index 73092ab8cf5..76156ae699c 100644 --- a/src/gallium/state_trackers/nine/surface9.h +++ b/src/gallium/state_trackers/nine/surface9.h @@ -125,11 +125,15 @@ HRESULT NineSurface9_UploadSelf( struct NineSurface9 *This, const struct pipe_box *damaged ); -HRESULT -NineSurface9_CopySurface( struct NineSurface9 *This, - struct NineSurface9 *From, - const POINT *pDestPoint, - const RECT *pSourceRect ); +void +NineSurface9_CopyMemToDefault( struct NineSurface9 *This, + struct NineSurface9 *From, + const POINT *pDestPoint, + const RECT *pSourceRect ); + +void +NineSurface9_CopyDefaultToMem( struct NineSurface9 *This, + struct NineSurface9 *From ); static inline boolean NineSurface9_IsOffscreenPlain (struct NineSurface9 *This ) diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c index a62e6ad99d8..3f5be26fed7 100644 --- a/src/gallium/state_trackers/nine/swapchain9.c +++ b/src/gallium/state_trackers/nine/swapchain9.c @@ -184,7 +184,9 @@ NineSwapChain9_Resize( struct NineSwapChain9 *This, /* Note: It is the role of the backend to fill if necessary * BackBufferWidth and BackBufferHeight */ - ID3DPresent_SetPresentParameters(This->present, pParams, This->mode); + hr = ID3DPresent_SetPresentParameters(This->present, pParams, This->mode); + if (hr != D3D_OK) + return hr; /* When we have flip behaviour, d3d9 expects we get back the screen buffer when we flip. * Here we don't get back the initial content of the screen. To emulate the behaviour @@ -575,9 +577,10 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r blit.filter = PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; - ID3DPresent_GetCursorPos(This->present, &device->cursor.pos); - - /* NOTE: blit messes up when box.x + box.width < 0, fix driver */ + /* NOTE: blit messes up when box.x + box.width < 0, fix driver + * NOTE2: device->cursor.pos contains coordinates relative to the screen. + * This happens to be also the position of the cursor when we are fullscreen. + * We don't use sw cursor for Windowed mode */ blit.dst.box.x = MAX2(device->cursor.pos.x, 0) - device->cursor.hotspot.x; blit.dst.box.y = MAX2(device->cursor.pos.y, 0) - device->cursor.hotspot.y; blit.dst.box.width = blit.src.box.width; @@ -587,13 +590,14 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r blit.src.box.width, blit.src.box.height, blit.dst.box.x, blit.dst.box.y); + blit.alpha_blend = TRUE; This->pipe->blit(This->pipe, &blit); } if (device->hud && resource) { hud_draw(device->hud, resource); /* XXX: no offset */ /* HUD doesn't clobber stipple */ - NineDevice9_RestoreNonCSOState(device, ~0x2); + nine_state_restore_non_cso(device); } } @@ -704,6 +708,7 @@ present( struct NineSwapChain9 *This, blit.mask = PIPE_MASK_RGBA; blit.filter = PIPE_TEX_FILTER_NEAREST; blit.scissor_enable = FALSE; + blit.alpha_blend = FALSE; This->pipe->blit(This->pipe, &blit); } @@ -835,7 +840,7 @@ NineSwapChain9_Present( struct NineSwapChain9 *This, ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]); This->base.device->state.changed.group |= NINE_STATE_FB; - nine_update_state(This->base.device, NINE_STATE_FB); + nine_update_state_framebuffer(This->base.device); return hr; } @@ -856,6 +861,8 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, DBG("GetFrontBufferData: This=%p pDestSurface=%p\n", This, pDestSurface); + user_assert(dest_surface->base.pool == D3DPOOL_SYSTEMMEM, D3DERR_INVALIDCALL); + width = dest_surface->desc.Width; height = dest_surface->desc.Height; @@ -870,7 +877,7 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, desc.MultiSampleQuality = 0; desc.Width = width; desc.Height = height; - /* NineSurface9_CopySurface needs same format. */ + /* NineSurface9_CopyDefaultToMem needs same format. */ desc.Format = dest_surface->desc.Format; desc.Usage = D3DUSAGE_RENDERTARGET; hr = NineSurface9_new(pDevice, NineUnknown(This), temp_resource, NULL, 0, @@ -883,7 +890,7 @@ NineSwapChain9_GetFrontBufferData( struct NineSwapChain9 *This, ID3DPresent_FrontBufferCopy(This->present, temp_handle); - NineSurface9_CopySurface(dest_surface, temp_surface, NULL, NULL); + NineSurface9_CopyDefaultToMem(dest_surface, temp_surface); ID3DPresent_DestroyD3DWindowBuffer(This->present, temp_handle); NineUnknown_Destroy(NineUnknown(temp_surface)); diff --git a/src/gallium/state_trackers/nine/texture9.c b/src/gallium/state_trackers/nine/texture9.c index 5900e76e52c..bc325c1335e 100644 --- a/src/gallium/state_trackers/nine/texture9.c +++ b/src/gallium/state_trackers/nine/texture9.c @@ -101,6 +101,13 @@ NineTexture9_ctor( struct NineTexture9 *This, if (Format != D3DFMT_NULL && pf == PIPE_FORMAT_NONE) return D3DERR_INVALIDCALL; + if (compressed_format(Format)) { + const unsigned w = util_format_get_blockwidth(pf); + const unsigned h = util_format_get_blockheight(pf); + + user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL); + } + info->screen = screen; info->target = PIPE_TEXTURE_2D; info->format = pf; @@ -152,10 +159,10 @@ NineTexture9_ctor( struct NineTexture9 *This, * apps access sublevels of texture even if they locked only first * level) */ level_offsets = alloca(sizeof(unsigned) * (info->last_level + 1)); - user_buffer = MALLOC( + user_buffer = align_malloc( nine_format_get_size_and_offsets(pf, level_offsets, Width, Height, - info->last_level)); + info->last_level), 32); This->managed_buffer = user_buffer; if (!This->managed_buffer) return E_OUTOFMEMORY; @@ -202,6 +209,9 @@ NineTexture9_ctor( struct NineTexture9 *This, return hr; } + /* Textures start initially dirty */ + This->dirty_rect.width = Width; + This->dirty_rect.height = Height; This->dirty_rect.depth = 1; /* widht == 0 means empty, depth stays 1 */ if (pSharedHandle && !*pSharedHandle) {/* Pool == D3DPOOL_SYSTEMMEM */ @@ -219,7 +229,8 @@ NineTexture9_dtor( struct NineTexture9 *This ) if (This->surfaces) { /* The surfaces should have 0 references and be unbound now. */ for (l = 0; l <= This->base.base.info.last_level; ++l) - NineUnknown_Destroy(&This->surfaces[l]->base.base); + if (This->surfaces[l]) + NineUnknown_Destroy(&This->surfaces[l]->base.base); FREE(This->surfaces); } @@ -295,18 +306,22 @@ NineTexture9_AddDirtyRect( struct NineTexture9 *This, pDirtyRect ? pDirtyRect->left : 0, pDirtyRect ? pDirtyRect->top : 0, pDirtyRect ? pDirtyRect->right : 0, pDirtyRect ? pDirtyRect->bottom : 0); - /* Tracking dirty regions on DEFAULT or SYSTEMMEM resources is pointless, + /* Tracking dirty regions on DEFAULT resources is pointless, * because we always write to the final storage. Just marked it dirty in * case we need to generate mip maps. */ - if (This->base.base.pool != D3DPOOL_MANAGED) { - if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) + if (This->base.base.pool == D3DPOOL_DEFAULT) { + if (This->base.base.usage & D3DUSAGE_AUTOGENMIPMAP) { This->base.dirty_mip = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } return D3D_OK; } - This->base.managed.dirty = TRUE; - BASETEX_REGISTER_UPDATE(&This->base); + if (This->base.base.pool == D3DPOOL_MANAGED) { + This->base.managed.dirty = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } if (!pDirtyRect) { u_box_origin_2d(This->base.base.info.width0, diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c index bbd5ce99d9a..fdfb79a138e 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.c +++ b/src/gallium/state_trackers/nine/vertexshader9.c @@ -48,9 +48,10 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, return hr; if (cso) { - This->variant.cso = cso; + This->ff_cso = cso; return D3D_OK; } + device = This->base.device; info.type = PIPE_SHADER_VERTEX; @@ -59,6 +60,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16; info.sampler_mask_shadow = 0x0; info.sampler_ps1xtypes = 0x0; + info.fog_enable = 0; hr = nine_translate_shader(device, &info); if (FAILED(hr)) @@ -71,6 +73,9 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, This->byte_code.size = info.byte_size; This->variant.cso = info.cso; + This->last_cso = info.cso; + This->last_key = 0; + This->const_used_size = info.const_used_size; This->lconstf = info.lconstf; This->sampler_mask = info.sampler_mask; @@ -87,11 +92,12 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This, void NineVertexShader9_dtor( struct NineVertexShader9 *This ) { - DBG("This=%p cso=%p\n", This, This->variant.cso); + DBG("This=%p\n", This); if (This->base.device) { struct pipe_context *pipe = This->base.device->pipe; struct nine_shader_variant *var = &This->variant; + do { if (var->cso) { if (This->base.device->state.cso.vs == var->cso) @@ -100,6 +106,12 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This ) } var = var->next; } while (var); + + if (This->ff_cso) { + if (This->ff_cso == This->base.device->state.cso.vs) + pipe->bind_vs_state(pipe, NULL); + pipe->delete_vs_state(pipe, This->ff_cso); + } } nine_shader_variants_free(&This->variant); @@ -130,10 +142,16 @@ NineVertexShader9_GetFunction( struct NineVertexShader9 *This, } void * -NineVertexShader9_GetVariant( struct NineVertexShader9 *This, - uint32_t key ) +NineVertexShader9_GetVariant( struct NineVertexShader9 *This ) { - void *cso = nine_shader_variant_get(&This->variant, key); + void *cso; + uint32_t key; + + key = This->next_key; + if (key == This->last_key) + return This->last_cso; + + cso = nine_shader_variant_get(&This->variant, key); if (!cso) { struct NineDevice9 *device = This->base.device; struct nine_shader_info info; @@ -144,6 +162,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This, info.const_b_base = NINE_CONST_B_BASE(device->max_vs_const_f) / 16; info.byte_code = This->byte_code.tokens; info.sampler_mask_shadow = key & 0xf; + info.fog_enable = device->state.rs[D3DRS_FOGENABLE]; hr = nine_translate_shader(This->base.device, &info); if (FAILED(hr)) @@ -151,6 +170,10 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This, nine_shader_variant_add(&This->variant, key, info.cso); cso = info.cso; } + + This->last_key = key; + This->last_cso = cso; + return cso; } diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h index 66c602c7b3c..15c3f4ff041 100644 --- a/src/gallium/state_trackers/nine/vertexshader9.h +++ b/src/gallium/state_trackers/nine/vertexshader9.h @@ -25,6 +25,7 @@ #include "iunknown.h" #include "nine_shader.h" +#include "nine_state.h" struct NineVertexShader9 { @@ -43,7 +44,6 @@ struct NineVertexShader9 } byte_code; uint8_t sampler_mask; - uint8_t sampler_mask_shadow; boolean position_t; /* if true, disable vport transform */ boolean point_size; /* if true, set rasterizer.point_size_per_vertex to 1 */ @@ -54,7 +54,13 @@ struct NineVertexShader9 const struct pipe_stream_output_info *so; - uint64_t ff_key[2]; + uint64_t ff_key[3]; + void *ff_cso; + + uint32_t last_key; + void *last_cso; + + uint32_t next_key; }; static inline struct NineVertexShader9 * NineVertexShader9( void *data ) @@ -62,9 +68,29 @@ NineVertexShader9( void *data ) return (struct NineVertexShader9 *)data; } +static inline BOOL +NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs, + struct nine_state *state ) +{ + uint8_t samplers_shadow; + uint32_t key; + BOOL res; + + samplers_shadow = (uint8_t)((state->samplers_shadow & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0)); + samplers_shadow &= vs->sampler_mask; + key = samplers_shadow; + + if (vs->byte_code.version < 0x30) + key |= state->rs[D3DRS_FOGENABLE] << 8; + + res = vs->last_key != key; + if (res) + vs->next_key = key; + return res; +} + void * -NineVertexShader9_GetVariant( struct NineVertexShader9 *vs, - uint32_t key ); +NineVertexShader9_GetVariant( struct NineVertexShader9 *vs ); /*** public ***/ diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c index 4dfc5599a8e..0b9005685a9 100644 --- a/src/gallium/state_trackers/nine/volume9.c +++ b/src/gallium/state_trackers/nine/volume9.c @@ -23,6 +23,7 @@ #include "device9.h" #include "volume9.h" #include "basetexture9.h" /* for marking dirty */ +#include "volumetexture9.h" #include "nine_helpers.h" #include "nine_pipe.h" #include "nine_dump.h" @@ -43,7 +44,7 @@ NineVolume9_AllocateData( struct NineVolume9 *This ) DBG("(%p(This=%p),level=%u) Allocating 0x%x bytes of system memory.\n", This->base.container, This, This->level, size); - This->data = (uint8_t *)MALLOC(size); + This->data = (uint8_t *)align_malloc(size, 32); if (!This->data) return E_OUTOFMEMORY; return D3D_OK; @@ -182,47 +183,23 @@ NineVolume9_GetDesc( struct NineVolume9 *This, return D3D_OK; } -static inline boolean -NineVolume9_IsDirty(struct NineVolume9 *This) -{ - return This->dirty_box[0].width != 0; -} - inline void NineVolume9_AddDirtyRegion( struct NineVolume9 *This, const struct pipe_box *box ) { - struct pipe_box cover_a, cover_b; - float vol[2]; + D3DBOX dirty_region; + struct NineVolumeTexture9 *tex = NineVolumeTexture9(This->base.container); if (!box) { - u_box_3d(0, 0, 0, This->desc.Width, This->desc.Height, - This->desc.Depth, &This->dirty_box[0]); - memset(&This->dirty_box[1], 0, sizeof(This->dirty_box[1])); - return; - } - if (!This->dirty_box[0].width) { - This->dirty_box[0] = *box; - return; - } - - u_box_union_3d(&cover_a, &This->dirty_box[0], box); - vol[0] = u_box_volume_3d(&cover_a); - - if (This->dirty_box[1].width == 0) { - vol[1] = u_box_volume_3d(&This->dirty_box[0]); - if (vol[0] > (vol[1] * 1.5f)) - This->dirty_box[1] = *box; - else - This->dirty_box[0] = cover_a; + NineVolumeTexture9_AddDirtyBox(tex, NULL); } else { - u_box_union_3d(&cover_b, &This->dirty_box[1], box); - vol[1] = u_box_volume_3d(&cover_b); - - if (vol[0] > vol[1]) - This->dirty_box[1] = cover_b; - else - This->dirty_box[0] = cover_a; + dirty_region.Left = box->x << This->level_actual; + dirty_region.Top = box->y << This->level_actual; + dirty_region.Front = box->z << This->level_actual; + dirty_region.Right = dirty_region.Left + (box->width << This->level_actual); + dirty_region.Bottom = dirty_region.Top + (box->height << This->level_actual); + dirty_region.Back = dirty_region.Front + (box->depth << This->level_actual); + NineVolumeTexture9_AddDirtyBox(tex, &dirty_region); } } @@ -254,21 +231,26 @@ NineVolume9_LockBox( struct NineVolume9 *This, pBox ? pBox->Front : 0, pBox ? pBox->Back : 0, nine_D3DLOCK_to_str(Flags)); + /* check if it's already locked */ + user_assert(This->lock_count == 0, D3DERR_INVALIDCALL); + + /* set pBits to NULL after lock_count check */ + user_assert(pLockedVolume, E_POINTER); + pLockedVolume->pBits = NULL; + user_assert(This->desc.Pool != D3DPOOL_DEFAULT || (This->desc.Usage & D3DUSAGE_DYNAMIC), D3DERR_INVALIDCALL); user_assert(!((Flags & D3DLOCK_DISCARD) && (Flags & D3DLOCK_READONLY)), D3DERR_INVALIDCALL); - user_assert(This->lock_count == 0, D3DERR_INVALIDCALL); - user_assert(pLockedVolume, E_POINTER); - - if (pBox && This->desc.Pool == D3DPOOL_DEFAULT && - util_format_is_compressed(This->info.format)) { + if (pBox && compressed_format (This->desc.Format)) { /* For volume all pools are checked */ const unsigned w = util_format_get_blockwidth(This->info.format); const unsigned h = util_format_get_blockheight(This->info.format); - user_assert(!(pBox->Left % w) && !(pBox->Right % w) && - !(pBox->Top % h) && !(pBox->Bottom % h), + user_assert((pBox->Left == 0 && pBox->Right == This->desc.Width && + pBox->Top == 0 && pBox->Bottom == This->desc.Height) || + (!(pBox->Left % w) && !(pBox->Right % w) && + !(pBox->Top % h) && !(pBox->Bottom % h)), D3DERR_INVALIDCALL); } @@ -312,8 +294,7 @@ NineVolume9_LockBox( struct NineVolume9 *This, if (!(Flags & (D3DLOCK_NO_DIRTY_UPDATE | D3DLOCK_READONLY))) { NineVolume9_MarkContainerDirty(This); - if (This->desc.Pool == D3DPOOL_MANAGED) - NineVolume9_AddDirtyRegion(This, &box); + NineVolume9_AddDirtyRegion(This, &box); } ++This->lock_count; @@ -333,42 +314,31 @@ NineVolume9_UnlockBox( struct NineVolume9 *This ) return D3D_OK; } - +/* When this function is called, we have already checked + * The copy regions fit the volumes */ HRESULT -NineVolume9_CopyVolume( struct NineVolume9 *This, - struct NineVolume9 *From, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_box *pSrcBox ) +NineVolume9_CopyMemToDefault( struct NineVolume9 *This, + struct NineVolume9 *From, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_box *pSrcBox ) { struct pipe_context *pipe = This->pipe; struct pipe_resource *r_dst = This->resource; - struct pipe_resource *r_src = From->resource; - struct pipe_transfer *transfer; struct pipe_box src_box; struct pipe_box dst_box; - uint8_t *p_dst; const uint8_t *p_src; DBG("This=%p From=%p dstx=%u dsty=%u dstz=%u pSrcBox=%p\n", This, From, dstx, dsty, dstz, pSrcBox); - assert(This->desc.Pool != D3DPOOL_MANAGED && - From->desc.Pool != D3DPOOL_MANAGED); - user_assert(This->desc.Format == From->desc.Format, D3DERR_INVALIDCALL); + assert(This->desc.Pool == D3DPOOL_DEFAULT && + From->desc.Pool == D3DPOOL_SYSTEMMEM); dst_box.x = dstx; dst_box.y = dsty; dst_box.z = dstz; if (pSrcBox) { - /* make sure it doesn't range outside the source volume */ - user_assert(pSrcBox->x >= 0 && - (pSrcBox->width - pSrcBox->x) <= From->desc.Width && - pSrcBox->y >= 0 && - (pSrcBox->height - pSrcBox->y) <= From->desc.Height && - pSrcBox->z >= 0 && - (pSrcBox->depth - pSrcBox->z) <= From->desc.Depth, - D3DERR_INVALIDCALL); src_box = *pSrcBox; } else { src_box.x = 0; @@ -378,101 +348,54 @@ NineVolume9_CopyVolume( struct NineVolume9 *This, src_box.height = From->desc.Height; src_box.depth = From->desc.Depth; } - /* limits */ - dst_box.width = This->desc.Width - dst_box.x; - dst_box.height = This->desc.Height - dst_box.y; - dst_box.depth = This->desc.Depth - dst_box.z; - - user_assert(src_box.width <= dst_box.width && - src_box.height <= dst_box.height && - src_box.depth <= dst_box.depth, D3DERR_INVALIDCALL); dst_box.width = src_box.width; dst_box.height = src_box.height; dst_box.depth = src_box.depth; - if (r_dst && r_src) { - pipe->resource_copy_region(pipe, - r_dst, This->level, - dst_box.x, dst_box.y, dst_box.z, - r_src, From->level, - &src_box); - } else - if (r_dst) { - p_src = NineVolume9_GetSystemMemPointer(From, - src_box.x, src_box.y, src_box.z); - - pipe->transfer_inline_write(pipe, r_dst, This->level, - 0, /* WRITE|DISCARD are implicit */ - &dst_box, p_src, - From->stride, From->layer_stride); - } else - if (r_src) { - p_dst = NineVolume9_GetSystemMemPointer(This, 0, 0, 0); - p_src = pipe->transfer_map(pipe, r_src, From->level, - PIPE_TRANSFER_READ, - &src_box, &transfer); - if (!p_src) - return D3DERR_DRIVERINTERNALERROR; - - util_copy_box(p_dst, This->info.format, - This->stride, This->layer_stride, - dst_box.x, dst_box.y, dst_box.z, - dst_box.width, dst_box.height, dst_box.depth, - p_src, - transfer->stride, transfer->layer_stride, - src_box.x, src_box.y, src_box.z); + p_src = NineVolume9_GetSystemMemPointer(From, + src_box.x, src_box.y, src_box.z); - pipe->transfer_unmap(pipe, transfer); - } else { - p_dst = NineVolume9_GetSystemMemPointer(This, 0, 0, 0); - p_src = NineVolume9_GetSystemMemPointer(From, 0, 0, 0); - - util_copy_box(p_dst, This->info.format, - This->stride, This->layer_stride, - dst_box.x, dst_box.y, dst_box.z, - dst_box.width, dst_box.height, dst_box.depth, - p_src, - From->stride, From->layer_stride, - src_box.x, src_box.y, src_box.z); - } + pipe->transfer_inline_write(pipe, r_dst, This->level, + 0, /* WRITE|DISCARD are implicit */ + &dst_box, p_src, + From->stride, From->layer_stride); - if (This->desc.Pool == D3DPOOL_DEFAULT) - NineVolume9_MarkContainerDirty(This); - if (!r_dst && This->resource) - NineVolume9_AddDirtyRegion(This, &dst_box); + NineVolume9_MarkContainerDirty(This); return D3D_OK; } HRESULT -NineVolume9_UploadSelf( struct NineVolume9 *This ) +NineVolume9_UploadSelf( struct NineVolume9 *This, + const struct pipe_box *damaged ) { struct pipe_context *pipe = This->pipe; struct pipe_resource *res = This->resource; + struct pipe_box box; uint8_t *ptr; - unsigned i; - DBG("This=%p dirty=%i data=%p res=%p\n", This, NineVolume9_IsDirty(This), + DBG("This=%p damaged=%p data=%p res=%p\n", This, damaged, This->data, res); assert(This->desc.Pool == D3DPOOL_MANAGED); - - if (!NineVolume9_IsDirty(This)) - return D3D_OK; assert(res); - for (i = 0; i < Elements(This->dirty_box); ++i) { - const struct pipe_box *box = &This->dirty_box[i]; - if (box->width == 0) - break; - ptr = NineVolume9_GetSystemMemPointer(This, box->x, box->y, box->z); - - pipe->transfer_inline_write(pipe, res, This->level, - 0, - box, ptr, This->stride, This->layer_stride); + if (damaged) { + box = *damaged; + } else { + box.x = 0; + box.y = 0; + box.z = 0; + box.width = This->desc.Width; + box.height = This->desc.Height; + box.depth = This->desc.Depth; } - NineVolume9_ClearDirtyRegion(This); + + ptr = NineVolume9_GetSystemMemPointer(This, box.x, box.y, box.z); + + pipe->transfer_inline_write(pipe, res, This->level, 0, &box, + ptr, This->stride, This->layer_stride); return D3D_OK; } diff --git a/src/gallium/state_trackers/nine/volume9.h b/src/gallium/state_trackers/nine/volume9.h index fae24310a50..26ca8a32605 100644 --- a/src/gallium/state_trackers/nine/volume9.h +++ b/src/gallium/state_trackers/nine/volume9.h @@ -50,8 +50,6 @@ struct NineVolume9 struct pipe_transfer *transfer; unsigned lock_count; - struct pipe_box dirty_box[2]; - struct pipe_context *pipe; /* for [GS]etPrivateData/FreePrivateData */ @@ -85,20 +83,15 @@ void NineVolume9_AddDirtyRegion( struct NineVolume9 *This, const struct pipe_box *box ); -static inline void -NineVolume9_ClearDirtyRegion( struct NineVolume9 *This ) -{ - memset(&This->dirty_box, 0, sizeof(This->dirty_box)); -} - HRESULT -NineVolume9_CopyVolume( struct NineVolume9 *This, - struct NineVolume9 *From, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_box *pSrcBox ); +NineVolume9_CopyMemToDefault( struct NineVolume9 *This, + struct NineVolume9 *From, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_box *pSrcBox ); HRESULT -NineVolume9_UploadSelf( struct NineVolume9 *This ); +NineVolume9_UploadSelf( struct NineVolume9 *This, + const struct pipe_box *damaged ); /*** Direct3D public ***/ diff --git a/src/gallium/state_trackers/nine/volumetexture9.c b/src/gallium/state_trackers/nine/volumetexture9.c index 1193e12f34c..e5b2b53148d 100644 --- a/src/gallium/state_trackers/nine/volumetexture9.c +++ b/src/gallium/state_trackers/nine/volumetexture9.c @@ -64,6 +64,13 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This, if (Format == D3DFMT_ATI1 || Format == D3DFMT_ATI2) return D3DERR_INVALIDCALL; + if (compressed_format(Format)) { + const unsigned w = util_format_get_blockwidth(pf); + const unsigned h = util_format_get_blockheight(pf); + /* Compressed formats are not compressed on depth component */ + user_assert(!(Width % w) && !(Height % h), D3DERR_INVALIDCALL); + } + info->screen = pParams->device->screen; info->target = PIPE_TEXTURE_3D; info->format = pf; @@ -116,6 +123,9 @@ NineVolumeTexture9_ctor( struct NineVolumeTexture9 *This, return hr; } + /* Textures start initially dirty */ + NineVolumeTexture9_AddDirtyBox(This, NULL); + return D3D_OK; } @@ -193,12 +203,14 @@ NineVolumeTexture9_AddDirtyBox( struct NineVolumeTexture9 *This, { DBG("This=%p pDirtybox=%p\n", This, pDirtyBox); - if (This->base.base.pool != D3DPOOL_MANAGED) { + if (This->base.base.pool == D3DPOOL_DEFAULT) { return D3D_OK; } - This->base.managed.dirty = TRUE; - BASETEX_REGISTER_UPDATE(&This->base); + if (This->base.base.pool == D3DPOOL_MANAGED) { + This->base.managed.dirty = TRUE; + BASETEX_REGISTER_UPDATE(&This->base); + } if (!pDirtyBox) { This->dirty_box.x = 0; |