diff options
author | Tim Rowley <[email protected]> | 2016-02-16 17:27:28 -0600 |
---|---|---|
committer | Tim Rowley <[email protected]> | 2016-03-02 18:38:41 -0600 |
commit | 2b2d3680bf164ec4f8b50436b96c3fc195318ea5 (patch) | |
tree | 94a91efdf29f8baf830a7124d49c1d6f9d2647ff /src | |
parent | 2eec41f6f1b85b43e38721661f4b21bd982c6a46 (diff) |
gallium/swr: add OpenSWR driver
OpenSWR is a new software rasterizer for x86 processors designed
for high performance and high scalablility on visualization workloads.
Acked-by: Roland Scheidegger <[email protected]>
Acked-by: Jose Fonseca <[email protected]>
Diffstat (limited to 'src')
23 files changed, 5718 insertions, 0 deletions
diff --git a/src/gallium/drivers/swr/swr_clear.cpp b/src/gallium/drivers/swr/swr_clear.cpp new file mode 100644 index 00000000000..9027f84f6ea --- /dev/null +++ b/src/gallium/drivers/swr/swr_clear.cpp @@ -0,0 +1,142 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "swr_context.h" +#include "swr_query.h" + +static void +swr_clear(struct pipe_context *pipe, + unsigned buffers, + const union pipe_color_union *color, + double depth, + unsigned stencil) +{ + struct swr_context *ctx = swr_context(pipe); + struct pipe_framebuffer_state *fb = &ctx->framebuffer; + + UINT clearMask = 0; + + if (!swr_check_render_cond(pipe)) + return; + + if (ctx->dirty) + swr_update_derived(ctx); + +/* Update clearMask/targetMask */ +#if 0 /* XXX SWR currently only clears SWR_ATTACHMENT_COLOR0, don't bother \ + checking others yet. */ + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + UINT i; + for (i = 0; i < fb->nr_cbufs; ++i) + if (fb->cbufs[i]) + clearMask |= (SWR_CLEAR_COLOR0 << i); + } +#else + if (buffers & PIPE_CLEAR_COLOR && fb->cbufs[0]) + clearMask |= SWR_CLEAR_COLOR; +#endif + + if (buffers & PIPE_CLEAR_DEPTH && fb->zsbuf) + clearMask |= SWR_CLEAR_DEPTH; + + if (buffers & PIPE_CLEAR_STENCIL && fb->zsbuf) + clearMask |= SWR_CLEAR_STENCIL; + +#if 0 // XXX HACK, override clear color alpha. On ubuntu, clears are + // transparent. + ((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */ +#endif + + /* Reset viewport to full framebuffer width/height before clear, then + * restore it */ + /* Scissor affects clear, viewport should not */ + ctx->dirty |= SWR_NEW_VIEWPORT; + SWR_VIEWPORT vp = {0}; + vp.width = ctx->framebuffer.width; + vp.height = ctx->framebuffer.height; + SwrSetViewports(ctx->swrContext, 1, &vp, NULL); + + swr_update_draw_context(ctx); + SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil); +} + + +#if 0 // XXX, these don't get called. how to get these called? Do we need + // them? Docs? +static void +swr_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct swr_context *ctx = swr_context(pipe); + fprintf(stderr, "SWR swr_clear_render_target!\n"); + + ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR; +} + +static void +swr_clear_depth_stencil(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct swr_context *ctx = swr_context(pipe); + fprintf(stderr, "SWR swr_clear_depth_stencil!\n"); + + ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR; +} + +static void +swr_clear_buffer(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + fprintf(stderr, "SWR swr_clear_buffer!\n"); + struct swr_context *ctx = swr_context(pipe); + struct swr_resource *buf = swr_resource(res); + union pipe_color_union color; + enum pipe_format dst_fmt; + unsigned width, height, elements; + + assert(res->target == PIPE_BUFFER); + assert(buf); + assert(size % data_size == 0); + + SWR_SURFACE_STATE &swr_buffer = buf->swr; + + ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR; +} +#endif + + +void +swr_clear_init(struct pipe_context *pipe) +{ + pipe->clear = swr_clear; +#if 0 // XXX, these don't get called. how to get these called? Do we need + // them? Docs? + pipe->clear_render_target = swr_clear_render_target; + pipe->clear_depth_stencil = swr_clear_depth_stencil; + pipe->clear_buffer = swr_clear_buffer; +#endif +} diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp new file mode 100644 index 00000000000..0e7ebb74d92 --- /dev/null +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -0,0 +1,407 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +extern "C" { +#include "util/u_transfer.h" +#include "util/u_surface.h" +} + +#include "swr_context.h" +#include "swr_memory.h" +#include "swr_screen.h" +#include "swr_resource.h" +#include "swr_scratch.h" +#include "swr_query.h" + +#include "api.h" +#include "backend.h" + +static struct pipe_surface * +swr_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (ps) { + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = surf_tmpl->format; + if (pt->target != PIPE_BUFFER) { + assert(surf_tmpl->u.tex.level <= pt->last_level); + ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); + ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); + ps->u.tex.level = surf_tmpl->u.tex.level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + if (ps->u.tex.first_layer != ps->u.tex.last_layer) { + debug_printf("creating surface with multiple layers, rendering " + "to first layer only\n"); + } + } else { + /* setting width as number of elements should get us correct + * renderbuffer width */ + ps->width = surf_tmpl->u.buf.last_element + - surf_tmpl->u.buf.first_element + 1; + ps->height = pt->height0; + ps->u.buf.first_element = surf_tmpl->u.buf.first_element; + ps->u.buf.last_element = surf_tmpl->u.buf.last_element; + assert(ps->u.buf.first_element <= ps->u.buf.last_element); + assert(ps->u.buf.last_element < ps->width); + } + } + return ps; +} + +static void +swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf) +{ + assert(surf->texture); + struct pipe_resource *resource = surf->texture; + + /* If the surface being destroyed is a current render target, + * call StoreTiles to resolve the hotTile state then set attachment + * to NULL. + */ + if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL + | PIPE_BIND_DISPLAY_TARGET)) { + struct swr_context *ctx = swr_context(pipe); + struct swr_resource *spr = swr_resource(resource); + swr_draw_context *pDC = &ctx->swrDC; + SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; + for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) + if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) { + swr_store_render_target(ctx, i, SWR_TILE_RESOLVED); + + /* + * Mesa thinks depth/stencil are fused, so we'll never get an + * explicit resource for stencil. So, if checking depth, then + * also check for stencil. + */ + if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) { + swr_store_render_target( + ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_RESOLVED); + } + + SwrWaitForIdle(ctx->swrContext); + break; + } + } + + pipe_resource_reference(&surf->texture, NULL); + FREE(surf); +} + + +static void * +swr_transfer_map(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct swr_resource *spr = swr_resource(resource); + struct pipe_transfer *pt; + enum pipe_format format = resource->format; + + assert(resource); + assert(level <= resource->last_level); + + /* + * If mapping any attached rendertarget, store tiles and wait for idle + * before giving CPU access to the surface. + * (set postStoreTileState to SWR_TILE_INVALID so tiles are reloaded) + */ + if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL + | PIPE_BIND_DISPLAY_TARGET)) { + struct swr_context *ctx = swr_context(pipe); + swr_draw_context *pDC = &ctx->swrDC; + SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; + for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) + if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) { + swr_store_render_target(ctx, i, SWR_TILE_INVALID); + /* + * Mesa thinks depth/stencil are fused, so we'll never get an + * explicit map for stencil. So, if mapping depth, then also + * store tile for stencil. + */ + if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) + swr_store_render_target( + ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_INVALID); + SwrWaitForIdle(ctx->swrContext); + break; + } + } + + pt = CALLOC_STRUCT(pipe_transfer); + if (!pt) + return NULL; + pipe_resource_reference(&pt->resource, resource); + pt->level = level; + pt->box = *box; + pt->stride = spr->row_stride[level]; + pt->layer_stride = spr->img_stride[level]; + + /* if we're mapping the depth/stencil, copy in stencil */ + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT + && spr->has_stencil) { + for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { + spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i]; + } + } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT + && spr->has_stencil) { + for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { + spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i]; + } + } + + unsigned offset = box->z * pt->layer_stride + box->y * pt->stride + + box->x * util_format_get_blocksize(format); + + *transfer = pt; + + return spr->swr.pBaseAddress + offset + spr->mip_offsets[level]; +} + +static void +swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) +{ + assert(transfer->resource); + + /* + * XXX TODO: use fences and come up with a real resource manager. + * + * If this resource has been mapped/unmapped, it's probably in use. Tag it + *with this context so + * we'll know to check dependencies when it's deleted. + */ + struct swr_resource *res = swr_resource(transfer->resource); + res->bound_to_context = (void *)pipe; + + /* if we're mapping the depth/stencil, copy out stencil */ + if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT + && res->has_stencil) { + for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { + res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3]; + } + } else if (res->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT + && res->has_stencil) { + for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { + res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[8 * i + 4]; + } + } + + pipe_resource_reference(&transfer->resource, NULL); + FREE(transfer); +} + + +static void +swr_resource_copy(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, + unsigned dsty, + unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) + || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) { + util_resource_copy_region( + pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); + return; + } + + debug_printf("unhandled swr_resource_copy\n"); +} + + +static void +swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) +{ + struct swr_context *ctx = swr_context(pipe); + struct pipe_blit_info info = *blit_info; + + if (blit_info->render_condition_enable && !swr_check_render_cond(pipe)) + return; + + if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1 + && !util_format_is_depth_or_stencil(info.src.resource->format) + && !util_format_is_pure_integer(info.src.resource->format)) { + debug_printf("swr: color resolve unimplemented\n"); + return; + } + + if (util_try_blit_via_copy_region(pipe, &info)) { + return; /* done */ + } + + if (info.mask & PIPE_MASK_S) { + debug_printf("swr: cannot blit stencil, skipping\n"); + info.mask &= ~PIPE_MASK_S; + } + + if (!util_blitter_is_blit_supported(ctx->blitter, &info)) { + debug_printf("swr: blit unsupported %s -> %s\n", + util_format_short_name(info.src.resource->format), + util_format_short_name(info.dst.resource->format)); + return; + } + + /* XXX turn off occlusion and streamout queries */ + + util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer); + util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems); + util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs); + /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/ + util_blitter_save_so_targets( + ctx->blitter, + ctx->num_so_targets, + (struct pipe_stream_output_target **)ctx->so_targets); + util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer); + util_blitter_save_viewport(ctx->blitter, &ctx->viewport); + util_blitter_save_scissor(ctx->blitter, &ctx->scissor); + util_blitter_save_fragment_shader(ctx->blitter, ctx->fs); + util_blitter_save_blend(ctx->blitter, (void *)ctx->blend); + util_blitter_save_depth_stencil_alpha(ctx->blitter, + (void *)ctx->depth_stencil); + util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); + util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask); + util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer); + util_blitter_save_fragment_sampler_states( + ctx->blitter, + ctx->num_samplers[PIPE_SHADER_FRAGMENT], + (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]); + util_blitter_save_fragment_sampler_views( + ctx->blitter, + ctx->num_sampler_views[PIPE_SHADER_FRAGMENT], + ctx->sampler_views[PIPE_SHADER_FRAGMENT]); + util_blitter_save_render_condition(ctx->blitter, + ctx->render_cond_query, + ctx->render_cond_cond, + ctx->render_cond_mode); + + util_blitter_blit(ctx->blitter, &info); +} + + +static void +swr_destroy(struct pipe_context *pipe) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->blitter) + util_blitter_destroy(ctx->blitter); + + if (ctx->swrContext) + SwrDestroyContext(ctx->swrContext); + + delete ctx->blendJIT; + + swr_destroy_scratch_buffers(ctx); + + FREE(ctx); +} + + +static void +swr_render_condition(struct pipe_context *pipe, + struct pipe_query *query, + boolean condition, + uint mode) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->render_cond_query = query; + ctx->render_cond_mode = mode; + ctx->render_cond_cond = condition; +} + + +struct pipe_context * +swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags) +{ + struct swr_context *ctx = CALLOC_STRUCT(swr_context); + ctx->blendJIT = + new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>; + + SWR_CREATECONTEXT_INFO createInfo; + createInfo.driver = GL; + createInfo.privateStateSize = sizeof(swr_draw_context); + createInfo.maxSubContexts = 0; + createInfo.pfnLoadTile = swr_LoadHotTile; + createInfo.pfnStoreTile = swr_StoreHotTile; + createInfo.pfnClearTile = swr_StoreHotTileClear; + ctx->swrContext = SwrCreateContext(&createInfo); + + /* Init Load/Store/ClearTiles Tables */ + swr_InitMemoryModule(); + + InitBackendFuncTables(); + + if (ctx->swrContext == NULL) + goto fail; + + ctx->pipe.screen = screen; + ctx->pipe.destroy = swr_destroy; + ctx->pipe.priv = priv; + ctx->pipe.create_surface = swr_create_surface; + ctx->pipe.surface_destroy = swr_surface_destroy; + ctx->pipe.transfer_map = swr_transfer_map; + ctx->pipe.transfer_unmap = swr_transfer_unmap; + + ctx->pipe.transfer_flush_region = u_default_transfer_flush_region; + ctx->pipe.transfer_inline_write = u_default_transfer_inline_write; + + ctx->pipe.resource_copy_region = swr_resource_copy; + ctx->pipe.render_condition = swr_render_condition; + + swr_state_init(&ctx->pipe); + swr_clear_init(&ctx->pipe); + swr_draw_init(&ctx->pipe); + swr_query_init(&ctx->pipe); + + ctx->pipe.blit = swr_blit; + ctx->blitter = util_blitter_create(&ctx->pipe); + if (!ctx->blitter) { + goto fail; + } + + swr_init_scratch_buffers(ctx); + + return &ctx->pipe; + +fail: + /* Should really validate the init steps and fail gracefully */ + swr_destroy(&ctx->pipe); + return NULL; +} diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h new file mode 100644 index 00000000000..73a8e8ddda1 --- /dev/null +++ b/src/gallium/drivers/swr/swr_context.h @@ -0,0 +1,182 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_CONTEXT_H +#define SWR_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_blitter.h" +#include "jit_api.h" +#include "swr_state.h" +#include <unordered_map> + +#define SWR_NEW_BLEND (1 << 0) +#define SWR_NEW_RASTERIZER (1 << 1) +#define SWR_NEW_DEPTH_STENCIL_ALPHA (1 << 2) +#define SWR_NEW_SAMPLER (1 << 3) +#define SWR_NEW_SAMPLER_VIEW (1 << 4) +#define SWR_NEW_VS (1 << 5) +#define SWR_NEW_FS (1 << 6) +#define SWR_NEW_VSCONSTANTS (1 << 7) +#define SWR_NEW_FSCONSTANTS (1 << 8) +#define SWR_NEW_VERTEX (1 << 9) +#define SWR_NEW_STIPPLE (1 << 10) +#define SWR_NEW_SCISSOR (1 << 11) +#define SWR_NEW_VIEWPORT (1 << 12) +#define SWR_NEW_FRAMEBUFFER (1 << 13) +#define SWR_NEW_CLIP (1 << 14) +#define SWR_NEW_SO (1 << 15) +#define SWR_NEW_ALL 0x0000ffff + +namespace std +{ +template <> struct hash<BLEND_COMPILE_STATE> { + std::size_t operator()(const BLEND_COMPILE_STATE &k) const + { + return util_hash_crc32(&k, sizeof(k)); + } +}; +}; + +struct swr_jit_texture { + uint32_t width; // same as number of elements + uint32_t height; + uint32_t depth; // doubles as array size + uint32_t first_level; + uint32_t last_level; + const void *base_ptr; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; +}; + +struct swr_jit_sampler { + float min_lod; + float max_lod; + float lod_bias; + float border_color[4]; +}; + +struct swr_draw_context { + const float *constantVS[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS]; + const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS]; + + swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS]; + swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS]; + + SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS]; +}; + +struct swr_context { + struct pipe_context pipe; /**< base class */ + + HANDLE swrContext; + + /** Constant state objects */ + struct swr_blend_state *blend; + struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + struct pipe_depth_stencil_alpha_state *depth_stencil; + struct pipe_rasterizer_state *rasterizer; + + struct swr_vertex_shader *vs; + struct swr_fragment_shader *fs; + struct swr_vertex_element_state *velems; + + /** Other rendering state */ + struct pipe_blend_color blend_color; + struct pipe_stencil_ref stencil_ref; + struct pipe_clip_state clip; + struct pipe_constant_buffer + constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_sampler_view * + sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; + + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; + + struct blitter_context *blitter; + + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + boolean render_cond_cond; + unsigned active_queries; + + unsigned num_vertex_buffers; + unsigned num_samplers[PIPE_SHADER_TYPES]; + unsigned num_sampler_views[PIPE_SHADER_TYPES]; + + unsigned sample_mask; + + // streamout + pipe_stream_output_target *so_targets[MAX_SO_STREAMS]; + uint32_t num_so_targets; + + /* Temp storage for user_buffer constants */ + struct swr_scratch_buffers *scratch; + + // blend jit functions + std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC> *blendJIT; + + /* Derived SWR API DrawState */ + struct swr_derived_state derived; + + /* SWR private state - draw context */ + struct swr_draw_context swrDC; + + unsigned dirty; /**< Mask of SWR_NEW_x flags */ +}; + +static INLINE struct swr_context * +swr_context(struct pipe_context *pipe) +{ + return (struct swr_context *)pipe; +} + +static INLINE void +swr_update_draw_context(struct swr_context *ctx) +{ + swr_draw_context *pDC = + (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); + memcpy(pDC, &ctx->swrDC, sizeof(swr_draw_context)); +} + +struct pipe_context *swr_create_context(struct pipe_screen *, void *priv, unsigned flags); + +void swr_state_init(struct pipe_context *pipe); + +void swr_clear_init(struct pipe_context *pipe); + +void swr_draw_init(struct pipe_context *pipe); + +void swr_finish(struct pipe_context *pipe); +#endif diff --git a/src/gallium/drivers/swr/swr_context_llvm.h b/src/gallium/drivers/swr/swr_context_llvm.h new file mode 100644 index 00000000000..58da813123f --- /dev/null +++ b/src/gallium/drivers/swr/swr_context_llvm.h @@ -0,0 +1,124 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#pragma once + +////////////////////////////////////////////////////////////////////////// +/// Generate LLVM type information for swr_jit_texture +INLINE static StructType * +Gen_swr_jit_texture(JitManager *pShG) +{ + LLVMContext &ctx = pShG->mContext; + std::vector<Type *> members; + + members.push_back(Type::getInt32Ty(ctx)); // width + members.push_back(Type::getInt32Ty(ctx)); // height + members.push_back(Type::getInt32Ty(ctx)); // depth + members.push_back(Type::getInt32Ty(ctx)); // first_level + members.push_back(Type::getInt32Ty(ctx)); // last_level + members.push_back(PointerType::get(Type::getInt8Ty(ctx), 0)); // base_ptr + members.push_back(ArrayType::get(Type::getInt32Ty(ctx), + PIPE_MAX_TEXTURE_LEVELS)); // row_stride + members.push_back(ArrayType::get(Type::getInt32Ty(ctx), + PIPE_MAX_TEXTURE_LEVELS)); // img_stride + members.push_back(ArrayType::get(Type::getInt32Ty(ctx), + PIPE_MAX_TEXTURE_LEVELS)); // mip_offsets + + return StructType::get(ctx, members, false); +} + +static const UINT swr_jit_texture_width = 0; +static const UINT swr_jit_texture_height = 1; +static const UINT swr_jit_texture_depth = 2; +static const UINT swr_jit_texture_first_level = 3; +static const UINT swr_jit_texture_last_level = 4; +static const UINT swr_jit_texture_base_ptr = 5; +static const UINT swr_jit_texture_row_stride = 6; +static const UINT swr_jit_texture_img_stride = 7; +static const UINT swr_jit_texture_mip_offsets = 8; + +////////////////////////////////////////////////////////////////////////// +/// Generate LLVM type information for swr_jit_sampler +INLINE static StructType * +Gen_swr_jit_sampler(JitManager *pShG) +{ + LLVMContext &ctx = pShG->mContext; + std::vector<Type *> members; + + members.push_back(Type::getFloatTy(ctx)); // min_lod + members.push_back(Type::getFloatTy(ctx)); // max_lod + members.push_back(Type::getFloatTy(ctx)); // lod_bias + members.push_back( + ArrayType::get(Type::getFloatTy(ctx), 4)); // border_color + + return StructType::get(ctx, members, false); +} + +static const UINT swr_jit_sampler_min_lod = 0; +static const UINT swr_jit_sampler_max_lod = 1; +static const UINT swr_jit_sampler_lod_bias = 2; +static const UINT swr_jit_sampler_border_color = 3; + +////////////////////////////////////////////////////////////////////////// +/// Generate LLVM type information for swr_draw_context +INLINE static StructType * +Gen_swr_draw_context(JitManager *pShG) +{ + LLVMContext &ctx = pShG->mContext; + std::vector<Type *> members; + + members.push_back( + ArrayType::get(PointerType::get(Type::getFloatTy(ctx), 0), + PIPE_MAX_CONSTANT_BUFFERS)); // constantVS + members.push_back(ArrayType::get( + Type::getInt32Ty(ctx), PIPE_MAX_CONSTANT_BUFFERS)); // num_constantsVS + members.push_back( + ArrayType::get(PointerType::get(Type::getFloatTy(ctx), 0), + PIPE_MAX_CONSTANT_BUFFERS)); // constantFS + members.push_back(ArrayType::get( + Type::getInt32Ty(ctx), PIPE_MAX_CONSTANT_BUFFERS)); // num_constantsFS + members.push_back( + ArrayType::get(Gen_swr_jit_texture(pShG), + PIPE_MAX_SHADER_SAMPLER_VIEWS)); // texturesVS + members.push_back(ArrayType::get(Gen_swr_jit_sampler(pShG), + PIPE_MAX_SAMPLERS)); // samplersVS + members.push_back( + ArrayType::get(Gen_swr_jit_texture(pShG), + PIPE_MAX_SHADER_SAMPLER_VIEWS)); // texturesFS + members.push_back(ArrayType::get(Gen_swr_jit_sampler(pShG), + PIPE_MAX_SAMPLERS)); // samplersFS + members.push_back(ArrayType::get(Gen_SWR_SURFACE_STATE(pShG), + SWR_NUM_ATTACHMENTS)); // renderTargets + + return StructType::get(ctx, members, false); +} + +static const UINT swr_draw_context_constantVS = 0; +static const UINT swr_draw_context_num_constantsVS = 1; +static const UINT swr_draw_context_constantFS = 2; +static const UINT swr_draw_context_num_constantsFS = 3; +static const UINT swr_draw_context_texturesVS = 4; +static const UINT swr_draw_context_samplersVS = 5; +static const UINT swr_draw_context_texturesFS = 6; +static const UINT swr_draw_context_samplersFS = 7; +static const UINT swr_draw_context_renderTargets = 8; diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp new file mode 100644 index 00000000000..a775bd2467f --- /dev/null +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -0,0 +1,271 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "swr_screen.h" +#include "swr_context.h" +#include "swr_resource.h" +#include "swr_fence.h" +#include "swr_query.h" +#include "jit_api.h" + +#include "util/u_draw.h" +#include "util/u_prim.h" + +/* + * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY + */ +static INLINE enum PRIMITIVE_TOPOLOGY +swr_convert_prim_topology(const unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: + return TOP_POINT_LIST; + case PIPE_PRIM_LINES: + return TOP_LINE_LIST; + case PIPE_PRIM_LINE_LOOP: + return TOP_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: + return TOP_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: + return TOP_TRIANGLE_LIST; + case PIPE_PRIM_TRIANGLE_STRIP: + return TOP_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: + return TOP_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: + return TOP_QUAD_LIST; + case PIPE_PRIM_QUAD_STRIP: + return TOP_QUAD_STRIP; + case PIPE_PRIM_POLYGON: + return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */ + case PIPE_PRIM_LINES_ADJACENCY: + return TOP_LINE_LIST_ADJ; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return TOP_LISTSTRIP_ADJ; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return TOP_TRI_LIST_ADJ; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return TOP_TRI_STRIP_ADJ; + default: + assert(0 && "Unknown topology"); + return TOP_UNKNOWN; + } +}; + + +/* + * Draw vertex arrays, with optional indexing, optional instancing. + */ +static void +swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct swr_context *ctx = swr_context(pipe); + + if (!swr_check_render_cond(pipe)) + return; + + if (info->indirect) { + util_draw_indirect(pipe, info); + return; + } + + /* Update derived state, pass draw info to update function */ + if (ctx->dirty) + swr_update_derived(ctx, info); + + swr_update_draw_context(ctx); + + if (ctx->vs->pipe.stream_output.num_outputs) { + if (!ctx->vs->soFunc[info->mode]) { + STREAMOUT_COMPILE_STATE state = {0}; + struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output; + + state.numVertsPerPrim = u_vertices_per_prim(info->mode); + + uint32_t offsets[MAX_SO_STREAMS] = {0}; + uint32_t num = 0; + + for (uint32_t i = 0; i < so->num_outputs; i++) { + assert(so->output[i].stream == 0); // @todo + uint32_t output_buffer = so->output[i].output_buffer; + if (so->output[i].dst_offset != offsets[output_buffer]) { + // hole - need to fill + state.stream.decl[num].bufferIndex = output_buffer; + state.stream.decl[num].hole = true; + state.stream.decl[num].componentMask = + (1 << (so->output[i].dst_offset - offsets[output_buffer])) + - 1; + num++; + offsets[output_buffer] = so->output[i].dst_offset; + } + + state.stream.decl[num].bufferIndex = output_buffer; + state.stream.decl[num].attribSlot = so->output[i].register_index - 1; + state.stream.decl[num].componentMask = + ((1 << so->output[i].num_components) - 1) + << so->output[i].start_component; + state.stream.decl[num].hole = false; + num++; + + offsets[output_buffer] += so->output[i].num_components; + } + + state.stream.numDecls = num; + + HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr; + ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state); + debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]); + assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL"); + } + + SwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0); + } + + struct swr_vertex_element_state *velems = ctx->velems; + if (!velems->fsFunc + || (velems->fsState.cutIndex != info->restart_index) + || (velems->fsState.bEnableCutIndex != info->primitive_restart)) { + + velems->fsState.cutIndex = info->restart_index; + velems->fsState.bEnableCutIndex = info->primitive_restart; + + /* Create Fetch Shader */ + HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr; + velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState); + + debug_printf("fetch shader %p\n", velems->fsFunc); + assert(velems->fsFunc && "Error: FetchShader = NULL"); + } + + SwrSetFetchFunc(ctx->swrContext, velems->fsFunc); + + if (info->indexed) + SwrDrawIndexedInstanced(ctx->swrContext, + swr_convert_prim_topology(info->mode), + info->count, + info->instance_count, + info->start, + info->index_bias, + info->start_instance); + else + SwrDrawInstanced(ctx->swrContext, + swr_convert_prim_topology(info->mode), + info->count, + info->instance_count, + info->start, + info->start_instance); +} + + +static void +swr_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_screen *screen = swr_screen(pipe->screen); + + /* If the current renderTarget is the display surface, store tiles back to + * the surface, in + * preparation for present (swr_flush_frontbuffer) + */ + struct pipe_surface *cb = ctx->framebuffer.cbufs[0]; + if (cb && swr_resource(cb->texture)->display_target) { + swr_store_render_target(ctx, SWR_ATTACHMENT_COLOR0, SWR_TILE_RESOLVED); + swr_resource(cb->texture)->bound_to_context = (void*)pipe; + } + + // SwrStoreTiles is asynchronous, always submit the "flush" fence. + // flush_frontbuffer needs it. + swr_fence_submit(ctx, screen->flush_fence); + + if (fence) + swr_fence_reference(pipe->screen, fence, screen->flush_fence); +} + +void +swr_finish(struct pipe_context *pipe) +{ + struct swr_screen *screen = swr_screen(pipe->screen); + struct pipe_fence_handle *fence = NULL; + + swr_flush(pipe, &fence, 0); + swr_fence_finish(&screen->base, fence, 0); + swr_fence_reference(&screen->base, &fence, NULL); +} + + +/* + * Store SWR HotTiles back to RenderTarget surface. + */ +void +swr_store_render_target(struct swr_context *ctx, + uint32_t attachment, + enum SWR_TILE_STATE post_tile_state) +{ + struct swr_draw_context *pDC = &ctx->swrDC; + struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment]; + + /* Only proceed if there's a valid surface to store to */ + if (renderTarget->pBaseAddress) { + /* Set viewport to full renderTarget width/height and disable scissor + * before StoreTiles */ + boolean change_viewport = + (ctx->derived.vp.x != 0.0f || ctx->derived.vp.y != 0.0f + || ctx->derived.vp.width != renderTarget->width + || ctx->derived.vp.height != renderTarget->height); + if (change_viewport) { + SWR_VIEWPORT vp = {0}; + vp.width = renderTarget->width; + vp.height = renderTarget->height; + SwrSetViewports(ctx->swrContext, 1, &vp, NULL); + } + + boolean scissor_enable = ctx->derived.rastState.scissorEnable; + if (scissor_enable) { + ctx->derived.rastState.scissorEnable = FALSE; + SwrSetRastState(ctx->swrContext, &ctx->derived.rastState); + } + + swr_update_draw_context(ctx); + SwrStoreTiles(ctx->swrContext, + (enum SWR_RENDERTARGET_ATTACHMENT)attachment, + post_tile_state); + + /* Restore viewport and scissor enable */ + if (change_viewport) + SwrSetViewports(ctx->swrContext, 1, &ctx->derived.vp, &ctx->derived.vpm); + if (scissor_enable) { + ctx->derived.rastState.scissorEnable = scissor_enable; + SwrSetRastState(ctx->swrContext, &ctx->derived.rastState); + } + } +} + + +void +swr_draw_init(struct pipe_context *pipe) +{ + pipe->draw_vbo = swr_draw_vbo; + pipe->flush = swr_flush; +} diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp new file mode 100644 index 00000000000..f97ea22151b --- /dev/null +++ b/src/gallium/drivers/swr/swr_fence.cpp @@ -0,0 +1,143 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "os/os_time.h" + +#include "swr_context.h" +#include "swr_screen.h" +#include "swr_fence.h" + +#if defined(PIPE_CC_MSVC) // portable thread yield + #define sched_yield SwitchToThread +#endif +/* + * Fence callback, called by back-end thread on completion of all rendering up + * to SwrSync call. + */ +static void +swr_sync_cb(UINT64 userData, UINT64 userData2, UINT64 userData3) +{ + struct swr_fence *fence = (struct swr_fence *)userData; + + fence->read = fence->write; +} + +/* + * Submit an existing fence. + */ +void +swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fh) +{ + struct swr_fence *fence = swr_fence(fh); + + fence->write++; + SwrSync(ctx->swrContext, swr_sync_cb, (UINT64)fence, 0, 0); +} + +/* + * Create a new fence object. + */ +struct pipe_fence_handle * +swr_fence_create() +{ + static int fence_id = 0; + struct swr_fence *fence = CALLOC_STRUCT(swr_fence); + if (!fence) + return NULL; + + memset(fence, 0, sizeof(*fence)); + pipe_reference_init(&fence->reference, 1); + fence->id = fence_id++; + + return (struct pipe_fence_handle *)fence; +} + +/** Destroy a fence. Called when refcount hits zero. */ +static void +swr_fence_destroy(struct swr_fence *fence) +{ + FREE(fence); +} + +/** + * Set ptr = fence, with reference counting + */ +void +swr_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *f) +{ + struct swr_fence *fence = swr_fence(f); + struct swr_fence *old; + + if (likely(ptr)) { + old = swr_fence(*ptr); + *ptr = f; + } else { + old = NULL; + } + + if (pipe_reference(&old->reference, &fence->reference)) + swr_fence_destroy(old); +} + +/* + * Wait for the fence to finish. + */ +boolean +swr_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + uint64_t timeout) +{ + struct swr_fence *fence = swr_fence(fence_handle); + + while (!swr_is_fence_done(fence)) + sched_yield(); + + return TRUE; +} + + +uint64_t +swr_get_timestamp(struct pipe_screen *screen) +{ + return os_time_get_nano(); +} + + +void +swr_fence_init(struct pipe_screen *p_screen) +{ + p_screen->fence_reference = swr_fence_reference; + p_screen->fence_finish = swr_fence_finish; + + p_screen->get_timestamp = swr_get_timestamp; + + /* + * Create persistant "flush" fence, submitted when swr_flush is called. + */ + struct swr_screen *screen = swr_screen(p_screen); + screen->flush_fence = swr_fence_create(); +} diff --git a/src/gallium/drivers/swr/swr_fence.h b/src/gallium/drivers/swr/swr_fence.h new file mode 100644 index 00000000000..257b2408820 --- /dev/null +++ b/src/gallium/drivers/swr/swr_fence.h @@ -0,0 +1,70 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_FENCE_H +#define SWR_FENCE_H + +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +struct pipe_screen; + +struct swr_fence { + struct pipe_reference reference; + + uint64_t read; + uint64_t write; + + unsigned id; /* Just for reference */ +}; + + +static inline struct swr_fence * +swr_fence(struct pipe_fence_handle *fence) +{ + return (struct swr_fence *)fence; +} + +static INLINE boolean +swr_is_fence_done(struct swr_fence *fence) +{ + return (fence->read == fence->write); +} + + +void swr_fence_init(struct pipe_screen *screen); + +struct pipe_fence_handle *swr_fence_create(); + +void swr_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *f); + +boolean swr_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + uint64_t timeout); + +void +swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fence); + +uint64_t swr_get_timestamp(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/drivers/swr/swr_loader.cpp b/src/gallium/drivers/swr/swr_loader.cpp new file mode 100644 index 00000000000..2113c371c5f --- /dev/null +++ b/src/gallium/drivers/swr/swr_loader.cpp @@ -0,0 +1,67 @@ +/**************************************************************************** + * Copyright (C) 2016 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "util/u_cpu_detect.h" +#include "util/u_dl.h" +#include "swr_public.h" + +#include <stdio.h> +#include <dlfcn.h> + +typedef pipe_screen *(*screen_create_proc)(struct sw_winsys *winsys); + +struct pipe_screen * +swr_create_screen(struct sw_winsys *winsys) +{ + fprintf(stderr, "SWR detected "); + + util_dl_library *pLibrary = nullptr; + + util_cpu_detect(); + if (util_cpu_caps.has_avx2) { + fprintf(stderr, "AVX2\n"); + pLibrary = util_dl_open("libswrAVX2.so"); + } else if (util_cpu_caps.has_avx) { + fprintf(stderr, "AVX\n"); + pLibrary = util_dl_open("libswrAVX.so"); + } else { + fprintf(stderr, "no AVX/AVX2 support. Aborting!\n"); + exit(-1); + } + + if (!pLibrary) { + fprintf(stderr, "SWR library load failure: %s\n", util_dl_error()); + exit(-1); + } + + util_dl_proc pScreenProc = util_dl_get_proc_address(pLibrary, "swr_create_screen"); + + if (!pScreenProc) { + fprintf(stderr, "SWR library search failure: %s\n", util_dl_error()); + exit(-1); + } + + screen_create_proc pScreenCreate = (screen_create_proc)pScreenProc; + + return pScreenCreate(winsys); +} diff --git a/src/gallium/drivers/swr/swr_memory.h b/src/gallium/drivers/swr/swr_memory.h new file mode 100644 index 00000000000..d1167810b4a --- /dev/null +++ b/src/gallium/drivers/swr/swr_memory.h @@ -0,0 +1,99 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#pragma once + +void LoadHotTile( + SWR_SURFACE_STATE *pSrcSurface, + SWR_FORMAT dstFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, UINT y, uint32_t renderTargetArrayIndex, + BYTE *pDstHotTile); + +void StoreHotTile( + SWR_SURFACE_STATE *pDstSurface, + SWR_FORMAT srcFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, UINT y, uint32_t renderTargetArrayIndex, + BYTE *pSrcHotTile); + +void StoreHotTileClear( + SWR_SURFACE_STATE *pDstSurface, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, + UINT y, + const float* pClearColor); + +INLINE void +swr_LoadHotTile(HANDLE hPrivateContext, + SWR_FORMAT dstFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, UINT y, + uint32_t renderTargetArrayIndex, BYTE* pDstHotTile) +{ + // Grab source surface state from private context + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + SWR_SURFACE_STATE *pSrcSurface = &pDC->renderTargets[renderTargetIndex]; + + LoadHotTile(pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile); +} + +INLINE void +swr_StoreHotTile(HANDLE hPrivateContext, + SWR_FORMAT srcFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, UINT y, + uint32_t renderTargetArrayIndex, BYTE* pSrcHotTile) +{ + // Grab destination surface state from private context + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex]; + + StoreHotTile(pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile); +} + +INLINE void +swr_StoreHotTileClear(HANDLE hPrivateContext, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, + UINT y, + const float* pClearColor) +{ + // Grab destination surface state from private context + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex]; + + StoreHotTileClear(pDstSurface, renderTargetIndex, x, y, pClearColor); +} + +void InitSimLoadTilesTable(); +void InitSimStoreTilesTable(); +void InitSimClearTilesTable(); + +/* Init Load/Store/ClearTiles Tables */ +INLINE void swr_InitMemoryModule() +{ + InitSimLoadTilesTable(); + InitSimStoreTilesTable(); + InitSimClearTilesTable(); +} diff --git a/src/gallium/drivers/swr/swr_public.h b/src/gallium/drivers/swr/swr_public.h new file mode 100644 index 00000000000..0814c3b85d6 --- /dev/null +++ b/src/gallium/drivers/swr/swr_public.h @@ -0,0 +1,46 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_PUBLIC_H +#define SWR_PUBLIC_H + +struct pipe_screen; +struct sw_winsys; +struct sw_displaytarget; + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_screen *swr_create_screen(struct sw_winsys *winsys); + +struct sw_winsys *swr_get_winsys(struct pipe_screen *pipe); + +struct sw_displaytarget *swr_get_displaytarget(struct pipe_resource *resource); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp new file mode 100644 index 00000000000..2510b3ae39c --- /dev/null +++ b/src/gallium/drivers/swr/swr_query.cpp @@ -0,0 +1,334 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "os/os_time.h" +#include "swr_context.h" +#include "swr_fence.h" +#include "swr_query.h" +#include "swr_screen.h" +#include "swr_state.h" + + +static struct swr_query * +swr_query(struct pipe_query *p) +{ + return (struct swr_query *)p; +} + +static struct pipe_query * +swr_create_query(struct pipe_context *pipe, unsigned type, unsigned index) +{ + struct swr_query *pq; + + assert(type < PIPE_QUERY_TYPES); + assert(index < MAX_SO_STREAMS); + + pq = CALLOC_STRUCT(swr_query); + + if (pq) { + pq->type = type; + pq->index = index; + } + + return (struct pipe_query *)pq; +} + + +static void +swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct swr_query *pq = swr_query(q); + + if (pq->fence) { + if (!swr_is_fence_done(swr_fence(pq->fence))) { + swr_fence_submit(swr_context(pipe), pq->fence); + swr_fence_finish(pipe->screen, pq->fence, 0); + } + swr_fence_reference(pipe->screen, &pq->fence, NULL); + } + + FREE(pq); +} + + +// XXX Create a fence callback, rather than stalling SwrWaitForIdle +static void +swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq) +{ + struct swr_context *ctx = swr_context(pipe); + + assert(pq->result); + union pipe_query_result *result = pq->result; + boolean enable_stats = pq->enable_stats; + SWR_STATS swr_stats = {0}; + + if (pq->fence) { + if (!swr_is_fence_done(swr_fence(pq->fence))) { + swr_fence_submit(ctx, pq->fence); + swr_fence_finish(pipe->screen, pq->fence, 0); + } + swr_fence_reference(pipe->screen, &pq->fence, NULL); + } + + /* + * These queries don't need SWR Stats enabled in the core + * Set and return. + */ + switch (pq->type) { + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + result->u64 = swr_get_timestamp(pipe->screen); + return; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* nothing to do here */ + return; + break; + case PIPE_QUERY_GPU_FINISHED: + result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId + vs LastRetiredId? */ + return; + break; + default: + /* Any query that needs SwrCore stats */ + break; + } + + /* + * All other results are collected from SwrCore counters + */ + + /* XXX, Should turn this into a fence callback and skip the stall */ + SwrGetStats(ctx->swrContext, &swr_stats); + /* SwrGetStats returns immediately, wait for collection */ + SwrWaitForIdle(ctx->swrContext); + + switch (pq->type) { + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_COUNTER: + result->u64 = swr_stats.DepthPassCount; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + result->u64 = swr_stats.IaPrimitives; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + result->u64 = swr_stats.SoNumPrimsWritten[pq->index]; + break; + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { + struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; + so_stats->num_primitives_written = + swr_stats.SoNumPrimsWritten[pq->index]; + so_stats->primitives_storage_needed = + swr_stats.SoPrimStorageNeeded[pq->index]; + } break; + case PIPE_QUERY_PIPELINE_STATISTICS: { + struct pipe_query_data_pipeline_statistics *p_stats = + &result->pipeline_statistics; + p_stats->ia_vertices = swr_stats.IaVertices; + p_stats->ia_primitives = swr_stats.IaPrimitives; + p_stats->vs_invocations = swr_stats.VsInvocations; + p_stats->gs_invocations = swr_stats.GsInvocations; + p_stats->gs_primitives = swr_stats.GsPrimitives; + p_stats->c_invocations = swr_stats.CPrimitives; + p_stats->c_primitives = swr_stats.CPrimitives; + p_stats->ps_invocations = swr_stats.PsInvocations; + p_stats->hs_invocations = swr_stats.HsInvocations; + p_stats->ds_invocations = swr_stats.DsInvocations; + p_stats->cs_invocations = swr_stats.CsInvocations; + } break; + default: + assert(0 && "Unsupported query"); + break; + } + + /* Only change stat collection if there are no active queries */ + if (ctx->active_queries == 0) + SwrEnableStats(ctx->swrContext, enable_stats); +} + + +static boolean +swr_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + union pipe_query_result *result) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_query *pq = swr_query(q); + + if (pq->fence) { + if (!swr_is_fence_done(swr_fence(pq->fence))) { + swr_fence_submit(ctx, pq->fence); + if (!wait) + return FALSE; + swr_fence_finish(pipe->screen, pq->fence, 0); + } + swr_fence_reference(pipe->screen, &pq->fence, NULL); + } + + /* XXX: Need to handle counter rollover */ + + switch (pq->type) { + /* Booleans */ + case PIPE_QUERY_OCCLUSION_PREDICATE: + result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE; + break; + case PIPE_QUERY_GPU_FINISHED: + result->b = pq->end.b; + break; + /* Counters */ + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + result->u64 = pq->end.u64 - pq->start.u64; + break; + /* Structures */ + case PIPE_QUERY_SO_STATISTICS: { + struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; + struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; + struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; + so_stats->num_primitives_written = + end->num_primitives_written - start->num_primitives_written; + so_stats->primitives_storage_needed = + end->primitives_storage_needed - start->primitives_storage_needed; + } break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: { + /* os_get_time_nano returns nanoseconds */ + result->timestamp_disjoint.frequency = UINT64_C(1000000000); + result->timestamp_disjoint.disjoint = FALSE; + } break; + case PIPE_QUERY_PIPELINE_STATISTICS: { + struct pipe_query_data_pipeline_statistics *p_stats = + &result->pipeline_statistics; + struct pipe_query_data_pipeline_statistics *start = + &pq->start.pipeline_statistics; + struct pipe_query_data_pipeline_statistics *end = + &pq->end.pipeline_statistics; + p_stats->ia_vertices = end->ia_vertices - start->ia_vertices; + p_stats->ia_primitives = end->ia_primitives - start->ia_primitives; + p_stats->vs_invocations = end->vs_invocations - start->vs_invocations; + p_stats->gs_invocations = end->gs_invocations - start->gs_invocations; + p_stats->gs_primitives = end->gs_primitives - start->gs_primitives; + p_stats->c_invocations = end->c_invocations - start->c_invocations; + p_stats->c_primitives = end->c_primitives - start->c_primitives; + p_stats->ps_invocations = end->ps_invocations - start->ps_invocations; + p_stats->hs_invocations = end->hs_invocations - start->hs_invocations; + p_stats->ds_invocations = end->ds_invocations - start->ds_invocations; + p_stats->cs_invocations = end->cs_invocations - start->cs_invocations; + } break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { + struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; + struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; + uint64_t num_primitives_written = + end->num_primitives_written - start->num_primitives_written; + uint64_t primitives_storage_needed = + end->primitives_storage_needed - start->primitives_storage_needed; + result->b = num_primitives_written > primitives_storage_needed; + } break; + default: + assert(0 && "Unsupported query"); + break; + } + + return TRUE; +} + +static boolean +swr_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_query *pq = swr_query(q); + + /* Initialize Results */ + memset(&pq->start, 0, sizeof(pq->start)); + memset(&pq->end, 0, sizeof(pq->end)); + + /* Gather start stats and enable SwrCore counters */ + pq->result = &pq->start; + pq->enable_stats = TRUE; + swr_gather_stats(pipe, pq); + ctx->active_queries++; + + /* override start timestamp to 0 for TIMESTAMP query */ + if (pq->type == PIPE_QUERY_TIMESTAMP) + pq->start.u64 = 0; + + return true; +} + +static void +swr_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_query *pq = swr_query(q); + + assert(ctx->active_queries + && "swr_end_query, there are no active queries!"); + ctx->active_queries--; + + /* Gather end stats and disable SwrCore counters */ + pq->result = &pq->end; + pq->enable_stats = FALSE; + swr_gather_stats(pipe, pq); +} + + +boolean +swr_check_render_cond(struct pipe_context *pipe) +{ + struct swr_context *ctx = swr_context(pipe); + boolean b, wait; + uint64_t result; + + if (!ctx->render_cond_query) + return TRUE; /* no query predicate, draw normally */ + + wait = (ctx->render_cond_mode == PIPE_RENDER_COND_WAIT + || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result( + pipe, ctx->render_cond_query, wait, (union pipe_query_result *)&result); + if (b) + return (!result == ctx->render_cond_cond); + else + return TRUE; +} + +void +swr_query_init(struct pipe_context *pipe) +{ + struct swr_context *ctx = swr_context(pipe); + + pipe->create_query = swr_create_query; + pipe->destroy_query = swr_destroy_query; + pipe->begin_query = swr_begin_query; + pipe->end_query = swr_end_query; + pipe->get_query_result = swr_get_query_result; + + ctx->active_queries = 0; +} diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h new file mode 100644 index 00000000000..836d07b68ae --- /dev/null +++ b/src/gallium/drivers/swr/swr_query.h @@ -0,0 +1,46 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_QUERY_H +#define SWR_QUERY_H + + +#include <limits.h> + +struct swr_query { + unsigned type; /* PIPE_QUERY_* */ + unsigned index; + + union pipe_query_result *result; + union pipe_query_result start; + union pipe_query_result end; + + struct pipe_fence_handle *fence; + + boolean enable_stats; +}; + +extern void swr_query_init(struct pipe_context *pipe); + +extern boolean swr_check_render_cond(struct pipe_context *pipe); +#endif diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h new file mode 100644 index 00000000000..87a27acfbce --- /dev/null +++ b/src/gallium/drivers/swr/swr_resource.h @@ -0,0 +1,97 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_RESOURCE_H +#define SWR_RESOURCE_H + +#include "pipe/p_state.h" +#include "api.h" + +struct sw_displaytarget; + +struct swr_resource { + struct pipe_resource base; + + bool has_depth; + bool has_stencil; + + UINT alignedWidth; + UINT alignedHeight; + + SWR_SURFACE_STATE swr; + SWR_SURFACE_STATE secondary; // for faking depth/stencil merged formats + + struct sw_displaytarget *display_target; + + unsigned row_stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned img_stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; + + /* Opaque pointer to swr_context to mark resource in use */ + void *bound_to_context; +}; + + +static INLINE struct swr_resource * +swr_resource(struct pipe_resource *resource) +{ + return (struct swr_resource *)resource; +} + +static INLINE boolean +swr_resource_is_texture(const struct pipe_resource *resource) +{ + switch (resource->target) { + case PIPE_BUFFER: + return FALSE; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +static INLINE void * +swr_resource_data(struct pipe_resource *resource) +{ + struct swr_resource *swr_r = swr_resource(resource); + + assert(!swr_resource_is_texture(resource)); + + return swr_r->swr.pBaseAddress; +} + + +void swr_store_render_target(struct swr_context *ctx, + uint32_t attachment, + enum SWR_TILE_STATE post_tile_state); +#endif diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp new file mode 100644 index 00000000000..e6c448cb9e7 --- /dev/null +++ b/src/gallium/drivers/swr/swr_scratch.cpp @@ -0,0 +1,116 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "util/u_memory.h" +#include "swr_context.h" +#include "swr_scratch.h" +#include "api.h" + + +void * +swr_copy_to_scratch_space(struct swr_context *ctx, + struct swr_scratch_space *space, + const void *user_buffer, + unsigned int size) +{ + void *ptr; + assert(space); + assert(user_buffer); + assert(size); + + if (size >= 2048) { /* XXX TODO create KNOB_ for this */ + /* Use per draw SwrAllocDrawContextMemory for larger copies */ + ptr = SwrAllocDrawContextMemory(ctx->swrContext, size, 4); + } else { + /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */ + unsigned int max_size_in_flight = size * KNOB_MAX_DRAWS_IN_FLIGHT; + + /* Need to grow space */ + if (max_size_in_flight > space->current_size) { + /* Must idle the pipeline, this is infrequent */ + SwrWaitForIdle(ctx->swrContext); + + space->current_size = max_size_in_flight; + + if (space->base) { + align_free(space->base); + space->base = NULL; + } + + if (!space->base) { + space->base = (BYTE *)align_malloc(space->current_size, 4); + space->head = (void *)space->base; + } + } + + /* Wrap */ + if (((BYTE *)space->head + size) + >= ((BYTE *)space->base + space->current_size)) { + /* + * TODO XXX: Should add a fence on wrap. Assumption is that + * current_space >> size, and there are at least MAX_DRAWS_IN_FLIGHT + * draws in scratch. So fence would always be met on wrap. A fence + * would ensure that first frame in buffer is done before wrapping. + * If fence ever needs to be waited on, can increase buffer size. + * So far in testing, this hasn't been necessary. + */ + space->head = space->base; + } + + ptr = space->head; + space->head = (BYTE *)space->head + size; + } + + /* Copy user_buffer to scratch */ + memcpy(ptr, user_buffer, size); + + return ptr; +} + + +void +swr_init_scratch_buffers(struct swr_context *ctx) +{ + struct swr_scratch_buffers *scratch; + + scratch = CALLOC_STRUCT(swr_scratch_buffers); + ctx->scratch = scratch; +} + +void +swr_destroy_scratch_buffers(struct swr_context *ctx) +{ + struct swr_scratch_buffers *scratch = ctx->scratch; + + if (scratch) { + if (scratch->vs_constants.base) + align_free(scratch->vs_constants.base); + if (scratch->fs_constants.base) + align_free(scratch->fs_constants.base); + if (scratch->vertex_buffer.base) + align_free(scratch->vertex_buffer.base); + if (scratch->index_buffer.base) + align_free(scratch->index_buffer.base); + FREE(scratch); + } +} diff --git a/src/gallium/drivers/swr/swr_scratch.h b/src/gallium/drivers/swr/swr_scratch.h new file mode 100644 index 00000000000..74218d63644 --- /dev/null +++ b/src/gallium/drivers/swr/swr_scratch.h @@ -0,0 +1,63 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_SCRATCH_H +#define SWR_SCRATCH_H + +struct swr_scratch_space { + void *head; + unsigned int current_size; + /* TODO XXX: Add a fence for wrap condition. */ + + void *base; +}; + +struct swr_scratch_buffers { + struct swr_scratch_space vs_constants; + struct swr_scratch_space fs_constants; + struct swr_scratch_space vertex_buffer; + struct swr_scratch_space index_buffer; +}; + + +/* + * swr_copy_to_scratch_space + * Copies size bytes of user_buffer into the scratch ring buffer. + * Used to store temporary data such as client arrays and constants. + * + * Inputs: + * space ptr to scratch pool (vs_constants, fs_constants) + * user_buffer, data to copy into scratch space + * size to be copied + * Returns: + * pointer to data copied to scratch space. + */ +void *swr_copy_to_scratch_space(struct swr_context *ctx, + struct swr_scratch_space *space, + const void *user_buffer, + unsigned int size); + +void swr_init_scratch_buffers(struct swr_context *ctx); +void swr_destroy_scratch_buffers(struct swr_context *ctx); + +#endif diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp new file mode 100644 index 00000000000..f0d48cd3f79 --- /dev/null +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -0,0 +1,746 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_cpu_detect.h" + +#include "state_tracker/sw_winsys.h" + +extern "C" { +#include "gallivm/lp_bld_limits.h" +} + +#include "swr_public.h" +#include "swr_screen.h" +#include "swr_context.h" +#include "swr_resource.h" +#include "swr_fence.h" +#include "gen_knobs.h" + +#include "jit_api.h" + +#include <stdio.h> + +/* MSVC case instensitive compare */ +#if defined(PIPE_CC_MSVC) + #define strcasecmp lstrcmpiA +#endif + +/* + * Max texture sizes + * XXX Check max texture size values against core and sampler. + */ +#define SWR_MAX_TEXTURE_SIZE (4 * 1048 * 1048 * 1024ULL) /* 4GB */ +#define SWR_MAX_TEXTURE_2D_LEVELS 14 /* 8K x 8K for now */ +#define SWR_MAX_TEXTURE_3D_LEVELS 12 /* 2K x 2K x 2K for now */ +#define SWR_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */ +#define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */ + +static const char * +swr_get_name(struct pipe_screen *screen) +{ + return "SWR"; +} + +static const char * +swr_get_vendor(struct pipe_screen *screen) +{ + return "Intel Corporation"; +} + +static boolean +swr_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bind) +{ + struct sw_winsys *winsys = swr_screen(screen)->winsys; + const struct util_format_description *format_desc; + + assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D + || target == PIPE_TEXTURE_1D_ARRAY + || target == PIPE_TEXTURE_2D + || target == PIPE_TEXTURE_2D_ARRAY + || target == PIPE_TEXTURE_RECT + || target == PIPE_TEXTURE_3D + || target == PIPE_TEXTURE_CUBE + || target == PIPE_TEXTURE_CUBE_ARRAY); + + format_desc = util_format_description(format); + if (!format_desc) + return FALSE; + + if (sample_count > 1) + return FALSE; + + if (bind + & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { + if (!winsys->is_displaytarget_format_supported(winsys, bind, format)) + return FALSE; + } + + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + if (mesa_to_swr_format(format) == (SWR_FORMAT)-1) + return FALSE; + + /* + * Although possible, it is unnatural to render into compressed or YUV + * surfaces. So disable these here to avoid going into weird paths + * inside the state trackers. + */ + if (format_desc->block.width != 1 || format_desc->block.height != 1) + return FALSE; + } + + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + if (mesa_to_swr_format(format) == (SWR_FORMAT)-1) + return FALSE; + } + + return TRUE; +} + +static int +swr_get_param(struct pipe_screen *screen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return PIPE_MAX_COLOR_BUFS; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return 1; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return SWR_MAX_TEXTURE_2D_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return SWR_MAX_TEXTURE_3D_LEVELS; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return SWR_MAX_TEXTURE_CUBE_LEVELS; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + return 0; // Don't support lower left frag coord. + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 1; + case PIPE_CAP_DEPTH_CLIP_DISABLE: + return 1; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return MAX_SO_STREAMS; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return MAX_ATTRIBUTES; + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 1024; + case PIPE_CAP_MAX_VERTEX_STREAMS: + return 1; + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 1; + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_START_INSTANCE: + return 1; + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return 1; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return SWR_MAX_TEXTURE_ARRAY_LAYERS; + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + case PIPE_CAP_CONDITIONAL_RENDER: + return 1; + case PIPE_CAP_TEXTURE_BARRIER: + return 0; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: /* draw module */ + case PIPE_CAP_VERTEX_COLOR_CLAMPED: /* draw module */ + return 1; + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return 1; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 330; + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + return 0; + case PIPE_CAP_COMPUTE: + return 0; + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 16; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return 0; + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + case PIPE_CAP_QUERY_TIMESTAMP: + return 1; + case PIPE_CAP_CUBE_MAP_ARRAY: + return 0; + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return 1; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 0; + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_NATIVE; + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + case PIPE_CAP_TEXTURE_GATHER_SM5: + return 0; + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + return 1; + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + return 0; + case PIPE_CAP_FAKE_SW_MSAA: + return 1; + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + return 0; + case PIPE_CAP_DRAW_INDIRECT: + return 1; + + case PIPE_CAP_VENDOR_ID: + return 0xFFFFFFFF; + case PIPE_CAP_DEVICE_ID: + return 0xFFFFFFFF; + case PIPE_CAP_ACCELERATED: + return 0; + case PIPE_CAP_VIDEO_MEMORY: { + /* XXX: Do we want to return the full amount of system memory ? */ + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + case PIPE_CAP_UMA: + return 1; + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + return 1; + case PIPE_CAP_CLIP_HALFZ: + return 1; + case PIPE_CAP_VERTEXID_NOBASE: + return 0; + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + return 1; + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + return 0; + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + return 0; // xxx + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + return 0; + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + return 0; + case PIPE_CAP_DEPTH_BOUNDS_TEST: + return 0; // xxx + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + return 1; + case PIPE_CAP_TGSI_TXQS: + case PIPE_CAP_FORCE_PERSAMPLE_INTERP: + case PIPE_CAP_SHAREABLE_SHADERS: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_CLEAR_TEXTURE: + case PIPE_CAP_DRAW_PARAMETERS: + case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_MULTI_DRAW_INDIRECT: + case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS: + case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL: + case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: + case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: + case PIPE_CAP_INVALIDATE_BUFFER: + case PIPE_CAP_GENERATE_MIPMAP: + case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: + case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_QUERY_MEMORY_INFO: + return 0; + } + + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAP %d query\n", param); + return 0; +} + +static int +swr_get_shader_param(struct pipe_screen *screen, + unsigned shader, + enum pipe_shader_cap param) +{ + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT) + return gallivm_get_shader_param(param); + + // Todo: geometry, tesselation, compute + return 0; +} + + +static float +swr_get_paramf(struct pipe_screen *screen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + return 255.0; /* arbitrary */ + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 0.0; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 0.0; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0; + } + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAPF %d query\n", param); + return 0.0; +} + +SWR_FORMAT +mesa_to_swr_format(enum pipe_format format) +{ + const struct util_format_description *format_desc = + util_format_description(format); + if (!format_desc) + return (SWR_FORMAT)-1; + + // more robust check would be comparing all attributes of the formats + // luckily format names are mostly standardized + for (int i = 0; i < NUM_SWR_FORMATS; i++) { + const SWR_FORMAT_INFO &swr_desc = GetFormatInfo((SWR_FORMAT)i); + + if (!strcasecmp(format_desc->short_name, swr_desc.name)) + return (SWR_FORMAT)i; + } + + // ... with some exceptions + switch (format) { + case PIPE_FORMAT_R8G8B8A8_SRGB: + return R8G8B8A8_UNORM_SRGB; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return B8G8R8A8_UNORM_SRGB; + case PIPE_FORMAT_I8_UNORM: + return R8_UNORM; + case PIPE_FORMAT_Z16_UNORM: + return R16_UNORM; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return R24_UNORM_X8_TYPELESS; + case PIPE_FORMAT_Z32_FLOAT: + return R32_FLOAT; + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return R32_FLOAT_X8X24_TYPELESS; + case PIPE_FORMAT_L8A8_UNORM: + return R8G8_UNORM; + default: + break; + } + + debug_printf("asked to convert unsupported format %s\n", + format_desc->name); + return (SWR_FORMAT)-1; +} + +static boolean +swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res) +{ + struct sw_winsys *winsys = screen->winsys; + struct sw_displaytarget *dt; + + UINT stride; + dt = winsys->displaytarget_create(winsys, + res->base.bind, + res->base.format, + res->alignedWidth, + res->alignedHeight, + 64, NULL, + &stride); + + if (dt == NULL) + return FALSE; + + void *map = winsys->displaytarget_map(winsys, dt, 0); + + res->display_target = dt; + res->swr.pBaseAddress = (uint8_t*) map; + + /* Clear the display target surface */ + if (map) + memset(map, 0, res->alignedHeight * stride); + + winsys->displaytarget_unmap(winsys, dt); + + return TRUE; +} + +static boolean +swr_texture_layout(struct swr_screen *screen, + struct swr_resource *res, + boolean allocate) +{ + struct pipe_resource *pt = &res->base; + + pipe_format fmt = pt->format; + const struct util_format_description *desc = util_format_description(fmt); + + res->has_depth = util_format_has_depth(desc); + res->has_stencil = util_format_has_stencil(desc); + + if (res->has_stencil && !res->has_depth) + fmt = PIPE_FORMAT_R8_UINT; + + res->swr.width = pt->width0; + res->swr.height = pt->height0; + res->swr.depth = pt->depth0; + res->swr.type = swr_convert_target_type(pt->target); + res->swr.tileMode = SWR_TILE_NONE; + res->swr.format = mesa_to_swr_format(fmt); + res->swr.numSamples = (1 << pt->nr_samples); + + SWR_FORMAT_INFO finfo = GetFormatInfo(res->swr.format); + + unsigned total_size = 0; + unsigned width = pt->width0; + unsigned height = pt->height0; + unsigned depth = pt->depth0; + unsigned layers = pt->array_size; + + for (int level = 0; level <= pt->last_level; level++) { + unsigned alignedWidth, alignedHeight; + unsigned num_slices; + + if (pt->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) { + alignedWidth = align(width, KNOB_MACROTILE_X_DIM); + alignedHeight = align(height, KNOB_MACROTILE_Y_DIM); + } else { + alignedWidth = width; + alignedHeight = height; + } + + if (level == 0) { + res->alignedWidth = alignedWidth; + res->alignedHeight = alignedHeight; + } + + res->row_stride[level] = alignedWidth * finfo.Bpp; + res->img_stride[level] = res->row_stride[level] * alignedHeight; + res->mip_offsets[level] = total_size; + + if (pt->target == PIPE_TEXTURE_3D) + num_slices = depth; + else if (pt->target == PIPE_TEXTURE_1D_ARRAY + || pt->target == PIPE_TEXTURE_2D_ARRAY + || pt->target == PIPE_TEXTURE_CUBE + || pt->target == PIPE_TEXTURE_CUBE_ARRAY) + num_slices = layers; + else + num_slices = 1; + + total_size += res->img_stride[level] * num_slices; + if (total_size > SWR_MAX_TEXTURE_SIZE) + return FALSE; + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + res->swr.halign = res->alignedWidth; + res->swr.valign = res->alignedHeight; + res->swr.pitch = res->row_stride[0]; + + if (allocate) { + res->swr.pBaseAddress = (BYTE *)_aligned_malloc(total_size, 64); + + if (res->has_depth && res->has_stencil) { + SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format); + res->secondary.width = pt->width0; + res->secondary.height = pt->height0; + res->secondary.depth = pt->depth0; + res->secondary.type = SURFACE_2D; + res->secondary.tileMode = SWR_TILE_NONE; + res->secondary.format = R8_UINT; + res->secondary.numSamples = (1 << pt->nr_samples); + res->secondary.pitch = res->alignedWidth * finfo.Bpp; + + res->secondary.pBaseAddress = (BYTE *)_aligned_malloc( + res->alignedHeight * res->secondary.pitch, 64); + } + } + + return TRUE; +} + +static boolean +swr_can_create_resource(struct pipe_screen *screen, + const struct pipe_resource *templat) +{ + struct swr_resource res; + memset(&res, 0, sizeof(res)); + res.base = *templat; + return swr_texture_layout(swr_screen(screen), &res, false); +} + +static struct pipe_resource * +swr_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + struct swr_screen *screen = swr_screen(_screen); + struct swr_resource *res = CALLOC_STRUCT(swr_resource); + if (!res) + return NULL; + + res->base = *templat; + pipe_reference_init(&res->base.reference, 1); + res->base.screen = &screen->base; + + if (swr_resource_is_texture(&res->base)) { + if (res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT + | PIPE_BIND_SHARED)) { + /* displayable surface + * first call swr_texture_layout without allocating to finish + * filling out the SWR_SURFAE_STATE in res */ + swr_texture_layout(screen, res, false); + if (!swr_displaytarget_layout(screen, res)) + goto fail; + } else { + /* texture map */ + if (!swr_texture_layout(screen, res, true)) + goto fail; + } + } else { + /* other data (vertex buffer, const buffer, etc) */ + assert(util_format_get_blocksize(templat->format) == 1); + assert(templat->height0 == 1); + assert(templat->depth0 == 1); + assert(templat->last_level == 0); + + /* Easiest to just call swr_texture_layout, as it sets up + * SWR_SURFAE_STATE in res */ + if (!swr_texture_layout(screen, res, true)) + goto fail; + } + + return &res->base; + +fail: + FREE(res); + return NULL; +} + +static void +swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt) +{ + struct swr_screen *screen = swr_screen(p_screen); + struct swr_resource *res = swr_resource(pt); + + /* + * If this resource is attached to a context it may still be in use, check + * dependencies before freeing + * XXX TODO: don't use SwrWaitForIdle, use fences and come up with a real + * resource manager. + * XXX It's happened that we get a swr_destroy prior to freeing the + * framebuffer resource. Don't wait on it. + */ + if (res->bound_to_context && !res->display_target) { + struct swr_context *ctx = + swr_context((pipe_context *)res->bound_to_context); + // XXX, don't SwrWaitForIdle!!! Use a fence. + SwrWaitForIdle(ctx->swrContext); + } + + /* + * Free resource primary surface. If resource is display target, winsys + * manages the buffer and will free it on displaytarget_destroy. + */ + if (res->display_target) { + /* display target */ + struct sw_winsys *winsys = screen->winsys; + winsys->displaytarget_destroy(winsys, res->display_target); + } else + _aligned_free(res->swr.pBaseAddress); + + _aligned_free(res->secondary.pBaseAddress); + + FREE(res); +} + + +static void +swr_flush_frontbuffer(struct pipe_screen *p_screen, + struct pipe_resource *resource, + unsigned level, + unsigned layer, + void *context_private, + struct pipe_box *sub_box) +{ + struct swr_screen *screen = swr_screen(p_screen); + struct sw_winsys *winsys = screen->winsys; + struct swr_resource *res = swr_resource(resource); + + /* Ensure fence set at flush is finished, before reading frame buffer */ + swr_fence_finish(p_screen, screen->flush_fence, 0); + + SwrEndFrame(swr_context((pipe_context *)res->bound_to_context)); + + assert(res->display_target); + if (res->display_target) + winsys->displaytarget_display( + winsys, res->display_target, context_private, sub_box); +} + + +static void +swr_destroy_screen(struct pipe_screen *p_screen) +{ + struct swr_screen *screen = swr_screen(p_screen); + struct sw_winsys *winsys = screen->winsys; + + fprintf(stderr, "SWR destroy screen!\n"); + + swr_fence_finish(p_screen, screen->flush_fence, 0); + swr_fence_reference(p_screen, &screen->flush_fence, NULL); + + JitDestroyContext(screen->hJitMgr); + + if (winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + +PUBLIC +struct pipe_screen * +swr_create_screen(struct sw_winsys *winsys) +{ + struct swr_screen *screen = CALLOC_STRUCT(swr_screen); + + if (!screen) + return NULL; + + if (!getenv("KNOB_MAX_PRIMS_PER_DRAW")) { + g_GlobalKnobs.MAX_PRIMS_PER_DRAW.Value(49152); + } + + screen->winsys = winsys; + screen->base.get_name = swr_get_name; + screen->base.get_vendor = swr_get_vendor; + screen->base.is_format_supported = swr_is_format_supported; + screen->base.context_create = swr_create_context; + screen->base.can_create_resource = swr_can_create_resource; + + screen->base.destroy = swr_destroy_screen; + screen->base.get_param = swr_get_param; + screen->base.get_shader_param = swr_get_shader_param; + screen->base.get_paramf = swr_get_paramf; + + screen->base.resource_create = swr_resource_create; + screen->base.resource_destroy = swr_resource_destroy; + + screen->base.flush_frontbuffer = swr_flush_frontbuffer; + + screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, KNOB_ARCH_STR); + + swr_fence_init(&screen->base); + + return &screen->base; +} + +struct sw_winsys * +swr_get_winsys(struct pipe_screen *pipe) +{ + return ((struct swr_screen *)pipe)->winsys; +} + +struct sw_displaytarget * +swr_get_displaytarget(struct pipe_resource *resource) +{ + return ((struct swr_resource *)resource)->display_target; +} diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h new file mode 100644 index 00000000000..a96dc44cf66 --- /dev/null +++ b/src/gallium/drivers/swr/swr_screen.h @@ -0,0 +1,52 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_SCREEN_H +#define SWR_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" +#include "api.h" + +struct sw_winsys; + +struct swr_screen { + struct pipe_screen base; + + struct pipe_fence_handle *flush_fence; + + struct sw_winsys *winsys; + + HANDLE hJitMgr; +}; + +static INLINE struct swr_screen * +swr_screen(struct pipe_screen *pipe) +{ + return (struct swr_screen *)pipe; +} + +SWR_FORMAT +mesa_to_swr_format(enum pipe_format format); + +#endif diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp new file mode 100644 index 00000000000..ff16d0f2f11 --- /dev/null +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -0,0 +1,591 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "JitManager.h" +#include "state.h" +#include "state_llvm.h" +#include "builder.h" + +#include "llvm-c/Core.h" +#include "llvm/Support/CBindingWrapping.h" + +#include "tgsi/tgsi_strings.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_tgsi.h" + +#include "swr_context.h" +#include "swr_context_llvm.h" +#include "swr_state.h" +#include "swr_screen.h" + +bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs) +{ + return !memcmp(&lhs, &rhs, sizeof(lhs)); +} + +void +swr_generate_fs_key(struct swr_jit_key &key, + struct swr_context *ctx, + swr_fragment_shader *swr_fs) +{ + key.nr_cbufs = ctx->framebuffer.nr_cbufs; + key.light_twoside = ctx->rasterizer->light_twoside; + memcpy(&key.vs_output_semantic_name, + &ctx->vs->info.base.output_semantic_name, + sizeof(key.vs_output_semantic_name)); + memcpy(&key.vs_output_semantic_idx, + &ctx->vs->info.base.output_semantic_index, + sizeof(key.vs_output_semantic_idx)); + + key.nr_samplers = swr_fs->info.base.file_max[TGSI_FILE_SAMPLER] + 1; + + for (unsigned i = 0; i < key.nr_samplers; i++) { + if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_sampler_state( + &key.sampler[i].sampler_state, + ctx->samplers[PIPE_SHADER_FRAGMENT][i]); + } + } + + /* + * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes + * are dx10-style? Can't really have mixed opcodes, at least not + * if we want to skip the holes here (without rescanning tgsi). + */ + if (swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + key.nr_sampler_views = + swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + for (unsigned i = 0; i < key.nr_sampler_views; i++) { + if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { + lp_sampler_static_texture_state( + &key.sampler[i].texture_state, + ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]); + } + } + } else { + key.nr_sampler_views = key.nr_samplers; + for (unsigned i = 0; i < key.nr_sampler_views; i++) { + if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_texture_state( + &key.sampler[i].texture_state, + ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]); + } + } + } +} + +struct BuilderSWR : public Builder { + BuilderSWR(JitManager *pJitMgr) + : Builder(pJitMgr) + { + pJitMgr->SetupNewModule(); + } + + PFN_VERTEX_FUNC + CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs); + PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_key &key); +}; + +PFN_VERTEX_FUNC +BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs) +{ + swr_vs->linkageMask = 0; + + for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) { + switch (swr_vs->info.base.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + default: + swr_vs->linkageMask |= (1 << i); + break; + } + } + + // tgsi_dump(swr_vs->pipe.tokens, 0); + + struct gallivm_state *gallivm = + gallivm_create("VS", wrap(&JM()->mContext)); + gallivm->module = wrap(JM()->mpCurrentModule); + + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + + memset(outputs, 0, sizeof(outputs)); + + AttrBuilder attrBuilder; + attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); + AttributeSet attrSet = AttributeSet::get( + JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); + + std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), + PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)}; + FunctionType *vsFuncType = + FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false); + + // create new vertex shader function + auto pFunction = Function::Create(vsFuncType, + GlobalValue::ExternalLinkage, + "VS", + JM()->mpCurrentModule); + pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); + + BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); + IRB()->SetInsertPoint(block); + LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); + + auto argitr = pFunction->arg_begin(); + Value *hPrivateData = &*argitr++; + hPrivateData->setName("hPrivateData"); + Value *pVsCtx = &*argitr++; + pVsCtx->setName("vsCtx"); + + Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)}); + + consts_ptr->setName("vs_constants"); + Value *const_sizes_ptr = + GEP(hPrivateData, {0, swr_draw_context_num_constantsVS}); + const_sizes_ptr->setName("num_vs_constants"); + + Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin}); + + for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { + const unsigned mask = swr_vs->info.base.input_usage_mask[attrib]; + for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { + if (mask & (1 << channel)) { + inputs[attrib][channel] = + wrap(LOAD(vtxInput, {0, 0, attrib, channel})); + } + } + } + + struct lp_bld_tgsi_system_values system_values; + memset(&system_values, 0, sizeof(system_values)); + system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID})); + system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID})); + + lp_build_tgsi_soa(gallivm, + swr_vs->pipe.tokens, + lp_type_float_vec(32, 32 * 8), + NULL, // mask + wrap(consts_ptr), + wrap(const_sizes_ptr), + &system_values, + inputs, + outputs, + NULL, // wrap(hPrivateData), (sampler context) + NULL, // thread data + NULL, // sampler + &swr_vs->info.base, + NULL); // geometry shader face + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout}); + + for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { + for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { + if (!outputs[attrib][channel]) + continue; + + Value *val = LOAD(unwrap(outputs[attrib][channel])); + + uint32_t outSlot = attrib; + if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) + outSlot = VERTEX_POINT_SIZE_SLOT; + STORE(val, vtxOutput, {0, 0, outSlot, channel}); + } + } + + RET_VOID(); + + gallivm_verify_function(gallivm, wrap(pFunction)); + gallivm_compile_module(gallivm); + + // lp_debug_dump_value(func); + + PFN_VERTEX_FUNC pFunc = + (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); + + debug_printf("vert shader %p\n", pFunc); + assert(pFunc && "Error: VertShader = NULL"); + +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5) + JM()->mIsModuleFinalized = true; +#endif + + return pFunc; +} + +PFN_VERTEX_FUNC +swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs) +{ + BuilderSWR builder( + reinterpret_cast<JitManager *>(swr_screen(ctx->screen)->hJitMgr)); + return builder.CompileVS(ctx, swr_vs); +} + +static unsigned +locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) +{ + for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { + if ((info->output_semantic_name[i] == name) + && (info->output_semantic_index[i] == index)) { + return i - 1; // position is not part of the linkage + } + } + + if (name == TGSI_SEMANTIC_COLOR) { // BCOLOR fallback + for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { + if ((info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) + && (info->output_semantic_index[i] == index)) { + return i - 1; // position is not part of the linkage + } + } + } + + return 0xFFFFFFFF; +} + +PFN_PIXEL_KERNEL +BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key) +{ + struct swr_fragment_shader *swr_fs = ctx->fs; + + // tgsi_dump(swr_fs->pipe.tokens, 0); + + struct gallivm_state *gallivm = + gallivm_create("FS", wrap(&JM()->mContext)); + gallivm->module = wrap(JM()->mpCurrentModule); + + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + + memset(inputs, 0, sizeof(inputs)); + memset(outputs, 0, sizeof(outputs)); + + struct lp_build_sampler_soa *sampler = NULL; + + AttrBuilder attrBuilder; + attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); + AttributeSet attrSet = AttributeSet::get( + JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); + + std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), + PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)}; + FunctionType *funcType = + FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false); + + auto pFunction = Function::Create(funcType, + GlobalValue::ExternalLinkage, + "FS", + JM()->mpCurrentModule); + pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); + + BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); + IRB()->SetInsertPoint(block); + LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); + + auto args = pFunction->arg_begin(); + Value *hPrivateData = &*args++; + hPrivateData->setName("hPrivateData"); + Value *pPS = &*args++; + pPS->setName("psCtx"); + + Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS}); + consts_ptr->setName("fs_constants"); + Value *const_sizes_ptr = + GEP(hPrivateData, {0, swr_draw_context_num_constantsFS}); + const_sizes_ptr->setName("num_fs_constants"); + + // xxx should check for flat shading versus interpolation + + + // load *pAttribs, *pPerspAttribs + Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs"); + Value *pPerspAttribs = + LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs"); + + swr_fs->constantMask = 0; + swr_fs->pointSpriteMask = 0; + + for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { + const unsigned mask = swr_fs->info.base.input_usage_mask[attrib]; + const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib]; + const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib]; + + if (!mask) + continue; + + // load i,j + Value *vi = nullptr, *vj = nullptr; + switch (interpLoc) { + case TGSI_INTERPOLATE_LOC_CENTER: + vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i"); + vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j"); + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i"); + vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j"); + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i"); + vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j"); + break; + } + + // load/compute w + Value *vw = nullptr, *pAttribs; + if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) { + pAttribs = pPerspAttribs; + switch (interpLoc) { + case TGSI_INTERPOLATE_LOC_CENTER: + vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center})); + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid})); + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample})); + break; + } + } else { + pAttribs = pRawAttribs; + vw = VIMMED1(1.f); + } + + vw->setName("w"); + + ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib]; + ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib]; + + if (semantic_name == TGSI_SEMANTIC_FACE) { + Value *ff = + UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty); + ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f)); + ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace"); + + inputs[attrib][0] = wrap(ff); + inputs[attrib][1] = wrap(VIMMED1(0.0f)); + inputs[attrib][2] = wrap(VIMMED1(0.0f)); + inputs[attrib][3] = wrap(VIMMED1(1.0f)); + continue; + } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord + inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX")); + inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY")); + inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ")); + inputs[attrib][3] = + wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW")); + continue; + } else if (semantic_name == TGSI_SEMANTIC_PRIMID) { + Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID"); + inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID)); + inputs[attrib][1] = wrap(VIMMED1(0)); + inputs[attrib][2] = wrap(VIMMED1(0)); + inputs[attrib][3] = wrap(VIMMED1(0)); + continue; + } + + unsigned linkedAttrib = + locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); + if (linkedAttrib == 0xFFFFFFFF) { + // not found - check for point sprite + if (ctx->rasterizer->sprite_coord_enable) { + linkedAttrib = ctx->vs->info.base.num_outputs - 1; + swr_fs->pointSpriteMask |= (1 << linkedAttrib); + } else { + fprintf(stderr, + "Missing %s[%d]\n", + tgsi_semantic_names[semantic_name], + semantic_idx); + assert(0 && "attribute linkage not found"); + } + } + + if (interpMode == TGSI_INTERPOLATE_CONSTANT) { + swr_fs->constantMask |= 1 << linkedAttrib; + } + + for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { + if (mask & (1 << channel)) { + Value *indexA = C(linkedAttrib * 12 + channel); + Value *indexB = C(linkedAttrib * 12 + channel + 4); + Value *indexC = C(linkedAttrib * 12 + channel + 8); + + if ((semantic_name == TGSI_SEMANTIC_COLOR) + && ctx->rasterizer->light_twoside) { + unsigned bcolorAttrib = locate_linkage( + TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base); + + unsigned diff = 12 * (bcolorAttrib - linkedAttrib); + + Value *back = + XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace"); + + Value *offset = MUL(back, C(diff)); + offset->setName("offset"); + + indexA = ADD(indexA, offset); + indexB = ADD(indexB, offset); + indexC = ADD(indexC, offset); + + if (interpMode == TGSI_INTERPOLATE_CONSTANT) { + swr_fs->constantMask |= 1 << bcolorAttrib; + } + } + + Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA))); + Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB))); + Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC))); + + if (interpMode == TGSI_INTERPOLATE_CONSTANT) { + inputs[attrib][channel] = wrap(va); + } else { + Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj); + + vc = FMUL(vk, vc); + + Value *interp = FMUL(va, vi); + Value *interp1 = FMUL(vb, vj); + interp = FADD(interp, interp1); + interp = FADD(interp, vc); + if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) + interp = FMUL(interp, vw); + inputs[attrib][channel] = wrap(interp); + } + } + } + } + + sampler = swr_sampler_soa_create(key.sampler); + + struct lp_bld_tgsi_system_values system_values; + memset(&system_values, 0, sizeof(system_values)); + + struct lp_build_mask_context mask; + + if (swr_fs->info.base.uses_kill) { + Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask"); + lp_build_mask_begin( + &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val)); + } + + lp_build_tgsi_soa(gallivm, + swr_fs->pipe.tokens, + lp_type_float_vec(32, 32 * 8), + swr_fs->info.base.uses_kill ? &mask : NULL, // mask + wrap(consts_ptr), + wrap(const_sizes_ptr), + &system_values, + inputs, + outputs, + wrap(hPrivateData), + NULL, // thread data + sampler, // sampler + &swr_fs->info.base, + NULL); // geometry shader face + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs; + attrib++) { + switch (swr_fs->info.base.output_semantic_name[attrib]) { + case TGSI_SEMANTIC_POSITION: { + // write z + LLVMValueRef outZ = + LLVMBuildLoad(gallivm->builder, outputs[attrib][2], ""); + STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ}); + break; + } + case TGSI_SEMANTIC_COLOR: { + for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { + if (!outputs[attrib][channel]) + continue; + + LLVMValueRef out = + LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], ""); + if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { + for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) { + STORE(unwrap(out), + pPS, + {0, SWR_PS_CONTEXT_shaded, rt, channel}); + } + } else { + STORE(unwrap(out), + pPS, + {0, + SWR_PS_CONTEXT_shaded, + swr_fs->info.base.output_semantic_index[attrib], + channel}); + } + } + break; + } + default: { + fprintf(stderr, + "unknown output from FS %s[%d]\n", + tgsi_semantic_names[swr_fs->info.base + .output_semantic_name[attrib]], + swr_fs->info.base.output_semantic_index[attrib]); + break; + } + } + } + + LLVMValueRef mask_result = 0; + if (swr_fs->info.base.uses_kill) { + mask_result = lp_build_mask_end(&mask); + } + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + if (swr_fs->info.base.uses_kill) { + STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask}); + } + + RET_VOID(); + + gallivm_verify_function(gallivm, wrap(pFunction)); + + gallivm_compile_module(gallivm); + + PFN_PIXEL_KERNEL kernel = + (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction)); + debug_printf("frag shader %p\n", kernel); + assert(kernel && "Error: FragShader = NULL"); + +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5) + JM()->mIsModuleFinalized = true; +#endif + + return kernel; +} + +PFN_PIXEL_KERNEL +swr_compile_fs(struct swr_context *ctx, swr_jit_key &key) +{ + BuilderSWR builder( + reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr)); + return builder.CompileFS(ctx, key); +} diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h new file mode 100644 index 00000000000..e22a7c48c2a --- /dev/null +++ b/src/gallium/drivers/swr/swr_shader.h @@ -0,0 +1,60 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#pragma once + +class swr_vertex_shader; +class swr_fragment_shader; +class swr_jit_key; + +PFN_VERTEX_FUNC +swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs); + +PFN_PIXEL_KERNEL +swr_compile_fs(struct swr_context *ctx, swr_jit_key &key); + +void swr_generate_fs_key(struct swr_jit_key &key, + struct swr_context *ctx, + swr_fragment_shader *swr_fs); + +struct swr_jit_key { + unsigned nr_cbufs; + unsigned light_twoside; + ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS]; + unsigned nr_samplers; + unsigned nr_sampler_views; + struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS]; +}; + +namespace std +{ +template <> struct hash<swr_jit_key> { + std::size_t operator()(const swr_jit_key &k) const + { + return util_hash_crc32(&k, sizeof(k)); + } +}; +}; + +bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs); diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp new file mode 100644 index 00000000000..49035b50ccd --- /dev/null +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -0,0 +1,1370 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "common/os.h" +#include "jit_api.h" +#include "JitManager.h" +#include "state_llvm.h" + +#include "gallivm/lp_bld_tgsi.h" +#include "util/u_format.h" + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_helpers.h" +#include "util/u_framebuffer.h" + +#include "swr_state.h" +#include "swr_context.h" +#include "swr_context_llvm.h" +#include "swr_screen.h" +#include "swr_resource.h" +#include "swr_tex_sample.h" +#include "swr_scratch.h" +#include "swr_shader.h" + +/* These should be pulled out into separate files as necessary + * Just initializing everything here to get going. */ + +static void * +swr_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state); + + memcpy(&state->pipe, blend, sizeof(*blend)); + + struct pipe_blend_state *pipe_blend = &state->pipe; + + for (int target = 0; + target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS); + target++) { + + struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target]; + SWR_RENDER_TARGET_BLEND_STATE &blendState = + state->blendState.renderTarget[target]; + RENDER_TARGET_BLEND_COMPILE_STATE &compileState = + state->compileState[target]; + + if (target != 0 && !pipe_blend->independent_blend_enable) { + memcpy(&compileState, + &state->compileState[0], + sizeof(RENDER_TARGET_BLEND_COMPILE_STATE)); + continue; + } + + compileState.blendEnable = rt_blend->blend_enable; + if (compileState.blendEnable) { + compileState.sourceAlphaBlendFactor = + swr_convert_blend_factor(rt_blend->alpha_src_factor); + compileState.destAlphaBlendFactor = + swr_convert_blend_factor(rt_blend->alpha_dst_factor); + compileState.sourceBlendFactor = + swr_convert_blend_factor(rt_blend->rgb_src_factor); + compileState.destBlendFactor = + swr_convert_blend_factor(rt_blend->rgb_dst_factor); + + compileState.colorBlendFunc = + swr_convert_blend_func(rt_blend->rgb_func); + compileState.alphaBlendFunc = + swr_convert_blend_func(rt_blend->alpha_func); + } + compileState.logicOpEnable = state->pipe.logicop_enable; + if (compileState.logicOpEnable) { + compileState.logicOpFunc = + swr_convert_logic_op(state->pipe.logicop_func); + } + + blendState.writeDisableRed = + (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1; + blendState.writeDisableGreen = + (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1; + blendState.writeDisableBlue = + (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1; + blendState.writeDisableAlpha = + (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1; + + if (rt_blend->colormask == 0) + compileState.blendEnable = false; + } + + return state; +} + +static void +swr_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->blend == blend) + return; + + ctx->blend = (swr_blend_state *)blend; + + ctx->dirty |= SWR_NEW_BLEND; +} + +static void +swr_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void +swr_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *color) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->blend_color = *color; + + ctx->dirty |= SWR_NEW_BLEND; +} + +static void +swr_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *ref) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->stencil_ref = *ref; + + ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA; +} + +static void * +swr_create_depth_stencil_state( + struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + struct pipe_depth_stencil_alpha_state *state; + + state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil, + sizeof *depth_stencil); + + return state; +} + +static void +swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil) + return; + + ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil; + + ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA; +} + +static void +swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ + FREE(depth); +} + + +static void * +swr_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rast) +{ + struct pipe_rasterizer_state *state; + state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast); + + return state; +} + +static void +swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle) +{ + struct swr_context *ctx = swr_context(pipe); + const struct pipe_rasterizer_state *rasterizer = + (const struct pipe_rasterizer_state *)handle; + + if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer) + return; + + ctx->rasterizer = (pipe_rasterizer_state *)rasterizer; + + ctx->dirty |= SWR_NEW_RASTERIZER; +} + +static void +swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} + + +static void * +swr_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct pipe_sampler_state *state = + (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler); + + return state; +} + +static void +swr_bind_sampler_states(struct pipe_context *pipe, + unsigned shader, + unsigned start, + unsigned num, + void **samplers) +{ + struct swr_context *ctx = swr_context(pipe); + unsigned i; + + assert(shader < PIPE_SHADER_TYPES); + assert(start + num <= Elements(ctx->samplers[shader])); + + /* set the new samplers */ + ctx->num_samplers[shader] = num; + for (i = 0; i < num; i++) { + ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i]; + } + + ctx->dirty |= SWR_NEW_SAMPLER; +} + +static void +swr_delete_sampler_state(struct pipe_context *pipe, void *sampler) +{ + FREE(sampler); +} + + +static struct pipe_sampler_view * +swr_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ) +{ + struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); + + if (view) { + *view = *templ; + view->reference.count = 1; + view->texture = NULL; + pipe_resource_reference(&view->texture, texture); + view->context = pipe; + } + + return view; +} + +static void +swr_set_sampler_views(struct pipe_context *pipe, + unsigned shader, + unsigned start, + unsigned num, + struct pipe_sampler_view **views) +{ + struct swr_context *ctx = swr_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); + + assert(shader < PIPE_SHADER_TYPES); + assert(start + num <= Elements(ctx->sampler_views[shader])); + + /* set the new sampler views */ + ctx->num_sampler_views[shader] = num; + for (i = 0; i < num; i++) { + /* Note: we're using pipe_sampler_view_release() here to work around + * a possible crash when the old view belongs to another context that + * was already destroyed. + */ + pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]); + pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i], + views[i]); + } + + ctx->dirty |= SWR_NEW_SAMPLER_VIEW; +} + +static void +swr_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + FREE(view); +} + +static void * +swr_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *vs) +{ + struct swr_vertex_shader *swr_vs = + (swr_vertex_shader *)CALLOC_STRUCT(swr_vertex_shader); + if (!swr_vs) + return NULL; + + swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens); + swr_vs->pipe.stream_output = vs->stream_output; + + lp_build_tgsi_info(vs->tokens, &swr_vs->info); + + swr_vs->func = swr_compile_vs(pipe, swr_vs); + + swr_vs->soState = {0}; + + if (swr_vs->pipe.stream_output.num_outputs) { + pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output; + + swr_vs->soState.soEnable = true; + // soState.rasterizerDisable set on state dirty + // soState.streamToRasterizer not used + + for (uint32_t i = 0; i < stream_output->num_outputs; i++) { + swr_vs->soState.streamMasks[stream_output->output[i].stream] |= + 1 << (stream_output->output[i].register_index - 1); + } + for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) { + swr_vs->soState.streamNumEntries[i] = + _mm_popcnt_u32(swr_vs->soState.streamMasks[i]); + } + } + + return swr_vs; +} + +static void +swr_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->vs == vs) + return; + + ctx->vs = (swr_vertex_shader *)vs; + ctx->dirty |= SWR_NEW_VS; +} + +static void +swr_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs; + FREE((void *)swr_vs->pipe.tokens); + FREE(vs); +} + +static void * +swr_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct swr_fragment_shader *swr_fs = new swr_fragment_shader; + if (!swr_fs) + return NULL; + + swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens); + + lp_build_tgsi_info(fs->tokens, &swr_fs->info); + + return swr_fs; +} + + +static void +swr_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->fs == fs) + return; + + ctx->fs = (swr_fragment_shader *)fs; + ctx->dirty |= SWR_NEW_FS; +} + +static void +swr_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs; + FREE((void *)swr_fs->pipe.tokens); + delete swr_fs; +} + + +static void +swr_set_constant_buffer(struct pipe_context *pipe, + uint shader, + uint index, + struct pipe_constant_buffer *cb) +{ + struct swr_context *ctx = swr_context(pipe); + struct pipe_resource *constants = cb ? cb->buffer : NULL; + + assert(shader < PIPE_SHADER_TYPES); + assert(index < Elements(ctx->constants[shader])); + + /* note: reference counting */ + util_copy_constant_buffer(&ctx->constants[shader][index], cb); + + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + ctx->dirty |= SWR_NEW_VSCONSTANTS; + } else if (shader == PIPE_SHADER_FRAGMENT) { + ctx->dirty |= SWR_NEW_FSCONSTANTS; + } + + if (cb && cb->user_buffer) { + pipe_resource_reference(&constants, NULL); + } +} + + +static void * +swr_create_vertex_elements_state(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *attribs) +{ + struct swr_vertex_element_state *velems; + assert(num_elements <= PIPE_MAX_ATTRIBS); + velems = CALLOC_STRUCT(swr_vertex_element_state); + if (velems) { + velems->fsState.numAttribs = num_elements; + for (unsigned i = 0; i < num_elements; i++) { + // XXX: we should do this keyed on the VS usage info + + const struct util_format_description *desc = + util_format_description(attribs[i].src_format); + + velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset; + velems->fsState.layout[i].Format = + mesa_to_swr_format(attribs[i].src_format); + velems->fsState.layout[i].StreamIndex = + attribs[i].vertex_buffer_index; + velems->fsState.layout[i].InstanceEnable = + attribs[i].instance_divisor != 0; + velems->fsState.layout[i].ComponentControl0 = + desc->channel[0].type != UTIL_FORMAT_TYPE_VOID + ? ComponentControl::StoreSrc + : ComponentControl::Store0; + velems->fsState.layout[i].ComponentControl1 = + desc->channel[1].type != UTIL_FORMAT_TYPE_VOID + ? ComponentControl::StoreSrc + : ComponentControl::Store0; + velems->fsState.layout[i].ComponentControl2 = + desc->channel[2].type != UTIL_FORMAT_TYPE_VOID + ? ComponentControl::StoreSrc + : ComponentControl::Store0; + velems->fsState.layout[i].ComponentControl3 = + desc->channel[3].type != UTIL_FORMAT_TYPE_VOID + ? ComponentControl::StoreSrc + : ComponentControl::Store1Fp; + velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW; + velems->fsState.layout[i].InstanceDataStepRate = + attribs[i].instance_divisor; + + /* Calculate the pitch of each stream */ + const SWR_FORMAT_INFO &swr_desc = GetFormatInfo( + mesa_to_swr_format(attribs[i].src_format)); + velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp; + } + } + + return velems; +} + +static void +swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_vertex_element_state *swr_velems = + (struct swr_vertex_element_state *)velems; + + ctx->velems = swr_velems; + ctx->dirty |= SWR_NEW_VERTEX; +} + +static void +swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + /* XXX Need to destroy fetch shader? */ + FREE(velems); +} + + +static void +swr_set_vertex_buffers(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_elements, + const struct pipe_vertex_buffer *buffers) +{ + struct swr_context *ctx = swr_context(pipe); + + assert(num_elements <= PIPE_MAX_ATTRIBS); + + util_set_vertex_buffers_count(ctx->vertex_buffer, + &ctx->num_vertex_buffers, + buffers, + start_slot, + num_elements); + + ctx->dirty |= SWR_NEW_VERTEX; +} + + +static void +swr_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ib) + memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer)); + else + memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer)); + + ctx->dirty |= SWR_NEW_VERTEX; +} + +static void +swr_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->poly_stipple = *stipple; /* struct copy */ + ctx->dirty |= SWR_NEW_STIPPLE; +} + +static void +swr_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->clip = *clip; + /* XXX Unimplemented, but prevents crash */ + + ctx->dirty |= SWR_NEW_CLIP; +} + + +static void +swr_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_scissor_state *scissor) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->scissor = *scissor; + ctx->dirty |= SWR_NEW_SCISSOR; +} + +static void +swr_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vpt) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->viewport = *vpt; + ctx->dirty |= SWR_NEW_VIEWPORT; +} + + +static void +swr_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct swr_context *ctx = swr_context(pipe); + + boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb); + + assert(fb->width <= KNOB_GUARDBAND_WIDTH); + assert(fb->height <= KNOB_GUARDBAND_HEIGHT); + + if (changed) { + unsigned i; + for (i = 0; i < fb->nr_cbufs; ++i) + pipe_surface_reference(&ctx->framebuffer.cbufs[i], fb->cbufs[i]); + for (; i < ctx->framebuffer.nr_cbufs; ++i) + pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL); + + ctx->framebuffer.nr_cbufs = fb->nr_cbufs; + + ctx->framebuffer.width = fb->width; + ctx->framebuffer.height = fb->height; + + pipe_surface_reference(&ctx->framebuffer.zsbuf, fb->zsbuf); + + ctx->dirty |= SWR_NEW_FRAMEBUFFER; + } +} + + +static void +swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct swr_context *ctx = swr_context(pipe); + + if (sample_mask != ctx->sample_mask) { + ctx->sample_mask = sample_mask; + ctx->dirty |= SWR_NEW_RASTERIZER; + } +} + + +void +swr_update_derived(struct swr_context *ctx, + const struct pipe_draw_info *p_draw_info) +{ + /* Any state that requires dirty flags to be re-triggered sets this mask */ + /* For example, user_buffer vertex and index buffers. */ + unsigned post_update_dirty_flags = 0; + + /* Render Targets */ + if (ctx->dirty & SWR_NEW_FRAMEBUFFER) { + struct pipe_framebuffer_state *fb = &ctx->framebuffer; + SWR_SURFACE_STATE *new_attachment[SWR_NUM_ATTACHMENTS] = {0}; + UINT i; + + /* colorbuffer targets */ + if (fb->nr_cbufs) + for (i = 0; i < fb->nr_cbufs; ++i) + if (fb->cbufs[i]) { + struct swr_resource *colorBuffer = + swr_resource(fb->cbufs[i]->texture); + new_attachment[SWR_ATTACHMENT_COLOR0 + i] = &colorBuffer->swr; + } + + /* depth/stencil target */ + if (fb->zsbuf) { + struct swr_resource *depthStencilBuffer = + swr_resource(fb->zsbuf->texture); + if (depthStencilBuffer->has_depth) { + new_attachment[SWR_ATTACHMENT_DEPTH] = &depthStencilBuffer->swr; + + if (depthStencilBuffer->has_stencil) + new_attachment[SWR_ATTACHMENT_STENCIL] = + &depthStencilBuffer->secondary; + + } else if (depthStencilBuffer->has_stencil) + new_attachment[SWR_ATTACHMENT_STENCIL] = &depthStencilBuffer->swr; + } + + /* Make the attachment updates */ + swr_draw_context *pDC = &ctx->swrDC; + SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; + for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) { + void *new_base = nullptr; + if (new_attachment[i]) + new_base = new_attachment[i]->pBaseAddress; + + /* StoreTile for changed target */ + if (renderTargets[i].pBaseAddress != new_base) { + if (renderTargets[i].pBaseAddress) { + enum SWR_TILE_STATE post_state = (new_attachment[i] + ? SWR_TILE_INVALID : SWR_TILE_RESOLVED); + swr_store_render_target(ctx, i, post_state); + } + + /* Make new attachment */ + if (new_attachment[i]) + renderTargets[i] = *new_attachment[i]; + else + if (renderTargets[i].pBaseAddress) + renderTargets[i] = {0}; + } + } + } + + /* Raster state */ + if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) { + pipe_rasterizer_state *rasterizer = ctx->rasterizer; + pipe_framebuffer_state *fb = &ctx->framebuffer; + + SWR_RASTSTATE *rastState = &ctx->derived.rastState; + rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face); + rastState->frontWinding = rasterizer->front_ccw + ? SWR_FRONTWINDING_CCW + : SWR_FRONTWINDING_CW; + rastState->scissorEnable = rasterizer->scissor; + rastState->pointSize = rasterizer->point_size > 0.0f + ? rasterizer->point_size + : 1.0f; + rastState->lineWidth = rasterizer->line_width > 0.0f + ? rasterizer->line_width + : 1.0f; + + rastState->pointParam = rasterizer->point_size_per_vertex; + + rastState->pointSpriteEnable = rasterizer->sprite_coord_enable; + rastState->pointSpriteTopOrigin = + rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT; + + /* XXX TODO: Add multisample */ + rastState->msaaRastEnable = false; + rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL; + rastState->sampleCount = SWR_MULTISAMPLE_1X; + rastState->bForcedSampleCount = false; + + bool do_offset = false; + switch (rasterizer->fill_front) { + case PIPE_POLYGON_MODE_FILL: + do_offset = rasterizer->offset_tri; + break; + case PIPE_POLYGON_MODE_LINE: + do_offset = rasterizer->offset_line; + break; + case PIPE_POLYGON_MODE_POINT: + do_offset = rasterizer->offset_point; + break; + } + + if (do_offset) { + rastState->depthBias = rasterizer->offset_units; + rastState->slopeScaledDepthBias = rasterizer->offset_scale; + rastState->depthBiasClamp = rasterizer->offset_clamp; + } else { + rastState->depthBias = 0; + rastState->slopeScaledDepthBias = 0; + rastState->depthBiasClamp = 0; + } + struct pipe_surface *zb = fb->zsbuf; + if (zb && swr_resource(zb->texture)->has_depth) + rastState->depthFormat = swr_resource(zb->texture)->swr.format; + + rastState->depthClipEnable = rasterizer->depth_clip; + + SwrSetRastState(ctx->swrContext, rastState); + } + + /* Scissor */ + if (ctx->dirty & SWR_NEW_SCISSOR) { + pipe_scissor_state *scissor = &ctx->scissor; + BBOX bbox(scissor->miny, scissor->maxy, + scissor->minx, scissor->maxx); + SwrSetScissorRects(ctx->swrContext, 1, &bbox); + } + + /* Viewport */ + if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER + | SWR_NEW_RASTERIZER)) { + pipe_viewport_state *state = &ctx->viewport; + pipe_framebuffer_state *fb = &ctx->framebuffer; + pipe_rasterizer_state *rasterizer = ctx->rasterizer; + + SWR_VIEWPORT *vp = &ctx->derived.vp; + SWR_VIEWPORT_MATRIX *vpm = &ctx->derived.vpm; + + vp->x = state->translate[0] - state->scale[0]; + vp->width = state->translate[0] + state->scale[0]; + vp->y = state->translate[1] - fabs(state->scale[1]); + vp->height = state->translate[1] + fabs(state->scale[1]); + if (rasterizer->clip_halfz == 0) { + vp->minZ = state->translate[2] - state->scale[2]; + vp->maxZ = state->translate[2] + state->scale[2]; + } else { + vp->minZ = state->translate[2]; + vp->maxZ = state->translate[2] + state->scale[2]; + } + + vpm->m00 = state->scale[0]; + vpm->m11 = state->scale[1]; + vpm->m22 = state->scale[2]; + vpm->m30 = state->translate[0]; + vpm->m31 = state->translate[1]; + vpm->m32 = state->translate[2]; + + /* Now that the matrix is calculated, clip the view coords to screen + * size. OpenGL allows for -ve x,y in the viewport. + */ + vp->x = std::max(vp->x, 0.0f); + vp->y = std::max(vp->y, 0.0f); + vp->width = std::min(vp->width, (float)fb->width); + vp->height = std::min(vp->height, (float)fb->height); + + SwrSetViewports(ctx->swrContext, 1, vp, vpm); + } + + /* Set vertex & index buffers */ + /* (using draw info if called by swr_draw_vbo) */ + if (ctx->dirty & SWR_NEW_VERTEX) { + uint32_t size, pitch, max_vertex, partial_inbounds; + const uint8_t *p_data; + + /* If being called by swr_draw_vbo, copy draw details */ + struct pipe_draw_info info = {0}; + if (p_draw_info) + info = *p_draw_info; + + /* vertex buffers */ + SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS]; + for (UINT i = 0; i < ctx->num_vertex_buffers; i++) { + pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; + + pitch = vb->stride; + if (!vb->user_buffer) { + /* VBO + * size is based on buffer->width0 rather than info.max_index + * to prevent having to validate VBO on each draw */ + size = vb->buffer->width0; + max_vertex = size / pitch; + partial_inbounds = size % pitch; + + p_data = (const uint8_t *)swr_resource_data(vb->buffer) + + vb->buffer_offset; + } else { + /* Client buffer + * client memory is one-time use, re-trigger SWR_NEW_VERTEX to + * revalidate on each draw */ + post_update_dirty_flags |= SWR_NEW_VERTEX; + + if (pitch) { + size = (info.max_index - info.min_index + 1) * pitch; + } else { + /* pitch = 0, means constant value + * set size to 1 vertex */ + size = ctx->velems->stream_pitch[i]; + } + + max_vertex = info.max_index + 1; + partial_inbounds = 0; + + /* Copy only needed vertices to scratch space */ + size = AlignUp(size, 4); + const void *ptr = (const uint8_t *) vb->user_buffer + + info.min_index * pitch; + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->vertex_buffer, ptr, size); + p_data = (const uint8_t *)ptr - info.min_index * pitch; + } + + swrVertexBuffers[i] = {0}; + swrVertexBuffers[i].index = i; + swrVertexBuffers[i].pitch = pitch; + swrVertexBuffers[i].pData = p_data; + swrVertexBuffers[i].size = size; + swrVertexBuffers[i].maxVertex = max_vertex; + swrVertexBuffers[i].partialInboundsSize = partial_inbounds; + } + + SwrSetVertexBuffers( + ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers); + + /* index buffer, if required (info passed in by swr_draw_vbo) */ + SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */ + if (info.indexed) { + pipe_index_buffer *ib = &ctx->index_buffer; + + pitch = ib->index_size ? ib->index_size : sizeof(uint32_t); + index_type = swr_convert_index_type(pitch); + + if (!ib->user_buffer) { + /* VBO + * size is based on buffer->width0 rather than info.count + * to prevent having to validate VBO on each draw */ + size = ib->buffer->width0; + p_data = + (const uint8_t *)swr_resource_data(ib->buffer) + ib->offset; + } else { + /* Client buffer + * client memory is one-time use, re-trigger SWR_NEW_VERTEX to + * revalidate on each draw */ + post_update_dirty_flags |= SWR_NEW_VERTEX; + + size = info.count * pitch; + size = AlignUp(size, 4); + + /* Copy indices to scratch space */ + const void *ptr = ib->user_buffer; + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->index_buffer, ptr, size); + p_data = (const uint8_t *)ptr; + } + + SWR_INDEX_BUFFER_STATE swrIndexBuffer; + swrIndexBuffer.format = swr_convert_index_type(ib->index_size); + swrIndexBuffer.pIndices = p_data; + swrIndexBuffer.size = size; + + SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer); + } + + struct swr_vertex_element_state *velems = ctx->velems; + if (velems && velems->fsState.indexType != index_type) { + velems->fsFunc = NULL; + velems->fsState.indexType = index_type; + } + } + + /* VertexShader */ + if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_FRAMEBUFFER)) { + SwrSetVertexFunc(ctx->swrContext, ctx->vs->func); + } + + swr_jit_key key; + if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW + | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) { + memset(&key, 0, sizeof(key)); + swr_generate_fs_key(key, ctx, ctx->fs); + auto search = ctx->fs->map.find(key); + PFN_PIXEL_KERNEL func; + if (search != ctx->fs->map.end()) { + func = search->second; + } else { + func = swr_compile_fs(ctx, key); + ctx->fs->map.insert(std::make_pair(key, func)); + } + SWR_PS_STATE psState = {0}; + psState.pfnPixelShader = func; + psState.killsPixel = ctx->fs->info.base.uses_kill; + psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL; + psState.writesODepth = ctx->fs->info.base.writes_z; + psState.usesSourceDepth = ctx->fs->info.base.reads_z; + psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX + psState.numRenderTargets = ctx->framebuffer.nr_cbufs; + psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa + uint32_t barycentricsMask = 0; +#if 0 + // when we switch to mesa-master + if (ctx->fs->info.base.uses_persp_center || + ctx->fs->info.base.uses_linear_center) + barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK; + if (ctx->fs->info.base.uses_persp_centroid || + ctx->fs->info.base.uses_linear_centroid) + barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK; + if (ctx->fs->info.base.uses_persp_sample || + ctx->fs->info.base.uses_linear_sample) + barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK; +#else + for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) { + switch (ctx->fs->info.base.input_interpolate_loc[i]) { + case TGSI_INTERPOLATE_LOC_CENTER: + barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK; + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK; + break; + case TGSI_INTERPOLATE_LOC_SAMPLE: + barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK; + break; + } + } +#endif + psState.barycentricsMask = barycentricsMask; + psState.usesUAV = false; // XXX + psState.forceEarlyZ = false; + SwrSetPixelShaderState(ctx->swrContext, &psState); + } + + /* JIT sampler state */ + if (ctx->dirty & SWR_NEW_SAMPLER) { + swr_draw_context *pDC = &ctx->swrDC; + + for (unsigned i = 0; i < key.nr_samplers; i++) { + const struct pipe_sampler_state *sampler = + ctx->samplers[PIPE_SHADER_FRAGMENT][i]; + + if (sampler) { + pDC->samplersFS[i].min_lod = sampler->min_lod; + pDC->samplersFS[i].max_lod = sampler->max_lod; + pDC->samplersFS[i].lod_bias = sampler->lod_bias; + COPY_4V(pDC->samplersFS[i].border_color, sampler->border_color.f); + } + } + } + + /* JIT sampler view state */ + if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { + swr_draw_context *pDC = &ctx->swrDC; + + for (unsigned i = 0; i < key.nr_sampler_views; i++) { + struct pipe_sampler_view *view = + ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]; + + if (view) { + struct pipe_resource *res = view->texture; + struct swr_resource *swr_res = swr_resource(res); + struct swr_jit_texture *jit_tex = &pDC->texturesFS[i]; + memset(jit_tex, 0, sizeof(*jit_tex)); + jit_tex->width = res->width0; + jit_tex->height = res->height0; + jit_tex->depth = res->depth0; + jit_tex->first_level = view->u.tex.first_level; + jit_tex->last_level = view->u.tex.last_level; + jit_tex->base_ptr = swr_res->swr.pBaseAddress; + + for (unsigned level = jit_tex->first_level; + level <= jit_tex->last_level; + level++) { + jit_tex->row_stride[level] = swr_res->row_stride[level]; + jit_tex->img_stride[level] = swr_res->img_stride[level]; + jit_tex->mip_offsets[level] = swr_res->mip_offsets[level]; + } + } + } + } + + /* VertexShader Constants */ + if (ctx->dirty & SWR_NEW_VSCONSTANTS) { + swr_draw_context *pDC = &ctx->swrDC; + + for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + const pipe_constant_buffer *cb = + &ctx->constants[PIPE_SHADER_VERTEX][i]; + pDC->num_constantsVS[i] = cb->buffer_size; + if (cb->buffer) + pDC->constantVS[i] = + (const float *)((const BYTE *)cb->buffer + cb->buffer_offset); + else { + /* Need to copy these constants to scratch space */ + if (cb->user_buffer && cb->buffer_size) { + const void *ptr = + ((const BYTE *)cb->user_buffer + cb->buffer_offset); + uint32_t size = AlignUp(cb->buffer_size, 4); + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->vs_constants, ptr, size); + pDC->constantVS[i] = (const float *)ptr; + } + } + } + } + + /* FragmentShader Constants */ + if (ctx->dirty & SWR_NEW_FSCONSTANTS) { + swr_draw_context *pDC = &ctx->swrDC; + + for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + const pipe_constant_buffer *cb = + &ctx->constants[PIPE_SHADER_FRAGMENT][i]; + pDC->num_constantsFS[i] = cb->buffer_size; + if (cb->buffer) + pDC->constantFS[i] = + (const float *)((const BYTE *)cb->buffer + cb->buffer_offset); + else { + /* Need to copy these constants to scratch space */ + if (cb->user_buffer && cb->buffer_size) { + const void *ptr = + ((const BYTE *)cb->user_buffer + cb->buffer_offset); + uint32_t size = AlignUp(cb->buffer_size, 4); + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->fs_constants, ptr, size); + pDC->constantFS[i] = (const float *)ptr; + } + } + } + } + + /* Depth/stencil state */ + if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) { + struct pipe_depth_state *depth = &(ctx->depth_stencil->depth); + struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil; + SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}}; + + /* XXX, incomplete. Need to flesh out stencil & alpha test state + struct pipe_stencil_state *front_stencil = + ctx->depth_stencil.stencil[0]; + struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1]; + struct pipe_alpha_state alpha; + */ + if (stencil[0].enabled) { + depthStencilState.stencilWriteEnable = 1; + depthStencilState.stencilTestEnable = 1; + depthStencilState.stencilTestFunc = + swr_convert_depth_func(stencil[0].func); + + depthStencilState.stencilPassDepthPassOp = + swr_convert_stencil_op(stencil[0].zpass_op); + depthStencilState.stencilPassDepthFailOp = + swr_convert_stencil_op(stencil[0].zfail_op); + depthStencilState.stencilFailOp = + swr_convert_stencil_op(stencil[0].fail_op); + depthStencilState.stencilWriteMask = stencil[0].writemask; + depthStencilState.stencilTestMask = stencil[0].valuemask; + depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0]; + } + if (stencil[1].enabled) { + depthStencilState.doubleSidedStencilTestEnable = 1; + + depthStencilState.backfaceStencilTestFunc = + swr_convert_depth_func(stencil[1].func); + + depthStencilState.backfaceStencilPassDepthPassOp = + swr_convert_stencil_op(stencil[1].zpass_op); + depthStencilState.backfaceStencilPassDepthFailOp = + swr_convert_stencil_op(stencil[1].zfail_op); + depthStencilState.backfaceStencilFailOp = + swr_convert_stencil_op(stencil[1].fail_op); + depthStencilState.backfaceStencilWriteMask = stencil[1].writemask; + depthStencilState.backfaceStencilTestMask = stencil[1].valuemask; + + depthStencilState.backfaceStencilRefValue = + ctx->stencil_ref.ref_value[1]; + } + + depthStencilState.depthTestEnable = depth->enabled; + depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func); + depthStencilState.depthWriteEnable = depth->writemask; + SwrSetDepthStencilState(ctx->swrContext, &depthStencilState); + } + + /* Blend State */ + if (ctx->dirty & (SWR_NEW_BLEND | + SWR_NEW_FRAMEBUFFER | + SWR_NEW_DEPTH_STENCIL_ALPHA)) { + struct pipe_framebuffer_state *fb = &ctx->framebuffer; + + SWR_BLEND_STATE blendState; + memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState)); + blendState.constantColor[0] = ctx->blend_color.color[0]; + blendState.constantColor[1] = ctx->blend_color.color[1]; + blendState.constantColor[2] = ctx->blend_color.color[2]; + blendState.constantColor[3] = ctx->blend_color.color[3]; + blendState.alphaTestReference = + *((uint32_t*)&ctx->depth_stencil->alpha.ref_value); + + // XXX MSAA + blendState.sampleMask = 0; + blendState.sampleCount = SWR_MULTISAMPLE_1X; + + /* If there are no color buffers bound, disable writes on RT0 + * and skip loop */ + if (fb->nr_cbufs == 0) { + blendState.renderTarget[0].writeDisableRed = 1; + blendState.renderTarget[0].writeDisableGreen = 1; + blendState.renderTarget[0].writeDisableBlue = 1; + blendState.renderTarget[0].writeDisableAlpha = 1; + SwrSetBlendFunc(ctx->swrContext, 0, NULL); + } + else + for (int target = 0; + target < std::min(SWR_NUM_RENDERTARGETS, + PIPE_MAX_COLOR_BUFS); + target++) { + if (!fb->cbufs[target]) + continue; + + struct swr_resource *colorBuffer = + swr_resource(fb->cbufs[target]->texture); + + BLEND_COMPILE_STATE compileState; + memset(&compileState, 0, sizeof(compileState)); + compileState.format = colorBuffer->swr.format; + memcpy(&compileState.blendState, + &ctx->blend->compileState[target], + sizeof(compileState.blendState)); + + if (compileState.blendState.blendEnable == false && + compileState.blendState.logicOpEnable == false) { + SwrSetBlendFunc(ctx->swrContext, target, NULL); + continue; + } + + compileState.desc.alphaTestEnable = + ctx->depth_stencil->alpha.enabled; + compileState.desc.independentAlphaBlendEnable = + ctx->blend->pipe.independent_blend_enable; + compileState.desc.alphaToCoverageEnable = + ctx->blend->pipe.alpha_to_coverage; + compileState.desc.sampleMaskEnable = 0; // XXX + compileState.desc.numSamples = 1; // XXX + + compileState.alphaTestFunction = + swr_convert_depth_func(ctx->depth_stencil->alpha.func); + compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx + + PFN_BLEND_JIT_FUNC func = NULL; + auto search = ctx->blendJIT->find(compileState); + if (search != ctx->blendJIT->end()) { + func = search->second; + } else { + HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr; + func = JitCompileBlend(hJitMgr, compileState); + debug_printf("BLEND shader %p\n", func); + assert(func && "Error: BlendShader = NULL"); + + ctx->blendJIT->insert(std::make_pair(compileState, func)); + } + SwrSetBlendFunc(ctx->swrContext, target, func); + } + + SwrSetBlendState(ctx->swrContext, &blendState); + } + + if (ctx->dirty & SWR_NEW_STIPPLE) { + /* XXX What to do with this one??? SWR doesn't stipple */ + } + + if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) { + ctx->vs->soState.rasterizerDisable = + ctx->rasterizer->rasterizer_discard; + SwrSetSoState(ctx->swrContext, &ctx->vs->soState); + + pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output; + + for (uint32_t i = 0; i < ctx->num_so_targets; i++) { + SWR_STREAMOUT_BUFFER buffer = {0}; + if (!ctx->so_targets[i]) + continue; + buffer.enable = true; + buffer.pBuffer = + (uint32_t *)swr_resource_data(ctx->so_targets[i]->buffer); + buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2; + buffer.pitch = stream_output->stride[i]; + buffer.streamOffset = ctx->so_targets[i]->buffer_offset >> 2; + + SwrSetSoBuffers(ctx->swrContext, &buffer, i); + } + } + + uint32_t linkage = ctx->vs->linkageMask; + if (ctx->rasterizer->sprite_coord_enable) + linkage |= (1 << ctx->vs->info.base.num_outputs); + + SwrSetLinkage(ctx->swrContext, linkage, NULL); + + // set up frontend state + SWR_FRONTEND_STATE feState = {0}; + SwrSetFrontendState(ctx->swrContext, &feState); + + // set up backend state + SWR_BACKEND_STATE backendState = {0}; + backendState.numAttributes = 1; + backendState.numComponents[0] = 4; + backendState.constantInterpolationMask = ctx->fs->constantMask; + backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask; + + SwrSetBackendState(ctx->swrContext, &backendState); + + ctx->dirty = post_update_dirty_flags; +} + +static struct pipe_stream_output_target * +swr_create_so_target(struct pipe_context *pipe, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = CALLOC_STRUCT(pipe_stream_output_target); + if (!target) + return NULL; + + target->context = pipe; + target->reference.count = 1; + pipe_resource_reference(&target->buffer, buffer); + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + return target; +} + +static void +swr_destroy_so_target(struct pipe_context *pipe, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + FREE(target); +} + +static void +swr_set_so_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct swr_context *swr = swr_context(pipe); + uint32_t i; + + assert(num_targets < MAX_SO_STREAMS); + + for (i = 0; i < num_targets; i++) { + pipe_so_target_reference( + (struct pipe_stream_output_target **)&swr->so_targets[i], + targets[i]); + } + + for (/* fall-through */; i < swr->num_so_targets; i++) { + pipe_so_target_reference( + (struct pipe_stream_output_target **)&swr->so_targets[i], NULL); + } + + swr->num_so_targets = num_targets; + + swr->dirty = SWR_NEW_SO; +} + + +void +swr_state_init(struct pipe_context *pipe) +{ + pipe->create_blend_state = swr_create_blend_state; + pipe->bind_blend_state = swr_bind_blend_state; + pipe->delete_blend_state = swr_delete_blend_state; + + pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state; + pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state; + pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state; + + pipe->create_rasterizer_state = swr_create_rasterizer_state; + pipe->bind_rasterizer_state = swr_bind_rasterizer_state; + pipe->delete_rasterizer_state = swr_delete_rasterizer_state; + + pipe->create_sampler_state = swr_create_sampler_state; + pipe->bind_sampler_states = swr_bind_sampler_states; + pipe->delete_sampler_state = swr_delete_sampler_state; + + pipe->create_sampler_view = swr_create_sampler_view; + pipe->set_sampler_views = swr_set_sampler_views; + pipe->sampler_view_destroy = swr_sampler_view_destroy; + + pipe->create_vs_state = swr_create_vs_state; + pipe->bind_vs_state = swr_bind_vs_state; + pipe->delete_vs_state = swr_delete_vs_state; + + pipe->create_fs_state = swr_create_fs_state; + pipe->bind_fs_state = swr_bind_fs_state; + pipe->delete_fs_state = swr_delete_fs_state; + + pipe->set_constant_buffer = swr_set_constant_buffer; + + pipe->create_vertex_elements_state = swr_create_vertex_elements_state; + pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state; + pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state; + + pipe->set_vertex_buffers = swr_set_vertex_buffers; + pipe->set_index_buffer = swr_set_index_buffer; + + pipe->set_polygon_stipple = swr_set_polygon_stipple; + pipe->set_clip_state = swr_set_clip_state; + pipe->set_scissor_states = swr_set_scissor_states; + pipe->set_viewport_states = swr_set_viewport_states; + + pipe->set_framebuffer_state = swr_set_framebuffer_state; + + pipe->set_blend_color = swr_set_blend_color; + pipe->set_stencil_ref = swr_set_stencil_ref; + + pipe->set_sample_mask = swr_set_sample_mask; + + pipe->create_stream_output_target = swr_create_so_target; + pipe->stream_output_target_destroy = swr_destroy_so_target; + pipe->set_stream_output_targets = swr_set_so_targets; +} diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h new file mode 100644 index 00000000000..a2b4d808aa3 --- /dev/null +++ b/src/gallium/drivers/swr/swr_state.h @@ -0,0 +1,307 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_STATE_H +#define SWR_STATE_H + +#include "pipe/p_defines.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "gallivm/lp_bld_tgsi.h" +#include "util/u_hash.h" +#include "api.h" +#include "swr_tex_sample.h" +#include "swr_shader.h" +#include <unordered_map> + +/* skeleton */ +struct swr_vertex_shader { + struct pipe_shader_state pipe; + struct lp_tgsi_info info; + unsigned linkageMask; + PFN_VERTEX_FUNC func; + SWR_STREAMOUT_STATE soState; + PFN_SO_FUNC soFunc[PIPE_PRIM_MAX]; +}; + +struct swr_fragment_shader { + struct pipe_shader_state pipe; + struct lp_tgsi_info info; + uint32_t constantMask; + uint32_t pointSpriteMask; + std::unordered_map<swr_jit_key, PFN_PIXEL_KERNEL> map; +}; + +/* Vertex element state */ +struct swr_vertex_element_state { + FETCH_COMPILE_STATE fsState; + PFN_FETCH_FUNC fsFunc; + uint32_t stream_pitch[PIPE_MAX_ATTRIBS]; +}; + +struct swr_blend_state { + struct pipe_blend_state pipe; + SWR_BLEND_STATE blendState; + RENDER_TARGET_BLEND_COMPILE_STATE compileState[PIPE_MAX_COLOR_BUFS]; +}; + +/* + * Derived SWR API DrawState + * For convenience of making simple changes without re-deriving state. + */ +struct swr_derived_state { + SWR_RASTSTATE rastState; + SWR_VIEWPORT vp; + SWR_VIEWPORT_MATRIX vpm; +}; + +void swr_update_derived(struct swr_context *, + const struct pipe_draw_info * = nullptr); + +/* + * Conversion functions: Convert mesa state defines to SWR. + */ + +static INLINE SWR_LOGIC_OP +swr_convert_logic_op(const UINT op) +{ + switch (op) { + case PIPE_LOGICOP_CLEAR: + return LOGICOP_CLEAR; + case PIPE_LOGICOP_NOR: + return LOGICOP_NOR; + case PIPE_LOGICOP_AND_INVERTED: + return LOGICOP_CLEAR; + case PIPE_LOGICOP_COPY_INVERTED: + return LOGICOP_COPY_INVERTED; + case PIPE_LOGICOP_AND_REVERSE: + return LOGICOP_AND_REVERSE; + case PIPE_LOGICOP_INVERT: + return LOGICOP_INVERT; + case PIPE_LOGICOP_XOR: + return LOGICOP_XOR; + case PIPE_LOGICOP_NAND: + return LOGICOP_NAND; + case PIPE_LOGICOP_AND: + return LOGICOP_AND; + case PIPE_LOGICOP_EQUIV: + return LOGICOP_EQUIV; + case PIPE_LOGICOP_NOOP: + return LOGICOP_NOOP; + case PIPE_LOGICOP_OR_INVERTED: + return LOGICOP_OR_INVERTED; + case PIPE_LOGICOP_COPY: + return LOGICOP_COPY; + case PIPE_LOGICOP_OR_REVERSE: + return LOGICOP_OR_REVERSE; + case PIPE_LOGICOP_OR: + return LOGICOP_OR; + case PIPE_LOGICOP_SET: + return LOGICOP_SET; + default: + assert(0 && "Unsupported logic op"); + return LOGICOP_NOOP; + } +} + +static INLINE SWR_STENCILOP +swr_convert_stencil_op(const UINT op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return STENCILOP_INVERT; + default: + assert(0 && "Unsupported stencil op"); + return STENCILOP_KEEP; + } +} + +static INLINE SWR_FORMAT +swr_convert_index_type(const UINT index_size) +{ + switch (index_size) { + case sizeof(unsigned char): + return R8_UINT; + case sizeof(unsigned short): + return R16_UINT; + case sizeof(unsigned int): + return R32_UINT; + default: + assert(0 && "Unsupported index type"); + return R32_UINT; + } +} + + +static INLINE SWR_ZFUNCTION +swr_convert_depth_func(const UINT pipe_func) +{ + switch (pipe_func) { + case PIPE_FUNC_NEVER: + return ZFUNC_NEVER; + case PIPE_FUNC_LESS: + return ZFUNC_LT; + case PIPE_FUNC_EQUAL: + return ZFUNC_EQ; + case PIPE_FUNC_LEQUAL: + return ZFUNC_LE; + case PIPE_FUNC_GREATER: + return ZFUNC_GT; + case PIPE_FUNC_NOTEQUAL: + return ZFUNC_NE; + case PIPE_FUNC_GEQUAL: + return ZFUNC_GE; + case PIPE_FUNC_ALWAYS: + return ZFUNC_ALWAYS; + default: + assert(0 && "Unsupported depth func"); + return ZFUNC_ALWAYS; + } +} + + +static INLINE SWR_CULLMODE +swr_convert_cull_mode(const UINT cull_face) +{ + switch (cull_face) { + case PIPE_FACE_NONE: + return SWR_CULLMODE_NONE; + case PIPE_FACE_FRONT: + return SWR_CULLMODE_FRONT; + case PIPE_FACE_BACK: + return SWR_CULLMODE_BACK; + case PIPE_FACE_FRONT_AND_BACK: + return SWR_CULLMODE_BOTH; + default: + assert(0 && "Invalid cull mode"); + return SWR_CULLMODE_NONE; + } +} + +static INLINE SWR_BLEND_OP +swr_convert_blend_func(const UINT blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return BLENDOP_ADD; + case PIPE_BLEND_SUBTRACT: + return BLENDOP_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLENDOP_REVSUBTRACT; + case PIPE_BLEND_MIN: + return BLENDOP_MIN; + case PIPE_BLEND_MAX: + return BLENDOP_MAX; + default: + assert(0 && "Invalid blend func"); + return BLENDOP_ADD; + } +} + +static INLINE SWR_BLEND_FACTOR +swr_convert_blend_factor(const UINT blend_factor) +{ + switch (blend_factor) { + case PIPE_BLENDFACTOR_ONE: + return BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return BLENDFACTOR_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return BLENDFACTOR_SRC1_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BLENDFACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return BLENDFACTOR_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return BLENDFACTOR_INV_SRC1_ALPHA; + default: + assert(0 && "Invalid blend factor"); + return BLENDFACTOR_ONE; + } +} + +static INLINE enum SWR_SURFACE_TYPE +swr_convert_target_type(const enum pipe_texture_target target) +{ + switch (target) { + case PIPE_BUFFER: + return SURFACE_BUFFER; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return SURFACE_1D; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + return SURFACE_2D; + case PIPE_TEXTURE_3D: + return SURFACE_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return SURFACE_CUBE; + default: + assert(0); + return SURFACE_NULL; + } +} +#endif diff --git a/src/gallium/drivers/swr/swr_tex_sample.cpp b/src/gallium/drivers/swr/swr_tex_sample.cpp new file mode 100644 index 00000000000..8e01e32e280 --- /dev/null +++ b/src/gallium/drivers/swr/swr_tex_sample.cpp @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Largely a copy of llvmpipe's lp_tex_sample.c + */ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - SWR driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca <[email protected]> + */ + +#include "state.h" +#include "JitManager.h" +#include "state_llvm.h" + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_sample.h" +#include "gallivm/lp_bld_tgsi.h" +#include "util/u_memory.h" + +#include "swr_tex_sample.h" +#include "swr_context_llvm.h" + + +/** + * This provides the bridge between the sampler state store in + * lp_jit_context and lp_jit_texture and the sampler code + * generator. It provides the texture layout information required by + * the texture sampler code generator in terms of the state stored in + * lp_jit_context and lp_jit_texture in runtime. + */ +struct swr_sampler_dynamic_state { + struct lp_sampler_dynamic_state base; + + const struct swr_sampler_static_state *static_state; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct swr_sampler_soa { + struct lp_build_sampler_soa base; + + struct swr_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +swr_texture_member(const struct lp_sampler_dynamic_state *base, + struct gallivm_state *gallivm, + LLVMValueRef context_ptr, + unsigned texture_unit, + unsigned member_index, + const char *member_name, + boolean emit_load) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS); + + /* context[0] */ + indices[0] = lp_build_const_int32(gallivm, 0); + /* context[0].textures */ + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS); + /* context[0].textures[unit] */ + indices[2] = lp_build_const_int32(gallivm, texture_unit); + /* context[0].textures[unit].member */ + indices[3] = lp_build_const_int32(gallivm, member_index); + + ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), ""); + + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; + + lp_build_name(res, "context.texture%u.%s", texture_unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to + * fetch the members of lp_jit_texture to fulfill the sampler code + * generator requests. + * + * This complexity is the price we have to pay to keep the texture + * sampler code generator a reusable module without dependencies to + * swr internals. + */ +#define SWR_TEXTURE_MEMBER(_name, _emit_load) \ + static LLVMValueRef swr_texture_##_name( \ + const struct lp_sampler_dynamic_state *base, \ + struct gallivm_state *gallivm, \ + LLVMValueRef context_ptr, \ + unsigned texture_unit) \ + { \ + return swr_texture_member(base, \ + gallivm, \ + context_ptr, \ + texture_unit, \ + swr_jit_texture_##_name, \ + #_name, \ + _emit_load); \ + } + + +SWR_TEXTURE_MEMBER(width, TRUE) +SWR_TEXTURE_MEMBER(height, TRUE) +SWR_TEXTURE_MEMBER(depth, TRUE) +SWR_TEXTURE_MEMBER(first_level, TRUE) +SWR_TEXTURE_MEMBER(last_level, TRUE) +SWR_TEXTURE_MEMBER(base_ptr, TRUE) +SWR_TEXTURE_MEMBER(row_stride, FALSE) +SWR_TEXTURE_MEMBER(img_stride, FALSE) +SWR_TEXTURE_MEMBER(mip_offsets, FALSE) + + +/** + * Fetch the specified member of the lp_jit_sampler structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +swr_sampler_member(const struct lp_sampler_dynamic_state *base, + struct gallivm_state *gallivm, + LLVMValueRef context_ptr, + unsigned sampler_unit, + unsigned member_index, + const char *member_name, + boolean emit_load) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(sampler_unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = lp_build_const_int32(gallivm, 0); + /* context[0].samplers */ + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS); + /* context[0].samplers[unit] */ + indices[2] = lp_build_const_int32(gallivm, sampler_unit); + /* context[0].samplers[unit].member */ + indices[3] = lp_build_const_int32(gallivm, member_index); + + ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), ""); + + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; + + lp_build_name(res, "context.sampler%u.%s", sampler_unit, member_name); + + return res; +} + + +#define SWR_SAMPLER_MEMBER(_name, _emit_load) \ + static LLVMValueRef swr_sampler_##_name( \ + const struct lp_sampler_dynamic_state *base, \ + struct gallivm_state *gallivm, \ + LLVMValueRef context_ptr, \ + unsigned sampler_unit) \ + { \ + return swr_sampler_member(base, \ + gallivm, \ + context_ptr, \ + sampler_unit, \ + swr_jit_sampler_##_name, \ + #_name, \ + _emit_load); \ + } + + +SWR_SAMPLER_MEMBER(min_lod, TRUE) +SWR_SAMPLER_MEMBER(max_lod, TRUE) +SWR_SAMPLER_MEMBER(lod_bias, TRUE) +SWR_SAMPLER_MEMBER(border_color, FALSE) + + +static void +swr_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +/** + * Fetch filtered values from texture. + * The 'texel' parameter returns four vectors corresponding to R, G, B, A. + */ +static void +swr_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, + struct gallivm_state *gallivm, + const struct lp_sampler_params *params) +{ + struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base; + unsigned texture_index = params->texture_index; + unsigned sampler_index = params->sampler_index; + + assert(sampler_index < PIPE_MAX_SAMPLERS); + assert(texture_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); + +#if 0 + lp_build_sample_nop(gallivm, params->type, params->coords, params->texel); +#else + lp_build_sample_soa( + &sampler->dynamic_state.static_state[texture_index].texture_state, + &sampler->dynamic_state.static_state[sampler_index].sampler_state, + &sampler->dynamic_state.base, + gallivm, + params); +#endif +} + +/** + * Fetch the texture size. + */ +static void +swr_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base, + struct gallivm_state *gallivm, + struct lp_type type, + unsigned texture_unit, + unsigned target, + LLVMValueRef context_ptr, + boolean is_sviewinfo, + enum lp_sampler_lod_property lod_property, + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef *sizes_out) +{ + struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base; + + assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS); + + lp_build_size_query_soa( + gallivm, + &sampler->dynamic_state.static_state[texture_unit].texture_state, + &sampler->dynamic_state.base, + type, + texture_unit, + target, + context_ptr, + is_sviewinfo, + lod_property, + explicit_lod, + sizes_out); +} + + +struct lp_build_sampler_soa * +swr_sampler_soa_create(const struct swr_sampler_static_state *static_state) +{ + struct swr_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(swr_sampler_soa); + if (!sampler) + return NULL; + + sampler->base.destroy = swr_sampler_soa_destroy; + sampler->base.emit_tex_sample = swr_sampler_soa_emit_fetch_texel; + sampler->base.emit_size_query = swr_sampler_soa_emit_size_query; + sampler->dynamic_state.base.width = swr_texture_width; + sampler->dynamic_state.base.height = swr_texture_height; + sampler->dynamic_state.base.depth = swr_texture_depth; + sampler->dynamic_state.base.first_level = swr_texture_first_level; + sampler->dynamic_state.base.last_level = swr_texture_last_level; + sampler->dynamic_state.base.base_ptr = swr_texture_base_ptr; + sampler->dynamic_state.base.row_stride = swr_texture_row_stride; + sampler->dynamic_state.base.img_stride = swr_texture_img_stride; + sampler->dynamic_state.base.mip_offsets = swr_texture_mip_offsets; + sampler->dynamic_state.base.min_lod = swr_sampler_min_lod; + sampler->dynamic_state.base.max_lod = swr_sampler_max_lod; + sampler->dynamic_state.base.lod_bias = swr_sampler_lod_bias; + sampler->dynamic_state.base.border_color = swr_sampler_border_color; + + sampler->dynamic_state.static_state = static_state; + + return &sampler->base; +} diff --git a/src/gallium/drivers/swr/swr_tex_sample.h b/src/gallium/drivers/swr/swr_tex_sample.h new file mode 100644 index 00000000000..f5c368c108d --- /dev/null +++ b/src/gallium/drivers/swr/swr_tex_sample.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#pragma once + +#include "gallivm/lp_bld.h" + +struct swr_sampler_static_state { + /* + * These attributes are effectively interleaved for more sane key handling. + * However, there might be lots of null space if the amount of samplers and + * textures isn't the same. + */ + struct lp_static_sampler_state sampler_state; + struct lp_static_texture_state texture_state; +}; + +/** + * Pure-LLVM texture sampling code generator. + * + */ +struct lp_build_sampler_soa * +swr_sampler_soa_create(const struct swr_sampler_static_state *key); |