From 2b2d3680bf164ec4f8b50436b96c3fc195318ea5 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Tue, 16 Feb 2016 17:27:28 -0600 Subject: gallium/swr: add OpenSWR driver OpenSWR is a new software rasterizer for x86 processors designed for high performance and high scalablility on visualization workloads. Acked-by: Roland Scheidegger Acked-by: Jose Fonseca --- src/gallium/drivers/swr/swr_context.cpp | 407 ++++++++++++++++++++++++++++++++ 1 file changed, 407 insertions(+) create mode 100644 src/gallium/drivers/swr/swr_context.cpp (limited to 'src/gallium/drivers/swr/swr_context.cpp') diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp new file mode 100644 index 00000000000..0e7ebb74d92 --- /dev/null +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -0,0 +1,407 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +extern "C" { +#include "util/u_transfer.h" +#include "util/u_surface.h" +} + +#include "swr_context.h" +#include "swr_memory.h" +#include "swr_screen.h" +#include "swr_resource.h" +#include "swr_scratch.h" +#include "swr_query.h" + +#include "api.h" +#include "backend.h" + +static struct pipe_surface * +swr_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (ps) { + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = surf_tmpl->format; + if (pt->target != PIPE_BUFFER) { + assert(surf_tmpl->u.tex.level <= pt->last_level); + ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); + ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); + ps->u.tex.level = surf_tmpl->u.tex.level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + if (ps->u.tex.first_layer != ps->u.tex.last_layer) { + debug_printf("creating surface with multiple layers, rendering " + "to first layer only\n"); + } + } else { + /* setting width as number of elements should get us correct + * renderbuffer width */ + ps->width = surf_tmpl->u.buf.last_element + - surf_tmpl->u.buf.first_element + 1; + ps->height = pt->height0; + ps->u.buf.first_element = surf_tmpl->u.buf.first_element; + ps->u.buf.last_element = surf_tmpl->u.buf.last_element; + assert(ps->u.buf.first_element <= ps->u.buf.last_element); + assert(ps->u.buf.last_element < ps->width); + } + } + return ps; +} + +static void +swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf) +{ + assert(surf->texture); + struct pipe_resource *resource = surf->texture; + + /* If the surface being destroyed is a current render target, + * call StoreTiles to resolve the hotTile state then set attachment + * to NULL. + */ + if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL + | PIPE_BIND_DISPLAY_TARGET)) { + struct swr_context *ctx = swr_context(pipe); + struct swr_resource *spr = swr_resource(resource); + swr_draw_context *pDC = &ctx->swrDC; + SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; + for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) + if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) { + swr_store_render_target(ctx, i, SWR_TILE_RESOLVED); + + /* + * Mesa thinks depth/stencil are fused, so we'll never get an + * explicit resource for stencil. So, if checking depth, then + * also check for stencil. + */ + if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) { + swr_store_render_target( + ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_RESOLVED); + } + + SwrWaitForIdle(ctx->swrContext); + break; + } + } + + pipe_resource_reference(&surf->texture, NULL); + FREE(surf); +} + + +static void * +swr_transfer_map(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct swr_resource *spr = swr_resource(resource); + struct pipe_transfer *pt; + enum pipe_format format = resource->format; + + assert(resource); + assert(level <= resource->last_level); + + /* + * If mapping any attached rendertarget, store tiles and wait for idle + * before giving CPU access to the surface. + * (set postStoreTileState to SWR_TILE_INVALID so tiles are reloaded) + */ + if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL + | PIPE_BIND_DISPLAY_TARGET)) { + struct swr_context *ctx = swr_context(pipe); + swr_draw_context *pDC = &ctx->swrDC; + SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; + for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) + if (renderTargets[i].pBaseAddress == spr->swr.pBaseAddress) { + swr_store_render_target(ctx, i, SWR_TILE_INVALID); + /* + * Mesa thinks depth/stencil are fused, so we'll never get an + * explicit map for stencil. So, if mapping depth, then also + * store tile for stencil. + */ + if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) + swr_store_render_target( + ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_INVALID); + SwrWaitForIdle(ctx->swrContext); + break; + } + } + + pt = CALLOC_STRUCT(pipe_transfer); + if (!pt) + return NULL; + pipe_resource_reference(&pt->resource, resource); + pt->level = level; + pt->box = *box; + pt->stride = spr->row_stride[level]; + pt->layer_stride = spr->img_stride[level]; + + /* if we're mapping the depth/stencil, copy in stencil */ + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT + && spr->has_stencil) { + for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { + spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i]; + } + } else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT + && spr->has_stencil) { + for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { + spr->swr.pBaseAddress[8 * i + 4] = spr->secondary.pBaseAddress[i]; + } + } + + unsigned offset = box->z * pt->layer_stride + box->y * pt->stride + + box->x * util_format_get_blocksize(format); + + *transfer = pt; + + return spr->swr.pBaseAddress + offset + spr->mip_offsets[level]; +} + +static void +swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) +{ + assert(transfer->resource); + + /* + * XXX TODO: use fences and come up with a real resource manager. + * + * If this resource has been mapped/unmapped, it's probably in use. Tag it + *with this context so + * we'll know to check dependencies when it's deleted. + */ + struct swr_resource *res = swr_resource(transfer->resource); + res->bound_to_context = (void *)pipe; + + /* if we're mapping the depth/stencil, copy out stencil */ + if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT + && res->has_stencil) { + for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { + res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3]; + } + } else if (res->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT + && res->has_stencil) { + for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { + res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[8 * i + 4]; + } + } + + pipe_resource_reference(&transfer->resource, NULL); + FREE(transfer); +} + + +static void +swr_resource_copy(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, + unsigned dsty, + unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) + || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) { + util_resource_copy_region( + pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); + return; + } + + debug_printf("unhandled swr_resource_copy\n"); +} + + +static void +swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) +{ + struct swr_context *ctx = swr_context(pipe); + struct pipe_blit_info info = *blit_info; + + if (blit_info->render_condition_enable && !swr_check_render_cond(pipe)) + return; + + if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1 + && !util_format_is_depth_or_stencil(info.src.resource->format) + && !util_format_is_pure_integer(info.src.resource->format)) { + debug_printf("swr: color resolve unimplemented\n"); + return; + } + + if (util_try_blit_via_copy_region(pipe, &info)) { + return; /* done */ + } + + if (info.mask & PIPE_MASK_S) { + debug_printf("swr: cannot blit stencil, skipping\n"); + info.mask &= ~PIPE_MASK_S; + } + + if (!util_blitter_is_blit_supported(ctx->blitter, &info)) { + debug_printf("swr: blit unsupported %s -> %s\n", + util_format_short_name(info.src.resource->format), + util_format_short_name(info.dst.resource->format)); + return; + } + + /* XXX turn off occlusion and streamout queries */ + + util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer); + util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems); + util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs); + /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/ + util_blitter_save_so_targets( + ctx->blitter, + ctx->num_so_targets, + (struct pipe_stream_output_target **)ctx->so_targets); + util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer); + util_blitter_save_viewport(ctx->blitter, &ctx->viewport); + util_blitter_save_scissor(ctx->blitter, &ctx->scissor); + util_blitter_save_fragment_shader(ctx->blitter, ctx->fs); + util_blitter_save_blend(ctx->blitter, (void *)ctx->blend); + util_blitter_save_depth_stencil_alpha(ctx->blitter, + (void *)ctx->depth_stencil); + util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); + util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask); + util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer); + util_blitter_save_fragment_sampler_states( + ctx->blitter, + ctx->num_samplers[PIPE_SHADER_FRAGMENT], + (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]); + util_blitter_save_fragment_sampler_views( + ctx->blitter, + ctx->num_sampler_views[PIPE_SHADER_FRAGMENT], + ctx->sampler_views[PIPE_SHADER_FRAGMENT]); + util_blitter_save_render_condition(ctx->blitter, + ctx->render_cond_query, + ctx->render_cond_cond, + ctx->render_cond_mode); + + util_blitter_blit(ctx->blitter, &info); +} + + +static void +swr_destroy(struct pipe_context *pipe) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->blitter) + util_blitter_destroy(ctx->blitter); + + if (ctx->swrContext) + SwrDestroyContext(ctx->swrContext); + + delete ctx->blendJIT; + + swr_destroy_scratch_buffers(ctx); + + FREE(ctx); +} + + +static void +swr_render_condition(struct pipe_context *pipe, + struct pipe_query *query, + boolean condition, + uint mode) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->render_cond_query = query; + ctx->render_cond_mode = mode; + ctx->render_cond_cond = condition; +} + + +struct pipe_context * +swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags) +{ + struct swr_context *ctx = CALLOC_STRUCT(swr_context); + ctx->blendJIT = + new std::unordered_map; + + SWR_CREATECONTEXT_INFO createInfo; + createInfo.driver = GL; + createInfo.privateStateSize = sizeof(swr_draw_context); + createInfo.maxSubContexts = 0; + createInfo.pfnLoadTile = swr_LoadHotTile; + createInfo.pfnStoreTile = swr_StoreHotTile; + createInfo.pfnClearTile = swr_StoreHotTileClear; + ctx->swrContext = SwrCreateContext(&createInfo); + + /* Init Load/Store/ClearTiles Tables */ + swr_InitMemoryModule(); + + InitBackendFuncTables(); + + if (ctx->swrContext == NULL) + goto fail; + + ctx->pipe.screen = screen; + ctx->pipe.destroy = swr_destroy; + ctx->pipe.priv = priv; + ctx->pipe.create_surface = swr_create_surface; + ctx->pipe.surface_destroy = swr_surface_destroy; + ctx->pipe.transfer_map = swr_transfer_map; + ctx->pipe.transfer_unmap = swr_transfer_unmap; + + ctx->pipe.transfer_flush_region = u_default_transfer_flush_region; + ctx->pipe.transfer_inline_write = u_default_transfer_inline_write; + + ctx->pipe.resource_copy_region = swr_resource_copy; + ctx->pipe.render_condition = swr_render_condition; + + swr_state_init(&ctx->pipe); + swr_clear_init(&ctx->pipe); + swr_draw_init(&ctx->pipe); + swr_query_init(&ctx->pipe); + + ctx->pipe.blit = swr_blit; + ctx->blitter = util_blitter_create(&ctx->pipe); + if (!ctx->blitter) { + goto fail; + } + + swr_init_scratch_buffers(ctx); + + return &ctx->pipe; + +fail: + /* Should really validate the init steps and fail gracefully */ + swr_destroy(&ctx->pipe); + return NULL; +} -- cgit v1.2.3