From 786af2f963925df2c2a6fb60b29a83e8340f03c7 Mon Sep 17 00:00:00 2001 From: Andreas Hartmetz Date: Sat, 4 Jan 2014 18:44:33 +0100 Subject: radeonsi: Apply si_* file naming scheme. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Marek Olšák --- src/gallium/drivers/radeonsi/Makefile.sources | 24 +- src/gallium/drivers/radeonsi/r600.h | 88 - src/gallium/drivers/radeonsi/r600_blit.c | 704 -------- src/gallium/drivers/radeonsi/r600_buffer.c | 70 - src/gallium/drivers/radeonsi/r600_hw_context.c | 716 -------- src/gallium/drivers/radeonsi/r600_query.c | 147 -- src/gallium/drivers/radeonsi/r600_resource.c | 61 - src/gallium/drivers/radeonsi/r600_resource.h | 40 - src/gallium/drivers/radeonsi/r600_translate.c | 53 - src/gallium/drivers/radeonsi/radeonsi_compute.c | 299 ---- src/gallium/drivers/radeonsi/radeonsi_pipe.c | 677 ------- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 263 --- src/gallium/drivers/radeonsi/radeonsi_pm4.c | 254 --- src/gallium/drivers/radeonsi/radeonsi_pm4.h | 95 - src/gallium/drivers/radeonsi/radeonsi_public.h | 30 - src/gallium/drivers/radeonsi/radeonsi_resource.h | 43 - src/gallium/drivers/radeonsi/radeonsi_shader.c | 2058 ---------------------- src/gallium/drivers/radeonsi/radeonsi_shader.h | 161 -- src/gallium/drivers/radeonsi/radeonsi_uvd.c | 153 -- src/gallium/drivers/radeonsi/si.h | 88 + src/gallium/drivers/radeonsi/si_blit.c | 704 ++++++++ src/gallium/drivers/radeonsi/si_buffer.c | 70 + src/gallium/drivers/radeonsi/si_commands.c | 4 +- src/gallium/drivers/radeonsi/si_compute.c | 299 ++++ src/gallium/drivers/radeonsi/si_descriptors.c | 6 +- src/gallium/drivers/radeonsi/si_hw_context.c | 716 ++++++++ src/gallium/drivers/radeonsi/si_pipe.c | 677 +++++++ src/gallium/drivers/radeonsi/si_pipe.h | 263 +++ src/gallium/drivers/radeonsi/si_pm4.c | 254 +++ src/gallium/drivers/radeonsi/si_pm4.h | 95 + src/gallium/drivers/radeonsi/si_public.h | 30 + src/gallium/drivers/radeonsi/si_query.c | 147 ++ src/gallium/drivers/radeonsi/si_resource.c | 61 + src/gallium/drivers/radeonsi/si_resource.h | 55 + src/gallium/drivers/radeonsi/si_shader.c | 2058 ++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_shader.h | 161 ++ src/gallium/drivers/radeonsi/si_state.c | 4 +- src/gallium/drivers/radeonsi/si_state.h | 2 +- src/gallium/drivers/radeonsi/si_state_draw.c | 4 +- src/gallium/drivers/radeonsi/si_translate.c | 53 + src/gallium/drivers/radeonsi/si_uvd.c | 153 ++ 41 files changed, 5906 insertions(+), 5934 deletions(-) delete mode 100644 src/gallium/drivers/radeonsi/r600.h delete mode 100644 src/gallium/drivers/radeonsi/r600_blit.c delete mode 100644 src/gallium/drivers/radeonsi/r600_buffer.c delete mode 100644 src/gallium/drivers/radeonsi/r600_hw_context.c delete mode 100644 src/gallium/drivers/radeonsi/r600_query.c delete mode 100644 src/gallium/drivers/radeonsi/r600_resource.c delete mode 100644 src/gallium/drivers/radeonsi/r600_resource.h delete mode 100644 src/gallium/drivers/radeonsi/r600_translate.c delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_compute.c delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_pipe.c delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_pipe.h delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_pm4.c delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_pm4.h delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_public.h delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_resource.h delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_shader.c delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_shader.h delete mode 100644 src/gallium/drivers/radeonsi/radeonsi_uvd.c create mode 100644 src/gallium/drivers/radeonsi/si.h create mode 100644 src/gallium/drivers/radeonsi/si_blit.c create mode 100644 src/gallium/drivers/radeonsi/si_buffer.c create mode 100644 src/gallium/drivers/radeonsi/si_compute.c create mode 100644 src/gallium/drivers/radeonsi/si_hw_context.c create mode 100644 src/gallium/drivers/radeonsi/si_pipe.c create mode 100644 src/gallium/drivers/radeonsi/si_pipe.h create mode 100644 src/gallium/drivers/radeonsi/si_pm4.c create mode 100644 src/gallium/drivers/radeonsi/si_pm4.h create mode 100644 src/gallium/drivers/radeonsi/si_public.h create mode 100644 src/gallium/drivers/radeonsi/si_query.c create mode 100644 src/gallium/drivers/radeonsi/si_resource.c create mode 100644 src/gallium/drivers/radeonsi/si_resource.h create mode 100644 src/gallium/drivers/radeonsi/si_shader.c create mode 100644 src/gallium/drivers/radeonsi/si_shader.h create mode 100644 src/gallium/drivers/radeonsi/si_translate.c create mode 100644 src/gallium/drivers/radeonsi/si_uvd.c (limited to 'src/gallium/drivers/radeonsi') diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 1302c6a7dfe..33f1492f6ce 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -1,16 +1,16 @@ C_SOURCES := \ - r600_blit.c \ - r600_buffer.c \ - r600_hw_context.c \ - radeonsi_pipe.c \ - r600_query.c \ - r600_resource.c \ - radeonsi_shader.c \ - r600_translate.c \ - radeonsi_pm4.c \ - radeonsi_compute.c \ + si_blit.c \ + si_buffer.c \ + si_commands.c \ + si_compute.c \ si_descriptors.c \ + si_hw_context.c \ + si_pipe.c \ + si_pm4.c \ + si_query.c \ + si_resource.c \ + si_shader.c \ si_state.c \ si_state_draw.c \ - si_commands.c \ - radeonsi_uvd.c + si_translate.c \ + si_uvd.c diff --git a/src/gallium/drivers/radeonsi/r600.h b/src/gallium/drivers/radeonsi/r600.h deleted file mode 100644 index 13bbad4b369..00000000000 --- a/src/gallium/drivers/radeonsi/r600.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef R600_H -#define R600_H - -#include "../../winsys/radeon/drm/radeon_winsys.h" -#include "util/u_double_list.h" -#include "util/u_transfer.h" - -#include "radeonsi_resource.h" - -struct winsys_handle; - -/* R600/R700 STATES */ -struct r600_query { - union { - uint64_t u64; - boolean b; - struct pipe_query_data_so_statistics so; - } result; - /* The kind of query */ - unsigned type; - /* Offset of the first result for current query */ - unsigned results_start; - /* Offset of the next free result after current query data */ - unsigned results_end; - /* Size of the result in memory for both begin_query and end_query, - * this can be one or two numbers, or it could even be a size of a structure. */ - unsigned result_size; - /* The buffer where query results are stored. It's used as a ring, - * data blocks for current query are stored sequentially from - * results_start to results_end, with wrapping on the buffer end */ - struct r600_resource *buffer; - /* The number of dwords for begin_query or end_query. */ - unsigned num_cs_dw; - /* linked list of queries */ - struct list_head list; -}; - -struct r600_context; -struct r600_screen; - -void si_get_backend_mask(struct r600_context *ctx); -void si_context_flush(struct r600_context *ctx, unsigned flags); -void si_begin_new_cs(struct r600_context *ctx); - -struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); -void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query); -boolean r600_context_query_result(struct r600_context *ctx, - struct r600_query *query, - boolean wait, void *vresult); -void r600_query_begin(struct r600_context *ctx, struct r600_query *query); -void r600_query_end(struct r600_context *ctx, struct r600_query *query); -void r600_context_queries_suspend(struct r600_context *ctx); -void r600_context_queries_resume(struct r600_context *ctx); -void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, - int flag_wait); - -bool si_is_timer_query(unsigned type); -bool si_query_needs_begin(unsigned type); -void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in); - -int si_context_init(struct r600_context *ctx); - -#endif diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c deleted file mode 100644 index 4c592b18b7d..00000000000 --- a/src/gallium/drivers/radeonsi/r600_blit.c +++ /dev/null @@ -1,704 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include "util/u_surface.h" -#include "util/u_blitter.h" -#include "util/u_format.h" -#include "radeonsi_pipe.h" -#include "si_state.h" - -enum r600_blitter_op /* bitmask */ -{ - R600_SAVE_TEXTURES = 1, - R600_SAVE_FRAMEBUFFER = 2, - R600_DISABLE_RENDER_COND = 4, - - R600_CLEAR = 0, - - R600_CLEAR_SURFACE = R600_SAVE_FRAMEBUFFER, - - R600_COPY = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES | - R600_DISABLE_RENDER_COND, - - R600_BLIT = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES | - R600_DISABLE_RENDER_COND, - - R600_DECOMPRESS = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND, - - R600_COLOR_RESOLVE = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND -}; - -static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - - r600_context_queries_suspend(rctx); - - util_blitter_save_blend(rctx->blitter, rctx->queued.named.blend); - util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->queued.named.dsa); - util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); - util_blitter_save_rasterizer(rctx->blitter, rctx->queued.named.rasterizer); - util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); - util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); - util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); - if (rctx->queued.named.viewport) { - util_blitter_save_viewport(rctx->blitter, &rctx->queued.named.viewport->viewport); - } - util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer); - util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets, - (struct pipe_stream_output_target**)rctx->b.streamout.targets); - - if (op & R600_SAVE_FRAMEBUFFER) - util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); - - if (op & R600_SAVE_TEXTURES) { - util_blitter_save_fragment_sampler_states( - rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers, - (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers); - - util_blitter_save_fragment_sampler_views(rctx->blitter, - util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask & - ((1 << NUM_TEX_UNITS) - 1)), - rctx->samplers[PIPE_SHADER_FRAGMENT].views.views); - } - - if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) { - rctx->saved_render_cond = rctx->current_render_cond; - rctx->saved_render_cond_cond = rctx->current_render_cond_cond; - rctx->saved_render_cond_mode = rctx->current_render_cond_mode; - rctx->b.b.render_condition(&rctx->b.b, NULL, FALSE, 0); - } - -} - -static void r600_blitter_end(struct pipe_context *ctx) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - if (rctx->saved_render_cond) { - rctx->b.b.render_condition(&rctx->b.b, - rctx->saved_render_cond, - rctx->saved_render_cond_cond, - rctx->saved_render_cond_mode); - rctx->saved_render_cond = NULL; - } - r600_context_queries_resume(rctx); -} - -static unsigned u_max_sample(struct pipe_resource *r) -{ - return r->nr_samples ? r->nr_samples - 1 : 0; -} - -static void r600_blit_decompress_depth(struct pipe_context *ctx, - struct r600_texture *texture, - struct r600_texture *staging, - unsigned first_level, unsigned last_level, - unsigned first_layer, unsigned last_layer, - unsigned first_sample, unsigned last_sample) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - unsigned layer, level, sample, checked_last_layer, max_layer, max_sample; - float depth = 1.0f; - const struct util_format_description *desc; - void **custom_dsa; - struct r600_texture *flushed_depth_texture = staging ? - staging : texture->flushed_depth_texture; - - if (!staging && !texture->dirty_level_mask) - return; - - max_sample = u_max_sample(&texture->resource.b.b); - - desc = util_format_description(flushed_depth_texture->resource.b.b.format); - switch (util_format_has_depth(desc) | util_format_has_stencil(desc) << 1) { - default: - assert(!"No depth or stencil to uncompress"); - return; - case 3: - custom_dsa = rctx->custom_dsa_flush_depth_stencil; - break; - case 2: - custom_dsa = rctx->custom_dsa_flush_stencil; - break; - case 1: - custom_dsa = rctx->custom_dsa_flush_depth; - break; - } - - for (level = first_level; level <= last_level; level++) { - if (!staging && !(texture->dirty_level_mask & (1 << level))) - continue; - - /* The smaller the mipmap level, the less layers there are - * as far as 3D textures are concerned. */ - max_layer = util_max_layer(&texture->resource.b.b, level); - checked_last_layer = last_layer < max_layer ? last_layer : max_layer; - - for (layer = first_layer; layer <= checked_last_layer; layer++) { - for (sample = first_sample; sample <= last_sample; sample++) { - struct pipe_surface *zsurf, *cbsurf, surf_tmpl; - - surf_tmpl.format = texture->resource.b.b.format; - surf_tmpl.u.tex.level = level; - surf_tmpl.u.tex.first_layer = layer; - surf_tmpl.u.tex.last_layer = layer; - - zsurf = ctx->create_surface(ctx, &texture->resource.b.b, &surf_tmpl); - - surf_tmpl.format = flushed_depth_texture->resource.b.b.format; - cbsurf = ctx->create_surface(ctx, - (struct pipe_resource*)flushed_depth_texture, &surf_tmpl); - - r600_blitter_begin(ctx, R600_DECOMPRESS); - util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, 1 << sample, - custom_dsa[sample], depth); - r600_blitter_end(ctx); - - pipe_surface_reference(&zsurf, NULL); - pipe_surface_reference(&cbsurf, NULL); - } - } - - /* The texture will always be dirty if some layers aren't flushed. - * I don't think this case can occur though. */ - if (!staging && - first_layer == 0 && last_layer == max_layer && - first_sample == 0 && last_sample == max_sample) { - texture->dirty_level_mask &= ~(1 << level); - } - } -} - -static void si_blit_decompress_depth_in_place(struct r600_context *rctx, - struct r600_texture *texture, - unsigned first_level, unsigned last_level, - unsigned first_layer, unsigned last_layer) -{ - struct pipe_surface *zsurf, surf_tmpl = {{0}}; - unsigned layer, max_layer, checked_last_layer, level; - - surf_tmpl.format = texture->resource.b.b.format; - - for (level = first_level; level <= last_level; level++) { - if (!(texture->dirty_level_mask & (1 << level))) - continue; - - surf_tmpl.u.tex.level = level; - - /* The smaller the mipmap level, the less layers there are - * as far as 3D textures are concerned. */ - max_layer = util_max_layer(&texture->resource.b.b, level); - checked_last_layer = last_layer < max_layer ? last_layer : max_layer; - - for (layer = first_layer; layer <= checked_last_layer; layer++) { - surf_tmpl.u.tex.first_layer = layer; - surf_tmpl.u.tex.last_layer = layer; - - zsurf = rctx->b.b.create_surface(&rctx->b.b, &texture->resource.b.b, &surf_tmpl); - - r600_blitter_begin(&rctx->b.b, R600_DECOMPRESS); - util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0, - rctx->custom_dsa_flush_inplace, - 1.0f); - r600_blitter_end(&rctx->b.b); - - pipe_surface_reference(&zsurf, NULL); - } - - /* The texture will always be dirty if some layers aren't flushed. - * I don't think this case occurs often though. */ - if (first_layer == 0 && last_layer == max_layer) { - texture->dirty_level_mask &= ~(1 << level); - } - } -} - -void si_flush_depth_textures(struct r600_context *rctx, - struct r600_textures_info *textures) -{ - unsigned i; - - for (i = 0; i < textures->n_views; ++i) { - struct pipe_sampler_view *view; - struct r600_texture *tex; - - view = textures->views.views[i]; - if (!view) continue; - - tex = (struct r600_texture *)view->texture; - if (!tex->is_depth || tex->is_flushing_texture) - continue; - - si_blit_decompress_depth_in_place(rctx, tex, - view->u.tex.first_level, view->u.tex.last_level, - 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); - } -} - -static void r600_blit_decompress_color(struct pipe_context *ctx, - struct r600_texture *rtex, - unsigned first_level, unsigned last_level, - unsigned first_layer, unsigned last_layer) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - unsigned layer, level, checked_last_layer, max_layer; - - if (!rtex->dirty_level_mask) - return; - - for (level = first_level; level <= last_level; level++) { - if (!(rtex->dirty_level_mask & (1 << level))) - continue; - - /* The smaller the mipmap level, the less layers there are - * as far as 3D textures are concerned. */ - max_layer = util_max_layer(&rtex->resource.b.b, level); - checked_last_layer = last_layer < max_layer ? last_layer : max_layer; - - for (layer = first_layer; layer <= checked_last_layer; layer++) { - struct pipe_surface *cbsurf, surf_tmpl; - - surf_tmpl.format = rtex->resource.b.b.format; - surf_tmpl.u.tex.level = level; - surf_tmpl.u.tex.first_layer = layer; - surf_tmpl.u.tex.last_layer = layer; - cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); - - r600_blitter_begin(ctx, R600_DECOMPRESS); - util_blitter_custom_color(rctx->blitter, cbsurf, - rctx->custom_blend_decompress); - r600_blitter_end(ctx); - - pipe_surface_reference(&cbsurf, NULL); - } - - /* The texture will always be dirty if some layers aren't flushed. - * I don't think this case occurs often though. */ - if (first_layer == 0 && last_layer == max_layer) { - rtex->dirty_level_mask &= ~(1 << level); - } - } -} - -void r600_decompress_color_textures(struct r600_context *rctx, - struct r600_textures_info *textures) -{ - unsigned i; - unsigned mask = textures->compressed_colortex_mask; - - while (mask) { - struct pipe_sampler_view *view; - struct r600_texture *tex; - - i = u_bit_scan(&mask); - - view = textures->views.views[i]; - assert(view); - - tex = (struct r600_texture *)view->texture; - assert(tex->cmask.size || tex->fmask.size); - - r600_blit_decompress_color(&rctx->b.b, tex, - view->u.tex.first_level, view->u.tex.last_level, - 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); - } -} - -static void r600_clear(struct pipe_context *ctx, unsigned buffers, - const union pipe_color_union *color, - double depth, unsigned stencil) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - - r600_blitter_begin(ctx, R600_CLEAR); - util_blitter_clear(rctx->blitter, fb->width, fb->height, - util_framebuffer_get_num_layers(fb), - buffers, color, depth, stencil); - r600_blitter_end(ctx); -} - -static void r600_clear_render_target(struct pipe_context *ctx, - struct pipe_surface *dst, - const union pipe_color_union *color, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - - r600_blitter_begin(ctx, R600_CLEAR_SURFACE); - util_blitter_clear_render_target(rctx->blitter, dst, color, - dstx, dsty, width, height); - r600_blitter_end(ctx); -} - -static void r600_clear_depth_stencil(struct pipe_context *ctx, - struct pipe_surface *dst, - unsigned clear_flags, - double depth, - unsigned stencil, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - - r600_blitter_begin(ctx, R600_CLEAR_SURFACE); - util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, - dstx, dsty, width, height); - r600_blitter_end(ctx); -} - -/* Helper for decompressing a portion of a color or depth resource before - * blitting if any decompression is needed. - * The driver doesn't decompress resources automatically while u_blitter is - * rendering. */ -static void r600_decompress_subresource(struct pipe_context *ctx, - struct pipe_resource *tex, - unsigned level, - unsigned first_layer, unsigned last_layer) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_texture *rtex = (struct r600_texture*)tex; - - if (rtex->is_depth && !rtex->is_flushing_texture) { - si_blit_decompress_depth_in_place(rctx, rtex, - level, level, - first_layer, last_layer); - } else if (rtex->fmask.size || rtex->cmask.size) { - r600_blit_decompress_color(ctx, rtex, level, level, - first_layer, last_layer); - } -} - -struct texture_orig_info { - unsigned format; - unsigned width0; - unsigned height0; - unsigned npix_x; - unsigned npix_y; - unsigned npix0_x; - unsigned npix0_y; -}; - -static void r600_compressed_to_blittable(struct pipe_resource *tex, - unsigned level, - struct texture_orig_info *orig) -{ - struct r600_texture *rtex = (struct r600_texture*)tex; - unsigned pixsize = util_format_get_blocksize(rtex->resource.b.b.format); - int new_format; - int new_height, new_width; - - orig->format = tex->format; - orig->width0 = tex->width0; - orig->height0 = tex->height0; - orig->npix0_x = rtex->surface.level[0].npix_x; - orig->npix0_y = rtex->surface.level[0].npix_y; - orig->npix_x = rtex->surface.level[level].npix_x; - orig->npix_y = rtex->surface.level[level].npix_y; - - if (pixsize == 8) - new_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ - else - new_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ - - new_width = util_format_get_nblocksx(tex->format, orig->width0); - new_height = util_format_get_nblocksy(tex->format, orig->height0); - - tex->width0 = new_width; - tex->height0 = new_height; - tex->format = new_format; - rtex->surface.level[0].npix_x = util_format_get_nblocksx(orig->format, orig->npix0_x); - rtex->surface.level[0].npix_y = util_format_get_nblocksy(orig->format, orig->npix0_y); - rtex->surface.level[level].npix_x = util_format_get_nblocksx(orig->format, orig->npix_x); - rtex->surface.level[level].npix_y = util_format_get_nblocksy(orig->format, orig->npix_y); - - /* By dividing the dimensions by 4, we effectively decrement - * last_level by 2, therefore the last 2 mipmap levels disappear and - * aren't blittable. Note that the last 3 mipmap levels (4x4, 2x2, - * 1x1) have equal slice sizes, which is an important assumption - * for this to work. - * - * In order to make the last 2 mipmap levels blittable, we have to - * add the slice size of the last mipmap level to the texture - * address, so that even though the hw thinks it reads last_level-2, - * it will actually read last_level-1, and if we add the slice size*2, - * it will read last_level. That's how this workaround works. - */ - if (level > rtex->resource.b.b.last_level-2) - rtex->mipmap_shift = level - (rtex->resource.b.b.last_level-2); -} - -static void r600_change_format(struct pipe_resource *tex, - unsigned level, - struct texture_orig_info *orig, - enum pipe_format format) -{ - struct r600_texture *rtex = (struct r600_texture*)tex; - - orig->format = tex->format; - orig->width0 = tex->width0; - orig->height0 = tex->height0; - orig->npix0_x = rtex->surface.level[0].npix_x; - orig->npix0_y = rtex->surface.level[0].npix_y; - orig->npix_x = rtex->surface.level[level].npix_x; - orig->npix_y = rtex->surface.level[level].npix_y; - - tex->format = format; -} - -static void r600_reset_blittable_to_orig(struct pipe_resource *tex, - unsigned level, - struct texture_orig_info *orig) -{ - struct r600_texture *rtex = (struct r600_texture*)tex; - - tex->format = orig->format; - tex->width0 = orig->width0; - tex->height0 = orig->height0; - rtex->surface.level[0].npix_x = orig->npix0_x; - rtex->surface.level[0].npix_y = orig->npix0_y; - rtex->surface.level[level].npix_x = orig->npix_x; - rtex->surface.level[level].npix_y = orig->npix_y; - rtex->mipmap_shift = 0; -} - -static void r600_resource_copy_region(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct texture_orig_info orig_info[2]; - struct pipe_box sbox; - const struct pipe_box *psbox = src_box; - boolean restore_orig[2]; - - /* Fallback for buffers. */ - if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { - si_copy_buffer(rctx, dst, src, dstx, src_box->x, src_box->width); - return; - } - - memset(orig_info, 0, sizeof(orig_info)); - - /* The driver doesn't decompress resources automatically while - * u_blitter is rendering. */ - r600_decompress_subresource(ctx, src, src_level, - src_box->z, src_box->z + src_box->depth - 1); - - restore_orig[0] = restore_orig[1] = FALSE; - - if (util_format_is_compressed(src->format) && - util_format_is_compressed(dst->format)) { - r600_compressed_to_blittable(src, src_level, &orig_info[0]); - restore_orig[0] = TRUE; - sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x); - sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y); - sbox.z = src_box->z; - sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width); - sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height); - sbox.depth = src_box->depth; - psbox=&sbox; - - r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); - restore_orig[1] = TRUE; - /* translate the dst box as well */ - dstx = util_format_get_nblocksx(orig_info[1].format, dstx); - dsty = util_format_get_nblocksy(orig_info[1].format, dsty); - } else if (!util_blitter_is_copy_supported(rctx->blitter, dst, src)) { - unsigned blocksize = util_format_get_blocksize(src->format); - - switch (blocksize) { - case 1: - r600_change_format(src, src_level, &orig_info[0], - PIPE_FORMAT_R8_UNORM); - r600_change_format(dst, dst_level, &orig_info[1], - PIPE_FORMAT_R8_UNORM); - break; - case 2: - r600_change_format(src, src_level, &orig_info[0], - PIPE_FORMAT_R8G8_UNORM); - r600_change_format(dst, dst_level, &orig_info[1], - PIPE_FORMAT_R8G8_UNORM); - break; - case 4: - r600_change_format(src, src_level, &orig_info[0], - PIPE_FORMAT_R8G8B8A8_UNORM); - r600_change_format(dst, dst_level, &orig_info[1], - PIPE_FORMAT_R8G8B8A8_UNORM); - break; - case 8: - r600_change_format(src, src_level, &orig_info[0], - PIPE_FORMAT_R16G16B16A16_UINT); - r600_change_format(dst, dst_level, &orig_info[1], - PIPE_FORMAT_R16G16B16A16_UINT); - break; - case 16: - r600_change_format(src, src_level, &orig_info[0], - PIPE_FORMAT_R32G32B32A32_UINT); - r600_change_format(dst, dst_level, &orig_info[1], - PIPE_FORMAT_R32G32B32A32_UINT); - break; - default: - fprintf(stderr, "Unhandled format %s with blocksize %u\n", - util_format_short_name(src->format), blocksize); - assert(0); - } - restore_orig[0] = TRUE; - restore_orig[1] = TRUE; - } - - r600_blitter_begin(ctx, R600_COPY); - util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz, - src, src_level, psbox); - r600_blitter_end(ctx); - - if (restore_orig[0]) - r600_reset_blittable_to_orig(src, src_level, &orig_info[0]); - - if (restore_orig[1]) - r600_reset_blittable_to_orig(dst, dst_level, &orig_info[1]); -} - -/* For MSAA integer resolving to work, we change the format to NORM using this function. */ -static enum pipe_format int_to_norm_format(enum pipe_format format) -{ - switch (format) { -#define REPLACE_FORMAT_SIGN(format,sign) \ - case PIPE_FORMAT_##format##_##sign##INT: \ - return PIPE_FORMAT_##format##_##sign##NORM -#define REPLACE_FORMAT(format) \ - REPLACE_FORMAT_SIGN(format, U); \ - REPLACE_FORMAT_SIGN(format, S) - - REPLACE_FORMAT_SIGN(B10G10R10A2, U); - REPLACE_FORMAT(R8); - REPLACE_FORMAT(R8G8); - REPLACE_FORMAT(R8G8B8X8); - REPLACE_FORMAT(R8G8B8A8); - REPLACE_FORMAT(A8); - REPLACE_FORMAT(I8); - REPLACE_FORMAT(L8); - REPLACE_FORMAT(L8A8); - REPLACE_FORMAT(R16); - REPLACE_FORMAT(R16G16); - REPLACE_FORMAT(R16G16B16X16); - REPLACE_FORMAT(R16G16B16A16); - REPLACE_FORMAT(A16); - REPLACE_FORMAT(I16); - REPLACE_FORMAT(L16); - REPLACE_FORMAT(L16A16); - -#undef REPLACE_FORMAT -#undef REPLACE_FORMAT_SIGN - default: - return format; - } -} - -static bool do_hardware_msaa_resolve(struct pipe_context *ctx, - const struct pipe_blit_info *info) -{ - struct r600_context *rctx = (struct r600_context*)ctx; - struct r600_texture *dst = (struct r600_texture*)info->dst.resource; - unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); - unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); - enum pipe_format format = int_to_norm_format(info->dst.format); - unsigned sample_mask = ~0; - - if (info->src.resource->nr_samples > 1 && - info->dst.resource->nr_samples <= 1 && - util_max_layer(info->src.resource, 0) == 0 && - util_max_layer(info->dst.resource, info->dst.level) == 0 && - info->dst.format == info->src.format && - !util_format_is_pure_integer(format) && - !util_format_is_depth_or_stencil(format) && - !info->scissor_enable && - (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA && - dst_width == info->src.resource->width0 && - dst_height == info->src.resource->height0 && - info->dst.box.x == 0 && - info->dst.box.y == 0 && - info->dst.box.width == dst_width && - info->dst.box.height == dst_height && - info->dst.box.depth == 1 && - info->src.box.x == 0 && - info->src.box.y == 0 && - info->src.box.width == dst_width && - info->src.box.height == dst_height && - info->src.box.depth == 1 && - dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D && - !(dst->surface.flags & RADEON_SURF_SCANOUT)) { - r600_blitter_begin(ctx, R600_COLOR_RESOLVE); - util_blitter_custom_resolve_color(rctx->blitter, - info->dst.resource, info->dst.level, - info->dst.box.z, - info->src.resource, info->src.box.z, - sample_mask, rctx->custom_blend_resolve, - format); - r600_blitter_end(ctx); - return true; - } - return false; -} - -static void si_blit(struct pipe_context *ctx, - const struct pipe_blit_info *info) -{ - struct r600_context *rctx = (struct r600_context*)ctx; - - if (do_hardware_msaa_resolve(ctx, info)) { - return; - } - - assert(util_blitter_is_blit_supported(rctx->blitter, info)); - - /* The driver doesn't decompress resources automatically while - * u_blitter is rendering. */ - r600_decompress_subresource(ctx, info->src.resource, info->src.level, - info->src.box.z, - info->src.box.z + info->src.box.depth - 1); - - r600_blitter_begin(ctx, R600_BLIT); - util_blitter_blit(rctx->blitter, info); - r600_blitter_end(ctx); -} - -static void si_flush_resource(struct pipe_context *ctx, - struct pipe_resource *resource) -{ -} - -void si_init_blit_functions(struct r600_context *rctx) -{ - rctx->b.b.clear = r600_clear; - rctx->b.b.clear_render_target = r600_clear_render_target; - rctx->b.b.clear_depth_stencil = r600_clear_depth_stencil; - rctx->b.b.resource_copy_region = r600_resource_copy_region; - rctx->b.b.blit = si_blit; - rctx->b.b.flush_resource = si_flush_resource; - rctx->b.blit_decompress_depth = r600_blit_decompress_depth; -} diff --git a/src/gallium/drivers/radeonsi/r600_buffer.c b/src/gallium/drivers/radeonsi/r600_buffer.c deleted file mode 100644 index e64683d382e..00000000000 --- a/src/gallium/drivers/radeonsi/r600_buffer.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - * Corbin Simpson - */ - -#include "pipe/p_screen.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_upload_mgr.h" - -#include "r600.h" -#include "radeonsi_pipe.h" - -void r600_upload_index_buffer(struct r600_context *rctx, - struct pipe_index_buffer *ib, unsigned count) -{ - u_upload_data(rctx->b.uploader, 0, count * ib->index_size, - ib->user_buffer, &ib->offset, &ib->buffer); -} - -void r600_upload_const_buffer(struct r600_context *rctx, struct r600_resource **rbuffer, - const uint8_t *ptr, unsigned size, - uint32_t *const_offset) -{ - if (R600_BIG_ENDIAN) { - uint32_t *tmpPtr; - unsigned i; - - if (!(tmpPtr = malloc(size))) { - R600_ERR("Failed to allocate BE swap buffer.\n"); - return; - } - - for (i = 0; i < size / 4; ++i) { - tmpPtr[i] = util_bswap32(((uint32_t *)ptr)[i]); - } - - u_upload_data(rctx->b.uploader, 0, size, tmpPtr, const_offset, - (struct pipe_resource**)rbuffer); - - free(tmpPtr); - } else { - u_upload_data(rctx->b.uploader, 0, size, ptr, const_offset, - (struct pipe_resource**)rbuffer); - } -} diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c deleted file mode 100644 index c21a1013f93..00000000000 --- a/src/gallium/drivers/radeonsi/r600_hw_context.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#include "../radeon/r600_cs.h" -#include "radeonsi_pm4.h" -#include "radeonsi_pipe.h" -#include "sid.h" -#include "util/u_memory.h" -#include - -#define GROUP_FORCE_NEW_BLOCK 0 - -/* Get backends mask */ -void si_get_backend_mask(struct r600_context *ctx) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - struct r600_resource *buffer; - uint32_t *results; - unsigned num_backends = ctx->screen->b.info.r600_num_backends; - unsigned i, mask = 0; - - /* if backend_map query is supported by the kernel */ - if (ctx->screen->b.info.r600_backend_map_valid) { - unsigned num_tile_pipes = ctx->screen->b.info.r600_num_tile_pipes; - unsigned backend_map = ctx->screen->b.info.r600_backend_map; - unsigned item_width = 4, item_mask = 0x7; - - while(num_tile_pipes--) { - i = backend_map & item_mask; - mask |= (1<>= item_width; - } - if (mask != 0) { - ctx->backend_mask = mask; - return; - } - } - - /* otherwise backup path for older kernels */ - - /* create buffer for event data */ - buffer = r600_resource_create_custom(&ctx->screen->b.b, - PIPE_USAGE_STAGING, - ctx->max_db*16); - if (!buffer) - goto err; - - /* initialize buffer with zeroes */ - results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - if (results) { - uint64_t va = 0; - - memset(results, 0, ctx->max_db * 4 * 4); - ctx->b.ws->buffer_unmap(buffer->cs_buf); - - /* emit EVENT_WRITE for ZPASS_DONE */ - va = r600_resource_va(&ctx->screen->b.b, (void *)buffer); - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = va >> 32; - - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, buffer, RADEON_USAGE_WRITE); - - /* analyze results */ - results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_READ); - if (results) { - for(i = 0; i < ctx->max_db; i++) { - /* at least highest bit will be set if backend is used */ - if (results[i*4 + 1]) - mask |= (1<b.ws->buffer_unmap(buffer->cs_buf); - } - } - - r600_resource_reference(&buffer, NULL); - - if (mask != 0) { - ctx->backend_mask = mask; - return; - } - -err: - /* fallback to old method - set num_backends lower bits to 1 */ - ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); - return; -} - -bool si_is_timer_query(unsigned type) -{ - return type == PIPE_QUERY_TIME_ELAPSED || - type == PIPE_QUERY_TIMESTAMP || - type == PIPE_QUERY_TIMESTAMP_DISJOINT; -} - -bool si_query_needs_begin(unsigned type) -{ - return type != PIPE_QUERY_TIMESTAMP; -} - -/* initialize */ -void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, - boolean count_draw_in) -{ - int i; - - /* The number of dwords we already used in the CS so far. */ - num_dw += ctx->b.rings.gfx.cs->cdw; - - for (i = 0; i < SI_NUM_ATOMS(ctx); i++) { - if (ctx->atoms.array[i]->dirty) { - num_dw += ctx->atoms.array[i]->num_dw; - } - } - - if (count_draw_in) { - /* The number of dwords all the dirty states would take. */ - num_dw += ctx->pm4_dirty_cdwords; - - /* The upper-bound of how much a draw command would take. */ - num_dw += SI_MAX_DRAW_CS_DWORDS; - } - - /* Count in queries_suspend. */ - num_dw += ctx->num_cs_dw_nontimer_queries_suspend; - - /* Count in streamout_end at the end of CS. */ - if (ctx->b.streamout.begin_emitted) { - num_dw += ctx->b.streamout.num_dw_for_end; - } - - /* Count in render_condition(NULL) at the end of CS. */ - if (ctx->predicate_drawing) { - num_dw += 3; - } - - /* Count in framebuffer cache flushes at the end of CS. */ - num_dw += ctx->atoms.cache_flush->num_dw; - -#if R600_TRACE_CS - if (ctx->screen->trace_bo) { - num_dw += R600_TRACE_CS_DWORDS; - } -#endif - - /* Flush if there's not enough space. */ - if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { - radeonsi_flush(&ctx->b.b, NULL, RADEON_FLUSH_ASYNC); - } -} - -void si_context_flush(struct r600_context *ctx, unsigned flags) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - - if (!cs->cdw) - return; - - /* suspend queries */ - ctx->nontimer_queries_suspended = false; - if (ctx->num_cs_dw_nontimer_queries_suspend) { - r600_context_queries_suspend(ctx); - ctx->nontimer_queries_suspended = true; - } - - ctx->b.streamout.suspended = false; - - if (ctx->b.streamout.begin_emitted) { - r600_emit_streamout_end(&ctx->b); - ctx->b.streamout.suspended = true; - } - - ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | - R600_CONTEXT_FLUSH_AND_INV_CB_META | - R600_CONTEXT_FLUSH_AND_INV_DB | - R600_CONTEXT_FLUSH_AND_INV_DB_META | - R600_CONTEXT_INV_TEX_CACHE; - si_emit_cache_flush(&ctx->b, NULL); - - /* this is probably not needed anymore */ - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); - - /* force to keep tiling flags */ - flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; - -#if R600_TRACE_CS - if (ctx->screen->trace_bo) { - struct r600_screen *rscreen = ctx->screen; - unsigned i; - - for (i = 0; i < cs->cdw; i++) { - fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]); - } - rscreen->cs_count++; - } -#endif - - /* Flush the CS. */ - ctx->b.ws->cs_flush(ctx->b.rings.gfx.cs, flags, 0); - -#if R600_TRACE_CS - if (ctx->screen->trace_bo) { - struct r600_screen *rscreen = ctx->screen; - unsigned i; - - for (i = 0; i < 10; i++) { - usleep(5); - if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) { - break; - } - } - if (i == 10) { - fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n", - rscreen->trace_ptr[1], rscreen->trace_ptr[0]); - } else { - fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5); - } - } -#endif - - si_begin_new_cs(ctx); -} - -void si_begin_new_cs(struct r600_context *ctx) -{ - ctx->pm4_dirty_cdwords = 0; - - /* Flush read caches at the beginning of CS. */ - ctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | - R600_CONTEXT_INV_CONST_CACHE | - R600_CONTEXT_INV_SHADER_CACHE; - - /* set all valid group as dirty so they get reemited on - * next draw command - */ - si_pm4_reset_emitted(ctx); - - /* The CS initialization should be emitted before everything else. */ - si_pm4_emit(ctx, ctx->queued.named.init); - ctx->emitted.named.init = ctx->queued.named.init; - - if (ctx->b.streamout.suspended) { - ctx->b.streamout.append_bitmask = ctx->b.streamout.enabled_mask; - r600_streamout_buffers_dirty(&ctx->b); - } - - /* resume queries */ - if (ctx->nontimer_queries_suspended) { - r600_context_queries_resume(ctx); - } - - si_all_descriptors_begin_new_cs(ctx); -} - -static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, - bool test_status_bit) -{ - uint32_t *current_result = (uint32_t*)map; - uint64_t start, end; - - start = (uint64_t)current_result[start_index] | - (uint64_t)current_result[start_index+1] << 32; - end = (uint64_t)current_result[end_index] | - (uint64_t)current_result[end_index+1] << 32; - - if (!test_status_bit || - ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { - return end - start; - } - return 0; -} - -static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) -{ - unsigned results_base = query->results_start; - char *map; - - map = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, - PIPE_TRANSFER_READ | - (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); - if (!map) - return FALSE; - - /* count all results across all data blocks */ - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - while (results_base != query->results_end) { - query->result.u64 += - r600_query_read_result(map + results_base, 0, 2, true); - results_base = (results_base + 16) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - while (results_base != query->results_end) { - query->result.b = query->result.b || - r600_query_read_result(map + results_base, 0, 2, true) != 0; - results_base = (results_base + 16) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_TIMESTAMP: - { - uint32_t *current_result = (uint32_t*)map; - query->result.u64 = (uint64_t)current_result[0] | (uint64_t)current_result[1] << 32; - break; - } - case PIPE_QUERY_TIME_ELAPSED: - while (results_base != query->results_end) { - query->result.u64 += - r600_query_read_result(map + results_base, 0, 2, false); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - /* SAMPLE_STREAMOUTSTATS stores this structure: - * { - * u64 NumPrimitivesWritten; - * u64 PrimitiveStorageNeeded; - * } - * We only need NumPrimitivesWritten here. */ - while (results_base != query->results_end) { - query->result.u64 += - r600_query_read_result(map + results_base, 2, 6, true); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_PRIMITIVES_GENERATED: - /* Here we read PrimitiveStorageNeeded. */ - while (results_base != query->results_end) { - query->result.u64 += - r600_query_read_result(map + results_base, 0, 4, true); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_SO_STATISTICS: - while (results_base != query->results_end) { - query->result.so.num_primitives_written += - r600_query_read_result(map + results_base, 2, 6, true); - query->result.so.primitives_storage_needed += - r600_query_read_result(map + results_base, 0, 4, true); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - while (results_base != query->results_end) { - query->result.b = query->result.b || - r600_query_read_result(map + results_base, 2, 6, true) != - r600_query_read_result(map + results_base, 0, 4, true); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - } - break; - default: - assert(0); - } - - query->results_start = query->results_end; - ctx->b.ws->buffer_unmap(query->buffer->cs_buf); - return TRUE; -} - -void r600_query_begin(struct r600_context *ctx, struct r600_query *query) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - unsigned new_results_end, i; - uint32_t *results; - uint64_t va; - - si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); - - new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; - - /* collect current results if query buffer is full */ - if (new_results_end == query->results_start) { - r600_query_result(ctx, query, TRUE); - } - - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - if (results) { - results = (uint32_t*)((char*)results + query->results_end); - memset(results, 0, query->result_size); - - /* Set top bits for unused backends */ - for (i = 0; i < ctx->max_db; i++) { - if (!(ctx->backend_mask & (1<b.ws->buffer_unmap(query->buffer->cs_buf); - } - break; - case PIPE_QUERY_TIME_ELAPSED: - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - results = (uint32_t*)((char*)results + query->results_end); - memset(results, 0, query->result_size); - ctx->b.ws->buffer_unmap(query->buffer->cs_buf); - break; - default: - assert(0); - } - - /* emit begin query */ - va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); - va += query->results_end; - - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - break; - case PIPE_QUERY_TIME_ELAPSED: - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); - cs->buf[cs->cdw++] = 0; - cs->buf[cs->cdw++] = 0; - break; - default: - assert(0); - } - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE); - - if (!si_is_timer_query(query->type)) { - ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; - } -} - -void r600_query_end(struct r600_context *ctx, struct r600_query *query) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - uint64_t va; - unsigned new_results_end; - - /* The queries which need begin already called this in begin_query. */ - if (!si_query_needs_begin(query->type)) { - si_need_cs_space(ctx, query->num_cs_dw, TRUE); - - new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; - - /* collect current results if query buffer is full */ - if (new_results_end == query->results_start) { - r600_query_result(ctx, query, TRUE); - } - } - - va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); - /* emit end query */ - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - va += query->results_end + 8; - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - va += query->results_end + query->result_size/2; - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; - break; - case PIPE_QUERY_TIME_ELAPSED: - va += query->results_end + query->result_size/2; - /* fall through */ - case PIPE_QUERY_TIMESTAMP: - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - cs->buf[cs->cdw++] = va; - cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); - cs->buf[cs->cdw++] = 0; - cs->buf[cs->cdw++] = 0; - break; - default: - assert(0); - } - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE); - - query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; - - if (si_query_needs_begin(query->type) && !si_is_timer_query(query->type)) { - ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw; - } -} - -void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, - int flag_wait) -{ - struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; - uint64_t va; - - if (operation == PREDICATION_OP_CLEAR) { - si_need_cs_space(ctx, 3, FALSE); - - cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - cs->buf[cs->cdw++] = 0; - cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR); - } else { - unsigned results_base = query->results_start; - unsigned count; - uint32_t op; - - /* find count of the query data blocks */ - count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0; - count /= query->result_size; - - si_need_cs_space(ctx, 5 * count, TRUE); - - op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | - (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); - va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); - - /* emit predicate packets for all data blocks */ - while (results_base != query->results_end) { - cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; - cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); - cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, - query->buffer, RADEON_USAGE_READ); - results_base = (results_base + query->result_size) % query->buffer->b.b.width0; - - /* set CONTINUE bit for all packets except the first */ - op |= PREDICATION_CONTINUE; - } - } -} - -struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type) -{ - struct r600_query *query; - unsigned buffer_size = 4096; - - query = CALLOC_STRUCT(r600_query); - if (query == NULL) - return NULL; - - query->type = query_type; - - switch (query_type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - query->result_size = 16 * ctx->max_db; - query->num_cs_dw = 6; - break; - case PIPE_QUERY_TIMESTAMP: - query->result_size = 8; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_TIME_ELAPSED: - query->result_size = 16; - query->num_cs_dw = 8; - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ - query->result_size = 32; - query->num_cs_dw = 6; - break; - default: - assert(0); - FREE(query); - return NULL; - } - - /* adjust buffer size to simplify offsets wrapping math */ - buffer_size -= buffer_size % query->result_size; - - /* Queries are normally read by the CPU after - * being written by the gpu, hence staging is probably a good - * usage pattern. - */ - query->buffer = r600_resource_create_custom(&ctx->screen->b.b, - PIPE_USAGE_STAGING, - buffer_size); - if (!query->buffer) { - FREE(query); - return NULL; - } - return query; -} - -void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) -{ - r600_resource_reference(&query->buffer, NULL); - free(query); -} - -boolean r600_context_query_result(struct r600_context *ctx, - struct r600_query *query, - boolean wait, void *vresult) -{ - boolean *result_b = (boolean*)vresult; - uint64_t *result_u64 = (uint64_t*)vresult; - struct pipe_query_data_so_statistics *result_so = - (struct pipe_query_data_so_statistics*)vresult; - - if (!r600_query_result(ctx, query, wait)) - return FALSE; - - switch (query->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - *result_u64 = query->result.u64; - break; - case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - *result_b = query->result.b; - break; - case PIPE_QUERY_TIMESTAMP: - case PIPE_QUERY_TIME_ELAPSED: - *result_u64 = (1000000 * query->result.u64) / ctx->screen->b.info.r600_clock_crystal_freq; - break; - case PIPE_QUERY_SO_STATISTICS: - *result_so = query->result.so; - break; - default: - assert(0); - } - return TRUE; -} - -void r600_context_queries_suspend(struct r600_context *ctx) -{ - struct r600_query *query; - - LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) { - r600_query_end(ctx, query); - } - assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); -} - -void r600_context_queries_resume(struct r600_context *ctx) -{ - struct r600_query *query; - - assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); - - LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) { - r600_query_begin(ctx, query); - } -} - -#if R600_TRACE_CS -void r600_trace_emit(struct r600_context *rctx) -{ - struct r600_screen *rscreen = rctx->screen; - struct radeon_winsys_cs *cs = rctx->cs; - uint64_t va; - - va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo); - r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE); - cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0); - cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | - PKT3_WRITE_DATA_WR_CONFIRM | - PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME); - cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; - cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL; - cs->buf[cs->cdw++] = cs->cdw; - cs->buf[cs->cdw++] = rscreen->cs_count; -} -#endif diff --git a/src/gallium/drivers/radeonsi/r600_query.c b/src/gallium/drivers/radeonsi/r600_query.c deleted file mode 100644 index e9fce94ee93..00000000000 --- a/src/gallium/drivers/radeonsi/r600_query.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include "radeonsi_pipe.h" -#include "sid.h" - -static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - - return (struct pipe_query*)r600_context_query_create(rctx, query_type); -} - -static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - - r600_context_query_destroy(rctx, (struct r600_query *)query); -} - -static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - if (!si_query_needs_begin(rquery->type)) { - assert(0); - return; - } - - memset(&rquery->result, 0, sizeof(rquery->result)); - rquery->results_start = rquery->results_end; - r600_query_begin(rctx, (struct r600_query *)query); - - if (!si_is_timer_query(rquery->type)) { - LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_query_list); - } -} - -static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - if (!si_query_needs_begin(rquery->type)) { - memset(&rquery->result, 0, sizeof(rquery->result)); - } - - r600_query_end(rctx, rquery); - - if (si_query_needs_begin(rquery->type) && !si_is_timer_query(rquery->type)) { - LIST_DELINIT(&rquery->list); - } -} - -static boolean r600_get_query_result(struct pipe_context *ctx, - struct pipe_query *query, - boolean wait, union pipe_query_result *vresult) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - - return r600_context_query_result(rctx, rquery, wait, vresult); -} - -static void r600_render_condition(struct pipe_context *ctx, - struct pipe_query *query, - boolean condition, - uint mode) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_query *rquery = (struct r600_query *)query; - int wait_flag = 0; - - /* If we already have nonzero result, render unconditionally */ - if (query != NULL && rquery->result.u64 != 0) { - if (rctx->current_render_cond) { - r600_render_condition(ctx, NULL, FALSE, 0); - } - return; - } - - rctx->current_render_cond = query; - rctx->current_render_cond_cond = condition; - rctx->current_render_cond_mode = mode; - - if (query == NULL) { - if (rctx->predicate_drawing) { - rctx->predicate_drawing = false; - r600_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, 1); - } - return; - } - - if (mode == PIPE_RENDER_COND_WAIT || - mode == PIPE_RENDER_COND_BY_REGION_WAIT) { - wait_flag = 1; - } - - rctx->predicate_drawing = true; - - switch (rquery->type) { - case PIPE_QUERY_OCCLUSION_COUNTER: - case PIPE_QUERY_OCCLUSION_PREDICATE: - r600_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag); - break; - case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PRIMITIVES_GENERATED: - case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - r600_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag); - break; - default: - assert(0); - } -} - -void r600_init_query_functions(struct r600_context *rctx) -{ - rctx->b.b.create_query = r600_create_query; - rctx->b.b.destroy_query = r600_destroy_query; - rctx->b.b.begin_query = r600_begin_query; - rctx->b.b.end_query = r600_end_query; - rctx->b.b.get_query_result = r600_get_query_result; - - if (rctx->screen->b.info.r600_num_backends > 0) - rctx->b.b.render_condition = r600_render_condition; -} diff --git a/src/gallium/drivers/radeonsi/r600_resource.c b/src/gallium/drivers/radeonsi/r600_resource.c deleted file mode 100644 index 9a0fde430ff..00000000000 --- a/src/gallium/drivers/radeonsi/r600_resource.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2010 Marek Olšák target == PIPE_BUFFER) { - return r600_buffer_create(screen, templ, 4096); - } else { - return r600_texture_create(screen, templ); - } -} - -static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * screen, - const struct pipe_resource *templ, - struct winsys_handle *whandle) -{ - if (templ->target == PIPE_BUFFER) { - return NULL; - } else { - return r600_texture_from_handle(screen, templ, whandle); - } -} - -void r600_init_screen_resource_functions(struct pipe_screen *screen) -{ - screen->resource_create = r600_resource_create; - screen->resource_from_handle = r600_resource_from_handle; - screen->resource_get_handle = u_resource_get_handle_vtbl; - screen->resource_destroy = u_resource_destroy_vtbl; -} - -void r600_init_context_resource_functions(struct r600_context *r600) -{ - r600->b.b.transfer_map = u_transfer_map_vtbl; - r600->b.b.transfer_flush_region = u_default_transfer_flush_region; - r600->b.b.transfer_unmap = u_transfer_unmap_vtbl; - r600->b.b.transfer_inline_write = u_default_transfer_inline_write; -} diff --git a/src/gallium/drivers/radeonsi/r600_resource.h b/src/gallium/drivers/radeonsi/r600_resource.h deleted file mode 100644 index be9ab33d90b..00000000000 --- a/src/gallium/drivers/radeonsi/r600_resource.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2010 Marek Olšák - */ - -#include "util/u_index_modify.h" -#include "util/u_upload_mgr.h" -#include "radeonsi_pipe.h" - - -void r600_translate_index_buffer(struct r600_context *r600, - struct pipe_index_buffer *ib, - unsigned count) -{ - struct pipe_resource *out_buffer = NULL; - unsigned out_offset; - void *ptr; - - switch (ib->index_size) { - case 1: - u_upload_alloc(r600->b.uploader, 0, count * 2, - &out_offset, &out_buffer, &ptr); - - util_shorten_ubyte_elts_to_userptr( - &r600->b.b, ib, 0, ib->offset, count, ptr); - - pipe_resource_reference(&ib->buffer, NULL); - ib->buffer = out_buffer; - ib->offset = out_offset; - ib->index_size = 2; - break; - } -} diff --git a/src/gallium/drivers/radeonsi/radeonsi_compute.c b/src/gallium/drivers/radeonsi/radeonsi_compute.c deleted file mode 100644 index 214ea3c2552..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_compute.c +++ /dev/null @@ -1,299 +0,0 @@ -#include "util/u_memory.h" - -#include "../radeon/r600_cs.h" -#include "radeonsi_pipe.h" -#include "radeonsi_shader.h" - -#include "radeon_llvm_util.h" - -#define MAX_GLOBAL_BUFFERS 20 - -struct si_pipe_compute { - struct r600_context *ctx; - - unsigned local_size; - unsigned private_size; - unsigned input_size; - unsigned num_kernels; - struct si_pipe_shader *kernels; - unsigned num_user_sgprs; - - struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; - - LLVMContextRef llvm_ctx; -}; - -static void *radeonsi_create_compute_state( - struct pipe_context *ctx, - const struct pipe_compute_state *cso) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct si_pipe_compute *program = - CALLOC_STRUCT(si_pipe_compute); - const struct pipe_llvm_program_header *header; - const unsigned char *code; - unsigned i; - - program->llvm_ctx = LLVMContextCreate(); - - header = cso->prog; - code = cso->prog + sizeof(struct pipe_llvm_program_header); - - program->ctx = rctx; - program->local_size = cso->req_local_mem; - program->private_size = cso->req_private_mem; - program->input_size = cso->req_input_mem; - - program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code, - header->num_bytes); - program->kernels = CALLOC(sizeof(struct si_pipe_shader), - program->num_kernels); - for (i = 0; i < program->num_kernels; i++) { - LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i, - code, header->num_bytes); - si_compile_llvm(rctx, &program->kernels[i], mod); - LLVMDisposeModule(mod); - } - - return program; -} - -static void radeonsi_bind_compute_state(struct pipe_context *ctx, void *state) -{ - struct r600_context *rctx = (struct r600_context*)ctx; - rctx->cs_shader_state.program = (struct si_pipe_compute*)state; -} - -static void radeonsi_set_global_binding( - struct pipe_context *ctx, unsigned first, unsigned n, - struct pipe_resource **resources, - uint32_t **handles) -{ - unsigned i; - struct r600_context *rctx = (struct r600_context*)ctx; - struct si_pipe_compute *program = rctx->cs_shader_state.program; - - if (!resources) { - for (i = first; i < first + n; i++) { - program->global_buffers[i] = NULL; - } - return; - } - - for (i = first; i < first + n; i++) { - uint64_t va; - program->global_buffers[i] = resources[i]; - va = r600_resource_va(ctx->screen, resources[i]); - memcpy(handles[i], &va, sizeof(va)); - } -} - -static void radeonsi_launch_grid( - struct pipe_context *ctx, - const uint *block_layout, const uint *grid_layout, - uint32_t pc, const void *input) -{ - struct r600_context *rctx = (struct r600_context*)ctx; - struct si_pipe_compute *program = rctx->cs_shader_state.program; - struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - struct r600_resource *kernel_args_buffer = NULL; - unsigned kernel_args_size; - unsigned num_work_size_bytes = 36; - uint32_t kernel_args_offset = 0; - uint32_t *kernel_args; - uint64_t kernel_args_va; - uint64_t shader_va; - unsigned arg_user_sgpr_count = 2; - unsigned i; - struct si_pipe_shader *shader = &program->kernels[pc]; - unsigned lds_blocks; - - pm4->compute_pkt = true; - si_cmd_context_control(pm4); - - si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE); - si_pm4_cmd_add(pm4, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) | - EVENT_INDEX(0x7) | - EVENT_WRITE_INV_L2); - si_pm4_cmd_end(pm4, false); - - si_pm4_inval_texture_cache(pm4); - si_pm4_inval_shader_cache(pm4); - si_cmd_surface_sync(pm4, pm4->cp_coher_cntl); - - /* Upload the kernel arguments */ - - /* The extra num_work_size_bytes are for work group / work item size information */ - kernel_args_size = program->input_size + num_work_size_bytes; - kernel_args = MALLOC(kernel_args_size); - for (i = 0; i < 3; i++) { - kernel_args[i] = grid_layout[i]; - kernel_args[i + 3] = grid_layout[i] * block_layout[i]; - kernel_args[i + 6] = block_layout[i]; - } - - memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size); - - r600_upload_const_buffer(rctx, &kernel_args_buffer, (uint8_t*)kernel_args, - kernel_args_size, &kernel_args_offset); - kernel_args_va = r600_resource_va(ctx->screen, - (struct pipe_resource*)kernel_args_buffer); - kernel_args_va += kernel_args_offset; - - si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ); - - si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va); - si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0)); - - si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0); - si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0); - si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0); - - si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X, - S_00B81C_NUM_THREAD_FULL(block_layout[0])); - si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y, - S_00B820_NUM_THREAD_FULL(block_layout[1])); - si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z, - S_00B824_NUM_THREAD_FULL(block_layout[2])); - - /* Global buffers */ - for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) { - struct r600_resource *buffer = - (struct r600_resource*)program->global_buffers[i]; - if (!buffer) { - continue; - } - si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE); - } - - /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID - * and is now per pipe, so it should be handled in the - * kernel if we want to use something other than the default value, - * which is now 0x22f. - */ - if (rctx->b.chip_class <= SI) { - /* XXX: This should be: - * (number of compute units) * 4 * (waves per simd) - 1 */ - - si_pm4_set_reg(pm4, R_00B82C_COMPUTE_MAX_WAVE_ID, - 0x190 /* Default value */); - } - - shader_va = r600_resource_va(ctx->screen, (void *)shader->bo); - si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); - si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff); - si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); - - si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, - /* We always use at least 3 VGPRS, these come from - * TIDIG_COMP_CNT. - * XXX: The compiler should account for this. - */ - S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4) - /* We always use at least 4 + arg_user_sgpr_count. The 4 extra - * sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN - * XXX: The compiler should account for this. - */ - | S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count, - shader->num_sgprs)) - 1) / 8)) - ; - - lds_blocks = shader->lds_size; - /* XXX: We are over allocating LDS. For SI, the shader reports LDS in - * blocks of 256 bytes, so if there are 4 bytes lds allocated in - * the shader and 4 bytes allocated by the state tracker, then - * we will set LDS_SIZE to 512 bytes rather than 256. - */ - if (rctx->b.chip_class <= SI) { - lds_blocks += align(program->local_size, 256) >> 8; - } else { - lds_blocks += align(program->local_size, 512) >> 9; - } - - assert(lds_blocks <= 0xFF); - - si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, - S_00B84C_SCRATCH_EN(0) - | S_00B84C_USER_SGPR(arg_user_sgpr_count) - | S_00B84C_TGID_X_EN(1) - | S_00B84C_TGID_Y_EN(1) - | S_00B84C_TGID_Z_EN(1) - | S_00B84C_TG_SIZE_EN(1) - | S_00B84C_TIDIG_COMP_CNT(2) - | S_00B84C_LDS_SIZE(lds_blocks) - | S_00B84C_EXCP_EN(0)) - ; - si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0); - - si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, - S_00B858_SH0_CU_EN(0xffff /* Default value */) - | S_00B858_SH1_CU_EN(0xffff /* Default value */)) - ; - - si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, - S_00B85C_SH0_CU_EN(0xffff /* Default value */) - | S_00B85C_SH1_CU_EN(0xffff /* Default value */)) - ; - - si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT); - si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */ - si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */ - si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */ - si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */ - si_pm4_cmd_end(pm4, false); - - si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(0x4))); - si_pm4_cmd_end(pm4, false); - - si_pm4_inval_texture_cache(pm4); - si_pm4_inval_shader_cache(pm4); - si_cmd_surface_sync(pm4, pm4->cp_coher_cntl); - - si_pm4_emit(rctx, pm4); - -#if 0 - fprintf(stderr, "cdw: %i\n", rctx->cs->cdw); - for (i = 0; i < rctx->cs->cdw; i++) { - fprintf(stderr, "%4i : 0x%08X\n", i, rctx->cs->buf[i]); - } -#endif - - FREE(pm4); - FREE(kernel_args); -} - - -static void si_delete_compute_state(struct pipe_context *ctx, void* state){ - struct si_pipe_compute *program = (struct si_pipe_compute *)state; - - if (!state) { - return; - } - - if (program->kernels) { - FREE(program->kernels); - } - - if (program->llvm_ctx){ - LLVMContextDispose(program->llvm_ctx); - } - - //And then free the program itself. - FREE(program); -} - -static void si_set_compute_resources(struct pipe_context * ctx_, - unsigned start, unsigned count, - struct pipe_surface ** surfaces) { } - -void si_init_compute_functions(struct r600_context *rctx) -{ - rctx->b.b.create_compute_state = radeonsi_create_compute_state; - rctx->b.b.delete_compute_state = si_delete_compute_state; - rctx->b.b.bind_compute_state = radeonsi_bind_compute_state; -/* ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; */ - rctx->b.b.set_compute_resources = si_set_compute_resources; - rctx->b.b.set_global_binding = radeonsi_set_global_binding; - rctx->b.b.launch_grid = radeonsi_launch_grid; -} diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c deleted file mode 100644 index 0fec6d56978..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c +++ /dev/null @@ -1,677 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_context.h" -#include "tgsi/tgsi_scan.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "util/u_blitter.h" -#include "util/u_double_list.h" -#include "util/u_format.h" -#include "util/u_transfer.h" -#include "util/u_surface.h" -#include "util/u_pack_color.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "util/u_simple_shaders.h" -#include "util/u_upload_mgr.h" -#include "vl/vl_decoder.h" -#include "vl/vl_video_buffer.h" -#include "os/os_time.h" -#include "pipebuffer/pb_buffer.h" -#include "radeonsi_pipe.h" -#include "radeon/radeon_uvd.h" -#include "r600.h" -#include "sid.h" -#include "r600_resource.h" -#include "radeonsi_pipe.h" -#include "si_state.h" -#include "../radeon/r600_cs.h" - -/* - * pipe_context - */ -void radeonsi_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, - unsigned flags) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct pipe_query *render_cond = NULL; - boolean render_cond_cond = FALSE; - unsigned render_cond_mode = 0; - - if (fence) { - *fence = rctx->b.ws->cs_create_fence(rctx->b.rings.gfx.cs); - } - - /* Disable render condition. */ - if (rctx->current_render_cond) { - render_cond = rctx->current_render_cond; - render_cond_cond = rctx->current_render_cond_cond; - render_cond_mode = rctx->current_render_cond_mode; - ctx->render_condition(ctx, NULL, FALSE, 0); - } - - si_context_flush(rctx, flags); - - /* Re-enable render condition. */ - if (render_cond) { - ctx->render_condition(ctx, render_cond, render_cond_cond, render_cond_mode); - } -} - -static void r600_flush_from_st(struct pipe_context *ctx, - struct pipe_fence_handle **fence, - unsigned flags) -{ - radeonsi_flush(ctx, fence, - flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0); -} - -static void r600_flush_from_winsys(void *ctx, unsigned flags) -{ - radeonsi_flush((struct pipe_context*)ctx, NULL, flags); -} - -static void r600_destroy_context(struct pipe_context *context) -{ - struct r600_context *rctx = (struct r600_context *)context; - - si_release_all_descriptors(rctx); - - pipe_resource_reference(&rctx->null_const_buf.buffer, NULL); - r600_resource_reference(&rctx->border_color_table, NULL); - - if (rctx->dummy_pixel_shader) { - rctx->b.b.delete_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); - } - for (int i = 0; i < 8; i++) { - rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_depth_stencil[i]); - rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_depth[i]); - rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_stencil[i]); - } - rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_inplace); - rctx->b.b.delete_blend_state(&rctx->b.b, rctx->custom_blend_resolve); - rctx->b.b.delete_blend_state(&rctx->b.b, rctx->custom_blend_decompress); - util_unreference_framebuffer_state(&rctx->framebuffer); - - util_blitter_destroy(rctx->blitter); - - r600_common_context_cleanup(&rctx->b); - FREE(rctx); -} - -static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) -{ - struct r600_context *rctx = CALLOC_STRUCT(r600_context); - struct r600_screen* rscreen = (struct r600_screen *)screen; - int shader, i; - - if (rctx == NULL) - return NULL; - - if (!r600_common_context_init(&rctx->b, &rscreen->b)) - goto fail; - - rctx->b.b.screen = screen; - rctx->b.b.priv = priv; - rctx->b.b.destroy = r600_destroy_context; - rctx->b.b.flush = r600_flush_from_st; - - /* Easy accessing of screen/winsys. */ - rctx->screen = rscreen; - - si_init_blit_functions(rctx); - r600_init_query_functions(rctx); - r600_init_context_resource_functions(rctx); - si_init_compute_functions(rctx); - - if (rscreen->b.info.has_uvd) { - rctx->b.b.create_video_codec = radeonsi_uvd_create_decoder; - rctx->b.b.create_video_buffer = radeonsi_video_buffer_create; - } else { - rctx->b.b.create_video_codec = vl_create_decoder; - rctx->b.b.create_video_buffer = vl_video_buffer_create; - } - - rctx->b.rings.gfx.cs = rctx->b.ws->cs_create(rctx->b.ws, RING_GFX, NULL); - rctx->b.rings.gfx.flush = r600_flush_from_winsys; - - si_init_all_descriptors(rctx); - - /* Initialize cache_flush. */ - rctx->cache_flush = si_atom_cache_flush; - rctx->atoms.cache_flush = &rctx->cache_flush; - - rctx->atoms.streamout_begin = &rctx->b.streamout.begin_atom; - - switch (rctx->b.chip_class) { - case SI: - case CIK: - si_init_state_functions(rctx); - LIST_INITHEAD(&rctx->active_nontimer_query_list); - rctx->max_db = 8; - si_init_config(rctx); - break; - default: - R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class); - goto fail; - } - - rctx->b.ws->cs_set_flush_callback(rctx->b.rings.gfx.cs, r600_flush_from_winsys, rctx); - - rctx->blitter = util_blitter_create(&rctx->b.b); - if (rctx->blitter == NULL) - goto fail; - - rctx->dummy_pixel_shader = - util_make_fragment_cloneinput_shader(&rctx->b.b, 0, - TGSI_SEMANTIC_GENERIC, - TGSI_INTERPOLATE_CONSTANT); - rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); - - /* these must be last */ - si_begin_new_cs(rctx); - si_get_backend_mask(rctx); - - /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy - * with a NULL buffer). We need to use a dummy buffer instead. */ - if (rctx->b.chip_class == CIK) { - rctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, - PIPE_USAGE_STATIC, 16); - rctx->null_const_buf.buffer_size = rctx->null_const_buf.buffer->width0; - - for (shader = 0; shader < SI_NUM_SHADERS; shader++) { - for (i = 0; i < NUM_CONST_BUFFERS; i++) { - rctx->b.b.set_constant_buffer(&rctx->b.b, shader, i, - &rctx->null_const_buf); - } - } - - /* Clear the NULL constant buffer, because loads should return zeros. */ - rctx->b.clear_buffer(&rctx->b.b, rctx->null_const_buf.buffer, 0, - rctx->null_const_buf.buffer->width0, 0); - } - - return &rctx->b.b; -fail: - r600_destroy_context(&rctx->b.b); - return NULL; -} - -/* - * pipe_screen - */ -static const char* r600_get_vendor(struct pipe_screen* pscreen) -{ - return "X.Org"; -} - -const char *r600_get_llvm_processor_name(enum radeon_family family) -{ - switch (family) { - case CHIP_TAHITI: return "tahiti"; - case CHIP_PITCAIRN: return "pitcairn"; - case CHIP_VERDE: return "verde"; - case CHIP_OLAND: return "oland"; -#if HAVE_LLVM <= 0x0303 - default: return "SI"; -#else - case CHIP_HAINAN: return "hainan"; - case CHIP_BONAIRE: return "bonaire"; - case CHIP_KABINI: return "kabini"; - case CHIP_KAVERI: return "kaveri"; - case CHIP_HAWAII: return "hawaii"; - default: return ""; -#endif - } -} - -static const char *r600_get_family_name(enum radeon_family family) -{ - switch(family) { - case CHIP_TAHITI: return "AMD TAHITI"; - case CHIP_PITCAIRN: return "AMD PITCAIRN"; - case CHIP_VERDE: return "AMD CAPE VERDE"; - case CHIP_OLAND: return "AMD OLAND"; - case CHIP_HAINAN: return "AMD HAINAN"; - case CHIP_BONAIRE: return "AMD BONAIRE"; - case CHIP_KAVERI: return "AMD KAVERI"; - case CHIP_KABINI: return "AMD KABINI"; - case CHIP_HAWAII: return "AMD HAWAII"; - default: return "AMD unknown"; - } -} - -static const char* r600_get_name(struct pipe_screen* pscreen) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - - return r600_get_family_name(rscreen->b.family); -} - -static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - - switch (param) { - /* Supported features (boolean caps). */ - case PIPE_CAP_TWO_SIDED_STENCIL: - case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: - case PIPE_CAP_ANISOTROPIC_FILTER: - case PIPE_CAP_POINT_SPRITE: - case PIPE_CAP_OCCLUSION_QUERY: - case PIPE_CAP_TEXTURE_SHADOW_MAP: - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - case PIPE_CAP_TEXTURE_SWIZZLE: - case PIPE_CAP_DEPTH_CLIP_DISABLE: - case PIPE_CAP_SHADER_STENCIL_EXPORT: - case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: - case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - case PIPE_CAP_SM3: - case PIPE_CAP_SEAMLESS_CUBE_MAP: - case PIPE_CAP_PRIMITIVE_RESTART: - case PIPE_CAP_CONDITIONAL_RENDER: - case PIPE_CAP_TEXTURE_BARRIER: - case PIPE_CAP_INDEP_BLEND_ENABLE: - case PIPE_CAP_INDEP_BLEND_FUNC: - case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: - case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: - case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: - case PIPE_CAP_USER_INDEX_BUFFERS: - case PIPE_CAP_USER_CONSTANT_BUFFERS: - case PIPE_CAP_START_INSTANCE: - case PIPE_CAP_NPOT_TEXTURES: - case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: - case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: - case PIPE_CAP_TGSI_INSTANCEID: - case PIPE_CAP_COMPUTE: - case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: - case PIPE_CAP_TGSI_VS_LAYER: - return 1; - - case PIPE_CAP_TEXTURE_MULTISAMPLE: - /* 2D tiling on CIK is supported since DRM 2.35.0 */ - return HAVE_LLVM >= 0x0304 && (rscreen->b.chip_class < CIK || - rscreen->b.info.drm_minor >= 35); - - case PIPE_CAP_TGSI_TEXCOORD: - return 0; - - case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: - return 64; - - case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: - return 256; - - case PIPE_CAP_GLSL_FEATURE_LEVEL: - return 140; - - case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: - return 1; - case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: - return MIN2(rscreen->b.info.vram_size, 0xFFFFFFFF); - - /* Unsupported features. */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - case PIPE_CAP_SCALED_RESOLVE: - case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: - case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: - case PIPE_CAP_VERTEX_COLOR_CLAMPED: - case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: - case PIPE_CAP_USER_VERTEX_BUFFERS: - case PIPE_CAP_QUERY_PIPELINE_STATISTICS: - case PIPE_CAP_CUBE_MAP_ARRAY: - return 0; - - case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: - return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600; - - /* Stream output. */ - case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: - return rscreen->b.has_streamout ? 4 : 0; - case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: - return rscreen->b.has_streamout ? 1 : 0; - case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: - case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: - return rscreen->b.has_streamout ? 32*4 : 0; - - /* Texturing. */ - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 15; - case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: - return 16384; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 32; - - /* Render targets. */ - case PIPE_CAP_MAX_RENDER_TARGETS: - return 8; - - case PIPE_CAP_MAX_VIEWPORTS: - return 1; - - /* Timer queries, present when the clock frequency is non zero. */ - case PIPE_CAP_QUERY_TIMESTAMP: - case PIPE_CAP_QUERY_TIME_ELAPSED: - return rscreen->b.info.r600_clock_crystal_freq != 0; - - case PIPE_CAP_MIN_TEXEL_OFFSET: - return -8; - - case PIPE_CAP_MAX_TEXEL_OFFSET: - return 7; - case PIPE_CAP_ENDIANNESS: - return PIPE_ENDIAN_LITTLE; - } - return 0; -} - -static float r600_get_paramf(struct pipe_screen* pscreen, - enum pipe_capf param) -{ - switch (param) { - case PIPE_CAPF_MAX_LINE_WIDTH: - case PIPE_CAPF_MAX_LINE_WIDTH_AA: - case PIPE_CAPF_MAX_POINT_WIDTH: - case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 16384.0f; - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - case PIPE_CAPF_GUARD_BAND_LEFT: - case PIPE_CAPF_GUARD_BAND_TOP: - case PIPE_CAPF_GUARD_BAND_RIGHT: - case PIPE_CAPF_GUARD_BAND_BOTTOM: - return 0.0f; - } - return 0.0f; -} - -static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) -{ - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_VERTEX: - break; - case PIPE_SHADER_GEOMETRY: - /* TODO: support and enable geometry programs */ - return 0; - case PIPE_SHADER_COMPUTE: - switch (param) { - case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_LLVM; - default: - return 0; - } - default: - /* TODO: support tessellation */ - return 0; - } - - switch (param) { - case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: - case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: - return 16384; - case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: - return 32; - case PIPE_SHADER_CAP_MAX_INPUTS: - return 32; - case PIPE_SHADER_CAP_MAX_TEMPS: - return 256; /* Max native temporaries. */ - case PIPE_SHADER_CAP_MAX_ADDRS: - /* FIXME Isn't this equal to TEMPS? */ - return 1; /* Max native address registers */ - case PIPE_SHADER_CAP_MAX_CONSTS: - return 4096; /* actually only memory limits this */ - case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: - return NUM_PIPE_CONST_BUFFERS; - case PIPE_SHADER_CAP_MAX_PREDS: - return 0; /* FIXME */ - case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: - return 1; - case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: - return 0; - case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: - case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: - case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: - return 1; - case PIPE_SHADER_CAP_INTEGERS: - return 1; - case PIPE_SHADER_CAP_SUBROUTINES: - return 0; - case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: - case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: - return 16; - case PIPE_SHADER_CAP_PREFERRED_IR: - return PIPE_SHADER_IR_TGSI; - } - return 0; -} - -static int r600_get_video_param(struct pipe_screen *screen, - enum pipe_video_profile profile, - enum pipe_video_entrypoint entrypoint, - enum pipe_video_cap param) -{ - switch (param) { - case PIPE_VIDEO_CAP_SUPPORTED: - return vl_profile_supported(screen, profile, entrypoint); - case PIPE_VIDEO_CAP_NPOT_TEXTURES: - return 1; - case PIPE_VIDEO_CAP_MAX_WIDTH: - case PIPE_VIDEO_CAP_MAX_HEIGHT: - return vl_video_buffer_max_size(screen); - case PIPE_VIDEO_CAP_PREFERED_FORMAT: - return PIPE_FORMAT_NV12; - case PIPE_VIDEO_CAP_MAX_LEVEL: - return vl_level_supported(screen, profile); - default: - return 0; - } -} - -static int r600_get_compute_param(struct pipe_screen *screen, - enum pipe_compute_cap param, - void *ret) -{ - struct r600_screen *rscreen = (struct r600_screen *)screen; - //TODO: select these params by asic - switch (param) { - case PIPE_COMPUTE_CAP_IR_TARGET: { - const char *gpu = r600_get_llvm_processor_name(rscreen->b.family); - if (ret) { - sprintf(ret, "%s-r600--", gpu); - } - return (8 + strlen(gpu)) * sizeof(char); - } - case PIPE_COMPUTE_CAP_GRID_DIMENSION: - if (ret) { - uint64_t * grid_dimension = ret; - grid_dimension[0] = 3; - } - return 1 * sizeof(uint64_t); - case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - if (ret) { - uint64_t * grid_size = ret; - grid_size[0] = 65535; - grid_size[1] = 65535; - grid_size[2] = 1; - } - return 3 * sizeof(uint64_t) ; - - case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - if (ret) { - uint64_t * block_size = ret; - block_size[0] = 256; - block_size[1] = 256; - block_size[2] = 256; - } - return 3 * sizeof(uint64_t); - case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - if (ret) { - uint64_t * max_threads_per_block = ret; - *max_threads_per_block = 256; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: - if (ret) { - uint64_t *max_global_size = ret; - /* XXX: Not sure what to put here. */ - *max_global_size = 2000000000; - } - return sizeof(uint64_t); - case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: - if (ret) { - uint64_t *max_local_size = ret; - /* Value reported by the closed source driver. */ - *max_local_size = 32768; - } - return sizeof(uint64_t); - case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: - if (ret) { - uint64_t *max_input_size = ret; - /* Value reported by the closed source driver. */ - *max_input_size = 1024; - } - return sizeof(uint64_t); - case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: - if (ret) { - uint64_t max_global_size; - uint64_t *max_mem_alloc_size = ret; - r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, &max_global_size); - *max_mem_alloc_size = max_global_size / 4; - } - return sizeof(uint64_t); - default: - fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); - return 0; - } -} - -static void r600_destroy_screen(struct pipe_screen* pscreen) -{ - struct r600_screen *rscreen = (struct r600_screen *)pscreen; - - if (rscreen == NULL) - return; - - if (!radeon_winsys_unref(rscreen->b.ws)) - return; - - r600_common_screen_cleanup(&rscreen->b); - -#if R600_TRACE_CS - if (rscreen->trace_bo) { - rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf); - pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL); - } -#endif - - rscreen->b.ws->destroy(rscreen->b.ws); - FREE(rscreen); -} - -static uint64_t r600_get_timestamp(struct pipe_screen *screen) -{ - struct r600_screen *rscreen = (struct r600_screen*)screen; - - return 1000000 * rscreen->b.ws->query_value(rscreen->b.ws, RADEON_TIMESTAMP) / - rscreen->b.info.r600_clock_crystal_freq; -} - -struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) -{ - struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen); - if (rscreen == NULL) { - return NULL; - } - - ws->query_info(ws, &rscreen->b.info); - - /* Set functions first. */ - rscreen->b.b.context_create = r600_create_context; - rscreen->b.b.destroy = r600_destroy_screen; - rscreen->b.b.get_name = r600_get_name; - rscreen->b.b.get_vendor = r600_get_vendor; - rscreen->b.b.get_param = r600_get_param; - rscreen->b.b.get_shader_param = r600_get_shader_param; - rscreen->b.b.get_paramf = r600_get_paramf; - rscreen->b.b.get_compute_param = r600_get_compute_param; - rscreen->b.b.get_timestamp = r600_get_timestamp; - rscreen->b.b.is_format_supported = si_is_format_supported; - if (rscreen->b.info.has_uvd) { - rscreen->b.b.get_video_param = ruvd_get_video_param; - rscreen->b.b.is_video_format_supported = ruvd_is_format_supported; - } else { - rscreen->b.b.get_video_param = r600_get_video_param; - rscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported; - } - r600_init_screen_resource_functions(&rscreen->b.b); - - if (!r600_common_screen_init(&rscreen->b, ws)) { - FREE(rscreen); - return NULL; - } - - rscreen->b.has_cp_dma = true; - rscreen->b.has_streamout = HAVE_LLVM >= 0x0304; - - if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) - rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; - -#if R600_TRACE_CS - rscreen->cs_count = 0; - if (rscreen->info.drm_minor >= 28) { - rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen, - PIPE_BIND_CUSTOM, - PIPE_USAGE_STAGING, - 4096); - if (rscreen->trace_bo) { - rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL, - PIPE_TRANSFER_UNSYNCHRONIZED); - } - } -#endif - - /* Create the auxiliary context. This must be done last. */ - rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL); - - return &rscreen->b.b; -} diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h deleted file mode 100644 index 56145bc4111..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Jerome Glisse - */ -#ifndef RADEONSI_PIPE_H -#define RADEONSI_PIPE_H - -#include "../radeon/r600_pipe_common.h" - -#include "pipe/p_screen.h" -#include "pipe/p_context.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_slab.h" -#include "r600.h" -#include "radeonsi_public.h" -#include "radeonsi_pm4.h" -#include "si_state.h" -#include "r600_resource.h" -#include "sid.h" - -#ifdef PIPE_ARCH_BIG_ENDIAN -#define R600_BIG_ENDIAN 1 -#else -#define R600_BIG_ENDIAN 0 -#endif - -#define R600_TRACE_CS 0 -#define R600_TRACE_CS_DWORDS 6 - -#define SI_MAX_DRAW_CS_DWORDS 18 - -struct si_pipe_compute; - -struct r600_screen { - struct r600_common_screen b; -#if R600_TRACE_CS - struct r600_resource *trace_bo; - uint32_t *trace_ptr; - unsigned cs_count; -#endif -}; - -struct si_pipe_sampler_view { - struct pipe_sampler_view base; - struct r600_resource *resource; - uint32_t state[8]; - uint32_t fmask_state[8]; -}; - -struct si_pipe_sampler_state { - uint32_t val[4]; - uint32_t border_color[4]; -}; - -struct si_cs_shader_state { - struct si_pipe_compute *program; -}; - -struct r600_textures_info { - struct si_sampler_views views; - struct si_pipe_sampler_state *samplers[NUM_TEX_UNITS]; - unsigned n_views; - uint32_t depth_texture_mask; /* which textures are depth */ - uint32_t compressed_colortex_mask; - unsigned n_samplers; -}; - -#define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0])) -#define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1) - -struct r600_context { - struct r600_common_context b; - struct blitter_context *blitter; - void *custom_dsa_flush_depth_stencil[8]; - void *custom_dsa_flush_depth[8]; - void *custom_dsa_flush_stencil[8]; - void *custom_dsa_flush_inplace; - void *custom_blend_resolve; - void *custom_blend_decompress; - struct r600_screen *screen; - - union { - struct { - /* The order matters. */ - struct r600_atom *const_buffers[SI_NUM_SHADERS]; - struct r600_atom *sampler_views[SI_NUM_SHADERS]; - struct r600_atom *streamout_buffers; - /* Caches must be flushed after resource descriptors are - * updated in memory. */ - struct r600_atom *cache_flush; - struct r600_atom *streamout_begin; - }; - struct r600_atom *array[0]; - } atoms; - - struct si_vertex_element *vertex_elements; - struct pipe_framebuffer_state framebuffer; - unsigned fb_log_samples; - unsigned fb_cb0_is_integer; - unsigned fb_compressed_cb_mask; - unsigned pa_sc_line_stipple; - unsigned pa_su_sc_mode_cntl; - /* for saving when using blitter */ - struct pipe_stencil_ref stencil_ref; - struct si_pipe_shader_selector *ps_shader; - struct si_pipe_shader_selector *vs_shader; - struct si_cs_shader_state cs_shader_state; - struct pipe_query *current_render_cond; - unsigned current_render_cond_mode; - boolean current_render_cond_cond; - struct pipe_query *saved_render_cond; - unsigned saved_render_cond_mode; - boolean saved_render_cond_cond; - /* shader information */ - unsigned sprite_coord_enable; - unsigned export_16bpc; - struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; - struct si_buffer_resources streamout_buffers; - struct r600_textures_info samplers[SI_NUM_SHADERS]; - struct r600_resource *border_color_table; - unsigned border_color_offset; - - unsigned default_ps_gprs, default_vs_gprs; - - /* Below are variables from the old r600_context. - */ - unsigned pm4_dirty_cdwords; - - /* The list of active queries. Only one query of each type can be active. */ - struct list_head active_nontimer_query_list; - unsigned num_cs_dw_nontimer_queries_suspend; - /* If queries have been suspended. */ - bool nontimer_queries_suspended; - - unsigned backend_mask; - unsigned max_db; /* for OQ */ - boolean predicate_drawing; - - /* Vertex and index buffers. */ - bool vertex_buffers_dirty; - struct pipe_index_buffer index_buffer; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_buffers; - - /* With rasterizer discard, there doesn't have to be a pixel shader. - * In that case, we bind this one: */ - void *dummy_pixel_shader; - struct r600_atom cache_flush; - struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ - - /* SI state handling */ - union si_state queued; - union si_state emitted; -}; - -/* r600_blit.c */ -void si_init_blit_functions(struct r600_context *rctx); -void si_flush_depth_textures(struct r600_context *rctx, - struct r600_textures_info *textures); -void r600_decompress_color_textures(struct r600_context *rctx, - struct r600_textures_info *textures); - -/* r600_buffer.c */ -void r600_upload_index_buffer(struct r600_context *rctx, - struct pipe_index_buffer *ib, unsigned count); - - -/* r600_pipe.c */ -void radeonsi_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, - unsigned flags); -const char *r600_get_llvm_processor_name(enum radeon_family family); - -/* r600_query.c */ -void r600_init_query_functions(struct r600_context *rctx); - -/* r600_resource.c */ -void r600_init_context_resource_functions(struct r600_context *r600); - -/* r600_translate.c */ -void r600_translate_index_buffer(struct r600_context *r600, - struct pipe_index_buffer *ib, - unsigned count); - -#if R600_TRACE_CS -void r600_trace_emit(struct r600_context *rctx); -#endif - -/* radeonsi_compute.c */ -void si_init_compute_functions(struct r600_context *rctx); - -/* radeonsi_uvd.c */ -struct pipe_video_codec *radeonsi_uvd_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templ); - -struct pipe_video_buffer *radeonsi_video_buffer_create(struct pipe_context *pipe, - const struct pipe_video_buffer *tmpl); - -/* - * common helpers - */ -static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) -{ - return value * (1 << frac_bits); -} -#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) - -static INLINE unsigned si_map_swizzle(unsigned swizzle) -{ - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_Y: - return V_008F0C_SQ_SEL_Y; - case UTIL_FORMAT_SWIZZLE_Z: - return V_008F0C_SQ_SEL_Z; - case UTIL_FORMAT_SWIZZLE_W: - return V_008F0C_SQ_SEL_W; - case UTIL_FORMAT_SWIZZLE_0: - return V_008F0C_SQ_SEL_0; - case UTIL_FORMAT_SWIZZLE_1: - return V_008F0C_SQ_SEL_1; - default: /* UTIL_FORMAT_SWIZZLE_X */ - return V_008F0C_SQ_SEL_X; - } -} - -static inline unsigned r600_tex_aniso_filter(unsigned filter) -{ - if (filter <= 1) return 0; - if (filter <= 2) return 1; - if (filter <= 4) return 2; - if (filter <= 8) return 3; - /* else */ return 4; -} - -/* 12.4 fixed-point */ -static INLINE unsigned r600_pack_float_12p4(float x) -{ - return x <= 0 ? 0 : - x >= 4096 ? 0xffff : x * 16; -} - -#endif diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.c b/src/gallium/drivers/radeonsi/radeonsi_pm4.c deleted file mode 100644 index eed0c4756d7..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright 2012 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Christian König - */ - -#include "../radeon/r600_cs.h" -#include "util/u_memory.h" -#include "radeonsi_pipe.h" -#include "radeonsi_pm4.h" -#include "sid.h" - -#define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *)) - -void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode) -{ - state->last_opcode = opcode; - state->last_pm4 = state->ndw++; -} - -void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw) -{ - state->pm4[state->ndw++] = dw; -} - -void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate) -{ - unsigned count; - count = state->ndw - state->last_pm4 - 2; - state->pm4[state->last_pm4] = - PKT3(state->last_opcode, count, predicate) - | PKT3_SHADER_TYPE_S(state->compute_pkt); - - assert(state->ndw <= SI_PM4_MAX_DW); -} - -void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val) -{ - unsigned opcode; - - if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) { - opcode = PKT3_SET_CONFIG_REG; - reg -= SI_CONFIG_REG_OFFSET; - - } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) { - opcode = PKT3_SET_SH_REG; - reg -= SI_SH_REG_OFFSET; - - } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) { - opcode = PKT3_SET_CONTEXT_REG; - reg -= SI_CONTEXT_REG_OFFSET; - - } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) { - opcode = PKT3_SET_UCONFIG_REG; - reg -= CIK_UCONFIG_REG_OFFSET; - - } else { - R600_ERR("Invalid register offset %08x!\n", reg); - return; - } - - reg >>= 2; - - if (opcode != state->last_opcode || reg != (state->last_reg + 1)) { - si_pm4_cmd_begin(state, opcode); - si_pm4_cmd_add(state, reg); - } - - state->last_reg = reg; - si_pm4_cmd_add(state, val); - si_pm4_cmd_end(state, false); -} - -void si_pm4_add_bo(struct si_pm4_state *state, - struct r600_resource *bo, - enum radeon_bo_usage usage) -{ - unsigned idx = state->nbo++; - assert(idx < SI_PM4_MAX_BO); - - r600_resource_reference(&state->bo[idx], bo); - state->bo_usage[idx] = usage; -} - -void si_pm4_sh_data_begin(struct si_pm4_state *state) -{ - si_pm4_cmd_begin(state, PKT3_NOP); -} - -void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw) -{ - si_pm4_cmd_add(state, dw); -} - -void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx) -{ - unsigned offs = state->last_pm4 + 1; - unsigned reg = base + idx * 4; - - /* Bail if no data was added */ - if (state->ndw == offs) { - state->ndw--; - return; - } - - si_pm4_cmd_end(state, false); - - si_pm4_cmd_begin(state, PKT3_SET_SH_REG_OFFSET); - si_pm4_cmd_add(state, (reg - SI_SH_REG_OFFSET) >> 2); - state->relocs[state->nrelocs++] = state->ndw; - si_pm4_cmd_add(state, offs << 2); - si_pm4_cmd_add(state, 0); - si_pm4_cmd_end(state, false); -} - -void si_pm4_inval_shader_cache(struct si_pm4_state *state) -{ - state->cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); - state->cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); -} - -void si_pm4_inval_texture_cache(struct si_pm4_state *state) -{ - state->cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); - state->cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); -} - -void si_pm4_free_state(struct r600_context *rctx, - struct si_pm4_state *state, - unsigned idx) -{ - if (state == NULL) - return; - - if (idx != ~0 && rctx->emitted.array[idx] == state) { - rctx->emitted.array[idx] = NULL; - } - - for (int i = 0; i < state->nbo; ++i) { - r600_resource_reference(&state->bo[i], NULL); - } - FREE(state); -} - -struct si_pm4_state * si_pm4_alloc_state(struct r600_context *rctx) -{ - struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - - if (pm4 == NULL) - return NULL; - - pm4->chip_class = rctx->b.chip_class; - - return pm4; -} - -uint32_t si_pm4_sync_flags(struct r600_context *rctx) -{ - uint32_t cp_coher_cntl = 0; - - for (int i = 0; i < NUMBER_OF_STATES; ++i) { - struct si_pm4_state *state = rctx->queued.array[i]; - - if (!state || rctx->emitted.array[i] == state) - continue; - - cp_coher_cntl |= state->cp_coher_cntl; - } - return cp_coher_cntl; -} - -unsigned si_pm4_dirty_dw(struct r600_context *rctx) -{ - unsigned count = 0; - - for (int i = 0; i < NUMBER_OF_STATES; ++i) { - struct si_pm4_state *state = rctx->queued.array[i]; - - if (!state || rctx->emitted.array[i] == state) - continue; - - count += state->ndw; -#if R600_TRACE_CS - /* for tracing each states */ - if (rctx->screen->trace_bo) { - count += R600_TRACE_CS_DWORDS; - } -#endif - } - - return count; -} - -void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state) -{ - struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; - for (int i = 0; i < state->nbo; ++i) { - r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, state->bo[i], - state->bo_usage[i]); - } - - memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4); - - for (int i = 0; i < state->nrelocs; ++i) { - cs->buf[cs->cdw + state->relocs[i]] += cs->cdw << 2; - } - - cs->cdw += state->ndw; - -#if R600_TRACE_CS - if (rctx->screen->trace_bo) { - r600_trace_emit(rctx); - } -#endif -} - -void si_pm4_emit_dirty(struct r600_context *rctx) -{ - for (int i = 0; i < NUMBER_OF_STATES; ++i) { - struct si_pm4_state *state = rctx->queued.array[i]; - - if (!state || rctx->emitted.array[i] == state) - continue; - - assert(state != rctx->queued.named.init); - si_pm4_emit(rctx, state); - rctx->emitted.array[i] = state; - } -} - -void si_pm4_reset_emitted(struct r600_context *rctx) -{ - memset(&rctx->emitted, 0, sizeof(rctx->emitted)); -} diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.h b/src/gallium/drivers/radeonsi/radeonsi_pm4.h deleted file mode 100644 index 2e32a19e212..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2012 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Christian König - */ - -#ifndef RADEONSI_PM4_H -#define RADEONSI_PM4_H - -#include "../../winsys/radeon/drm/radeon_winsys.h" - -#define SI_PM4_MAX_DW 256 -#define SI_PM4_MAX_BO 32 -#define SI_PM4_MAX_RELOCS 4 - -// forward defines -struct r600_context; -enum chip_class; - -struct si_pm4_state -{ - /* family specific handling */ - enum chip_class chip_class; - /* PKT3_SET_*_REG handling */ - unsigned last_opcode; - unsigned last_reg; - unsigned last_pm4; - - /* flush flags for SURFACE_SYNC */ - uint32_t cp_coher_cntl; - - /* commands for the DE */ - unsigned ndw; - uint32_t pm4[SI_PM4_MAX_DW]; - - /* BO's referenced by this state */ - unsigned nbo; - struct r600_resource *bo[SI_PM4_MAX_BO]; - enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO]; - - /* relocs for shader data */ - unsigned nrelocs; - unsigned relocs[SI_PM4_MAX_RELOCS]; - - bool compute_pkt; -}; - -void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode); -void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw); -void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate); - -void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val); -void si_pm4_add_bo(struct si_pm4_state *state, - struct r600_resource *bo, - enum radeon_bo_usage usage); - -void si_pm4_sh_data_begin(struct si_pm4_state *state); -void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw); -void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx); - -void si_pm4_inval_shader_cache(struct si_pm4_state *state); -void si_pm4_inval_texture_cache(struct si_pm4_state *state); - -void si_pm4_free_state(struct r600_context *rctx, - struct si_pm4_state *state, - unsigned idx); -struct si_pm4_state * si_pm4_alloc_state(struct r600_context *rctx); - -uint32_t si_pm4_sync_flags(struct r600_context *rctx); -unsigned si_pm4_dirty_dw(struct r600_context *rctx); -void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state); -void si_pm4_emit_dirty(struct r600_context *rctx); -void si_pm4_reset_emitted(struct r600_context *rctx); - -#endif diff --git a/src/gallium/drivers/radeonsi/radeonsi_public.h b/src/gallium/drivers/radeonsi/radeonsi_public.h deleted file mode 100644 index 5dcec0fc93b..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_public.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2010 Jerome Glisse - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef RADEONSI_PUBLIC_H -#define RADEONSI_PUBLIC_H - -struct radeon_winsys; - -struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws); - -#endif diff --git a/src/gallium/drivers/radeonsi/radeonsi_resource.h b/src/gallium/drivers/radeonsi/radeonsi_resource.h deleted file mode 100644 index 7794beb9c57..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_resource.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2012 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Christian König - */ - -#ifndef RADEONSI_RESOURCE_H -#define RADEONSI_RESOURCE_H - -#include "../radeon/r600_pipe_common.h" -#include "util/u_transfer.h" -#include "util/u_inlines.h" - -static INLINE struct r600_resource * -r600_resource_create_custom(struct pipe_screen *screen, - unsigned usage, unsigned size) -{ - assert(size); - return r600_resource(pipe_buffer_create(screen, - PIPE_BIND_CUSTOM, usage, size)); -} - -#endif diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c deleted file mode 100644 index 9c1fb13272a..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ /dev/null @@ -1,2058 +0,0 @@ - -/* - * Copyright 2012 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Tom Stellard - * Michel Dänzer - * Christian König - */ - -#include "gallivm/lp_bld_tgsi_action.h" -#include "gallivm/lp_bld_const.h" -#include "gallivm/lp_bld_gather.h" -#include "gallivm/lp_bld_intr.h" -#include "gallivm/lp_bld_logic.h" -#include "gallivm/lp_bld_tgsi.h" -#include "gallivm/lp_bld_arit.h" -#include "gallivm/lp_bld_flow.h" -#include "radeon_llvm.h" -#include "radeon_llvm_emit.h" -#include "util/u_memory.h" -#include "tgsi/tgsi_info.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_scan.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_dump.h" - -#include "radeonsi_pipe.h" -#include "radeonsi_shader.h" -#include "si_state.h" -#include "sid.h" - -#include -#include -#include - -struct si_shader_context -{ - struct radeon_llvm_context radeon_bld; - struct tgsi_parse_context parse; - struct tgsi_token * tokens; - struct si_pipe_shader *shader; - unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ - int param_streamout_config; - int param_streamout_write_index; - int param_streamout_offset[4]; - int param_vertex_id; - int param_instance_id; - LLVMValueRef const_md; - LLVMValueRef const_resource[NUM_CONST_BUFFERS]; -#if HAVE_LLVM >= 0x0304 - LLVMValueRef ddxy_lds; -#endif - LLVMValueRef *constants[NUM_CONST_BUFFERS]; - LLVMValueRef *resources; - LLVMValueRef *samplers; - LLVMValueRef so_buffers[4]; -}; - -static struct si_shader_context * si_shader_context( - struct lp_build_tgsi_context * bld_base) -{ - return (struct si_shader_context *)bld_base; -} - - -#define PERSPECTIVE_BASE 0 -#define LINEAR_BASE 9 - -#define SAMPLE_OFFSET 0 -#define CENTER_OFFSET 2 -#define CENTROID_OFSET 4 - -#define USE_SGPR_MAX_SUFFIX_LEN 5 -#define CONST_ADDR_SPACE 2 -#define LOCAL_ADDR_SPACE 3 -#define USER_SGPR_ADDR_SPACE 8 - -/** - * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad - * - * @param offset The offset parameter specifies the number of - * elements to offset, not the number of bytes or dwords. An element is the - * the type pointed to by the base_ptr parameter (e.g. int is the element of - * an int* pointer) - * - * When LLVM lowers the load instruction, it will convert the element offset - * into a dword offset automatically. - * - */ -static LLVMValueRef build_indexed_load( - struct si_shader_context * si_shader_ctx, - LLVMValueRef base_ptr, - LLVMValueRef offset) -{ - struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; - - LLVMValueRef indices[2] = { - LLVMConstInt(LLVMInt64TypeInContext(base->gallivm->context), 0, false), - offset - }; - LLVMValueRef computed_ptr = LLVMBuildGEP( - base->gallivm->builder, base_ptr, indices, 2, ""); - - LLVMValueRef result = LLVMBuildLoad(base->gallivm->builder, computed_ptr, ""); - LLVMSetMetadata(result, 1, si_shader_ctx->const_md); - return result; -} - -static LLVMValueRef get_instance_index_for_fetch( - struct radeon_llvm_context * radeon_bld, - unsigned divisor) -{ - struct si_shader_context *si_shader_ctx = - si_shader_context(&radeon_bld->soa.bld_base); - struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm; - - LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, - si_shader_ctx->param_instance_id); - result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam( - radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); - - if (divisor > 1) - result = LLVMBuildUDiv(gallivm->builder, result, - lp_build_const_int32(gallivm, divisor), ""); - - return result; -} - -static void declare_input_vs( - struct si_shader_context * si_shader_ctx, - unsigned input_index, - const struct tgsi_full_declaration *decl) -{ - struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; - unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index]; - - unsigned chan; - - LLVMValueRef t_list_ptr; - LLVMValueRef t_offset; - LLVMValueRef t_list; - LLVMValueRef attribute_offset; - LLVMValueRef buffer_index; - LLVMValueRef args[3]; - LLVMTypeRef vec4_type; - LLVMValueRef input; - - /* Load the T list */ - t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER); - - t_offset = lp_build_const_int32(base->gallivm, input_index); - - t_list = build_indexed_load(si_shader_ctx, t_list_ptr, t_offset); - - /* Build the attribute offset */ - attribute_offset = lp_build_const_int32(base->gallivm, 0); - - if (divisor) { - /* Build index from instance ID, start instance and divisor */ - si_shader_ctx->shader->shader.uses_instanceid = true; - buffer_index = get_instance_index_for_fetch(&si_shader_ctx->radeon_bld, divisor); - } else { - /* Load the buffer index, which is always stored in VGPR0 - * for Vertex Shaders */ - buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, - si_shader_ctx->param_vertex_id); - } - - vec4_type = LLVMVectorType(base->elem_type, 4); - args[0] = t_list; - args[1] = attribute_offset; - args[2] = buffer_index; - input = build_intrinsic(base->gallivm->builder, - "llvm.SI.vs.load.input", vec4_type, args, 3, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - - /* Break up the vec4 into individual components */ - for (chan = 0; chan < 4; chan++) { - LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan); - /* XXX: Use a helper function for this. There is one in - * tgsi_llvm.c. */ - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] = - LLVMBuildExtractElement(base->gallivm->builder, - input, llvm_chan, ""); - } -} - -static void declare_input_fs( - struct si_shader_context * si_shader_ctx, - unsigned input_index, - const struct tgsi_full_declaration *decl) -{ - struct si_shader *shader = &si_shader_ctx->shader->shader; - struct lp_build_context * base = - &si_shader_ctx->radeon_bld.soa.bld_base.base; - struct lp_build_context *uint = - &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; - struct gallivm_state * gallivm = base->gallivm; - LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); - LLVMValueRef main_fn = si_shader_ctx->radeon_bld.main_fn; - - LLVMValueRef interp_param; - const char * intr_name; - - /* This value is: - * [15:0] NewPrimMask (Bit mask for each quad. It is set it the - * quad begins a new primitive. Bit 0 always needs - * to be unset) - * [32:16] ParamOffset - * - */ - LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK); - LLVMValueRef attr_number; - - unsigned chan; - - if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - unsigned soa_index = - radeon_llvm_reg_index_soa(input_index, chan); - si_shader_ctx->radeon_bld.inputs[soa_index] = - LLVMGetParam(main_fn, SI_PARAM_POS_X_FLOAT + chan); - - if (chan == 3) - /* RCP for fragcoord.w */ - si_shader_ctx->radeon_bld.inputs[soa_index] = - LLVMBuildFDiv(gallivm->builder, - lp_build_const_float(gallivm, 1.0f), - si_shader_ctx->radeon_bld.inputs[soa_index], - ""); - } - return; - } - - if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { - LLVMValueRef face, is_face_positive; - - face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE); - - is_face_positive = LLVMBuildFCmp(gallivm->builder, - LLVMRealUGT, face, - lp_build_const_float(gallivm, 0.0f), - ""); - - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] = - LLVMBuildSelect(gallivm->builder, - is_face_positive, - lp_build_const_float(gallivm, 1.0f), - lp_build_const_float(gallivm, 0.0f), - ""); - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] = - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] = - lp_build_const_float(gallivm, 0.0f); - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] = - lp_build_const_float(gallivm, 1.0f); - - return; - } - - shader->input[input_index].param_offset = shader->ninterp++; - attr_number = lp_build_const_int32(gallivm, - shader->input[input_index].param_offset); - - switch (decl->Interp.Interpolate) { - case TGSI_INTERPOLATE_COLOR: - if (si_shader_ctx->shader->key.ps.flatshade) { - interp_param = 0; - } else { - if (decl->Interp.Centroid) - interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID); - else - interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER); - } - break; - case TGSI_INTERPOLATE_CONSTANT: - interp_param = 0; - break; - case TGSI_INTERPOLATE_LINEAR: - if (decl->Interp.Centroid) - interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID); - else - interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER); - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - if (decl->Interp.Centroid) - interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID); - else - interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER); - break; - default: - fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); - return; - } - - intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; - - /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ - if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && - si_shader_ctx->shader->key.ps.color_two_side) { - LLVMValueRef args[4]; - LLVMValueRef face, is_face_positive; - LLVMValueRef back_attr_number = - lp_build_const_int32(gallivm, - shader->input[input_index].param_offset + 1); - - face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE); - - is_face_positive = LLVMBuildFCmp(gallivm->builder, - LLVMRealUGT, face, - lp_build_const_float(gallivm, 0.0f), - ""); - - args[2] = params; - args[3] = interp_param; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); - unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); - LLVMValueRef front, back; - - args[0] = llvm_chan; - args[1] = attr_number; - front = build_intrinsic(base->gallivm->builder, intr_name, - input_type, args, args[3] ? 4 : 3, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - - args[1] = back_attr_number; - back = build_intrinsic(base->gallivm->builder, intr_name, - input_type, args, args[3] ? 4 : 3, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - - si_shader_ctx->radeon_bld.inputs[soa_index] = - LLVMBuildSelect(gallivm->builder, - is_face_positive, - front, - back, - ""); - } - - shader->ninterp++; - } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) { - LLVMValueRef args[4]; - - args[0] = uint->zero; - args[1] = attr_number; - args[2] = params; - args[3] = interp_param; - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] = - build_intrinsic(base->gallivm->builder, intr_name, - input_type, args, args[3] ? 4 : 3, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] = - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] = - lp_build_const_float(gallivm, 0.0f); - si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] = - lp_build_const_float(gallivm, 1.0f); - } else { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - LLVMValueRef args[4]; - LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); - unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); - args[0] = llvm_chan; - args[1] = attr_number; - args[2] = params; - args[3] = interp_param; - si_shader_ctx->radeon_bld.inputs[soa_index] = - build_intrinsic(base->gallivm->builder, intr_name, - input_type, args, args[3] ? 4 : 3, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - } - } -} - -static void declare_input( - struct radeon_llvm_context * radeon_bld, - unsigned input_index, - const struct tgsi_full_declaration *decl) -{ - struct si_shader_context * si_shader_ctx = - si_shader_context(&radeon_bld->soa.bld_base); - if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { - declare_input_vs(si_shader_ctx, input_index, decl); - } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { - declare_input_fs(si_shader_ctx, input_index, decl); - } else { - fprintf(stderr, "Warning: Unsupported shader type,\n"); - } -} - -static void declare_system_value( - struct radeon_llvm_context * radeon_bld, - unsigned index, - const struct tgsi_full_declaration *decl) -{ - struct si_shader_context *si_shader_ctx = - si_shader_context(&radeon_bld->soa.bld_base); - LLVMValueRef value = 0; - - switch (decl->Semantic.Name) { - case TGSI_SEMANTIC_INSTANCEID: - value = LLVMGetParam(radeon_bld->main_fn, - si_shader_ctx->param_instance_id); - break; - - case TGSI_SEMANTIC_VERTEXID: - value = LLVMGetParam(radeon_bld->main_fn, - si_shader_ctx->param_vertex_id); - break; - - default: - assert(!"unknown system value"); - return; - } - - radeon_bld->system_values[index] = value; -} - -static LLVMValueRef fetch_constant( - struct lp_build_tgsi_context * bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle) -{ - struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - struct lp_build_context * base = &bld_base->base; - const struct tgsi_ind_register *ireg = ®->Indirect; - unsigned buf, idx; - - LLVMValueRef args[2]; - LLVMValueRef addr; - LLVMValueRef result; - - if (swizzle == LP_CHAN_ALL) { - unsigned chan; - LLVMValueRef values[4]; - for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) - values[chan] = fetch_constant(bld_base, reg, type, chan); - - return lp_build_gather_values(bld_base->base.gallivm, values, 4); - } - - buf = reg->Register.Dimension ? reg->Dimension.Index : 0; - idx = reg->Register.Index * 4 + swizzle; - - if (!reg->Register.Indirect) - return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]); - - args[0] = si_shader_ctx->const_resource[buf]; - args[1] = lp_build_const_int32(base->gallivm, idx * 4); - addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle]; - addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg"); - addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16); - args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]); - - result = build_intrinsic(base->gallivm->builder, "llvm.SI.load.const", base->elem_type, - args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - - return bitcast(bld_base, type, result); -} - -/* Initialize arguments for the shader export intrinsic */ -static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, - struct tgsi_full_declaration *d, - unsigned index, - unsigned target, - LLVMValueRef *args) -{ - struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - struct lp_build_context *uint = - &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; - struct lp_build_context *base = &bld_base->base; - unsigned compressed = 0; - unsigned chan; - - if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { - int cbuf = target - V_008DFC_SQ_EXP_MRT; - - if (cbuf >= 0 && cbuf < 8) { - compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1; - - if (compressed) - si_shader_ctx->shader->spi_shader_col_format |= - V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf); - else - si_shader_ctx->shader->spi_shader_col_format |= - V_028714_SPI_SHADER_32_ABGR << (4 * cbuf); - - si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf); - } - } - - if (compressed) { - /* Pixel shader needs to pack output values before export */ - for (chan = 0; chan < 2; chan++ ) { - LLVMValueRef *out_ptr = - si_shader_ctx->radeon_bld.soa.outputs[index]; - args[0] = LLVMBuildLoad(base->gallivm->builder, - out_ptr[2 * chan], ""); - args[1] = LLVMBuildLoad(base->gallivm->builder, - out_ptr[2 * chan + 1], ""); - args[chan + 5] = - build_intrinsic(base->gallivm->builder, - "llvm.SI.packf16", - LLVMInt32TypeInContext(base->gallivm->context), - args, 2, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - args[chan + 7] = args[chan + 5] = - LLVMBuildBitCast(base->gallivm->builder, - args[chan + 5], - LLVMFloatTypeInContext(base->gallivm->context), - ""); - } - - /* Set COMPR flag */ - args[4] = uint->one; - } else { - for (chan = 0; chan < 4; chan++ ) { - LLVMValueRef out_ptr = - si_shader_ctx->radeon_bld.soa.outputs[index][chan]; - /* +5 because the first output value will be - * the 6th argument to the intrinsic. */ - args[chan + 5] = LLVMBuildLoad(base->gallivm->builder, - out_ptr, ""); - } - - /* Clear COMPR flag */ - args[4] = uint->zero; - } - - /* XXX: This controls which components of the output - * registers actually get exported. (e.g bit 0 means export - * X component, bit 1 means export Y component, etc.) I'm - * hard coding this to 0xf for now. In the future, we might - * want to do something else. */ - args[0] = lp_build_const_int32(base->gallivm, 0xf); - - /* Specify whether the EXEC mask represents the valid mask */ - args[1] = uint->zero; - - /* Specify whether this is the last export */ - args[2] = uint->zero; - - /* Specify the target we are exporting */ - args[3] = lp_build_const_int32(base->gallivm, target); - - /* XXX: We probably need to keep track of the output - * values, so we know what we are passing to the next - * stage. */ -} - -static void si_alpha_test(struct lp_build_tgsi_context *bld_base, - unsigned index) -{ - struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - - if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) { - LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3]; - LLVMValueRef alpha_ref = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, - SI_PARAM_ALPHA_REF); - - LLVMValueRef alpha_pass = - lp_build_cmp(&bld_base->base, - si_shader_ctx->shader->key.ps.alpha_func, - LLVMBuildLoad(gallivm->builder, out_ptr, ""), - alpha_ref); - LLVMValueRef arg = - lp_build_select(&bld_base->base, - alpha_pass, - lp_build_const_float(gallivm, 1.0f), - lp_build_const_float(gallivm, -1.0f)); - - build_intrinsic(gallivm->builder, - "llvm.AMDGPU.kill", - LLVMVoidTypeInContext(gallivm->context), - &arg, 1, 0); - } else { - build_intrinsic(gallivm->builder, - "llvm.AMDGPU.kilp", - LLVMVoidTypeInContext(gallivm->context), - NULL, 0, 0); - } -} - -static void si_alpha_to_one(struct lp_build_tgsi_context *bld_base, - unsigned index) -{ - struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - - /* set alpha to one */ - LLVMBuildStore(bld_base->base.gallivm->builder, - bld_base->base.one, - si_shader_ctx->radeon_bld.soa.outputs[index][3]); -} - -static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base, - LLVMValueRef (*pos)[9], unsigned index) -{ - struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - struct si_pipe_shader *shader = si_shader_ctx->shader; - struct lp_build_context *base = &bld_base->base; - struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; - unsigned reg_index; - unsigned chan; - unsigned const_chan; - LLVMValueRef out_elts[4]; - LLVMValueRef base_elt; - LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); - LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, NUM_PIPE_CONST_BUFFERS); - LLVMValueRef const_resource = build_indexed_load(si_shader_ctx, ptr, constbuf_index); - - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][chan]; - out_elts[chan] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); - } - - for (reg_index = 0; reg_index < 2; reg_index ++) { - LLVMValueRef *args = pos[2 + reg_index]; - - if (!(shader->key.vs.ucps_enabled & (1 << reg_index))) - continue; - - shader->shader.clip_dist_write |= 0xf << (4 * reg_index); - - args[5] = - args[6] = - args[7] = - args[8] = lp_build_const_float(base->gallivm, 0.0f); - - /* Compute dot products of position and user clip plane vectors */ - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) { - args[0] = const_resource; - args[1] = lp_build_const_int32(base->gallivm, - ((reg_index * 4 + chan) * 4 + - const_chan) * 4); - base_elt = build_intrinsic(base->gallivm->builder, - "llvm.SI.load.const", - base->elem_type, - args, 2, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - args[5 + chan] = - lp_build_add(base, args[5 + chan], - lp_build_mul(base, base_elt, - out_elts[const_chan])); - } - } - - args[0] = lp_build_const_int32(base->gallivm, 0xf); - args[1] = uint->zero; - args[2] = uint->zero; - args[3] = lp_build_const_int32(base->gallivm, - V_008DFC_SQ_EXP_POS + 2 + reg_index); - args[4] = uint->zero; - } -} - -static void si_dump_streamout(struct pipe_stream_output_info *so) -{ - unsigned i; - - if (so->num_outputs) - fprintf(stderr, "STREAMOUT\n"); - - for (i = 0; i < so->num_outputs; i++) { - unsigned mask = ((1 << so->output[i].num_components) - 1) << - so->output[i].start_component; - fprintf(stderr, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n", - i, so->output[i].output_buffer, - so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, - so->output[i].register_index, - mask & 1 ? "x" : "", - mask & 2 ? "y" : "", - mask & 4 ? "z" : "", - mask & 8 ? "w" : ""); - } -} - -/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. - * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), - * or v4i32 (num_channels=3,4). */ -static void build_tbuffer_store(struct si_shader_context *shader, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset, - unsigned dfmt, - unsigned nfmt, - unsigned offen, - unsigned idxen, - unsigned glc, - unsigned slc, - unsigned tfe) -{ - struct gallivm_state *gallivm = &shader->radeon_bld.gallivm; - LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); - LLVMValueRef args[] = { - rsrc, - vdata, - LLVMConstInt(i32, num_channels, 0), - vaddr, - soffset, - LLVMConstInt(i32, inst_offset, 0), - LLVMConstInt(i32, dfmt, 0), - LLVMConstInt(i32, nfmt, 0), - LLVMConstInt(i32, offen, 0), - LLVMConstInt(i32, idxen, 0), - LLVMConstInt(i32, glc, 0), - LLVMConstInt(i32, slc, 0), - LLVMConstInt(i32, tfe, 0) - }; - - /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */ - unsigned func = CLAMP(num_channels, 1, 3) - 1; - const char *types[] = {"i32", "v2i32", "v4i32"}; - char name[256]; - snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); - - lp_build_intrinsic(gallivm->builder, name, - LLVMVoidTypeInContext(gallivm->context), - args, Elements(args)); -} - -static void build_streamout_store(struct si_shader_context *shader, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset) -{ - static unsigned dfmt[] = { - V_008F0C_BUF_DATA_FORMAT_32, - V_008F0C_BUF_DATA_FORMAT_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32, - V_008F0C_BUF_DATA_FORMAT_32_32_32_32 - }; - assert(num_channels >= 1 && num_channels <= 4); - - build_tbuffer_store(shader, rsrc, vdata, num_channels, vaddr, soffset, - inst_offset, dfmt[num_channels-1], - V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); -} - -/* On SI, the vertex shader is responsible for writing streamout data - * to buffers. */ -static void si_llvm_emit_streamout(struct si_shader_context *shader) -{ - struct pipe_stream_output_info *so = &shader->shader->selector->so; - struct gallivm_state *gallivm = &shader->radeon_bld.gallivm; - LLVMBuilderRef builder = gallivm->builder; - int i, j; - struct lp_build_if_state if_ctx; - - LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); - - LLVMValueRef so_param = - LLVMGetParam(shader->radeon_bld.main_fn, - shader->param_streamout_config); - - /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ - LLVMValueRef so_vtx_count = - LLVMBuildAnd(builder, - LLVMBuildLShr(builder, so_param, - LLVMConstInt(i32, 16, 0), ""), - LLVMConstInt(i32, 127, 0), ""); - - LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32, - NULL, 0, LLVMReadNoneAttribute); - - /* can_emit = tid < so_vtx_count; */ - LLVMValueRef can_emit = - LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); - - /* Emit the streamout code conditionally. This actually avoids - * out-of-bounds buffer access. The hw tells us via the SGPR - * (so_vtx_count) which threads are allowed to emit streamout data. */ - lp_build_if(&if_ctx, gallivm, can_emit); - { - /* The buffer offset is computed as follows: - * ByteOffset = streamout_offset[buffer_id]*4 + - * (streamout_write_index + thread_id)*stride[buffer_id] + - * attrib_offset - */ - - LLVMValueRef so_write_index = - LLVMGetParam(shader->radeon_bld.main_fn, - shader->param_streamout_write_index); - - /* Compute (streamout_write_index + thread_id). */ - so_write_index = LLVMBuildAdd(builder, so_write_index, tid, ""); - - /* Compute the write offset for each enabled buffer. */ - LLVMValueRef so_write_offset[4] = {}; - for (i = 0; i < 4; i++) { - if (!so->stride[i]) - continue; - - LLVMValueRef so_offset = LLVMGetParam(shader->radeon_bld.main_fn, - shader->param_streamout_offset[i]); - so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(i32, 4, 0), ""); - - so_write_offset[i] = LLVMBuildMul(builder, so_write_index, - LLVMConstInt(i32, so->stride[i]*4, 0), ""); - so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, ""); - } - - LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS] = shader->radeon_bld.soa.outputs; - - /* Write streamout data. */ - for (i = 0; i < so->num_outputs; i++) { - unsigned buf_idx = so->output[i].output_buffer; - unsigned reg = so->output[i].register_index; - unsigned start = so->output[i].start_component; - unsigned num_comps = so->output[i].num_components; - LLVMValueRef out[4]; - - assert(num_comps && num_comps <= 4); - if (!num_comps || num_comps > 4) - continue; - - /* Load the output as int. */ - for (j = 0; j < num_comps; j++) { - out[j] = LLVMBuildLoad(builder, outputs[reg][start+j], ""); - out[j] = LLVMBuildBitCast(builder, out[j], i32, ""); - } - - /* Pack the output. */ - LLVMValueRef vdata = NULL; - - switch (num_comps) { - case 1: /* as i32 */ - vdata = out[0]; - break; - case 2: /* as v2i32 */ - case 3: /* as v4i32 (aligned to 4) */ - case 4: /* as v4i32 */ - vdata = LLVMGetUndef(LLVMVectorType(i32, util_next_power_of_two(num_comps))); - for (j = 0; j < num_comps; j++) { - vdata = LLVMBuildInsertElement(builder, vdata, out[j], - LLVMConstInt(i32, j, 0), ""); - } - break; - } - - build_streamout_store(shader, shader->so_buffers[buf_idx], - vdata, num_comps, - so_write_offset[buf_idx], - LLVMConstInt(i32, 0, 0), - so->output[i].dst_offset*4); - } - } - lp_build_endif(&if_ctx); -} - - -static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) -{ - struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); - struct si_shader * shader = &si_shader_ctx->shader->shader; - struct lp_build_context * base = &bld_base->base; - struct lp_build_context * uint = - &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; - struct tgsi_parse_context *parse = &si_shader_ctx->parse; - LLVMValueRef args[9]; - LLVMValueRef last_args[9] = { 0 }; - LLVMValueRef pos_args[4][9] = { { 0 } }; - unsigned semantic_name; - unsigned param_count = 0; - int depth_index = -1, stencil_index = -1, psize_index = -1, edgeflag_index = -1; - int layer_index = -1; - int i; - - if (si_shader_ctx->shader->selector->so.num_outputs) { - si_llvm_emit_streamout(si_shader_ctx); - } - - while (!tgsi_parse_end_of_tokens(parse)) { - struct tgsi_full_declaration *d = - &parse->FullToken.FullDeclaration; - unsigned target; - unsigned index; - - tgsi_parse_token(parse); - - if (parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_PROPERTY && - parse->FullToken.FullProperty.Property.PropertyName == - TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) - shader->fs_write_all = TRUE; - - if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) - continue; - - switch (d->Declaration.File) { - case TGSI_FILE_INPUT: - i = shader->ninput++; - assert(i < Elements(shader->input)); - shader->input[i].name = d->Semantic.Name; - shader->input[i].sid = d->Semantic.Index; - shader->input[i].interpolate = d->Interp.Interpolate; - shader->input[i].centroid = d->Interp.Centroid; - continue; - - case TGSI_FILE_OUTPUT: - i = shader->noutput++; - assert(i < Elements(shader->output)); - shader->output[i].name = d->Semantic.Name; - shader->output[i].sid = d->Semantic.Index; - shader->output[i].interpolate = d->Interp.Interpolate; - break; - - default: - continue; - } - - semantic_name = d->Semantic.Name; -handle_semantic: - for (index = d->Range.First; index <= d->Range.Last; index++) { - /* Select the correct target */ - switch(semantic_name) { - case TGSI_SEMANTIC_PSIZE: - shader->vs_out_misc_write = true; - shader->vs_out_point_size = true; - psize_index = index; - continue; - case TGSI_SEMANTIC_EDGEFLAG: - shader->vs_out_misc_write = true; - shader->vs_out_edgeflag = true; - edgeflag_index = index; - continue; - case TGSI_SEMANTIC_LAYER: - shader->vs_out_misc_write = true; - shader->vs_out_layer = true; - layer_index = index; - continue; - case TGSI_SEMANTIC_POSITION: - if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { - target = V_008DFC_SQ_EXP_POS; - break; - } else { - depth_index = index; - continue; - } - case TGSI_SEMANTIC_STENCIL: - stencil_index = index; - continue; - case TGSI_SEMANTIC_COLOR: - if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { - case TGSI_SEMANTIC_BCOLOR: - target = V_008DFC_SQ_EXP_PARAM + param_count; - shader->output[i].param_offset = param_count; - param_count++; - } else { - target = V_008DFC_SQ_EXP_MRT + shader->output[i].sid; - if (si_shader_ctx->shader->key.ps.alpha_to_one) { - si_alpha_to_one(bld_base, index); - } - if (shader->output[i].sid == 0 && - si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) - si_alpha_test(bld_base, index); - } - break; - case TGSI_SEMANTIC_CLIPDIST: - if (!(si_shader_ctx->shader->key.vs.ucps_enabled & - (1 << d->Semantic.Index))) - continue; - shader->clip_dist_write |= - d->Declaration.UsageMask << (d->Semantic.Index << 2); - target = V_008DFC_SQ_EXP_POS + 2 + d->Semantic.Index; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - si_llvm_emit_clipvertex(bld_base, pos_args, index); - continue; - case TGSI_SEMANTIC_FOG: - case TGSI_SEMANTIC_GENERIC: - target = V_008DFC_SQ_EXP_PARAM + param_count; - shader->output[i].param_offset = param_count; - param_count++; - break; - default: - target = 0; - fprintf(stderr, - "Warning: SI unhandled output type:%d\n", - semantic_name); - } - - si_llvm_init_export_args(bld_base, d, index, target, args); - - if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && - target >= V_008DFC_SQ_EXP_POS && - target <= (V_008DFC_SQ_EXP_POS + 3)) { - memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], - args, sizeof(args)); - } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT && - semantic_name == TGSI_SEMANTIC_COLOR) { - /* If there is an export instruction waiting to be emitted, do so now. */ - if (last_args[0]) { - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - last_args, 9); - } - - /* This instruction will be emitted at the end of the shader. */ - memcpy(last_args, args, sizeof(args)); - - /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */ - if (shader->fs_write_all && shader->output[i].sid == 0 && - si_shader_ctx->shader->key.ps.nr_cbufs > 1) { - for (int c = 1; c < si_shader_ctx->shader->key.ps.nr_cbufs; c++) { - si_llvm_init_export_args(bld_base, d, index, - V_008DFC_SQ_EXP_MRT + c, args); - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - args, 9); - } - } - } else { - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - args, 9); - } - } - - if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { - semantic_name = TGSI_SEMANTIC_GENERIC; - goto handle_semantic; - } - } - - if (depth_index >= 0 || stencil_index >= 0) { - LLVMValueRef out_ptr; - unsigned mask = 0; - - /* Specify the target we are exporting */ - args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ); - - if (depth_index >= 0) { - out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2]; - args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); - mask |= 0x1; - - if (stencil_index < 0) { - args[6] = - args[7] = - args[8] = args[5]; - } - } - - if (stencil_index >= 0) { - out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1]; - args[7] = - args[8] = - args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); - /* Only setting the stencil component bit (0x2) here - * breaks some stencil piglit tests - */ - mask |= 0x3; - - if (depth_index < 0) - args[5] = args[6]; - } - - /* Specify which components to enable */ - args[0] = lp_build_const_int32(base->gallivm, mask); - - args[1] = - args[2] = - args[4] = uint->zero; - - if (last_args[0]) - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - args, 9); - else - memcpy(last_args, args, sizeof(args)); - } - - if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { - unsigned pos_idx = 0; - - /* We need to add the position output manually if it's missing. */ - if (!pos_args[0][0]) { - pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */ - pos_args[0][1] = uint->zero; /* EXEC mask */ - pos_args[0][2] = uint->zero; /* last export? */ - pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS); - pos_args[0][4] = uint->zero; /* COMPR flag */ - pos_args[0][5] = base->zero; /* X */ - pos_args[0][6] = base->zero; /* Y */ - pos_args[0][7] = base->zero; /* Z */ - pos_args[0][8] = base->one; /* W */ - } - - /* Write the misc vector (point size, edgeflag, layer, viewport). */ - if (shader->vs_out_misc_write) { - pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */ - shader->vs_out_point_size | - (shader->vs_out_edgeflag << 1) | - (shader->vs_out_layer << 2)); - pos_args[1][1] = uint->zero; /* EXEC mask */ - pos_args[1][2] = uint->zero; /* last export? */ - pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1); - pos_args[1][4] = uint->zero; /* COMPR flag */ - pos_args[1][5] = base->zero; /* X */ - pos_args[1][6] = base->zero; /* Y */ - pos_args[1][7] = base->zero; /* Z */ - pos_args[1][8] = base->zero; /* W */ - - if (shader->vs_out_point_size) { - pos_args[1][5] = LLVMBuildLoad(base->gallivm->builder, - si_shader_ctx->radeon_bld.soa.outputs[psize_index][0], ""); - } - - if (shader->vs_out_edgeflag) { - LLVMValueRef output = LLVMBuildLoad(base->gallivm->builder, - si_shader_ctx->radeon_bld.soa.outputs[edgeflag_index][0], ""); - - /* The output is a float, but the hw expects an integer - * with the first bit containing the edge flag. */ - output = LLVMBuildFPToUI(base->gallivm->builder, output, - bld_base->uint_bld.elem_type, ""); - - output = lp_build_min(&bld_base->int_bld, output, bld_base->int_bld.one); - - /* The LLVM intrinsic expects a float. */ - pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder, output, - base->elem_type, ""); - } - - if (shader->vs_out_layer) { - pos_args[1][7] = LLVMBuildLoad(base->gallivm->builder, - si_shader_ctx->radeon_bld.soa.outputs[layer_index][0], ""); - } - } - - for (i = 0; i < 4; i++) - if (pos_args[i][0]) - shader->nr_pos_exports++; - - for (i = 0; i < 4; i++) { - if (!pos_args[i][0]) - continue; - - /* Specify the target we are exporting */ - pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++); - - if (pos_idx == shader->nr_pos_exports) - /* Specify that this is the last export */ - pos_args[i][2] = uint->one; - - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - pos_args[i], 9); - } - } else { - if (!last_args[0]) { - /* Specify which components to enable */ - last_args[0] = lp_build_const_int32(base->gallivm, 0x0); - - /* Specify the target we are exporting */ - last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); - - /* Set COMPR flag to zero to export data as 32-bit */ - last_args[4] = uint->zero; - - /* dummy bits */ - last_args[5]= uint->zero; - last_args[6]= uint->zero; - last_args[7]= uint->zero; - last_args[8]= uint->zero; - - si_shader_ctx->shader->spi_shader_col_format |= - V_028714_SPI_SHADER_32_ABGR; - si_shader_ctx->shader->cb_shader_mask |= S_02823C_OUTPUT0_ENABLE(0xf); - } - - /* Specify whether the EXEC mask represents the valid mask */ - last_args[1] = uint->one; - - /* Specify that this is the last export */ - last_args[2] = lp_build_const_int32(base->gallivm, 1); - - lp_build_intrinsic(base->gallivm->builder, - "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - last_args, 9); - } -} - -static const struct lp_build_tgsi_action txf_action; - -static void build_tex_intrinsic(const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data); - -static void tex_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - const struct tgsi_full_instruction * inst = emit_data->inst; - unsigned opcode = inst->Instruction.Opcode; - unsigned target = inst->Texture.Texture; - LLVMValueRef coords[4]; - LLVMValueRef address[16]; - int ref_pos; - unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos); - unsigned count = 0; - unsigned chan; - unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1; - unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index; - - if (target == TGSI_TEXTURE_BUFFER) { - LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128); - LLVMTypeRef v2i128 = LLVMVectorType(i128, 2); - LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context); - LLVMTypeRef v16i8 = LLVMVectorType(i8, 16); - - /* Truncate v32i8 to v16i8. */ - LLVMValueRef res = si_shader_ctx->resources[sampler_index]; - res = LLVMBuildBitCast(gallivm->builder, res, v2i128, ""); - res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.zero, ""); - res = LLVMBuildBitCast(gallivm->builder, res, v16i8, ""); - - emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); - emit_data->args[0] = res; - emit_data->args[1] = bld_base->uint_bld.zero; - emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0); - emit_data->arg_count = 3; - return; - } - - /* Fetch and project texture coordinates */ - coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); - for (chan = 0; chan < 3; chan++ ) { - coords[chan] = lp_build_emit_fetch(bld_base, - emit_data->inst, 0, - chan); - if (opcode == TGSI_OPCODE_TXP) - coords[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_DIV, - coords[chan], - coords[3]); - } - - if (opcode == TGSI_OPCODE_TXP) - coords[3] = bld_base->base.one; - - /* Pack LOD bias value */ - if (opcode == TGSI_OPCODE_TXB) - address[count++] = coords[3]; - - if (target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE) - radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords); - - /* Pack depth comparison value */ - switch (target) { - case TGSI_TEXTURE_SHADOW1D: - case TGSI_TEXTURE_SHADOW1D_ARRAY: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_SHADOWCUBE: - case TGSI_TEXTURE_SHADOW2D_ARRAY: - assert(ref_pos >= 0); - address[count++] = coords[ref_pos]; - break; - case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0); - } - - /* Pack user derivatives */ - if (opcode == TGSI_OPCODE_TXD) { - for (chan = 0; chan < 2; chan++) { - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, chan); - if (num_coords > 1) - address[count++] = lp_build_emit_fetch(bld_base, inst, 2, chan); - } - } - - /* Pack texture coordinates */ - address[count++] = coords[0]; - if (num_coords > 1) - address[count++] = coords[1]; - if (num_coords > 2) - address[count++] = coords[2]; - - /* Pack LOD or sample index */ - if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF) - address[count++] = coords[3]; - - if (count > 16) { - assert(!"Cannot handle more than 16 texture address parameters"); - count = 16; - } - - for (chan = 0; chan < count; chan++ ) { - address[chan] = LLVMBuildBitCast(gallivm->builder, - address[chan], - LLVMInt32TypeInContext(gallivm->context), - ""); - } - - /* Adjust the sample index according to FMASK. - * - * For uncompressed MSAA surfaces, FMASK should return 0x76543210, - * which is the identity mapping. Each nibble says which physical sample - * should be fetched to get that sample. - * - * For example, 0x11111100 means there are only 2 samples stored and - * the second sample covers 3/4 of the pixel. When reading samples 0 - * and 1, return physical sample 0 (determined by the first two 0s - * in FMASK), otherwise return physical sample 1. - * - * The sample index should be adjusted as follows: - * sample_index = (fmask >> (sample_index * 4)) & 0xF; - */ - if (target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - struct lp_build_context *uint_bld = &bld_base->uint_bld; - struct lp_build_emit_data txf_emit_data = *emit_data; - LLVMValueRef txf_address[4]; - unsigned txf_count = count; - - memcpy(txf_address, address, sizeof(txf_address)); - - if (target == TGSI_TEXTURE_2D_MSAA) { - txf_address[2] = bld_base->uint_bld.zero; - } - txf_address[3] = bld_base->uint_bld.zero; - - /* Pad to a power-of-two size. */ - while (txf_count < util_next_power_of_two(txf_count)) - txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); - - /* Read FMASK using TXF. */ - txf_emit_data.chan = 0; - txf_emit_data.dst_type = LLVMVectorType( - LLVMInt32TypeInContext(bld_base->base.gallivm->context), 4); - txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count); - txf_emit_data.args[1] = si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index]; - txf_emit_data.args[2] = lp_build_const_int32(bld_base->base.gallivm, - target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY); - txf_emit_data.arg_count = 3; - - build_tex_intrinsic(&txf_action, bld_base, &txf_emit_data); - - /* Initialize some constants. */ - LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0); - LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0); - - /* Apply the formula. */ - LLVMValueRef fmask = - LLVMBuildExtractElement(gallivm->builder, - txf_emit_data.output[0], - uint_bld->zero, ""); - - unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3; - - LLVMValueRef sample_index4 = - LLVMBuildMul(gallivm->builder, address[sample_chan], four, ""); - - LLVMValueRef shifted_fmask = - LLVMBuildLShr(gallivm->builder, fmask, sample_index4, ""); - - LLVMValueRef final_sample = - LLVMBuildAnd(gallivm->builder, shifted_fmask, F, ""); - - /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK - * resource descriptor is 0 (invalid), - */ - LLVMValueRef fmask_desc = - LLVMBuildBitCast(gallivm->builder, - si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index], - LLVMVectorType(uint_bld->elem_type, 8), ""); - - LLVMValueRef fmask_word1 = - LLVMBuildExtractElement(gallivm->builder, fmask_desc, - uint_bld->one, ""); - - LLVMValueRef word1_is_nonzero = - LLVMBuildICmp(gallivm->builder, LLVMIntNE, - fmask_word1, uint_bld->zero, ""); - - /* Replace the MSAA sample index. */ - address[sample_chan] = - LLVMBuildSelect(gallivm->builder, word1_is_nonzero, - final_sample, address[sample_chan], ""); - } - - /* Resource */ - emit_data->args[1] = si_shader_ctx->resources[sampler_index]; - - if (opcode == TGSI_OPCODE_TXF) { - /* add tex offsets */ - if (inst->Texture.NumOffsets) { - struct lp_build_context *uint_bld = &bld_base->uint_bld; - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - const struct tgsi_texture_offset * off = inst->TexOffsets; - - assert(inst->Texture.NumOffsets == 1); - - switch (target) { - case TGSI_TEXTURE_3D: - address[2] = lp_build_add(uint_bld, address[2], - bld->immediates[off->Index][off->SwizzleZ]); - /* fall through */ - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_2D_ARRAY: - case TGSI_TEXTURE_SHADOW2D_ARRAY: - address[1] = - lp_build_add(uint_bld, address[1], - bld->immediates[off->Index][off->SwizzleY]); - /* fall through */ - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_SHADOW1D: - case TGSI_TEXTURE_1D_ARRAY: - case TGSI_TEXTURE_SHADOW1D_ARRAY: - address[0] = - lp_build_add(uint_bld, address[0], - bld->immediates[off->Index][off->SwizzleX]); - break; - /* texture offsets do not apply to other texture targets */ - } - } - - emit_data->dst_type = LLVMVectorType( - LLVMInt32TypeInContext(bld_base->base.gallivm->context), - 4); - - emit_data->arg_count = 3; - } else { - /* Sampler */ - emit_data->args[2] = si_shader_ctx->samplers[sampler_index]; - - emit_data->dst_type = LLVMVectorType( - LLVMFloatTypeInContext(bld_base->base.gallivm->context), - 4); - - emit_data->arg_count = 4; - } - - /* Dimensions */ - emit_data->args[emit_data->arg_count - 1] = - lp_build_const_int32(bld_base->base.gallivm, target); - - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); - - emit_data->args[0] = lp_build_gather_values(gallivm, address, count); -} - -static void build_tex_intrinsic(const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct lp_build_context * base = &bld_base->base; - char intr_name[127]; - - if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { - emit_data->output[emit_data->chan] = build_intrinsic( - base->gallivm->builder, - "llvm.SI.vs.load.input", emit_data->dst_type, - emit_data->args, emit_data->arg_count, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - return; - } - - sprintf(intr_name, "%sv%ui32", action->intr_name, - LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0]))); - - emit_data->output[emit_data->chan] = build_intrinsic( - base->gallivm->builder, intr_name, emit_data->dst_type, - emit_data->args, emit_data->arg_count, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); -} - -static void txq_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction *inst = emit_data->inst; - struct gallivm_state *gallivm = bld_base->base.gallivm; - - if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { - LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); - LLVMTypeRef v8i32 = LLVMVectorType(i32, 8); - - /* Read the size from the buffer descriptor directly. */ - LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index]; - size = LLVMBuildBitCast(gallivm->builder, size, v8i32, ""); - size = LLVMBuildExtractElement(gallivm->builder, size, - lp_build_const_int32(gallivm, 2), ""); - emit_data->args[0] = size; - return; - } - - /* Mip level */ - emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); - - /* Resource */ - emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index]; - - /* Dimensions */ - emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm, - inst->Texture.Texture); - - emit_data->arg_count = 3; - - emit_data->dst_type = LLVMVectorType( - LLVMInt32TypeInContext(bld_base->base.gallivm->context), - 4); -} - -static void build_txq_intrinsic(const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { - /* Just return the buffer size. */ - emit_data->output[emit_data->chan] = emit_data->args[0]; - return; - } - - build_tgsi_intrinsic_nomem(action, bld_base, emit_data); -} - -#if HAVE_LLVM >= 0x0304 - -static void si_llvm_emit_ddxy( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; - struct lp_build_context * base = &bld_base->base; - const struct tgsi_full_instruction *inst = emit_data->inst; - unsigned opcode = inst->Instruction.Opcode; - LLVMValueRef indices[2]; - LLVMValueRef store_ptr, load_ptr0, load_ptr1; - LLVMValueRef tl, trbl, result[4]; - LLVMTypeRef i32; - unsigned swizzle[4]; - unsigned c; - - i32 = LLVMInt32TypeInContext(gallivm->context); - - indices[0] = bld_base->uint_bld.zero; - indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32, - NULL, 0, LLVMReadNoneAttribute); - store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds, - indices, 2, ""); - - indices[1] = LLVMBuildAnd(gallivm->builder, indices[1], - lp_build_const_int32(gallivm, 0xfffffffc), ""); - load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds, - indices, 2, ""); - - indices[1] = LLVMBuildAdd(gallivm->builder, indices[1], - lp_build_const_int32(gallivm, - opcode == TGSI_OPCODE_DDX ? 1 : 2), - ""); - load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds, - indices, 2, ""); - - for (c = 0; c < 4; ++c) { - unsigned i; - - swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c); - for (i = 0; i < c; ++i) { - if (swizzle[i] == swizzle[c]) { - result[c] = result[i]; - break; - } - } - if (i != c) - continue; - - LLVMBuildStore(gallivm->builder, - LLVMBuildBitCast(gallivm->builder, - lp_build_emit_fetch(bld_base, inst, 0, c), - i32, ""), - store_ptr); - - tl = LLVMBuildLoad(gallivm->builder, load_ptr0, ""); - tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, ""); - - trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, ""); - trbl = LLVMBuildBitCast(gallivm->builder, trbl, base->elem_type, ""); - - result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, ""); - } - - emit_data->output[0] = lp_build_gather_values(gallivm, result, 4); -} - -#endif /* HAVE_LLVM >= 0x0304 */ - -static const struct lp_build_tgsi_action tex_action = { - .fetch_args = tex_fetch_args, - .emit = build_tex_intrinsic, - .intr_name = "llvm.SI.sample." -}; - -static const struct lp_build_tgsi_action txb_action = { - .fetch_args = tex_fetch_args, - .emit = build_tex_intrinsic, - .intr_name = "llvm.SI.sampleb." -}; - -#if HAVE_LLVM >= 0x0304 -static const struct lp_build_tgsi_action txd_action = { - .fetch_args = tex_fetch_args, - .emit = build_tex_intrinsic, - .intr_name = "llvm.SI.sampled." -}; -#endif - -static const struct lp_build_tgsi_action txf_action = { - .fetch_args = tex_fetch_args, - .emit = build_tex_intrinsic, - .intr_name = "llvm.SI.imageload." -}; - -static const struct lp_build_tgsi_action txl_action = { - .fetch_args = tex_fetch_args, - .emit = build_tex_intrinsic, - .intr_name = "llvm.SI.samplel." -}; - -static const struct lp_build_tgsi_action txq_action = { - .fetch_args = txq_fetch_args, - .emit = build_txq_intrinsic, - .intr_name = "llvm.SI.resinfo" -}; - -static void create_meta_data(struct si_shader_context *si_shader_ctx) -{ - struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm; - LLVMValueRef args[3]; - - args[0] = LLVMMDStringInContext(gallivm->context, "const", 5); - args[1] = 0; - args[2] = lp_build_const_int32(gallivm, 1); - - si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3); -} - -static void create_function(struct si_shader_context *si_shader_ctx) -{ - struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; - struct gallivm_state *gallivm = bld_base->base.gallivm; - LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32; - unsigned i, last_sgpr, num_params; - - i8 = LLVMInt8TypeInContext(gallivm->context); - i32 = LLVMInt32TypeInContext(gallivm->context); - f32 = LLVMFloatTypeInContext(gallivm->context); - v2i32 = LLVMVectorType(i32, 2); - v3i32 = LLVMVectorType(i32, 3); - - params[SI_PARAM_CONST] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE); - /* We assume at most 16 textures per program at the moment. - * This need probably need to be changed to support bindless textures */ - params[SI_PARAM_SAMPLER] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_VIEWS), CONST_ADDR_SPACE); - params[SI_PARAM_RESOURCE] = LLVMPointerType( - LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_STATES), CONST_ADDR_SPACE); - - switch (si_shader_ctx->type) { - case TGSI_PROCESSOR_VERTEX: - params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST]; - params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST]; - params[SI_PARAM_START_INSTANCE] = i32; - num_params = SI_PARAM_START_INSTANCE+1; - - /* The locations of the other parameters are assigned dynamically. */ - - /* Streamout SGPRs. */ - if (si_shader_ctx->shader->selector->so.num_outputs) { - params[si_shader_ctx->param_streamout_config = num_params++] = i32; - params[si_shader_ctx->param_streamout_write_index = num_params++] = i32; - } - /* A streamout buffer offset is loaded if the stride is non-zero. */ - for (i = 0; i < 4; i++) { - if (!si_shader_ctx->shader->selector->so.stride[i]) - continue; - - params[si_shader_ctx->param_streamout_offset[i] = num_params++] = i32; - } - - last_sgpr = num_params-1; - - /* VGPRs */ - params[si_shader_ctx->param_vertex_id = num_params++] = i32; - params[num_params++] = i32; /* unused*/ - params[num_params++] = i32; /* unused */ - params[si_shader_ctx->param_instance_id = num_params++] = i32; - break; - - case TGSI_PROCESSOR_FRAGMENT: - params[SI_PARAM_ALPHA_REF] = f32; - params[SI_PARAM_PRIM_MASK] = i32; - last_sgpr = SI_PARAM_PRIM_MASK; - params[SI_PARAM_PERSP_SAMPLE] = v2i32; - params[SI_PARAM_PERSP_CENTER] = v2i32; - params[SI_PARAM_PERSP_CENTROID] = v2i32; - params[SI_PARAM_PERSP_PULL_MODEL] = v3i32; - params[SI_PARAM_LINEAR_SAMPLE] = v2i32; - params[SI_PARAM_LINEAR_CENTER] = v2i32; - params[SI_PARAM_LINEAR_CENTROID] = v2i32; - params[SI_PARAM_LINE_STIPPLE_TEX] = f32; - params[SI_PARAM_POS_X_FLOAT] = f32; - params[SI_PARAM_POS_Y_FLOAT] = f32; - params[SI_PARAM_POS_Z_FLOAT] = f32; - params[SI_PARAM_POS_W_FLOAT] = f32; - params[SI_PARAM_FRONT_FACE] = f32; - params[SI_PARAM_ANCILLARY] = f32; - params[SI_PARAM_SAMPLE_COVERAGE] = f32; - params[SI_PARAM_POS_FIXED_PT] = f32; - num_params = SI_PARAM_POS_FIXED_PT+1; - break; - - default: - assert(0 && "unimplemented shader"); - return; - } - - assert(num_params <= Elements(params)); - radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params); - radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type); - - for (i = 0; i <= last_sgpr; ++i) { - LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i); - switch (i) { - default: - LLVMAddAttribute(P, LLVMInRegAttribute); - break; -#if HAVE_LLVM >= 0x0304 - /* We tell llvm that array inputs are passed by value to allow Sinking pass - * to move load. Inputs are constant so this is fine. */ - case SI_PARAM_CONST: - case SI_PARAM_SAMPLER: - case SI_PARAM_RESOURCE: - LLVMAddAttribute(P, LLVMByValAttribute); - break; -#endif - } - } - -#if HAVE_LLVM >= 0x0304 - if (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || - bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0) - si_shader_ctx->ddxy_lds = - LLVMAddGlobalInAddressSpace(gallivm->module, - LLVMArrayType(i32, 64), - "ddxy_lds", - LOCAL_ADDR_SPACE); -#endif -} - -static void preload_constants(struct si_shader_context *si_shader_ctx) -{ - struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; - struct gallivm_state * gallivm = bld_base->base.gallivm; - const struct tgsi_shader_info * info = bld_base->info; - unsigned buf; - LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); - - for (buf = 0; buf < NUM_CONST_BUFFERS; buf++) { - unsigned i, num_const = info->const_file_max[buf] + 1; - - if (num_const == 0) - continue; - - /* Allocate space for the constant values */ - si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef)); - - /* Load the resource descriptor */ - si_shader_ctx->const_resource[buf] = - build_indexed_load(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf)); - - /* Load the constants, we rely on the code sinking to do the rest */ - for (i = 0; i < num_const * 4; ++i) { - LLVMValueRef args[2] = { - si_shader_ctx->const_resource[buf], - lp_build_const_int32(gallivm, i * 4) - }; - si_shader_ctx->constants[buf][i] = - build_intrinsic(gallivm->builder, "llvm.SI.load.const", - bld_base->base.elem_type, args, 2, - LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - } - } -} - -static void preload_samplers(struct si_shader_context *si_shader_ctx) -{ - struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; - struct gallivm_state * gallivm = bld_base->base.gallivm; - const struct tgsi_shader_info * info = bld_base->info; - - unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1; - - LLVMValueRef res_ptr, samp_ptr; - LLVMValueRef offset; - - if (num_samplers == 0) - return; - - /* Allocate space for the values */ - si_shader_ctx->resources = CALLOC(NUM_SAMPLER_VIEWS, sizeof(LLVMValueRef)); - si_shader_ctx->samplers = CALLOC(num_samplers, sizeof(LLVMValueRef)); - - res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE); - samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER); - - /* Load the resources and samplers, we rely on the code sinking to do the rest */ - for (i = 0; i < num_samplers; ++i) { - /* Resource */ - offset = lp_build_const_int32(gallivm, i); - si_shader_ctx->resources[i] = build_indexed_load(si_shader_ctx, res_ptr, offset); - - /* Sampler */ - offset = lp_build_const_int32(gallivm, i); - si_shader_ctx->samplers[i] = build_indexed_load(si_shader_ctx, samp_ptr, offset); - - /* FMASK resource */ - if (info->is_msaa_sampler[i]) { - offset = lp_build_const_int32(gallivm, FMASK_TEX_OFFSET + i); - si_shader_ctx->resources[FMASK_TEX_OFFSET + i] = - build_indexed_load(si_shader_ctx, res_ptr, offset); - } - } -} - -static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx) -{ - struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; - struct gallivm_state * gallivm = bld_base->base.gallivm; - unsigned i; - - if (!si_shader_ctx->shader->selector->so.num_outputs) - return; - - LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, - SI_PARAM_SO_BUFFER); - - /* Load the resources, we rely on the code sinking to do the rest */ - for (i = 0; i < 4; ++i) { - if (si_shader_ctx->shader->selector->so.stride[i]) { - LLVMValueRef offset = lp_build_const_int32(gallivm, i); - - si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset); - } - } -} - -int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader, - LLVMModuleRef mod) -{ - unsigned i; - uint32_t *ptr; - struct radeon_llvm_binary binary; - bool dump = r600_can_dump_shader(&rctx->screen->b, - shader->selector ? shader->selector->tokens : NULL); - memset(&binary, 0, sizeof(binary)); - radeon_llvm_compile(mod, &binary, - r600_get_llvm_processor_name(rctx->screen->b.family), dump); - if (dump && ! binary.disassembled) { - fprintf(stderr, "SI CODE:\n"); - for (i = 0; i < binary.code_size; i+=4 ) { - fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3], - binary.code[i + 2], binary.code[i + 1], - binary.code[i]); - } - } - - /* XXX: We may be able to emit some of these values directly rather than - * extracting fields to be emitted later. - */ - for (i = 0; i < binary.config_size; i+= 8) { - unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i)); - unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4)); - switch (reg) { - case R_00B028_SPI_SHADER_PGM_RSRC1_PS: - case R_00B128_SPI_SHADER_PGM_RSRC1_VS: - case R_00B228_SPI_SHADER_PGM_RSRC1_GS: - case R_00B848_COMPUTE_PGM_RSRC1: - shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8; - shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4; - break; - case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: - shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value); - break; - case R_00B84C_COMPUTE_PGM_RSRC2: - shader->lds_size = G_00B84C_LDS_SIZE(value); - break; - case R_0286CC_SPI_PS_INPUT_ENA: - shader->spi_ps_input_ena = value; - break; - default: - fprintf(stderr, "Warning: Compiler emitted unknown " - "config register: 0x%x\n", reg); - break; - } - } - - /* copy new shader */ - r600_resource_reference(&shader->bo, NULL); - shader->bo = r600_resource_create_custom(rctx->b.b.screen, PIPE_USAGE_IMMUTABLE, - binary.code_size); - if (shader->bo == NULL) { - return -ENOMEM; - } - - ptr = (uint32_t*)rctx->b.ws->buffer_map(shader->bo->cs_buf, rctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); - if (0 /*R600_BIG_ENDIAN*/) { - for (i = 0; i < binary.code_size / 4; ++i) { - ptr[i] = util_bswap32(*(uint32_t*)(binary.code + i*4)); - } - } else { - memcpy(ptr, binary.code, binary.code_size); - } - rctx->b.ws->buffer_unmap(shader->bo->cs_buf); - - free(binary.code); - free(binary.config); - - return 0; -} - -int si_pipe_shader_create( - struct pipe_context *ctx, - struct si_pipe_shader *shader) -{ - struct r600_context *rctx = (struct r600_context*)ctx; - struct si_pipe_shader_selector *sel = shader->selector; - struct si_shader_context si_shader_ctx; - struct tgsi_shader_info shader_info; - struct lp_build_tgsi_context * bld_base; - LLVMModuleRef mod; - int r = 0; - bool dump = r600_can_dump_shader(&rctx->screen->b, shader->selector->tokens); - - assert(shader->shader.noutput == 0); - assert(shader->shader.ninterp == 0); - assert(shader->shader.ninput == 0); - - memset(&si_shader_ctx, 0, sizeof(si_shader_ctx)); - radeon_llvm_context_init(&si_shader_ctx.radeon_bld); - bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; - - tgsi_scan_shader(sel->tokens, &shader_info); - - shader->shader.uses_kill = shader_info.uses_kill; - shader->shader.uses_instanceid = shader_info.uses_instanceid; - bld_base->info = &shader_info; - bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; - bld_base->emit_epilogue = si_llvm_emit_epilogue; - - bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action; -#if HAVE_LLVM >= 0x0304 - bld_base->op_actions[TGSI_OPCODE_TXD] = txd_action; -#endif - bld_base->op_actions[TGSI_OPCODE_TXF] = txf_action; - bld_base->op_actions[TGSI_OPCODE_TXL] = txl_action; - bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action; - -#if HAVE_LLVM >= 0x0304 - bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy; - bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy; -#endif - - si_shader_ctx.radeon_bld.load_input = declare_input; - si_shader_ctx.radeon_bld.load_system_value = declare_system_value; - si_shader_ctx.tokens = sel->tokens; - tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); - si_shader_ctx.shader = shader; - si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; - - create_meta_data(&si_shader_ctx); - create_function(&si_shader_ctx); - preload_constants(&si_shader_ctx); - preload_samplers(&si_shader_ctx); - preload_streamout_buffers(&si_shader_ctx); - - /* Dump TGSI code before doing TGSI->LLVM conversion in case the - * conversion fails. */ - if (dump) { - tgsi_dump(sel->tokens, 0); - si_dump_streamout(&sel->so); - } - - if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { - fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); - for (int i = 0; i < NUM_CONST_BUFFERS; i++) - FREE(si_shader_ctx.constants[i]); - FREE(si_shader_ctx.resources); - FREE(si_shader_ctx.samplers); - return -EINVAL; - } - - radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); - - mod = bld_base->base.gallivm->module; - r = si_compile_llvm(rctx, shader, mod); - - radeon_llvm_dispose(&si_shader_ctx.radeon_bld); - tgsi_parse_free(&si_shader_ctx.parse); - - for (int i = 0; i < NUM_CONST_BUFFERS; i++) - FREE(si_shader_ctx.constants[i]); - FREE(si_shader_ctx.resources); - FREE(si_shader_ctx.samplers); - - return r; -} - -void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) -{ - r600_resource_reference(&shader->bo, NULL); -} diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h deleted file mode 100644 index 5e5a27f8580..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2012 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Tom Stellard - * Michel Dänzer - * Christian König - */ - -#ifndef RADEONSI_SHADER_H -#define RADEONSI_SHADER_H - -#include /* LLVMModuleRef */ - -#define SI_SGPR_CONST 0 -#define SI_SGPR_SAMPLER 2 -#define SI_SGPR_RESOURCE 4 -#define SI_SGPR_VERTEX_BUFFER 6 /* VS only */ -#define SI_SGPR_SO_BUFFER 8 /* VS only, stream-out */ -#define SI_SGPR_START_INSTANCE 10 /* VS only */ -#define SI_SGPR_ALPHA_REF 6 /* PS only */ - -#define SI_VS_NUM_USER_SGPR 11 -#define SI_PS_NUM_USER_SGPR 7 - -/* LLVM function parameter indices */ -#define SI_PARAM_CONST 0 -#define SI_PARAM_SAMPLER 1 -#define SI_PARAM_RESOURCE 2 - -/* VS only parameters */ -#define SI_PARAM_VERTEX_BUFFER 3 -#define SI_PARAM_SO_BUFFER 4 -#define SI_PARAM_START_INSTANCE 5 -/* the other VS parameters are assigned dynamically */ - -/* PS only parameters */ -#define SI_PARAM_ALPHA_REF 3 -#define SI_PARAM_PRIM_MASK 4 -#define SI_PARAM_PERSP_SAMPLE 5 -#define SI_PARAM_PERSP_CENTER 6 -#define SI_PARAM_PERSP_CENTROID 7 -#define SI_PARAM_PERSP_PULL_MODEL 8 -#define SI_PARAM_LINEAR_SAMPLE 9 -#define SI_PARAM_LINEAR_CENTER 10 -#define SI_PARAM_LINEAR_CENTROID 11 -#define SI_PARAM_LINE_STIPPLE_TEX 12 -#define SI_PARAM_POS_X_FLOAT 13 -#define SI_PARAM_POS_Y_FLOAT 14 -#define SI_PARAM_POS_Z_FLOAT 15 -#define SI_PARAM_POS_W_FLOAT 16 -#define SI_PARAM_FRONT_FACE 17 -#define SI_PARAM_ANCILLARY 18 -#define SI_PARAM_SAMPLE_COVERAGE 19 -#define SI_PARAM_POS_FIXED_PT 20 - -struct si_shader_io { - unsigned name; - int sid; - unsigned param_offset; - unsigned interpolate; - bool centroid; -}; - -struct si_pipe_shader; - -struct si_pipe_shader_selector { - struct si_pipe_shader *current; - - struct tgsi_token *tokens; - struct pipe_stream_output_info so; - - unsigned num_shaders; - - /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ - unsigned type; - - /* 1 when the shader contains - * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0. - * Used to determine whether we need to include nr_cbufs in the key */ - unsigned fs_write_all; -}; - -struct si_shader { - unsigned ninput; - struct si_shader_io input[40]; - - unsigned noutput; - struct si_shader_io output[40]; - - unsigned ninterp; - bool uses_kill; - bool uses_instanceid; - bool fs_write_all; - bool vs_out_misc_write; - bool vs_out_point_size; - bool vs_out_edgeflag; - bool vs_out_layer; - unsigned nr_pos_exports; - unsigned clip_dist_write; -}; - -union si_shader_key { - struct { - unsigned export_16bpc:8; - unsigned nr_cbufs:4; - unsigned color_two_side:1; - unsigned alpha_func:3; - unsigned flatshade:1; - unsigned alpha_to_one:1; - } ps; - struct { - unsigned instance_divisors[PIPE_MAX_ATTRIBS]; - unsigned ucps_enabled:2; - } vs; -}; - -struct si_pipe_shader { - struct si_pipe_shader_selector *selector; - struct si_pipe_shader *next_variant; - struct si_shader shader; - struct si_pm4_state *pm4; - struct r600_resource *bo; - unsigned num_sgprs; - unsigned num_vgprs; - unsigned lds_size; - unsigned spi_ps_input_ena; - unsigned spi_shader_col_format; - unsigned cb_shader_mask; - bool cb0_is_integer; - unsigned sprite_coord_enable; - union si_shader_key key; -}; - -/* radeonsi_shader.c */ -int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader); -int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader); -int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader, - LLVMModuleRef mod); -void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader); - -#endif diff --git a/src/gallium/drivers/radeonsi/radeonsi_uvd.c b/src/gallium/drivers/radeonsi/radeonsi_uvd.c deleted file mode 100644 index 6ecb17c8051..00000000000 --- a/src/gallium/drivers/radeonsi/radeonsi_uvd.c +++ /dev/null @@ -1,153 +0,0 @@ -/************************************************************************** - * - * Copyright 2011 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Authors: - * Christian König - * - */ - -#include -#include -#include -#include - -#include "pipe/p_video_codec.h" - -#include "util/u_memory.h" -#include "util/u_video.h" - -#include "vl/vl_defines.h" -#include "vl/vl_mpeg12_decoder.h" - -#include "radeonsi_pipe.h" -#include "radeon/radeon_uvd.h" -#include "sid.h" - -/** - * creates an video buffer with an UVD compatible memory layout - */ -struct pipe_video_buffer *radeonsi_video_buffer_create(struct pipe_context *pipe, - const struct pipe_video_buffer *tmpl) -{ - struct r600_context *ctx = (struct r600_context *)pipe; - struct r600_texture *resources[VL_NUM_COMPONENTS] = {}; - struct radeon_surface *surfaces[VL_NUM_COMPONENTS] = {}; - struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {}; - const enum pipe_format *resource_formats; - struct pipe_video_buffer template; - struct pipe_resource templ; - unsigned i, array_size; - - assert(pipe); - - /* first create the needed resources as "normal" textures */ - resource_formats = vl_video_buffer_formats(pipe->screen, tmpl->buffer_format); - if (!resource_formats) - return NULL; - - array_size = tmpl->interlaced ? 2 : 1; - template = *tmpl; - template.width = align(tmpl->width, VL_MACROBLOCK_WIDTH); - template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT); - - vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_STATIC, 0); - /* TODO: get tiling working */ - templ.bind = PIPE_BIND_LINEAR; - resources[0] = (struct r600_texture *) - pipe->screen->resource_create(pipe->screen, &templ); - if (!resources[0]) - goto error; - - if (resource_formats[1] != PIPE_FORMAT_NONE) { - vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_STATIC, 1); - templ.bind = PIPE_BIND_LINEAR; - resources[1] = (struct r600_texture *) - pipe->screen->resource_create(pipe->screen, &templ); - if (!resources[1]) - goto error; - } - - if (resource_formats[2] != PIPE_FORMAT_NONE) { - vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_STATIC, 2); - templ.bind = PIPE_BIND_LINEAR; - resources[2] = (struct r600_texture *) - pipe->screen->resource_create(pipe->screen, &templ); - if (!resources[2]) - goto error; - } - - for (i = 0; i < VL_NUM_COMPONENTS; ++i) { - if (!resources[i]) - continue; - - surfaces[i] = & resources[i]->surface; - pbs[i] = &resources[i]->resource.buf; - } - - ruvd_join_surfaces(ctx->b.ws, templ.bind, pbs, surfaces); - - for (i = 0; i < VL_NUM_COMPONENTS; ++i) { - if (!resources[i]) - continue; - - /* recreate the CS handle */ - resources[i]->resource.cs_buf = ctx->b.ws->buffer_get_cs_handle( - resources[i]->resource.buf); - } - - template.height *= array_size; - return vl_video_buffer_create_ex2(pipe, &template, (struct pipe_resource **)resources); - -error: - for (i = 0; i < VL_NUM_COMPONENTS; ++i) - pipe_resource_reference((struct pipe_resource **)&resources[i], NULL); - - return NULL; -} - -/* set the decoding target buffer offsets */ -static struct radeon_winsys_cs_handle* radeonsi_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_buffer *buf) -{ - struct r600_texture *luma = (struct r600_texture *)buf->resources[0]; - struct r600_texture *chroma = (struct r600_texture *)buf->resources[1]; - - msg->body.decode.dt_field_mode = buf->base.interlaced; - - ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface); - - return luma->resource.cs_buf; -} - -/** - * creates an UVD compatible decoder - */ -struct pipe_video_codec *radeonsi_uvd_create_decoder(struct pipe_context *context, - const struct pipe_video_codec *templ) -{ - return ruvd_create_decoder(context, templ, radeonsi_uvd_set_dtb); -} diff --git a/src/gallium/drivers/radeonsi/si.h b/src/gallium/drivers/radeonsi/si.h new file mode 100644 index 00000000000..d955ae9f7de --- /dev/null +++ b/src/gallium/drivers/radeonsi/si.h @@ -0,0 +1,88 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#ifndef SI_H +#define SI_H + +#include "../../winsys/radeon/drm/radeon_winsys.h" +#include "util/u_double_list.h" +#include "util/u_transfer.h" + +#include "si_resource.h" + +struct winsys_handle; + +/* R600/R700 STATES */ +struct r600_query { + union { + uint64_t u64; + boolean b; + struct pipe_query_data_so_statistics so; + } result; + /* The kind of query */ + unsigned type; + /* Offset of the first result for current query */ + unsigned results_start; + /* Offset of the next free result after current query data */ + unsigned results_end; + /* Size of the result in memory for both begin_query and end_query, + * this can be one or two numbers, or it could even be a size of a structure. */ + unsigned result_size; + /* The buffer where query results are stored. It's used as a ring, + * data blocks for current query are stored sequentially from + * results_start to results_end, with wrapping on the buffer end */ + struct r600_resource *buffer; + /* The number of dwords for begin_query or end_query. */ + unsigned num_cs_dw; + /* linked list of queries */ + struct list_head list; +}; + +struct r600_context; +struct r600_screen; + +void si_get_backend_mask(struct r600_context *ctx); +void si_context_flush(struct r600_context *ctx, unsigned flags); +void si_begin_new_cs(struct r600_context *ctx); + +struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type); +void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query); +boolean r600_context_query_result(struct r600_context *ctx, + struct r600_query *query, + boolean wait, void *vresult); +void r600_query_begin(struct r600_context *ctx, struct r600_query *query); +void r600_query_end(struct r600_context *ctx, struct r600_query *query); +void r600_context_queries_suspend(struct r600_context *ctx); +void r600_context_queries_resume(struct r600_context *ctx); +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait); + +bool si_is_timer_query(unsigned type); +bool si_query_needs_begin(unsigned type); +void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in); + +int si_context_init(struct r600_context *ctx); + +#endif diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c new file mode 100644 index 00000000000..589d8f55638 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -0,0 +1,704 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "util/u_surface.h" +#include "util/u_blitter.h" +#include "util/u_format.h" +#include "si_pipe.h" +#include "si_state.h" + +enum r600_blitter_op /* bitmask */ +{ + R600_SAVE_TEXTURES = 1, + R600_SAVE_FRAMEBUFFER = 2, + R600_DISABLE_RENDER_COND = 4, + + R600_CLEAR = 0, + + R600_CLEAR_SURFACE = R600_SAVE_FRAMEBUFFER, + + R600_COPY = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES | + R600_DISABLE_RENDER_COND, + + R600_BLIT = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES | + R600_DISABLE_RENDER_COND, + + R600_DECOMPRESS = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND, + + R600_COLOR_RESOLVE = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND +}; + +static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + r600_context_queries_suspend(rctx); + + util_blitter_save_blend(rctx->blitter, rctx->queued.named.blend); + util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->queued.named.dsa); + util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref); + util_blitter_save_rasterizer(rctx->blitter, rctx->queued.named.rasterizer); + util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader); + util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); + util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements); + if (rctx->queued.named.viewport) { + util_blitter_save_viewport(rctx->blitter, &rctx->queued.named.viewport->viewport); + } + util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer); + util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets, + (struct pipe_stream_output_target**)rctx->b.streamout.targets); + + if (op & R600_SAVE_FRAMEBUFFER) + util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); + + if (op & R600_SAVE_TEXTURES) { + util_blitter_save_fragment_sampler_states( + rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers, + (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers); + + util_blitter_save_fragment_sampler_views(rctx->blitter, + util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask & + ((1 << NUM_TEX_UNITS) - 1)), + rctx->samplers[PIPE_SHADER_FRAGMENT].views.views); + } + + if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) { + rctx->saved_render_cond = rctx->current_render_cond; + rctx->saved_render_cond_cond = rctx->current_render_cond_cond; + rctx->saved_render_cond_mode = rctx->current_render_cond_mode; + rctx->b.b.render_condition(&rctx->b.b, NULL, FALSE, 0); + } + +} + +static void r600_blitter_end(struct pipe_context *ctx) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + if (rctx->saved_render_cond) { + rctx->b.b.render_condition(&rctx->b.b, + rctx->saved_render_cond, + rctx->saved_render_cond_cond, + rctx->saved_render_cond_mode); + rctx->saved_render_cond = NULL; + } + r600_context_queries_resume(rctx); +} + +static unsigned u_max_sample(struct pipe_resource *r) +{ + return r->nr_samples ? r->nr_samples - 1 : 0; +} + +static void r600_blit_decompress_depth(struct pipe_context *ctx, + struct r600_texture *texture, + struct r600_texture *staging, + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + unsigned first_sample, unsigned last_sample) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + unsigned layer, level, sample, checked_last_layer, max_layer, max_sample; + float depth = 1.0f; + const struct util_format_description *desc; + void **custom_dsa; + struct r600_texture *flushed_depth_texture = staging ? + staging : texture->flushed_depth_texture; + + if (!staging && !texture->dirty_level_mask) + return; + + max_sample = u_max_sample(&texture->resource.b.b); + + desc = util_format_description(flushed_depth_texture->resource.b.b.format); + switch (util_format_has_depth(desc) | util_format_has_stencil(desc) << 1) { + default: + assert(!"No depth or stencil to uncompress"); + return; + case 3: + custom_dsa = rctx->custom_dsa_flush_depth_stencil; + break; + case 2: + custom_dsa = rctx->custom_dsa_flush_stencil; + break; + case 1: + custom_dsa = rctx->custom_dsa_flush_depth; + break; + } + + for (level = first_level; level <= last_level; level++) { + if (!staging && !(texture->dirty_level_mask & (1 << level))) + continue; + + /* The smaller the mipmap level, the less layers there are + * as far as 3D textures are concerned. */ + max_layer = util_max_layer(&texture->resource.b.b, level); + checked_last_layer = last_layer < max_layer ? last_layer : max_layer; + + for (layer = first_layer; layer <= checked_last_layer; layer++) { + for (sample = first_sample; sample <= last_sample; sample++) { + struct pipe_surface *zsurf, *cbsurf, surf_tmpl; + + surf_tmpl.format = texture->resource.b.b.format; + surf_tmpl.u.tex.level = level; + surf_tmpl.u.tex.first_layer = layer; + surf_tmpl.u.tex.last_layer = layer; + + zsurf = ctx->create_surface(ctx, &texture->resource.b.b, &surf_tmpl); + + surf_tmpl.format = flushed_depth_texture->resource.b.b.format; + cbsurf = ctx->create_surface(ctx, + (struct pipe_resource*)flushed_depth_texture, &surf_tmpl); + + r600_blitter_begin(ctx, R600_DECOMPRESS); + util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, 1 << sample, + custom_dsa[sample], depth); + r600_blitter_end(ctx); + + pipe_surface_reference(&zsurf, NULL); + pipe_surface_reference(&cbsurf, NULL); + } + } + + /* The texture will always be dirty if some layers aren't flushed. + * I don't think this case can occur though. */ + if (!staging && + first_layer == 0 && last_layer == max_layer && + first_sample == 0 && last_sample == max_sample) { + texture->dirty_level_mask &= ~(1 << level); + } + } +} + +static void si_blit_decompress_depth_in_place(struct r600_context *rctx, + struct r600_texture *texture, + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer) +{ + struct pipe_surface *zsurf, surf_tmpl = {{0}}; + unsigned layer, max_layer, checked_last_layer, level; + + surf_tmpl.format = texture->resource.b.b.format; + + for (level = first_level; level <= last_level; level++) { + if (!(texture->dirty_level_mask & (1 << level))) + continue; + + surf_tmpl.u.tex.level = level; + + /* The smaller the mipmap level, the less layers there are + * as far as 3D textures are concerned. */ + max_layer = util_max_layer(&texture->resource.b.b, level); + checked_last_layer = last_layer < max_layer ? last_layer : max_layer; + + for (layer = first_layer; layer <= checked_last_layer; layer++) { + surf_tmpl.u.tex.first_layer = layer; + surf_tmpl.u.tex.last_layer = layer; + + zsurf = rctx->b.b.create_surface(&rctx->b.b, &texture->resource.b.b, &surf_tmpl); + + r600_blitter_begin(&rctx->b.b, R600_DECOMPRESS); + util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0, + rctx->custom_dsa_flush_inplace, + 1.0f); + r600_blitter_end(&rctx->b.b); + + pipe_surface_reference(&zsurf, NULL); + } + + /* The texture will always be dirty if some layers aren't flushed. + * I don't think this case occurs often though. */ + if (first_layer == 0 && last_layer == max_layer) { + texture->dirty_level_mask &= ~(1 << level); + } + } +} + +void si_flush_depth_textures(struct r600_context *rctx, + struct r600_textures_info *textures) +{ + unsigned i; + + for (i = 0; i < textures->n_views; ++i) { + struct pipe_sampler_view *view; + struct r600_texture *tex; + + view = textures->views.views[i]; + if (!view) continue; + + tex = (struct r600_texture *)view->texture; + if (!tex->is_depth || tex->is_flushing_texture) + continue; + + si_blit_decompress_depth_in_place(rctx, tex, + view->u.tex.first_level, view->u.tex.last_level, + 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); + } +} + +static void r600_blit_decompress_color(struct pipe_context *ctx, + struct r600_texture *rtex, + unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + unsigned layer, level, checked_last_layer, max_layer; + + if (!rtex->dirty_level_mask) + return; + + for (level = first_level; level <= last_level; level++) { + if (!(rtex->dirty_level_mask & (1 << level))) + continue; + + /* The smaller the mipmap level, the less layers there are + * as far as 3D textures are concerned. */ + max_layer = util_max_layer(&rtex->resource.b.b, level); + checked_last_layer = last_layer < max_layer ? last_layer : max_layer; + + for (layer = first_layer; layer <= checked_last_layer; layer++) { + struct pipe_surface *cbsurf, surf_tmpl; + + surf_tmpl.format = rtex->resource.b.b.format; + surf_tmpl.u.tex.level = level; + surf_tmpl.u.tex.first_layer = layer; + surf_tmpl.u.tex.last_layer = layer; + cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); + + r600_blitter_begin(ctx, R600_DECOMPRESS); + util_blitter_custom_color(rctx->blitter, cbsurf, + rctx->custom_blend_decompress); + r600_blitter_end(ctx); + + pipe_surface_reference(&cbsurf, NULL); + } + + /* The texture will always be dirty if some layers aren't flushed. + * I don't think this case occurs often though. */ + if (first_layer == 0 && last_layer == max_layer) { + rtex->dirty_level_mask &= ~(1 << level); + } + } +} + +void r600_decompress_color_textures(struct r600_context *rctx, + struct r600_textures_info *textures) +{ + unsigned i; + unsigned mask = textures->compressed_colortex_mask; + + while (mask) { + struct pipe_sampler_view *view; + struct r600_texture *tex; + + i = u_bit_scan(&mask); + + view = textures->views.views[i]; + assert(view); + + tex = (struct r600_texture *)view->texture; + assert(tex->cmask.size || tex->fmask.size); + + r600_blit_decompress_color(&rctx->b.b, tex, + view->u.tex.first_level, view->u.tex.last_level, + 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level)); + } +} + +static void r600_clear(struct pipe_context *ctx, unsigned buffers, + const union pipe_color_union *color, + double depth, unsigned stencil) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct pipe_framebuffer_state *fb = &rctx->framebuffer; + + r600_blitter_begin(ctx, R600_CLEAR); + util_blitter_clear(rctx->blitter, fb->width, fb->height, + util_framebuffer_get_num_layers(fb), + buffers, color, depth, stencil); + r600_blitter_end(ctx); +} + +static void r600_clear_render_target(struct pipe_context *ctx, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); + util_blitter_clear_render_target(rctx->blitter, dst, color, + dstx, dsty, width, height); + r600_blitter_end(ctx); +} + +static void r600_clear_depth_stencil(struct pipe_context *ctx, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); + util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, + dstx, dsty, width, height); + r600_blitter_end(ctx); +} + +/* Helper for decompressing a portion of a color or depth resource before + * blitting if any decompression is needed. + * The driver doesn't decompress resources automatically while u_blitter is + * rendering. */ +static void r600_decompress_subresource(struct pipe_context *ctx, + struct pipe_resource *tex, + unsigned level, + unsigned first_layer, unsigned last_layer) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_texture *rtex = (struct r600_texture*)tex; + + if (rtex->is_depth && !rtex->is_flushing_texture) { + si_blit_decompress_depth_in_place(rctx, rtex, + level, level, + first_layer, last_layer); + } else if (rtex->fmask.size || rtex->cmask.size) { + r600_blit_decompress_color(ctx, rtex, level, level, + first_layer, last_layer); + } +} + +struct texture_orig_info { + unsigned format; + unsigned width0; + unsigned height0; + unsigned npix_x; + unsigned npix_y; + unsigned npix0_x; + unsigned npix0_y; +}; + +static void r600_compressed_to_blittable(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + struct r600_texture *rtex = (struct r600_texture*)tex; + unsigned pixsize = util_format_get_blocksize(rtex->resource.b.b.format); + int new_format; + int new_height, new_width; + + orig->format = tex->format; + orig->width0 = tex->width0; + orig->height0 = tex->height0; + orig->npix0_x = rtex->surface.level[0].npix_x; + orig->npix0_y = rtex->surface.level[0].npix_y; + orig->npix_x = rtex->surface.level[level].npix_x; + orig->npix_y = rtex->surface.level[level].npix_y; + + if (pixsize == 8) + new_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */ + else + new_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */ + + new_width = util_format_get_nblocksx(tex->format, orig->width0); + new_height = util_format_get_nblocksy(tex->format, orig->height0); + + tex->width0 = new_width; + tex->height0 = new_height; + tex->format = new_format; + rtex->surface.level[0].npix_x = util_format_get_nblocksx(orig->format, orig->npix0_x); + rtex->surface.level[0].npix_y = util_format_get_nblocksy(orig->format, orig->npix0_y); + rtex->surface.level[level].npix_x = util_format_get_nblocksx(orig->format, orig->npix_x); + rtex->surface.level[level].npix_y = util_format_get_nblocksy(orig->format, orig->npix_y); + + /* By dividing the dimensions by 4, we effectively decrement + * last_level by 2, therefore the last 2 mipmap levels disappear and + * aren't blittable. Note that the last 3 mipmap levels (4x4, 2x2, + * 1x1) have equal slice sizes, which is an important assumption + * for this to work. + * + * In order to make the last 2 mipmap levels blittable, we have to + * add the slice size of the last mipmap level to the texture + * address, so that even though the hw thinks it reads last_level-2, + * it will actually read last_level-1, and if we add the slice size*2, + * it will read last_level. That's how this workaround works. + */ + if (level > rtex->resource.b.b.last_level-2) + rtex->mipmap_shift = level - (rtex->resource.b.b.last_level-2); +} + +static void r600_change_format(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig, + enum pipe_format format) +{ + struct r600_texture *rtex = (struct r600_texture*)tex; + + orig->format = tex->format; + orig->width0 = tex->width0; + orig->height0 = tex->height0; + orig->npix0_x = rtex->surface.level[0].npix_x; + orig->npix0_y = rtex->surface.level[0].npix_y; + orig->npix_x = rtex->surface.level[level].npix_x; + orig->npix_y = rtex->surface.level[level].npix_y; + + tex->format = format; +} + +static void r600_reset_blittable_to_orig(struct pipe_resource *tex, + unsigned level, + struct texture_orig_info *orig) +{ + struct r600_texture *rtex = (struct r600_texture*)tex; + + tex->format = orig->format; + tex->width0 = orig->width0; + tex->height0 = orig->height0; + rtex->surface.level[0].npix_x = orig->npix0_x; + rtex->surface.level[0].npix_y = orig->npix0_y; + rtex->surface.level[level].npix_x = orig->npix_x; + rtex->surface.level[level].npix_y = orig->npix_y; + rtex->mipmap_shift = 0; +} + +static void r600_resource_copy_region(struct pipe_context *ctx, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct texture_orig_info orig_info[2]; + struct pipe_box sbox; + const struct pipe_box *psbox = src_box; + boolean restore_orig[2]; + + /* Fallback for buffers. */ + if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { + si_copy_buffer(rctx, dst, src, dstx, src_box->x, src_box->width); + return; + } + + memset(orig_info, 0, sizeof(orig_info)); + + /* The driver doesn't decompress resources automatically while + * u_blitter is rendering. */ + r600_decompress_subresource(ctx, src, src_level, + src_box->z, src_box->z + src_box->depth - 1); + + restore_orig[0] = restore_orig[1] = FALSE; + + if (util_format_is_compressed(src->format) && + util_format_is_compressed(dst->format)) { + r600_compressed_to_blittable(src, src_level, &orig_info[0]); + restore_orig[0] = TRUE; + sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x); + sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y); + sbox.z = src_box->z; + sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width); + sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height); + sbox.depth = src_box->depth; + psbox=&sbox; + + r600_compressed_to_blittable(dst, dst_level, &orig_info[1]); + restore_orig[1] = TRUE; + /* translate the dst box as well */ + dstx = util_format_get_nblocksx(orig_info[1].format, dstx); + dsty = util_format_get_nblocksy(orig_info[1].format, dsty); + } else if (!util_blitter_is_copy_supported(rctx->blitter, dst, src)) { + unsigned blocksize = util_format_get_blocksize(src->format); + + switch (blocksize) { + case 1: + r600_change_format(src, src_level, &orig_info[0], + PIPE_FORMAT_R8_UNORM); + r600_change_format(dst, dst_level, &orig_info[1], + PIPE_FORMAT_R8_UNORM); + break; + case 2: + r600_change_format(src, src_level, &orig_info[0], + PIPE_FORMAT_R8G8_UNORM); + r600_change_format(dst, dst_level, &orig_info[1], + PIPE_FORMAT_R8G8_UNORM); + break; + case 4: + r600_change_format(src, src_level, &orig_info[0], + PIPE_FORMAT_R8G8B8A8_UNORM); + r600_change_format(dst, dst_level, &orig_info[1], + PIPE_FORMAT_R8G8B8A8_UNORM); + break; + case 8: + r600_change_format(src, src_level, &orig_info[0], + PIPE_FORMAT_R16G16B16A16_UINT); + r600_change_format(dst, dst_level, &orig_info[1], + PIPE_FORMAT_R16G16B16A16_UINT); + break; + case 16: + r600_change_format(src, src_level, &orig_info[0], + PIPE_FORMAT_R32G32B32A32_UINT); + r600_change_format(dst, dst_level, &orig_info[1], + PIPE_FORMAT_R32G32B32A32_UINT); + break; + default: + fprintf(stderr, "Unhandled format %s with blocksize %u\n", + util_format_short_name(src->format), blocksize); + assert(0); + } + restore_orig[0] = TRUE; + restore_orig[1] = TRUE; + } + + r600_blitter_begin(ctx, R600_COPY); + util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz, + src, src_level, psbox); + r600_blitter_end(ctx); + + if (restore_orig[0]) + r600_reset_blittable_to_orig(src, src_level, &orig_info[0]); + + if (restore_orig[1]) + r600_reset_blittable_to_orig(dst, dst_level, &orig_info[1]); +} + +/* For MSAA integer resolving to work, we change the format to NORM using this function. */ +static enum pipe_format int_to_norm_format(enum pipe_format format) +{ + switch (format) { +#define REPLACE_FORMAT_SIGN(format,sign) \ + case PIPE_FORMAT_##format##_##sign##INT: \ + return PIPE_FORMAT_##format##_##sign##NORM +#define REPLACE_FORMAT(format) \ + REPLACE_FORMAT_SIGN(format, U); \ + REPLACE_FORMAT_SIGN(format, S) + + REPLACE_FORMAT_SIGN(B10G10R10A2, U); + REPLACE_FORMAT(R8); + REPLACE_FORMAT(R8G8); + REPLACE_FORMAT(R8G8B8X8); + REPLACE_FORMAT(R8G8B8A8); + REPLACE_FORMAT(A8); + REPLACE_FORMAT(I8); + REPLACE_FORMAT(L8); + REPLACE_FORMAT(L8A8); + REPLACE_FORMAT(R16); + REPLACE_FORMAT(R16G16); + REPLACE_FORMAT(R16G16B16X16); + REPLACE_FORMAT(R16G16B16A16); + REPLACE_FORMAT(A16); + REPLACE_FORMAT(I16); + REPLACE_FORMAT(L16); + REPLACE_FORMAT(L16A16); + +#undef REPLACE_FORMAT +#undef REPLACE_FORMAT_SIGN + default: + return format; + } +} + +static bool do_hardware_msaa_resolve(struct pipe_context *ctx, + const struct pipe_blit_info *info) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + struct r600_texture *dst = (struct r600_texture*)info->dst.resource; + unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level); + unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level); + enum pipe_format format = int_to_norm_format(info->dst.format); + unsigned sample_mask = ~0; + + if (info->src.resource->nr_samples > 1 && + info->dst.resource->nr_samples <= 1 && + util_max_layer(info->src.resource, 0) == 0 && + util_max_layer(info->dst.resource, info->dst.level) == 0 && + info->dst.format == info->src.format && + !util_format_is_pure_integer(format) && + !util_format_is_depth_or_stencil(format) && + !info->scissor_enable && + (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA && + dst_width == info->src.resource->width0 && + dst_height == info->src.resource->height0 && + info->dst.box.x == 0 && + info->dst.box.y == 0 && + info->dst.box.width == dst_width && + info->dst.box.height == dst_height && + info->dst.box.depth == 1 && + info->src.box.x == 0 && + info->src.box.y == 0 && + info->src.box.width == dst_width && + info->src.box.height == dst_height && + info->src.box.depth == 1 && + dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D && + !(dst->surface.flags & RADEON_SURF_SCANOUT)) { + r600_blitter_begin(ctx, R600_COLOR_RESOLVE); + util_blitter_custom_resolve_color(rctx->blitter, + info->dst.resource, info->dst.level, + info->dst.box.z, + info->src.resource, info->src.box.z, + sample_mask, rctx->custom_blend_resolve, + format); + r600_blitter_end(ctx); + return true; + } + return false; +} + +static void si_blit(struct pipe_context *ctx, + const struct pipe_blit_info *info) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + + if (do_hardware_msaa_resolve(ctx, info)) { + return; + } + + assert(util_blitter_is_blit_supported(rctx->blitter, info)); + + /* The driver doesn't decompress resources automatically while + * u_blitter is rendering. */ + r600_decompress_subresource(ctx, info->src.resource, info->src.level, + info->src.box.z, + info->src.box.z + info->src.box.depth - 1); + + r600_blitter_begin(ctx, R600_BLIT); + util_blitter_blit(rctx->blitter, info); + r600_blitter_end(ctx); +} + +static void si_flush_resource(struct pipe_context *ctx, + struct pipe_resource *resource) +{ +} + +void si_init_blit_functions(struct r600_context *rctx) +{ + rctx->b.b.clear = r600_clear; + rctx->b.b.clear_render_target = r600_clear_render_target; + rctx->b.b.clear_depth_stencil = r600_clear_depth_stencil; + rctx->b.b.resource_copy_region = r600_resource_copy_region; + rctx->b.b.blit = si_blit; + rctx->b.b.flush_resource = si_flush_resource; + rctx->b.blit_decompress_depth = r600_blit_decompress_depth; +} diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c new file mode 100644 index 00000000000..ca77f2ba356 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_buffer.c @@ -0,0 +1,70 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + * Corbin Simpson + */ + +#include "pipe/p_screen.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" + +#include "si.h" +#include "si_pipe.h" + +void r600_upload_index_buffer(struct r600_context *rctx, + struct pipe_index_buffer *ib, unsigned count) +{ + u_upload_data(rctx->b.uploader, 0, count * ib->index_size, + ib->user_buffer, &ib->offset, &ib->buffer); +} + +void r600_upload_const_buffer(struct r600_context *rctx, struct r600_resource **rbuffer, + const uint8_t *ptr, unsigned size, + uint32_t *const_offset) +{ + if (R600_BIG_ENDIAN) { + uint32_t *tmpPtr; + unsigned i; + + if (!(tmpPtr = malloc(size))) { + R600_ERR("Failed to allocate BE swap buffer.\n"); + return; + } + + for (i = 0; i < size / 4; ++i) { + tmpPtr[i] = util_bswap32(((uint32_t *)ptr)[i]); + } + + u_upload_data(rctx->b.uploader, 0, size, tmpPtr, const_offset, + (struct pipe_resource**)rbuffer); + + free(tmpPtr); + } else { + u_upload_data(rctx->b.uploader, 0, size, ptr, const_offset, + (struct pipe_resource**)rbuffer); + } +} diff --git a/src/gallium/drivers/radeonsi/si_commands.c b/src/gallium/drivers/radeonsi/si_commands.c index bf9592493c3..a020ac36868 100644 --- a/src/gallium/drivers/radeonsi/si_commands.c +++ b/src/gallium/drivers/radeonsi/si_commands.c @@ -24,9 +24,9 @@ * Christian König */ -#include "radeonsi_pipe.h" -#include "radeonsi_pm4.h" #include "sid.h" +#include "si_pipe.h" +#include "si_pm4.h" void si_cmd_context_control(struct si_pm4_state *pm4) { diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c new file mode 100644 index 00000000000..40d75daa3f5 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -0,0 +1,299 @@ +#include "util/u_memory.h" + +#include "../radeon/r600_cs.h" +#include "si_pipe.h" +#include "si_shader.h" + +#include "radeon_llvm_util.h" + +#define MAX_GLOBAL_BUFFERS 20 + +struct si_pipe_compute { + struct r600_context *ctx; + + unsigned local_size; + unsigned private_size; + unsigned input_size; + unsigned num_kernels; + struct si_pipe_shader *kernels; + unsigned num_user_sgprs; + + struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; + + LLVMContextRef llvm_ctx; +}; + +static void *radeonsi_create_compute_state( + struct pipe_context *ctx, + const struct pipe_compute_state *cso) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct si_pipe_compute *program = + CALLOC_STRUCT(si_pipe_compute); + const struct pipe_llvm_program_header *header; + const unsigned char *code; + unsigned i; + + program->llvm_ctx = LLVMContextCreate(); + + header = cso->prog; + code = cso->prog + sizeof(struct pipe_llvm_program_header); + + program->ctx = rctx; + program->local_size = cso->req_local_mem; + program->private_size = cso->req_private_mem; + program->input_size = cso->req_input_mem; + + program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code, + header->num_bytes); + program->kernels = CALLOC(sizeof(struct si_pipe_shader), + program->num_kernels); + for (i = 0; i < program->num_kernels; i++) { + LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i, + code, header->num_bytes); + si_compile_llvm(rctx, &program->kernels[i], mod); + LLVMDisposeModule(mod); + } + + return program; +} + +static void radeonsi_bind_compute_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + rctx->cs_shader_state.program = (struct si_pipe_compute*)state; +} + +static void radeonsi_set_global_binding( + struct pipe_context *ctx, unsigned first, unsigned n, + struct pipe_resource **resources, + uint32_t **handles) +{ + unsigned i; + struct r600_context *rctx = (struct r600_context*)ctx; + struct si_pipe_compute *program = rctx->cs_shader_state.program; + + if (!resources) { + for (i = first; i < first + n; i++) { + program->global_buffers[i] = NULL; + } + return; + } + + for (i = first; i < first + n; i++) { + uint64_t va; + program->global_buffers[i] = resources[i]; + va = r600_resource_va(ctx->screen, resources[i]); + memcpy(handles[i], &va, sizeof(va)); + } +} + +static void radeonsi_launch_grid( + struct pipe_context *ctx, + const uint *block_layout, const uint *grid_layout, + uint32_t pc, const void *input) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + struct si_pipe_compute *program = rctx->cs_shader_state.program; + struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); + struct r600_resource *kernel_args_buffer = NULL; + unsigned kernel_args_size; + unsigned num_work_size_bytes = 36; + uint32_t kernel_args_offset = 0; + uint32_t *kernel_args; + uint64_t kernel_args_va; + uint64_t shader_va; + unsigned arg_user_sgpr_count = 2; + unsigned i; + struct si_pipe_shader *shader = &program->kernels[pc]; + unsigned lds_blocks; + + pm4->compute_pkt = true; + si_cmd_context_control(pm4); + + si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE); + si_pm4_cmd_add(pm4, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) | + EVENT_INDEX(0x7) | + EVENT_WRITE_INV_L2); + si_pm4_cmd_end(pm4, false); + + si_pm4_inval_texture_cache(pm4); + si_pm4_inval_shader_cache(pm4); + si_cmd_surface_sync(pm4, pm4->cp_coher_cntl); + + /* Upload the kernel arguments */ + + /* The extra num_work_size_bytes are for work group / work item size information */ + kernel_args_size = program->input_size + num_work_size_bytes; + kernel_args = MALLOC(kernel_args_size); + for (i = 0; i < 3; i++) { + kernel_args[i] = grid_layout[i]; + kernel_args[i + 3] = grid_layout[i] * block_layout[i]; + kernel_args[i + 6] = block_layout[i]; + } + + memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size); + + r600_upload_const_buffer(rctx, &kernel_args_buffer, (uint8_t*)kernel_args, + kernel_args_size, &kernel_args_offset); + kernel_args_va = r600_resource_va(ctx->screen, + (struct pipe_resource*)kernel_args_buffer); + kernel_args_va += kernel_args_offset; + + si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ); + + si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va); + si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0)); + + si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0); + si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0); + si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0); + + si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X, + S_00B81C_NUM_THREAD_FULL(block_layout[0])); + si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y, + S_00B820_NUM_THREAD_FULL(block_layout[1])); + si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z, + S_00B824_NUM_THREAD_FULL(block_layout[2])); + + /* Global buffers */ + for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) { + struct r600_resource *buffer = + (struct r600_resource*)program->global_buffers[i]; + if (!buffer) { + continue; + } + si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE); + } + + /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID + * and is now per pipe, so it should be handled in the + * kernel if we want to use something other than the default value, + * which is now 0x22f. + */ + if (rctx->b.chip_class <= SI) { + /* XXX: This should be: + * (number of compute units) * 4 * (waves per simd) - 1 */ + + si_pm4_set_reg(pm4, R_00B82C_COMPUTE_MAX_WAVE_ID, + 0x190 /* Default value */); + } + + shader_va = r600_resource_va(ctx->screen, (void *)shader->bo); + si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); + si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff); + si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); + + si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, + /* We always use at least 3 VGPRS, these come from + * TIDIG_COMP_CNT. + * XXX: The compiler should account for this. + */ + S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4) + /* We always use at least 4 + arg_user_sgpr_count. The 4 extra + * sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN + * XXX: The compiler should account for this. + */ + | S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count, + shader->num_sgprs)) - 1) / 8)) + ; + + lds_blocks = shader->lds_size; + /* XXX: We are over allocating LDS. For SI, the shader reports LDS in + * blocks of 256 bytes, so if there are 4 bytes lds allocated in + * the shader and 4 bytes allocated by the state tracker, then + * we will set LDS_SIZE to 512 bytes rather than 256. + */ + if (rctx->b.chip_class <= SI) { + lds_blocks += align(program->local_size, 256) >> 8; + } else { + lds_blocks += align(program->local_size, 512) >> 9; + } + + assert(lds_blocks <= 0xFF); + + si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, + S_00B84C_SCRATCH_EN(0) + | S_00B84C_USER_SGPR(arg_user_sgpr_count) + | S_00B84C_TGID_X_EN(1) + | S_00B84C_TGID_Y_EN(1) + | S_00B84C_TGID_Z_EN(1) + | S_00B84C_TG_SIZE_EN(1) + | S_00B84C_TIDIG_COMP_CNT(2) + | S_00B84C_LDS_SIZE(lds_blocks) + | S_00B84C_EXCP_EN(0)) + ; + si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0); + + si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, + S_00B858_SH0_CU_EN(0xffff /* Default value */) + | S_00B858_SH1_CU_EN(0xffff /* Default value */)) + ; + + si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, + S_00B85C_SH0_CU_EN(0xffff /* Default value */) + | S_00B85C_SH1_CU_EN(0xffff /* Default value */)) + ; + + si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT); + si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */ + si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */ + si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */ + si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */ + si_pm4_cmd_end(pm4, false); + + si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE); + si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(0x4))); + si_pm4_cmd_end(pm4, false); + + si_pm4_inval_texture_cache(pm4); + si_pm4_inval_shader_cache(pm4); + si_cmd_surface_sync(pm4, pm4->cp_coher_cntl); + + si_pm4_emit(rctx, pm4); + +#if 0 + fprintf(stderr, "cdw: %i\n", rctx->cs->cdw); + for (i = 0; i < rctx->cs->cdw; i++) { + fprintf(stderr, "%4i : 0x%08X\n", i, rctx->cs->buf[i]); + } +#endif + + FREE(pm4); + FREE(kernel_args); +} + + +static void si_delete_compute_state(struct pipe_context *ctx, void* state){ + struct si_pipe_compute *program = (struct si_pipe_compute *)state; + + if (!state) { + return; + } + + if (program->kernels) { + FREE(program->kernels); + } + + if (program->llvm_ctx){ + LLVMContextDispose(program->llvm_ctx); + } + + //And then free the program itself. + FREE(program); +} + +static void si_set_compute_resources(struct pipe_context * ctx_, + unsigned start, unsigned count, + struct pipe_surface ** surfaces) { } + +void si_init_compute_functions(struct r600_context *rctx) +{ + rctx->b.b.create_compute_state = radeonsi_create_compute_state; + rctx->b.b.delete_compute_state = si_delete_compute_state; + rctx->b.b.bind_compute_state = radeonsi_bind_compute_state; +/* ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; */ + rctx->b.b.set_compute_resources = si_set_compute_resources; + rctx->b.b.set_global_binding = radeonsi_set_global_binding; + rctx->b.b.launch_grid = radeonsi_launch_grid; +} diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index c43b35e3983..9cb5da12ad8 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -24,9 +24,9 @@ * Marek Olšák */ #include "../radeon/r600_cs.h" -#include "radeonsi_pipe.h" -#include "radeonsi_resource.h" -#include "radeonsi_shader.h" +#include "si_pipe.h" +#include "si_resource.h" +#include "si_shader.h" #include "util/u_memory.h" diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c new file mode 100644 index 00000000000..bb67baa52d2 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -0,0 +1,716 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#include "../radeon/r600_cs.h" +#include "sid.h" +#include "si_pm4.h" +#include "si_pipe.h" +#include "util/u_memory.h" +#include + +#define GROUP_FORCE_NEW_BLOCK 0 + +/* Get backends mask */ +void si_get_backend_mask(struct r600_context *ctx) +{ + struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; + struct r600_resource *buffer; + uint32_t *results; + unsigned num_backends = ctx->screen->b.info.r600_num_backends; + unsigned i, mask = 0; + + /* if backend_map query is supported by the kernel */ + if (ctx->screen->b.info.r600_backend_map_valid) { + unsigned num_tile_pipes = ctx->screen->b.info.r600_num_tile_pipes; + unsigned backend_map = ctx->screen->b.info.r600_backend_map; + unsigned item_width = 4, item_mask = 0x7; + + while(num_tile_pipes--) { + i = backend_map & item_mask; + mask |= (1<>= item_width; + } + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + } + + /* otherwise backup path for older kernels */ + + /* create buffer for event data */ + buffer = r600_resource_create_custom(&ctx->screen->b.b, + PIPE_USAGE_STAGING, + ctx->max_db*16); + if (!buffer) + goto err; + + /* initialize buffer with zeroes */ + results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); + if (results) { + uint64_t va = 0; + + memset(results, 0, ctx->max_db * 4 * 4); + ctx->b.ws->buffer_unmap(buffer->cs_buf); + + /* emit EVENT_WRITE for ZPASS_DONE */ + va = r600_resource_va(&ctx->screen->b.b, (void *)buffer); + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = va >> 32; + + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, buffer, RADEON_USAGE_WRITE); + + /* analyze results */ + results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_READ); + if (results) { + for(i = 0; i < ctx->max_db; i++) { + /* at least highest bit will be set if backend is used */ + if (results[i*4 + 1]) + mask |= (1<b.ws->buffer_unmap(buffer->cs_buf); + } + } + + r600_resource_reference(&buffer, NULL); + + if (mask != 0) { + ctx->backend_mask = mask; + return; + } + +err: + /* fallback to old method - set num_backends lower bits to 1 */ + ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); + return; +} + +bool si_is_timer_query(unsigned type) +{ + return type == PIPE_QUERY_TIME_ELAPSED || + type == PIPE_QUERY_TIMESTAMP || + type == PIPE_QUERY_TIMESTAMP_DISJOINT; +} + +bool si_query_needs_begin(unsigned type) +{ + return type != PIPE_QUERY_TIMESTAMP; +} + +/* initialize */ +void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, + boolean count_draw_in) +{ + int i; + + /* The number of dwords we already used in the CS so far. */ + num_dw += ctx->b.rings.gfx.cs->cdw; + + for (i = 0; i < SI_NUM_ATOMS(ctx); i++) { + if (ctx->atoms.array[i]->dirty) { + num_dw += ctx->atoms.array[i]->num_dw; + } + } + + if (count_draw_in) { + /* The number of dwords all the dirty states would take. */ + num_dw += ctx->pm4_dirty_cdwords; + + /* The upper-bound of how much a draw command would take. */ + num_dw += SI_MAX_DRAW_CS_DWORDS; + } + + /* Count in queries_suspend. */ + num_dw += ctx->num_cs_dw_nontimer_queries_suspend; + + /* Count in streamout_end at the end of CS. */ + if (ctx->b.streamout.begin_emitted) { + num_dw += ctx->b.streamout.num_dw_for_end; + } + + /* Count in render_condition(NULL) at the end of CS. */ + if (ctx->predicate_drawing) { + num_dw += 3; + } + + /* Count in framebuffer cache flushes at the end of CS. */ + num_dw += ctx->atoms.cache_flush->num_dw; + +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + num_dw += R600_TRACE_CS_DWORDS; + } +#endif + + /* Flush if there's not enough space. */ + if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { + radeonsi_flush(&ctx->b.b, NULL, RADEON_FLUSH_ASYNC); + } +} + +void si_context_flush(struct r600_context *ctx, unsigned flags) +{ + struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; + + if (!cs->cdw) + return; + + /* suspend queries */ + ctx->nontimer_queries_suspended = false; + if (ctx->num_cs_dw_nontimer_queries_suspend) { + r600_context_queries_suspend(ctx); + ctx->nontimer_queries_suspended = true; + } + + ctx->b.streamout.suspended = false; + + if (ctx->b.streamout.begin_emitted) { + r600_emit_streamout_end(&ctx->b); + ctx->b.streamout.suspended = true; + } + + ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB | + R600_CONTEXT_FLUSH_AND_INV_CB_META | + R600_CONTEXT_FLUSH_AND_INV_DB | + R600_CONTEXT_FLUSH_AND_INV_DB_META | + R600_CONTEXT_INV_TEX_CACHE; + si_emit_cache_flush(&ctx->b, NULL); + + /* this is probably not needed anymore */ + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); + + /* force to keep tiling flags */ + flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; + +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + struct r600_screen *rscreen = ctx->screen; + unsigned i; + + for (i = 0; i < cs->cdw; i++) { + fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]); + } + rscreen->cs_count++; + } +#endif + + /* Flush the CS. */ + ctx->b.ws->cs_flush(ctx->b.rings.gfx.cs, flags, 0); + +#if R600_TRACE_CS + if (ctx->screen->trace_bo) { + struct r600_screen *rscreen = ctx->screen; + unsigned i; + + for (i = 0; i < 10; i++) { + usleep(5); + if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) { + break; + } + } + if (i == 10) { + fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n", + rscreen->trace_ptr[1], rscreen->trace_ptr[0]); + } else { + fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5); + } + } +#endif + + si_begin_new_cs(ctx); +} + +void si_begin_new_cs(struct r600_context *ctx) +{ + ctx->pm4_dirty_cdwords = 0; + + /* Flush read caches at the beginning of CS. */ + ctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | + R600_CONTEXT_INV_CONST_CACHE | + R600_CONTEXT_INV_SHADER_CACHE; + + /* set all valid group as dirty so they get reemited on + * next draw command + */ + si_pm4_reset_emitted(ctx); + + /* The CS initialization should be emitted before everything else. */ + si_pm4_emit(ctx, ctx->queued.named.init); + ctx->emitted.named.init = ctx->queued.named.init; + + if (ctx->b.streamout.suspended) { + ctx->b.streamout.append_bitmask = ctx->b.streamout.enabled_mask; + r600_streamout_buffers_dirty(&ctx->b); + } + + /* resume queries */ + if (ctx->nontimer_queries_suspended) { + r600_context_queries_resume(ctx); + } + + si_all_descriptors_begin_new_cs(ctx); +} + +static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, + bool test_status_bit) +{ + uint32_t *current_result = (uint32_t*)map; + uint64_t start, end; + + start = (uint64_t)current_result[start_index] | + (uint64_t)current_result[start_index+1] << 32; + end = (uint64_t)current_result[end_index] | + (uint64_t)current_result[end_index+1] << 32; + + if (!test_status_bit || + ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { + return end - start; + } + return 0; +} + +static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) +{ + unsigned results_base = query->results_start; + char *map; + + map = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, + PIPE_TRANSFER_READ | + (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); + if (!map) + return FALSE; + + /* count all results across all data blocks */ + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + while (results_base != query->results_end) { + query->result.u64 += + r600_query_read_result(map + results_base, 0, 2, true); + results_base = (results_base + 16) % query->buffer->b.b.width0; + } + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + while (results_base != query->results_end) { + query->result.b = query->result.b || + r600_query_read_result(map + results_base, 0, 2, true) != 0; + results_base = (results_base + 16) % query->buffer->b.b.width0; + } + break; + case PIPE_QUERY_TIMESTAMP: + { + uint32_t *current_result = (uint32_t*)map; + query->result.u64 = (uint64_t)current_result[0] | (uint64_t)current_result[1] << 32; + break; + } + case PIPE_QUERY_TIME_ELAPSED: + while (results_base != query->results_end) { + query->result.u64 += + r600_query_read_result(map + results_base, 0, 2, false); + results_base = (results_base + query->result_size) % query->buffer->b.b.width0; + } + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + /* SAMPLE_STREAMOUTSTATS stores this structure: + * { + * u64 NumPrimitivesWritten; + * u64 PrimitiveStorageNeeded; + * } + * We only need NumPrimitivesWritten here. */ + while (results_base != query->results_end) { + query->result.u64 += + r600_query_read_result(map + results_base, 2, 6, true); + results_base = (results_base + query->result_size) % query->buffer->b.b.width0; + } + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + /* Here we read PrimitiveStorageNeeded. */ + while (results_base != query->results_end) { + query->result.u64 += + r600_query_read_result(map + results_base, 0, 4, true); + results_base = (results_base + query->result_size) % query->buffer->b.b.width0; + } + break; + case PIPE_QUERY_SO_STATISTICS: + while (results_base != query->results_end) { + query->result.so.num_primitives_written += + r600_query_read_result(map + results_base, 2, 6, true); + query->result.so.primitives_storage_needed += + r600_query_read_result(map + results_base, 0, 4, true); + results_base = (results_base + query->result_size) % query->buffer->b.b.width0; + } + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + while (results_base != query->results_end) { + query->result.b = query->result.b || + r600_query_read_result(map + results_base, 2, 6, true) != + r600_query_read_result(map + results_base, 0, 4, true); + results_base = (results_base + query->result_size) % query->buffer->b.b.width0; + } + break; + default: + assert(0); + } + + query->results_start = query->results_end; + ctx->b.ws->buffer_unmap(query->buffer->cs_buf); + return TRUE; +} + +void r600_query_begin(struct r600_context *ctx, struct r600_query *query) +{ + struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; + unsigned new_results_end, i; + uint32_t *results; + uint64_t va; + + si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); + + new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; + + /* collect current results if query buffer is full */ + if (new_results_end == query->results_start) { + r600_query_result(ctx, query, TRUE); + } + + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); + if (results) { + results = (uint32_t*)((char*)results + query->results_end); + memset(results, 0, query->result_size); + + /* Set top bits for unused backends */ + for (i = 0; i < ctx->max_db; i++) { + if (!(ctx->backend_mask & (1<b.ws->buffer_unmap(query->buffer->cs_buf); + } + break; + case PIPE_QUERY_TIME_ELAPSED: + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); + results = (uint32_t*)((char*)results + query->results_end); + memset(results, 0, query->result_size); + ctx->b.ws->buffer_unmap(query->buffer->cs_buf); + break; + default: + assert(0); + } + + /* emit begin query */ + va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); + va += query->results_end; + + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; + break; + case PIPE_QUERY_TIME_ELAPSED: + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); + cs->buf[cs->cdw++] = 0; + cs->buf[cs->cdw++] = 0; + break; + default: + assert(0); + } + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE); + + if (!si_is_timer_query(query->type)) { + ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; + } +} + +void r600_query_end(struct r600_context *ctx, struct r600_query *query) +{ + struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; + uint64_t va; + unsigned new_results_end; + + /* The queries which need begin already called this in begin_query. */ + if (!si_query_needs_begin(query->type)) { + si_need_cs_space(ctx, query->num_cs_dw, TRUE); + + new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; + + /* collect current results if query buffer is full */ + if (new_results_end == query->results_start) { + r600_query_result(ctx, query, TRUE); + } + } + + va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); + /* emit end query */ + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + va += query->results_end + 8; + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + va += query->results_end + query->result_size/2; + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; + break; + case PIPE_QUERY_TIME_ELAPSED: + va += query->results_end + query->result_size/2; + /* fall through */ + case PIPE_QUERY_TIMESTAMP: + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); + cs->buf[cs->cdw++] = va; + cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); + cs->buf[cs->cdw++] = 0; + cs->buf[cs->cdw++] = 0; + break; + default: + assert(0); + } + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE); + + query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; + + if (si_query_needs_begin(query->type) && !si_is_timer_query(query->type)) { + ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw; + } +} + +void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, + int flag_wait) +{ + struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs; + uint64_t va; + + if (operation == PREDICATION_OP_CLEAR) { + si_need_cs_space(ctx, 3, FALSE); + + cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); + cs->buf[cs->cdw++] = 0; + cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR); + } else { + unsigned results_base = query->results_start; + unsigned count; + uint32_t op; + + /* find count of the query data blocks */ + count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0; + count /= query->result_size; + + si_need_cs_space(ctx, 5 * count, TRUE); + + op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | + (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); + va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer); + + /* emit predicate packets for all data blocks */ + while (results_base != query->results_end) { + cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); + cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; + cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, + query->buffer, RADEON_USAGE_READ); + results_base = (results_base + query->result_size) % query->buffer->b.b.width0; + + /* set CONTINUE bit for all packets except the first */ + op |= PREDICATION_CONTINUE; + } + } +} + +struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type) +{ + struct r600_query *query; + unsigned buffer_size = 4096; + + query = CALLOC_STRUCT(r600_query); + if (query == NULL) + return NULL; + + query->type = query_type; + + switch (query_type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + query->result_size = 16 * ctx->max_db; + query->num_cs_dw = 6; + break; + case PIPE_QUERY_TIMESTAMP: + query->result_size = 8; + query->num_cs_dw = 8; + break; + case PIPE_QUERY_TIME_ELAPSED: + query->result_size = 16; + query->num_cs_dw = 8; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ + query->result_size = 32; + query->num_cs_dw = 6; + break; + default: + assert(0); + FREE(query); + return NULL; + } + + /* adjust buffer size to simplify offsets wrapping math */ + buffer_size -= buffer_size % query->result_size; + + /* Queries are normally read by the CPU after + * being written by the gpu, hence staging is probably a good + * usage pattern. + */ + query->buffer = r600_resource_create_custom(&ctx->screen->b.b, + PIPE_USAGE_STAGING, + buffer_size); + if (!query->buffer) { + FREE(query); + return NULL; + } + return query; +} + +void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) +{ + r600_resource_reference(&query->buffer, NULL); + free(query); +} + +boolean r600_context_query_result(struct r600_context *ctx, + struct r600_query *query, + boolean wait, void *vresult) +{ + boolean *result_b = (boolean*)vresult; + uint64_t *result_u64 = (uint64_t*)vresult; + struct pipe_query_data_so_statistics *result_so = + (struct pipe_query_data_so_statistics*)vresult; + + if (!r600_query_result(ctx, query, wait)) + return FALSE; + + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + *result_u64 = query->result.u64; + break; + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + *result_b = query->result.b; + break; + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + *result_u64 = (1000000 * query->result.u64) / ctx->screen->b.info.r600_clock_crystal_freq; + break; + case PIPE_QUERY_SO_STATISTICS: + *result_so = query->result.so; + break; + default: + assert(0); + } + return TRUE; +} + +void r600_context_queries_suspend(struct r600_context *ctx) +{ + struct r600_query *query; + + LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) { + r600_query_end(ctx, query); + } + assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); +} + +void r600_context_queries_resume(struct r600_context *ctx) +{ + struct r600_query *query; + + assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); + + LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) { + r600_query_begin(ctx, query); + } +} + +#if R600_TRACE_CS +void r600_trace_emit(struct r600_context *rctx) +{ + struct r600_screen *rscreen = rctx->screen; + struct radeon_winsys_cs *cs = rctx->cs; + uint64_t va; + + va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo); + r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE); + cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0); + cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | + PKT3_WRITE_DATA_WR_CONFIRM | + PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME); + cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL; + cs->buf[cs->cdw++] = cs->cdw; + cs->buf[cs->cdw++] = rscreen->cs_count; +} +#endif diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c new file mode 100644 index 00000000000..62fd2a3f7a1 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -0,0 +1,677 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "util/u_blitter.h" +#include "util/u_double_list.h" +#include "util/u_format.h" +#include "util/u_transfer.h" +#include "util/u_surface.h" +#include "util/u_pack_color.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_simple_shaders.h" +#include "util/u_upload_mgr.h" +#include "vl/vl_decoder.h" +#include "vl/vl_video_buffer.h" +#include "os/os_time.h" +#include "pipebuffer/pb_buffer.h" +#include "si_pipe.h" +#include "radeon/radeon_uvd.h" +#include "si.h" +#include "sid.h" +#include "si_resource.h" +#include "si_pipe.h" +#include "si_state.h" +#include "../radeon/r600_cs.h" + +/* + * pipe_context + */ +void radeonsi_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, + unsigned flags) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct pipe_query *render_cond = NULL; + boolean render_cond_cond = FALSE; + unsigned render_cond_mode = 0; + + if (fence) { + *fence = rctx->b.ws->cs_create_fence(rctx->b.rings.gfx.cs); + } + + /* Disable render condition. */ + if (rctx->current_render_cond) { + render_cond = rctx->current_render_cond; + render_cond_cond = rctx->current_render_cond_cond; + render_cond_mode = rctx->current_render_cond_mode; + ctx->render_condition(ctx, NULL, FALSE, 0); + } + + si_context_flush(rctx, flags); + + /* Re-enable render condition. */ + if (render_cond) { + ctx->render_condition(ctx, render_cond, render_cond_cond, render_cond_mode); + } +} + +static void r600_flush_from_st(struct pipe_context *ctx, + struct pipe_fence_handle **fence, + unsigned flags) +{ + radeonsi_flush(ctx, fence, + flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0); +} + +static void r600_flush_from_winsys(void *ctx, unsigned flags) +{ + radeonsi_flush((struct pipe_context*)ctx, NULL, flags); +} + +static void r600_destroy_context(struct pipe_context *context) +{ + struct r600_context *rctx = (struct r600_context *)context; + + si_release_all_descriptors(rctx); + + pipe_resource_reference(&rctx->null_const_buf.buffer, NULL); + r600_resource_reference(&rctx->border_color_table, NULL); + + if (rctx->dummy_pixel_shader) { + rctx->b.b.delete_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); + } + for (int i = 0; i < 8; i++) { + rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_depth_stencil[i]); + rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_depth[i]); + rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_stencil[i]); + } + rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_inplace); + rctx->b.b.delete_blend_state(&rctx->b.b, rctx->custom_blend_resolve); + rctx->b.b.delete_blend_state(&rctx->b.b, rctx->custom_blend_decompress); + util_unreference_framebuffer_state(&rctx->framebuffer); + + util_blitter_destroy(rctx->blitter); + + r600_common_context_cleanup(&rctx->b); + FREE(rctx); +} + +static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) +{ + struct r600_context *rctx = CALLOC_STRUCT(r600_context); + struct r600_screen* rscreen = (struct r600_screen *)screen; + int shader, i; + + if (rctx == NULL) + return NULL; + + if (!r600_common_context_init(&rctx->b, &rscreen->b)) + goto fail; + + rctx->b.b.screen = screen; + rctx->b.b.priv = priv; + rctx->b.b.destroy = r600_destroy_context; + rctx->b.b.flush = r600_flush_from_st; + + /* Easy accessing of screen/winsys. */ + rctx->screen = rscreen; + + si_init_blit_functions(rctx); + r600_init_query_functions(rctx); + r600_init_context_resource_functions(rctx); + si_init_compute_functions(rctx); + + if (rscreen->b.info.has_uvd) { + rctx->b.b.create_video_codec = radeonsi_uvd_create_decoder; + rctx->b.b.create_video_buffer = radeonsi_video_buffer_create; + } else { + rctx->b.b.create_video_codec = vl_create_decoder; + rctx->b.b.create_video_buffer = vl_video_buffer_create; + } + + rctx->b.rings.gfx.cs = rctx->b.ws->cs_create(rctx->b.ws, RING_GFX, NULL); + rctx->b.rings.gfx.flush = r600_flush_from_winsys; + + si_init_all_descriptors(rctx); + + /* Initialize cache_flush. */ + rctx->cache_flush = si_atom_cache_flush; + rctx->atoms.cache_flush = &rctx->cache_flush; + + rctx->atoms.streamout_begin = &rctx->b.streamout.begin_atom; + + switch (rctx->b.chip_class) { + case SI: + case CIK: + si_init_state_functions(rctx); + LIST_INITHEAD(&rctx->active_nontimer_query_list); + rctx->max_db = 8; + si_init_config(rctx); + break; + default: + R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class); + goto fail; + } + + rctx->b.ws->cs_set_flush_callback(rctx->b.rings.gfx.cs, r600_flush_from_winsys, rctx); + + rctx->blitter = util_blitter_create(&rctx->b.b); + if (rctx->blitter == NULL) + goto fail; + + rctx->dummy_pixel_shader = + util_make_fragment_cloneinput_shader(&rctx->b.b, 0, + TGSI_SEMANTIC_GENERIC, + TGSI_INTERPOLATE_CONSTANT); + rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader); + + /* these must be last */ + si_begin_new_cs(rctx); + si_get_backend_mask(rctx); + + /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy + * with a NULL buffer). We need to use a dummy buffer instead. */ + if (rctx->b.chip_class == CIK) { + rctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER, + PIPE_USAGE_STATIC, 16); + rctx->null_const_buf.buffer_size = rctx->null_const_buf.buffer->width0; + + for (shader = 0; shader < SI_NUM_SHADERS; shader++) { + for (i = 0; i < NUM_CONST_BUFFERS; i++) { + rctx->b.b.set_constant_buffer(&rctx->b.b, shader, i, + &rctx->null_const_buf); + } + } + + /* Clear the NULL constant buffer, because loads should return zeros. */ + rctx->b.clear_buffer(&rctx->b.b, rctx->null_const_buf.buffer, 0, + rctx->null_const_buf.buffer->width0, 0); + } + + return &rctx->b.b; +fail: + r600_destroy_context(&rctx->b.b); + return NULL; +} + +/* + * pipe_screen + */ +static const char* r600_get_vendor(struct pipe_screen* pscreen) +{ + return "X.Org"; +} + +const char *r600_get_llvm_processor_name(enum radeon_family family) +{ + switch (family) { + case CHIP_TAHITI: return "tahiti"; + case CHIP_PITCAIRN: return "pitcairn"; + case CHIP_VERDE: return "verde"; + case CHIP_OLAND: return "oland"; +#if HAVE_LLVM <= 0x0303 + default: return "SI"; +#else + case CHIP_HAINAN: return "hainan"; + case CHIP_BONAIRE: return "bonaire"; + case CHIP_KABINI: return "kabini"; + case CHIP_KAVERI: return "kaveri"; + case CHIP_HAWAII: return "hawaii"; + default: return ""; +#endif + } +} + +static const char *r600_get_family_name(enum radeon_family family) +{ + switch(family) { + case CHIP_TAHITI: return "AMD TAHITI"; + case CHIP_PITCAIRN: return "AMD PITCAIRN"; + case CHIP_VERDE: return "AMD CAPE VERDE"; + case CHIP_OLAND: return "AMD OLAND"; + case CHIP_HAINAN: return "AMD HAINAN"; + case CHIP_BONAIRE: return "AMD BONAIRE"; + case CHIP_KAVERI: return "AMD KAVERI"; + case CHIP_KABINI: return "AMD KABINI"; + case CHIP_HAWAII: return "AMD HAWAII"; + default: return "AMD unknown"; + } +} + +static const char* r600_get_name(struct pipe_screen* pscreen) +{ + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + + return r600_get_family_name(rscreen->b.family); +} + +static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) +{ + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + + switch (param) { + /* Supported features (boolean caps). */ + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TEXTURE_SWIZZLE: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_SM3: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_PRIMITIVE_RESTART: + case PIPE_CAP_CONDITIONAL_RENDER: + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_START_INSTANCE: + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_COMPUTE: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + case PIPE_CAP_TGSI_VS_LAYER: + return 1; + + case PIPE_CAP_TEXTURE_MULTISAMPLE: + /* 2D tiling on CIK is supported since DRM 2.35.0 */ + return HAVE_LLVM >= 0x0304 && (rscreen->b.chip_class < CIK || + rscreen->b.info.drm_minor >= 35); + + case PIPE_CAP_TGSI_TEXCOORD: + return 0; + + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 140; + + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 1; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return MIN2(rscreen->b.info.vram_size, 0xFFFFFFFF); + + /* Unsupported features. */ + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_SCALED_RESOLVE: + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + case PIPE_CAP_CUBE_MAP_ARRAY: + return 0; + + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600; + + /* Stream output. */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return rscreen->b.has_streamout ? 4 : 0; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return rscreen->b.has_streamout ? 1 : 0; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return rscreen->b.has_streamout ? 32*4 : 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 15; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 16384; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 32; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + + /* Timer queries, present when the clock frequency is non zero. */ + case PIPE_CAP_QUERY_TIMESTAMP: + case PIPE_CAP_QUERY_TIME_ELAPSED: + return rscreen->b.info.r600_clock_crystal_freq != 0; + + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; + } + return 0; +} + +static float r600_get_paramf(struct pipe_screen* pscreen, + enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 16384.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0f; + } + return 0.0f; +} + +static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) +{ + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_VERTEX: + break; + case PIPE_SHADER_GEOMETRY: + /* TODO: support and enable geometry programs */ + return 0; + case PIPE_SHADER_COMPUTE: + switch (param) { + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_LLVM; + default: + return 0; + } + default: + /* TODO: support tessellation */ + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return 16384; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return 32; + case PIPE_SHADER_CAP_MAX_INPUTS: + return 32; + case PIPE_SHADER_CAP_MAX_TEMPS: + return 256; /* Max native temporaries. */ + case PIPE_SHADER_CAP_MAX_ADDRS: + /* FIXME Isn't this equal to TEMPS? */ + return 1; /* Max native address registers */ + case PIPE_SHADER_CAP_MAX_CONSTS: + return 4096; /* actually only memory limits this */ + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return NUM_PIPE_CONST_BUFFERS; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; /* FIXME */ + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return 0; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_INTEGERS: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: + return 16; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_TGSI; + } + return 0; +} + +static int r600_get_video_param(struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_entrypoint entrypoint, + enum pipe_video_cap param) +{ + switch (param) { + case PIPE_VIDEO_CAP_SUPPORTED: + return vl_profile_supported(screen, profile, entrypoint); + case PIPE_VIDEO_CAP_NPOT_TEXTURES: + return 1; + case PIPE_VIDEO_CAP_MAX_WIDTH: + case PIPE_VIDEO_CAP_MAX_HEIGHT: + return vl_video_buffer_max_size(screen); + case PIPE_VIDEO_CAP_PREFERED_FORMAT: + return PIPE_FORMAT_NV12; + case PIPE_VIDEO_CAP_MAX_LEVEL: + return vl_level_supported(screen, profile); + default: + return 0; + } +} + +static int r600_get_compute_param(struct pipe_screen *screen, + enum pipe_compute_cap param, + void *ret) +{ + struct r600_screen *rscreen = (struct r600_screen *)screen; + //TODO: select these params by asic + switch (param) { + case PIPE_COMPUTE_CAP_IR_TARGET: { + const char *gpu = r600_get_llvm_processor_name(rscreen->b.family); + if (ret) { + sprintf(ret, "%s-r600--", gpu); + } + return (8 + strlen(gpu)) * sizeof(char); + } + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + if (ret) { + uint64_t * grid_dimension = ret; + grid_dimension[0] = 3; + } + return 1 * sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + if (ret) { + uint64_t * grid_size = ret; + grid_size[0] = 65535; + grid_size[1] = 65535; + grid_size[2] = 1; + } + return 3 * sizeof(uint64_t) ; + + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + if (ret) { + uint64_t * block_size = ret; + block_size[0] = 256; + block_size[1] = 256; + block_size[2] = 256; + } + return 3 * sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + if (ret) { + uint64_t * max_threads_per_block = ret; + *max_threads_per_block = 256; + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: + if (ret) { + uint64_t *max_global_size = ret; + /* XXX: Not sure what to put here. */ + *max_global_size = 2000000000; + } + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: + if (ret) { + uint64_t *max_local_size = ret; + /* Value reported by the closed source driver. */ + *max_local_size = 32768; + } + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: + if (ret) { + uint64_t *max_input_size = ret; + /* Value reported by the closed source driver. */ + *max_input_size = 1024; + } + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + if (ret) { + uint64_t max_global_size; + uint64_t *max_mem_alloc_size = ret; + r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, &max_global_size); + *max_mem_alloc_size = max_global_size / 4; + } + return sizeof(uint64_t); + default: + fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); + return 0; + } +} + +static void r600_destroy_screen(struct pipe_screen* pscreen) +{ + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + + if (rscreen == NULL) + return; + + if (!radeon_winsys_unref(rscreen->b.ws)) + return; + + r600_common_screen_cleanup(&rscreen->b); + +#if R600_TRACE_CS + if (rscreen->trace_bo) { + rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf); + pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL); + } +#endif + + rscreen->b.ws->destroy(rscreen->b.ws); + FREE(rscreen); +} + +static uint64_t r600_get_timestamp(struct pipe_screen *screen) +{ + struct r600_screen *rscreen = (struct r600_screen*)screen; + + return 1000000 * rscreen->b.ws->query_value(rscreen->b.ws, RADEON_TIMESTAMP) / + rscreen->b.info.r600_clock_crystal_freq; +} + +struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) +{ + struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen); + if (rscreen == NULL) { + return NULL; + } + + ws->query_info(ws, &rscreen->b.info); + + /* Set functions first. */ + rscreen->b.b.context_create = r600_create_context; + rscreen->b.b.destroy = r600_destroy_screen; + rscreen->b.b.get_name = r600_get_name; + rscreen->b.b.get_vendor = r600_get_vendor; + rscreen->b.b.get_param = r600_get_param; + rscreen->b.b.get_shader_param = r600_get_shader_param; + rscreen->b.b.get_paramf = r600_get_paramf; + rscreen->b.b.get_compute_param = r600_get_compute_param; + rscreen->b.b.get_timestamp = r600_get_timestamp; + rscreen->b.b.is_format_supported = si_is_format_supported; + if (rscreen->b.info.has_uvd) { + rscreen->b.b.get_video_param = ruvd_get_video_param; + rscreen->b.b.is_video_format_supported = ruvd_is_format_supported; + } else { + rscreen->b.b.get_video_param = r600_get_video_param; + rscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported; + } + r600_init_screen_resource_functions(&rscreen->b.b); + + if (!r600_common_screen_init(&rscreen->b, ws)) { + FREE(rscreen); + return NULL; + } + + rscreen->b.has_cp_dma = true; + rscreen->b.has_streamout = HAVE_LLVM >= 0x0304; + + if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) + rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; + +#if R600_TRACE_CS + rscreen->cs_count = 0; + if (rscreen->info.drm_minor >= 28) { + rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen, + PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, + 4096); + if (rscreen->trace_bo) { + rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL, + PIPE_TRANSFER_UNSYNCHRONIZED); + } + } +#endif + + /* Create the auxiliary context. This must be done last. */ + rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL); + + return &rscreen->b.b; +} diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h new file mode 100644 index 00000000000..36fb875f88f --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -0,0 +1,263 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#ifndef SI_PIPE_H +#define SI_PIPE_H + +#include "../radeon/r600_pipe_common.h" + +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "util/u_format.h" +#include "util/u_math.h" +#include "util/u_slab.h" +#include "si.h" +#include "sid.h" +#include "si_public.h" +#include "si_pm4.h" +#include "si_resource.h" +#include "si_state.h" + +#ifdef PIPE_ARCH_BIG_ENDIAN +#define R600_BIG_ENDIAN 1 +#else +#define R600_BIG_ENDIAN 0 +#endif + +#define R600_TRACE_CS 0 +#define R600_TRACE_CS_DWORDS 6 + +#define SI_MAX_DRAW_CS_DWORDS 18 + +struct si_pipe_compute; + +struct r600_screen { + struct r600_common_screen b; +#if R600_TRACE_CS + struct r600_resource *trace_bo; + uint32_t *trace_ptr; + unsigned cs_count; +#endif +}; + +struct si_pipe_sampler_view { + struct pipe_sampler_view base; + struct r600_resource *resource; + uint32_t state[8]; + uint32_t fmask_state[8]; +}; + +struct si_pipe_sampler_state { + uint32_t val[4]; + uint32_t border_color[4]; +}; + +struct si_cs_shader_state { + struct si_pipe_compute *program; +}; + +struct r600_textures_info { + struct si_sampler_views views; + struct si_pipe_sampler_state *samplers[NUM_TEX_UNITS]; + unsigned n_views; + uint32_t depth_texture_mask; /* which textures are depth */ + uint32_t compressed_colortex_mask; + unsigned n_samplers; +}; + +#define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0])) +#define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1) + +struct r600_context { + struct r600_common_context b; + struct blitter_context *blitter; + void *custom_dsa_flush_depth_stencil[8]; + void *custom_dsa_flush_depth[8]; + void *custom_dsa_flush_stencil[8]; + void *custom_dsa_flush_inplace; + void *custom_blend_resolve; + void *custom_blend_decompress; + struct r600_screen *screen; + + union { + struct { + /* The order matters. */ + struct r600_atom *const_buffers[SI_NUM_SHADERS]; + struct r600_atom *sampler_views[SI_NUM_SHADERS]; + struct r600_atom *streamout_buffers; + /* Caches must be flushed after resource descriptors are + * updated in memory. */ + struct r600_atom *cache_flush; + struct r600_atom *streamout_begin; + }; + struct r600_atom *array[0]; + } atoms; + + struct si_vertex_element *vertex_elements; + struct pipe_framebuffer_state framebuffer; + unsigned fb_log_samples; + unsigned fb_cb0_is_integer; + unsigned fb_compressed_cb_mask; + unsigned pa_sc_line_stipple; + unsigned pa_su_sc_mode_cntl; + /* for saving when using blitter */ + struct pipe_stencil_ref stencil_ref; + struct si_pipe_shader_selector *ps_shader; + struct si_pipe_shader_selector *vs_shader; + struct si_cs_shader_state cs_shader_state; + struct pipe_query *current_render_cond; + unsigned current_render_cond_mode; + boolean current_render_cond_cond; + struct pipe_query *saved_render_cond; + unsigned saved_render_cond_mode; + boolean saved_render_cond_cond; + /* shader information */ + unsigned sprite_coord_enable; + unsigned export_16bpc; + struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; + struct si_buffer_resources streamout_buffers; + struct r600_textures_info samplers[SI_NUM_SHADERS]; + struct r600_resource *border_color_table; + unsigned border_color_offset; + + unsigned default_ps_gprs, default_vs_gprs; + + /* Below are variables from the old r600_context. + */ + unsigned pm4_dirty_cdwords; + + /* The list of active queries. Only one query of each type can be active. */ + struct list_head active_nontimer_query_list; + unsigned num_cs_dw_nontimer_queries_suspend; + /* If queries have been suspended. */ + bool nontimer_queries_suspended; + + unsigned backend_mask; + unsigned max_db; /* for OQ */ + boolean predicate_drawing; + + /* Vertex and index buffers. */ + bool vertex_buffers_dirty; + struct pipe_index_buffer index_buffer; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + + /* With rasterizer discard, there doesn't have to be a pixel shader. + * In that case, we bind this one: */ + void *dummy_pixel_shader; + struct r600_atom cache_flush; + struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ + + /* SI state handling */ + union si_state queued; + union si_state emitted; +}; + +/* si_blit.c */ +void si_init_blit_functions(struct r600_context *rctx); +void si_flush_depth_textures(struct r600_context *rctx, + struct r600_textures_info *textures); +void r600_decompress_color_textures(struct r600_context *rctx, + struct r600_textures_info *textures); + +/* si_buffer.c */ +void r600_upload_index_buffer(struct r600_context *rctx, + struct pipe_index_buffer *ib, unsigned count); + + +/* si_pipe.c */ +void radeonsi_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, + unsigned flags); +const char *r600_get_llvm_processor_name(enum radeon_family family); + +/* si_query.c */ +void r600_init_query_functions(struct r600_context *rctx); + +/* si_resource.c */ +void r600_init_context_resource_functions(struct r600_context *r600); + +/* si_translate.c */ +void r600_translate_index_buffer(struct r600_context *r600, + struct pipe_index_buffer *ib, + unsigned count); + +#if R600_TRACE_CS +void r600_trace_emit(struct r600_context *rctx); +#endif + +/* si_compute.c */ +void si_init_compute_functions(struct r600_context *rctx); + +/* si_uvd.c */ +struct pipe_video_codec *radeonsi_uvd_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ); + +struct pipe_video_buffer *radeonsi_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *tmpl); + +/* + * common helpers + */ +static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) +{ + return value * (1 << frac_bits); +} +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +static INLINE unsigned si_map_swizzle(unsigned swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_Y: + return V_008F0C_SQ_SEL_Y; + case UTIL_FORMAT_SWIZZLE_Z: + return V_008F0C_SQ_SEL_Z; + case UTIL_FORMAT_SWIZZLE_W: + return V_008F0C_SQ_SEL_W; + case UTIL_FORMAT_SWIZZLE_0: + return V_008F0C_SQ_SEL_0; + case UTIL_FORMAT_SWIZZLE_1: + return V_008F0C_SQ_SEL_1; + default: /* UTIL_FORMAT_SWIZZLE_X */ + return V_008F0C_SQ_SEL_X; + } +} + +static inline unsigned r600_tex_aniso_filter(unsigned filter) +{ + if (filter <= 1) return 0; + if (filter <= 2) return 1; + if (filter <= 4) return 2; + if (filter <= 8) return 3; + /* else */ return 4; +} + +/* 12.4 fixed-point */ +static INLINE unsigned r600_pack_float_12p4(float x) +{ + return x <= 0 ? 0 : + x >= 4096 ? 0xffff : x * 16; +} + +#endif diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c new file mode 100644 index 00000000000..511c32d8866 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -0,0 +1,254 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Christian König + */ + +#include "../radeon/r600_cs.h" +#include "util/u_memory.h" +#include "si_pipe.h" +#include "si_pm4.h" +#include "sid.h" + +#define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *)) + +void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode) +{ + state->last_opcode = opcode; + state->last_pm4 = state->ndw++; +} + +void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw) +{ + state->pm4[state->ndw++] = dw; +} + +void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate) +{ + unsigned count; + count = state->ndw - state->last_pm4 - 2; + state->pm4[state->last_pm4] = + PKT3(state->last_opcode, count, predicate) + | PKT3_SHADER_TYPE_S(state->compute_pkt); + + assert(state->ndw <= SI_PM4_MAX_DW); +} + +void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val) +{ + unsigned opcode; + + if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) { + opcode = PKT3_SET_CONFIG_REG; + reg -= SI_CONFIG_REG_OFFSET; + + } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) { + opcode = PKT3_SET_SH_REG; + reg -= SI_SH_REG_OFFSET; + + } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) { + opcode = PKT3_SET_CONTEXT_REG; + reg -= SI_CONTEXT_REG_OFFSET; + + } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) { + opcode = PKT3_SET_UCONFIG_REG; + reg -= CIK_UCONFIG_REG_OFFSET; + + } else { + R600_ERR("Invalid register offset %08x!\n", reg); + return; + } + + reg >>= 2; + + if (opcode != state->last_opcode || reg != (state->last_reg + 1)) { + si_pm4_cmd_begin(state, opcode); + si_pm4_cmd_add(state, reg); + } + + state->last_reg = reg; + si_pm4_cmd_add(state, val); + si_pm4_cmd_end(state, false); +} + +void si_pm4_add_bo(struct si_pm4_state *state, + struct r600_resource *bo, + enum radeon_bo_usage usage) +{ + unsigned idx = state->nbo++; + assert(idx < SI_PM4_MAX_BO); + + r600_resource_reference(&state->bo[idx], bo); + state->bo_usage[idx] = usage; +} + +void si_pm4_sh_data_begin(struct si_pm4_state *state) +{ + si_pm4_cmd_begin(state, PKT3_NOP); +} + +void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw) +{ + si_pm4_cmd_add(state, dw); +} + +void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx) +{ + unsigned offs = state->last_pm4 + 1; + unsigned reg = base + idx * 4; + + /* Bail if no data was added */ + if (state->ndw == offs) { + state->ndw--; + return; + } + + si_pm4_cmd_end(state, false); + + si_pm4_cmd_begin(state, PKT3_SET_SH_REG_OFFSET); + si_pm4_cmd_add(state, (reg - SI_SH_REG_OFFSET) >> 2); + state->relocs[state->nrelocs++] = state->ndw; + si_pm4_cmd_add(state, offs << 2); + si_pm4_cmd_add(state, 0); + si_pm4_cmd_end(state, false); +} + +void si_pm4_inval_shader_cache(struct si_pm4_state *state) +{ + state->cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); + state->cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); +} + +void si_pm4_inval_texture_cache(struct si_pm4_state *state) +{ + state->cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); + state->cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); +} + +void si_pm4_free_state(struct r600_context *rctx, + struct si_pm4_state *state, + unsigned idx) +{ + if (state == NULL) + return; + + if (idx != ~0 && rctx->emitted.array[idx] == state) { + rctx->emitted.array[idx] = NULL; + } + + for (int i = 0; i < state->nbo; ++i) { + r600_resource_reference(&state->bo[i], NULL); + } + FREE(state); +} + +struct si_pm4_state * si_pm4_alloc_state(struct r600_context *rctx) +{ + struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); + + if (pm4 == NULL) + return NULL; + + pm4->chip_class = rctx->b.chip_class; + + return pm4; +} + +uint32_t si_pm4_sync_flags(struct r600_context *rctx) +{ + uint32_t cp_coher_cntl = 0; + + for (int i = 0; i < NUMBER_OF_STATES; ++i) { + struct si_pm4_state *state = rctx->queued.array[i]; + + if (!state || rctx->emitted.array[i] == state) + continue; + + cp_coher_cntl |= state->cp_coher_cntl; + } + return cp_coher_cntl; +} + +unsigned si_pm4_dirty_dw(struct r600_context *rctx) +{ + unsigned count = 0; + + for (int i = 0; i < NUMBER_OF_STATES; ++i) { + struct si_pm4_state *state = rctx->queued.array[i]; + + if (!state || rctx->emitted.array[i] == state) + continue; + + count += state->ndw; +#if R600_TRACE_CS + /* for tracing each states */ + if (rctx->screen->trace_bo) { + count += R600_TRACE_CS_DWORDS; + } +#endif + } + + return count; +} + +void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state) +{ + struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + for (int i = 0; i < state->nbo; ++i) { + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, state->bo[i], + state->bo_usage[i]); + } + + memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4); + + for (int i = 0; i < state->nrelocs; ++i) { + cs->buf[cs->cdw + state->relocs[i]] += cs->cdw << 2; + } + + cs->cdw += state->ndw; + +#if R600_TRACE_CS + if (rctx->screen->trace_bo) { + r600_trace_emit(rctx); + } +#endif +} + +void si_pm4_emit_dirty(struct r600_context *rctx) +{ + for (int i = 0; i < NUMBER_OF_STATES; ++i) { + struct si_pm4_state *state = rctx->queued.array[i]; + + if (!state || rctx->emitted.array[i] == state) + continue; + + assert(state != rctx->queued.named.init); + si_pm4_emit(rctx, state); + rctx->emitted.array[i] = state; + } +} + +void si_pm4_reset_emitted(struct r600_context *rctx) +{ + memset(&rctx->emitted, 0, sizeof(rctx->emitted)); +} diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h new file mode 100644 index 00000000000..fcdf470e6cf --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -0,0 +1,95 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Christian König + */ + +#ifndef SI_PM4_H +#define SI_PM4_H + +#include "../../winsys/radeon/drm/radeon_winsys.h" + +#define SI_PM4_MAX_DW 256 +#define SI_PM4_MAX_BO 32 +#define SI_PM4_MAX_RELOCS 4 + +// forward defines +struct r600_context; +enum chip_class; + +struct si_pm4_state +{ + /* family specific handling */ + enum chip_class chip_class; + /* PKT3_SET_*_REG handling */ + unsigned last_opcode; + unsigned last_reg; + unsigned last_pm4; + + /* flush flags for SURFACE_SYNC */ + uint32_t cp_coher_cntl; + + /* commands for the DE */ + unsigned ndw; + uint32_t pm4[SI_PM4_MAX_DW]; + + /* BO's referenced by this state */ + unsigned nbo; + struct r600_resource *bo[SI_PM4_MAX_BO]; + enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO]; + + /* relocs for shader data */ + unsigned nrelocs; + unsigned relocs[SI_PM4_MAX_RELOCS]; + + bool compute_pkt; +}; + +void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode); +void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw); +void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate); + +void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val); +void si_pm4_add_bo(struct si_pm4_state *state, + struct r600_resource *bo, + enum radeon_bo_usage usage); + +void si_pm4_sh_data_begin(struct si_pm4_state *state); +void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw); +void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx); + +void si_pm4_inval_shader_cache(struct si_pm4_state *state); +void si_pm4_inval_texture_cache(struct si_pm4_state *state); + +void si_pm4_free_state(struct r600_context *rctx, + struct si_pm4_state *state, + unsigned idx); +struct si_pm4_state * si_pm4_alloc_state(struct r600_context *rctx); + +uint32_t si_pm4_sync_flags(struct r600_context *rctx); +unsigned si_pm4_dirty_dw(struct r600_context *rctx); +void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state); +void si_pm4_emit_dirty(struct r600_context *rctx); +void si_pm4_reset_emitted(struct r600_context *rctx); + +#endif diff --git a/src/gallium/drivers/radeonsi/si_public.h b/src/gallium/drivers/radeonsi/si_public.h new file mode 100644 index 00000000000..7cf36c8407a --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_public.h @@ -0,0 +1,30 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef SI_PUBLIC_H +#define SI_PUBLIC_H + +struct radeon_winsys; + +struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws); + +#endif diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c new file mode 100644 index 00000000000..2f23d0881de --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -0,0 +1,147 @@ +/* + * Copyright 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "si_pipe.h" +#include "sid.h" + +static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + return (struct pipe_query*)r600_context_query_create(rctx, query_type); +} + +static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + r600_context_query_destroy(rctx, (struct r600_query *)query); +} + +static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + + if (!si_query_needs_begin(rquery->type)) { + assert(0); + return; + } + + memset(&rquery->result, 0, sizeof(rquery->result)); + rquery->results_start = rquery->results_end; + r600_query_begin(rctx, (struct r600_query *)query); + + if (!si_is_timer_query(rquery->type)) { + LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_query_list); + } +} + +static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + + if (!si_query_needs_begin(rquery->type)) { + memset(&rquery->result, 0, sizeof(rquery->result)); + } + + r600_query_end(rctx, rquery); + + if (si_query_needs_begin(rquery->type) && !si_is_timer_query(rquery->type)) { + LIST_DELINIT(&rquery->list); + } +} + +static boolean r600_get_query_result(struct pipe_context *ctx, + struct pipe_query *query, + boolean wait, union pipe_query_result *vresult) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + + return r600_context_query_result(rctx, rquery, wait, vresult); +} + +static void r600_render_condition(struct pipe_context *ctx, + struct pipe_query *query, + boolean condition, + uint mode) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_query *rquery = (struct r600_query *)query; + int wait_flag = 0; + + /* If we already have nonzero result, render unconditionally */ + if (query != NULL && rquery->result.u64 != 0) { + if (rctx->current_render_cond) { + r600_render_condition(ctx, NULL, FALSE, 0); + } + return; + } + + rctx->current_render_cond = query; + rctx->current_render_cond_cond = condition; + rctx->current_render_cond_mode = mode; + + if (query == NULL) { + if (rctx->predicate_drawing) { + rctx->predicate_drawing = false; + r600_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, 1); + } + return; + } + + if (mode == PIPE_RENDER_COND_WAIT || + mode == PIPE_RENDER_COND_BY_REGION_WAIT) { + wait_flag = 1; + } + + rctx->predicate_drawing = true; + + switch (rquery->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_OCCLUSION_PREDICATE: + r600_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag); + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + r600_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag); + break; + default: + assert(0); + } +} + +void r600_init_query_functions(struct r600_context *rctx) +{ + rctx->b.b.create_query = r600_create_query; + rctx->b.b.destroy_query = r600_destroy_query; + rctx->b.b.begin_query = r600_begin_query; + rctx->b.b.end_query = r600_end_query; + rctx->b.b.get_query_result = r600_get_query_result; + + if (rctx->screen->b.info.r600_num_backends > 0) + rctx->b.b.render_condition = r600_render_condition; +} diff --git a/src/gallium/drivers/radeonsi/si_resource.c b/src/gallium/drivers/radeonsi/si_resource.c new file mode 100644 index 00000000000..1d1e9d8c9b3 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_resource.c @@ -0,0 +1,61 @@ +/* + * Copyright 2010 Marek Olšák target == PIPE_BUFFER) { + return r600_buffer_create(screen, templ, 4096); + } else { + return r600_texture_create(screen, templ); + } +} + +static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * screen, + const struct pipe_resource *templ, + struct winsys_handle *whandle) +{ + if (templ->target == PIPE_BUFFER) { + return NULL; + } else { + return r600_texture_from_handle(screen, templ, whandle); + } +} + +void r600_init_screen_resource_functions(struct pipe_screen *screen) +{ + screen->resource_create = r600_resource_create; + screen->resource_from_handle = r600_resource_from_handle; + screen->resource_get_handle = u_resource_get_handle_vtbl; + screen->resource_destroy = u_resource_destroy_vtbl; +} + +void r600_init_context_resource_functions(struct r600_context *r600) +{ + r600->b.b.transfer_map = u_transfer_map_vtbl; + r600->b.b.transfer_flush_region = u_default_transfer_flush_region; + r600->b.b.transfer_unmap = u_transfer_unmap_vtbl; + r600->b.b.transfer_inline_write = u_default_transfer_inline_write; +} diff --git a/src/gallium/drivers/radeonsi/si_resource.h b/src/gallium/drivers/radeonsi/si_resource.h new file mode 100644 index 00000000000..5a9aba31125 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_resource.h @@ -0,0 +1,55 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Christian König + */ + +#ifndef SI_RESOURCE_H +#define SI_RESOURCE_H + +#include "../radeon/r600_pipe_common.h" +#include "util/u_transfer.h" +#include "util/u_inlines.h" + +static INLINE struct r600_resource * +r600_resource_create_custom(struct pipe_screen *screen, + unsigned usage, unsigned size) +{ + assert(size); + return r600_resource(pipe_buffer_create(screen, + PIPE_BIND_CUSTOM, usage, size)); +} + +struct r600_surface { + struct pipe_surface base; +}; + +void r600_init_screen_resource_functions(struct pipe_screen *screen); + +struct r600_context; + +void r600_upload_const_buffer(struct r600_context *rctx, struct r600_resource **rbuffer, + const uint8_t *ptr, unsigned size, + uint32_t *const_offset); + +#endif diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c new file mode 100644 index 00000000000..03a1c83f7e8 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -0,0 +1,2058 @@ + +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Tom Stellard + * Michel Dänzer + * Christian König + */ + +#include "gallivm/lp_bld_tgsi_action.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_gather.h" +#include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_tgsi.h" +#include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_flow.h" +#include "radeon_llvm.h" +#include "radeon_llvm_emit.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" + +#include "si_pipe.h" +#include "si_shader.h" +#include "si_state.h" +#include "sid.h" + +#include +#include +#include + +struct si_shader_context +{ + struct radeon_llvm_context radeon_bld; + struct tgsi_parse_context parse; + struct tgsi_token * tokens; + struct si_pipe_shader *shader; + unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ + int param_streamout_config; + int param_streamout_write_index; + int param_streamout_offset[4]; + int param_vertex_id; + int param_instance_id; + LLVMValueRef const_md; + LLVMValueRef const_resource[NUM_CONST_BUFFERS]; +#if HAVE_LLVM >= 0x0304 + LLVMValueRef ddxy_lds; +#endif + LLVMValueRef *constants[NUM_CONST_BUFFERS]; + LLVMValueRef *resources; + LLVMValueRef *samplers; + LLVMValueRef so_buffers[4]; +}; + +static struct si_shader_context * si_shader_context( + struct lp_build_tgsi_context * bld_base) +{ + return (struct si_shader_context *)bld_base; +} + + +#define PERSPECTIVE_BASE 0 +#define LINEAR_BASE 9 + +#define SAMPLE_OFFSET 0 +#define CENTER_OFFSET 2 +#define CENTROID_OFSET 4 + +#define USE_SGPR_MAX_SUFFIX_LEN 5 +#define CONST_ADDR_SPACE 2 +#define LOCAL_ADDR_SPACE 3 +#define USER_SGPR_ADDR_SPACE 8 + +/** + * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad + * + * @param offset The offset parameter specifies the number of + * elements to offset, not the number of bytes or dwords. An element is the + * the type pointed to by the base_ptr parameter (e.g. int is the element of + * an int* pointer) + * + * When LLVM lowers the load instruction, it will convert the element offset + * into a dword offset automatically. + * + */ +static LLVMValueRef build_indexed_load( + struct si_shader_context * si_shader_ctx, + LLVMValueRef base_ptr, + LLVMValueRef offset) +{ + struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; + + LLVMValueRef indices[2] = { + LLVMConstInt(LLVMInt64TypeInContext(base->gallivm->context), 0, false), + offset + }; + LLVMValueRef computed_ptr = LLVMBuildGEP( + base->gallivm->builder, base_ptr, indices, 2, ""); + + LLVMValueRef result = LLVMBuildLoad(base->gallivm->builder, computed_ptr, ""); + LLVMSetMetadata(result, 1, si_shader_ctx->const_md); + return result; +} + +static LLVMValueRef get_instance_index_for_fetch( + struct radeon_llvm_context * radeon_bld, + unsigned divisor) +{ + struct si_shader_context *si_shader_ctx = + si_shader_context(&radeon_bld->soa.bld_base); + struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm; + + LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, + si_shader_ctx->param_instance_id); + result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam( + radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); + + if (divisor > 1) + result = LLVMBuildUDiv(gallivm->builder, result, + lp_build_const_int32(gallivm, divisor), ""); + + return result; +} + +static void declare_input_vs( + struct si_shader_context * si_shader_ctx, + unsigned input_index, + const struct tgsi_full_declaration *decl) +{ + struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; + unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index]; + + unsigned chan; + + LLVMValueRef t_list_ptr; + LLVMValueRef t_offset; + LLVMValueRef t_list; + LLVMValueRef attribute_offset; + LLVMValueRef buffer_index; + LLVMValueRef args[3]; + LLVMTypeRef vec4_type; + LLVMValueRef input; + + /* Load the T list */ + t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER); + + t_offset = lp_build_const_int32(base->gallivm, input_index); + + t_list = build_indexed_load(si_shader_ctx, t_list_ptr, t_offset); + + /* Build the attribute offset */ + attribute_offset = lp_build_const_int32(base->gallivm, 0); + + if (divisor) { + /* Build index from instance ID, start instance and divisor */ + si_shader_ctx->shader->shader.uses_instanceid = true; + buffer_index = get_instance_index_for_fetch(&si_shader_ctx->radeon_bld, divisor); + } else { + /* Load the buffer index, which is always stored in VGPR0 + * for Vertex Shaders */ + buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + si_shader_ctx->param_vertex_id); + } + + vec4_type = LLVMVectorType(base->elem_type, 4); + args[0] = t_list; + args[1] = attribute_offset; + args[2] = buffer_index; + input = build_intrinsic(base->gallivm->builder, + "llvm.SI.vs.load.input", vec4_type, args, 3, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + + /* Break up the vec4 into individual components */ + for (chan = 0; chan < 4; chan++) { + LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan); + /* XXX: Use a helper function for this. There is one in + * tgsi_llvm.c. */ + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] = + LLVMBuildExtractElement(base->gallivm->builder, + input, llvm_chan, ""); + } +} + +static void declare_input_fs( + struct si_shader_context * si_shader_ctx, + unsigned input_index, + const struct tgsi_full_declaration *decl) +{ + struct si_shader *shader = &si_shader_ctx->shader->shader; + struct lp_build_context * base = + &si_shader_ctx->radeon_bld.soa.bld_base.base; + struct lp_build_context *uint = + &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; + struct gallivm_state * gallivm = base->gallivm; + LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef main_fn = si_shader_ctx->radeon_bld.main_fn; + + LLVMValueRef interp_param; + const char * intr_name; + + /* This value is: + * [15:0] NewPrimMask (Bit mask for each quad. It is set it the + * quad begins a new primitive. Bit 0 always needs + * to be unset) + * [32:16] ParamOffset + * + */ + LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK); + LLVMValueRef attr_number; + + unsigned chan; + + if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + unsigned soa_index = + radeon_llvm_reg_index_soa(input_index, chan); + si_shader_ctx->radeon_bld.inputs[soa_index] = + LLVMGetParam(main_fn, SI_PARAM_POS_X_FLOAT + chan); + + if (chan == 3) + /* RCP for fragcoord.w */ + si_shader_ctx->radeon_bld.inputs[soa_index] = + LLVMBuildFDiv(gallivm->builder, + lp_build_const_float(gallivm, 1.0f), + si_shader_ctx->radeon_bld.inputs[soa_index], + ""); + } + return; + } + + if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { + LLVMValueRef face, is_face_positive; + + face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE); + + is_face_positive = LLVMBuildFCmp(gallivm->builder, + LLVMRealUGT, face, + lp_build_const_float(gallivm, 0.0f), + ""); + + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] = + LLVMBuildSelect(gallivm->builder, + is_face_positive, + lp_build_const_float(gallivm, 1.0f), + lp_build_const_float(gallivm, 0.0f), + ""); + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] = + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] = + lp_build_const_float(gallivm, 0.0f); + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] = + lp_build_const_float(gallivm, 1.0f); + + return; + } + + shader->input[input_index].param_offset = shader->ninterp++; + attr_number = lp_build_const_int32(gallivm, + shader->input[input_index].param_offset); + + switch (decl->Interp.Interpolate) { + case TGSI_INTERPOLATE_COLOR: + if (si_shader_ctx->shader->key.ps.flatshade) { + interp_param = 0; + } else { + if (decl->Interp.Centroid) + interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID); + else + interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER); + } + break; + case TGSI_INTERPOLATE_CONSTANT: + interp_param = 0; + break; + case TGSI_INTERPOLATE_LINEAR: + if (decl->Interp.Centroid) + interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID); + else + interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER); + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + if (decl->Interp.Centroid) + interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID); + else + interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER); + break; + default: + fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); + return; + } + + intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; + + /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ + if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && + si_shader_ctx->shader->key.ps.color_two_side) { + LLVMValueRef args[4]; + LLVMValueRef face, is_face_positive; + LLVMValueRef back_attr_number = + lp_build_const_int32(gallivm, + shader->input[input_index].param_offset + 1); + + face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE); + + is_face_positive = LLVMBuildFCmp(gallivm->builder, + LLVMRealUGT, face, + lp_build_const_float(gallivm, 0.0f), + ""); + + args[2] = params; + args[3] = interp_param; + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); + unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); + LLVMValueRef front, back; + + args[0] = llvm_chan; + args[1] = attr_number; + front = build_intrinsic(base->gallivm->builder, intr_name, + input_type, args, args[3] ? 4 : 3, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + + args[1] = back_attr_number; + back = build_intrinsic(base->gallivm->builder, intr_name, + input_type, args, args[3] ? 4 : 3, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + + si_shader_ctx->radeon_bld.inputs[soa_index] = + LLVMBuildSelect(gallivm->builder, + is_face_positive, + front, + back, + ""); + } + + shader->ninterp++; + } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) { + LLVMValueRef args[4]; + + args[0] = uint->zero; + args[1] = attr_number; + args[2] = params; + args[3] = interp_param; + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] = + build_intrinsic(base->gallivm->builder, intr_name, + input_type, args, args[3] ? 4 : 3, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] = + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] = + lp_build_const_float(gallivm, 0.0f); + si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] = + lp_build_const_float(gallivm, 1.0f); + } else { + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + LLVMValueRef args[4]; + LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); + unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); + args[0] = llvm_chan; + args[1] = attr_number; + args[2] = params; + args[3] = interp_param; + si_shader_ctx->radeon_bld.inputs[soa_index] = + build_intrinsic(base->gallivm->builder, intr_name, + input_type, args, args[3] ? 4 : 3, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + } + } +} + +static void declare_input( + struct radeon_llvm_context * radeon_bld, + unsigned input_index, + const struct tgsi_full_declaration *decl) +{ + struct si_shader_context * si_shader_ctx = + si_shader_context(&radeon_bld->soa.bld_base); + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { + declare_input_vs(si_shader_ctx, input_index, decl); + } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { + declare_input_fs(si_shader_ctx, input_index, decl); + } else { + fprintf(stderr, "Warning: Unsupported shader type,\n"); + } +} + +static void declare_system_value( + struct radeon_llvm_context * radeon_bld, + unsigned index, + const struct tgsi_full_declaration *decl) +{ + struct si_shader_context *si_shader_ctx = + si_shader_context(&radeon_bld->soa.bld_base); + LLVMValueRef value = 0; + + switch (decl->Semantic.Name) { + case TGSI_SEMANTIC_INSTANCEID: + value = LLVMGetParam(radeon_bld->main_fn, + si_shader_ctx->param_instance_id); + break; + + case TGSI_SEMANTIC_VERTEXID: + value = LLVMGetParam(radeon_bld->main_fn, + si_shader_ctx->param_vertex_id); + break; + + default: + assert(!"unknown system value"); + return; + } + + radeon_bld->system_values[index] = value; +} + +static LLVMValueRef fetch_constant( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register *reg, + enum tgsi_opcode_type type, + unsigned swizzle) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct lp_build_context * base = &bld_base->base; + const struct tgsi_ind_register *ireg = ®->Indirect; + unsigned buf, idx; + + LLVMValueRef args[2]; + LLVMValueRef addr; + LLVMValueRef result; + + if (swizzle == LP_CHAN_ALL) { + unsigned chan; + LLVMValueRef values[4]; + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) + values[chan] = fetch_constant(bld_base, reg, type, chan); + + return lp_build_gather_values(bld_base->base.gallivm, values, 4); + } + + buf = reg->Register.Dimension ? reg->Dimension.Index : 0; + idx = reg->Register.Index * 4 + swizzle; + + if (!reg->Register.Indirect) + return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]); + + args[0] = si_shader_ctx->const_resource[buf]; + args[1] = lp_build_const_int32(base->gallivm, idx * 4); + addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle]; + addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg"); + addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16); + args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]); + + result = build_intrinsic(base->gallivm->builder, "llvm.SI.load.const", base->elem_type, + args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + + return bitcast(bld_base, type, result); +} + +/* Initialize arguments for the shader export intrinsic */ +static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, + struct tgsi_full_declaration *d, + unsigned index, + unsigned target, + LLVMValueRef *args) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct lp_build_context *uint = + &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; + struct lp_build_context *base = &bld_base->base; + unsigned compressed = 0; + unsigned chan; + + if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { + int cbuf = target - V_008DFC_SQ_EXP_MRT; + + if (cbuf >= 0 && cbuf < 8) { + compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1; + + if (compressed) + si_shader_ctx->shader->spi_shader_col_format |= + V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf); + else + si_shader_ctx->shader->spi_shader_col_format |= + V_028714_SPI_SHADER_32_ABGR << (4 * cbuf); + + si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf); + } + } + + if (compressed) { + /* Pixel shader needs to pack output values before export */ + for (chan = 0; chan < 2; chan++ ) { + LLVMValueRef *out_ptr = + si_shader_ctx->radeon_bld.soa.outputs[index]; + args[0] = LLVMBuildLoad(base->gallivm->builder, + out_ptr[2 * chan], ""); + args[1] = LLVMBuildLoad(base->gallivm->builder, + out_ptr[2 * chan + 1], ""); + args[chan + 5] = + build_intrinsic(base->gallivm->builder, + "llvm.SI.packf16", + LLVMInt32TypeInContext(base->gallivm->context), + args, 2, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + args[chan + 7] = args[chan + 5] = + LLVMBuildBitCast(base->gallivm->builder, + args[chan + 5], + LLVMFloatTypeInContext(base->gallivm->context), + ""); + } + + /* Set COMPR flag */ + args[4] = uint->one; + } else { + for (chan = 0; chan < 4; chan++ ) { + LLVMValueRef out_ptr = + si_shader_ctx->radeon_bld.soa.outputs[index][chan]; + /* +5 because the first output value will be + * the 6th argument to the intrinsic. */ + args[chan + 5] = LLVMBuildLoad(base->gallivm->builder, + out_ptr, ""); + } + + /* Clear COMPR flag */ + args[4] = uint->zero; + } + + /* XXX: This controls which components of the output + * registers actually get exported. (e.g bit 0 means export + * X component, bit 1 means export Y component, etc.) I'm + * hard coding this to 0xf for now. In the future, we might + * want to do something else. */ + args[0] = lp_build_const_int32(base->gallivm, 0xf); + + /* Specify whether the EXEC mask represents the valid mask */ + args[1] = uint->zero; + + /* Specify whether this is the last export */ + args[2] = uint->zero; + + /* Specify the target we are exporting */ + args[3] = lp_build_const_int32(base->gallivm, target); + + /* XXX: We probably need to keep track of the output + * values, so we know what we are passing to the next + * stage. */ +} + +static void si_alpha_test(struct lp_build_tgsi_context *bld_base, + unsigned index) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + + if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) { + LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3]; + LLVMValueRef alpha_ref = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_ALPHA_REF); + + LLVMValueRef alpha_pass = + lp_build_cmp(&bld_base->base, + si_shader_ctx->shader->key.ps.alpha_func, + LLVMBuildLoad(gallivm->builder, out_ptr, ""), + alpha_ref); + LLVMValueRef arg = + lp_build_select(&bld_base->base, + alpha_pass, + lp_build_const_float(gallivm, 1.0f), + lp_build_const_float(gallivm, -1.0f)); + + build_intrinsic(gallivm->builder, + "llvm.AMDGPU.kill", + LLVMVoidTypeInContext(gallivm->context), + &arg, 1, 0); + } else { + build_intrinsic(gallivm->builder, + "llvm.AMDGPU.kilp", + LLVMVoidTypeInContext(gallivm->context), + NULL, 0, 0); + } +} + +static void si_alpha_to_one(struct lp_build_tgsi_context *bld_base, + unsigned index) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + + /* set alpha to one */ + LLVMBuildStore(bld_base->base.gallivm->builder, + bld_base->base.one, + si_shader_ctx->radeon_bld.soa.outputs[index][3]); +} + +static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base, + LLVMValueRef (*pos)[9], unsigned index) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct si_pipe_shader *shader = si_shader_ctx->shader; + struct lp_build_context *base = &bld_base->base; + struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; + unsigned reg_index; + unsigned chan; + unsigned const_chan; + LLVMValueRef out_elts[4]; + LLVMValueRef base_elt; + LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); + LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, NUM_PIPE_CONST_BUFFERS); + LLVMValueRef const_resource = build_indexed_load(si_shader_ctx, ptr, constbuf_index); + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][chan]; + out_elts[chan] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); + } + + for (reg_index = 0; reg_index < 2; reg_index ++) { + LLVMValueRef *args = pos[2 + reg_index]; + + if (!(shader->key.vs.ucps_enabled & (1 << reg_index))) + continue; + + shader->shader.clip_dist_write |= 0xf << (4 * reg_index); + + args[5] = + args[6] = + args[7] = + args[8] = lp_build_const_float(base->gallivm, 0.0f); + + /* Compute dot products of position and user clip plane vectors */ + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) { + args[0] = const_resource; + args[1] = lp_build_const_int32(base->gallivm, + ((reg_index * 4 + chan) * 4 + + const_chan) * 4); + base_elt = build_intrinsic(base->gallivm->builder, + "llvm.SI.load.const", + base->elem_type, + args, 2, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + args[5 + chan] = + lp_build_add(base, args[5 + chan], + lp_build_mul(base, base_elt, + out_elts[const_chan])); + } + } + + args[0] = lp_build_const_int32(base->gallivm, 0xf); + args[1] = uint->zero; + args[2] = uint->zero; + args[3] = lp_build_const_int32(base->gallivm, + V_008DFC_SQ_EXP_POS + 2 + reg_index); + args[4] = uint->zero; + } +} + +static void si_dump_streamout(struct pipe_stream_output_info *so) +{ + unsigned i; + + if (so->num_outputs) + fprintf(stderr, "STREAMOUT\n"); + + for (i = 0; i < so->num_outputs; i++) { + unsigned mask = ((1 << so->output[i].num_components) - 1) << + so->output[i].start_component; + fprintf(stderr, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n", + i, so->output[i].output_buffer, + so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, + so->output[i].register_index, + mask & 1 ? "x" : "", + mask & 2 ? "y" : "", + mask & 4 ? "z" : "", + mask & 8 ? "w" : ""); + } +} + +/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. + * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), + * or v4i32 (num_channels=3,4). */ +static void build_tbuffer_store(struct si_shader_context *shader, + LLVMValueRef rsrc, + LLVMValueRef vdata, + unsigned num_channels, + LLVMValueRef vaddr, + LLVMValueRef soffset, + unsigned inst_offset, + unsigned dfmt, + unsigned nfmt, + unsigned offen, + unsigned idxen, + unsigned glc, + unsigned slc, + unsigned tfe) +{ + struct gallivm_state *gallivm = &shader->radeon_bld.gallivm; + LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef args[] = { + rsrc, + vdata, + LLVMConstInt(i32, num_channels, 0), + vaddr, + soffset, + LLVMConstInt(i32, inst_offset, 0), + LLVMConstInt(i32, dfmt, 0), + LLVMConstInt(i32, nfmt, 0), + LLVMConstInt(i32, offen, 0), + LLVMConstInt(i32, idxen, 0), + LLVMConstInt(i32, glc, 0), + LLVMConstInt(i32, slc, 0), + LLVMConstInt(i32, tfe, 0) + }; + + /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */ + unsigned func = CLAMP(num_channels, 1, 3) - 1; + const char *types[] = {"i32", "v2i32", "v4i32"}; + char name[256]; + snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); + + lp_build_intrinsic(gallivm->builder, name, + LLVMVoidTypeInContext(gallivm->context), + args, Elements(args)); +} + +static void build_streamout_store(struct si_shader_context *shader, + LLVMValueRef rsrc, + LLVMValueRef vdata, + unsigned num_channels, + LLVMValueRef vaddr, + LLVMValueRef soffset, + unsigned inst_offset) +{ + static unsigned dfmt[] = { + V_008F0C_BUF_DATA_FORMAT_32, + V_008F0C_BUF_DATA_FORMAT_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32_32 + }; + assert(num_channels >= 1 && num_channels <= 4); + + build_tbuffer_store(shader, rsrc, vdata, num_channels, vaddr, soffset, + inst_offset, dfmt[num_channels-1], + V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); +} + +/* On SI, the vertex shader is responsible for writing streamout data + * to buffers. */ +static void si_llvm_emit_streamout(struct si_shader_context *shader) +{ + struct pipe_stream_output_info *so = &shader->shader->selector->so; + struct gallivm_state *gallivm = &shader->radeon_bld.gallivm; + LLVMBuilderRef builder = gallivm->builder; + int i, j; + struct lp_build_if_state if_ctx; + + LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); + + LLVMValueRef so_param = + LLVMGetParam(shader->radeon_bld.main_fn, + shader->param_streamout_config); + + /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ + LLVMValueRef so_vtx_count = + LLVMBuildAnd(builder, + LLVMBuildLShr(builder, so_param, + LLVMConstInt(i32, 16, 0), ""), + LLVMConstInt(i32, 127, 0), ""); + + LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32, + NULL, 0, LLVMReadNoneAttribute); + + /* can_emit = tid < so_vtx_count; */ + LLVMValueRef can_emit = + LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); + + /* Emit the streamout code conditionally. This actually avoids + * out-of-bounds buffer access. The hw tells us via the SGPR + * (so_vtx_count) which threads are allowed to emit streamout data. */ + lp_build_if(&if_ctx, gallivm, can_emit); + { + /* The buffer offset is computed as follows: + * ByteOffset = streamout_offset[buffer_id]*4 + + * (streamout_write_index + thread_id)*stride[buffer_id] + + * attrib_offset + */ + + LLVMValueRef so_write_index = + LLVMGetParam(shader->radeon_bld.main_fn, + shader->param_streamout_write_index); + + /* Compute (streamout_write_index + thread_id). */ + so_write_index = LLVMBuildAdd(builder, so_write_index, tid, ""); + + /* Compute the write offset for each enabled buffer. */ + LLVMValueRef so_write_offset[4] = {}; + for (i = 0; i < 4; i++) { + if (!so->stride[i]) + continue; + + LLVMValueRef so_offset = LLVMGetParam(shader->radeon_bld.main_fn, + shader->param_streamout_offset[i]); + so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(i32, 4, 0), ""); + + so_write_offset[i] = LLVMBuildMul(builder, so_write_index, + LLVMConstInt(i32, so->stride[i]*4, 0), ""); + so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, ""); + } + + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS] = shader->radeon_bld.soa.outputs; + + /* Write streamout data. */ + for (i = 0; i < so->num_outputs; i++) { + unsigned buf_idx = so->output[i].output_buffer; + unsigned reg = so->output[i].register_index; + unsigned start = so->output[i].start_component; + unsigned num_comps = so->output[i].num_components; + LLVMValueRef out[4]; + + assert(num_comps && num_comps <= 4); + if (!num_comps || num_comps > 4) + continue; + + /* Load the output as int. */ + for (j = 0; j < num_comps; j++) { + out[j] = LLVMBuildLoad(builder, outputs[reg][start+j], ""); + out[j] = LLVMBuildBitCast(builder, out[j], i32, ""); + } + + /* Pack the output. */ + LLVMValueRef vdata = NULL; + + switch (num_comps) { + case 1: /* as i32 */ + vdata = out[0]; + break; + case 2: /* as v2i32 */ + case 3: /* as v4i32 (aligned to 4) */ + case 4: /* as v4i32 */ + vdata = LLVMGetUndef(LLVMVectorType(i32, util_next_power_of_two(num_comps))); + for (j = 0; j < num_comps; j++) { + vdata = LLVMBuildInsertElement(builder, vdata, out[j], + LLVMConstInt(i32, j, 0), ""); + } + break; + } + + build_streamout_store(shader, shader->so_buffers[buf_idx], + vdata, num_comps, + so_write_offset[buf_idx], + LLVMConstInt(i32, 0, 0), + so->output[i].dst_offset*4); + } + } + lp_build_endif(&if_ctx); +} + + +static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) +{ + struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); + struct si_shader * shader = &si_shader_ctx->shader->shader; + struct lp_build_context * base = &bld_base->base; + struct lp_build_context * uint = + &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; + struct tgsi_parse_context *parse = &si_shader_ctx->parse; + LLVMValueRef args[9]; + LLVMValueRef last_args[9] = { 0 }; + LLVMValueRef pos_args[4][9] = { { 0 } }; + unsigned semantic_name; + unsigned param_count = 0; + int depth_index = -1, stencil_index = -1, psize_index = -1, edgeflag_index = -1; + int layer_index = -1; + int i; + + if (si_shader_ctx->shader->selector->so.num_outputs) { + si_llvm_emit_streamout(si_shader_ctx); + } + + while (!tgsi_parse_end_of_tokens(parse)) { + struct tgsi_full_declaration *d = + &parse->FullToken.FullDeclaration; + unsigned target; + unsigned index; + + tgsi_parse_token(parse); + + if (parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_PROPERTY && + parse->FullToken.FullProperty.Property.PropertyName == + TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) + shader->fs_write_all = TRUE; + + if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) + continue; + + switch (d->Declaration.File) { + case TGSI_FILE_INPUT: + i = shader->ninput++; + assert(i < Elements(shader->input)); + shader->input[i].name = d->Semantic.Name; + shader->input[i].sid = d->Semantic.Index; + shader->input[i].interpolate = d->Interp.Interpolate; + shader->input[i].centroid = d->Interp.Centroid; + continue; + + case TGSI_FILE_OUTPUT: + i = shader->noutput++; + assert(i < Elements(shader->output)); + shader->output[i].name = d->Semantic.Name; + shader->output[i].sid = d->Semantic.Index; + shader->output[i].interpolate = d->Interp.Interpolate; + break; + + default: + continue; + } + + semantic_name = d->Semantic.Name; +handle_semantic: + for (index = d->Range.First; index <= d->Range.Last; index++) { + /* Select the correct target */ + switch(semantic_name) { + case TGSI_SEMANTIC_PSIZE: + shader->vs_out_misc_write = true; + shader->vs_out_point_size = true; + psize_index = index; + continue; + case TGSI_SEMANTIC_EDGEFLAG: + shader->vs_out_misc_write = true; + shader->vs_out_edgeflag = true; + edgeflag_index = index; + continue; + case TGSI_SEMANTIC_LAYER: + shader->vs_out_misc_write = true; + shader->vs_out_layer = true; + layer_index = index; + continue; + case TGSI_SEMANTIC_POSITION: + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { + target = V_008DFC_SQ_EXP_POS; + break; + } else { + depth_index = index; + continue; + } + case TGSI_SEMANTIC_STENCIL: + stencil_index = index; + continue; + case TGSI_SEMANTIC_COLOR: + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { + case TGSI_SEMANTIC_BCOLOR: + target = V_008DFC_SQ_EXP_PARAM + param_count; + shader->output[i].param_offset = param_count; + param_count++; + } else { + target = V_008DFC_SQ_EXP_MRT + shader->output[i].sid; + if (si_shader_ctx->shader->key.ps.alpha_to_one) { + si_alpha_to_one(bld_base, index); + } + if (shader->output[i].sid == 0 && + si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) + si_alpha_test(bld_base, index); + } + break; + case TGSI_SEMANTIC_CLIPDIST: + if (!(si_shader_ctx->shader->key.vs.ucps_enabled & + (1 << d->Semantic.Index))) + continue; + shader->clip_dist_write |= + d->Declaration.UsageMask << (d->Semantic.Index << 2); + target = V_008DFC_SQ_EXP_POS + 2 + d->Semantic.Index; + break; + case TGSI_SEMANTIC_CLIPVERTEX: + si_llvm_emit_clipvertex(bld_base, pos_args, index); + continue; + case TGSI_SEMANTIC_FOG: + case TGSI_SEMANTIC_GENERIC: + target = V_008DFC_SQ_EXP_PARAM + param_count; + shader->output[i].param_offset = param_count; + param_count++; + break; + default: + target = 0; + fprintf(stderr, + "Warning: SI unhandled output type:%d\n", + semantic_name); + } + + si_llvm_init_export_args(bld_base, d, index, target, args); + + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && + target >= V_008DFC_SQ_EXP_POS && + target <= (V_008DFC_SQ_EXP_POS + 3)) { + memcpy(pos_args[target - V_008DFC_SQ_EXP_POS], + args, sizeof(args)); + } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT && + semantic_name == TGSI_SEMANTIC_COLOR) { + /* If there is an export instruction waiting to be emitted, do so now. */ + if (last_args[0]) { + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + last_args, 9); + } + + /* This instruction will be emitted at the end of the shader. */ + memcpy(last_args, args, sizeof(args)); + + /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */ + if (shader->fs_write_all && shader->output[i].sid == 0 && + si_shader_ctx->shader->key.ps.nr_cbufs > 1) { + for (int c = 1; c < si_shader_ctx->shader->key.ps.nr_cbufs; c++) { + si_llvm_init_export_args(bld_base, d, index, + V_008DFC_SQ_EXP_MRT + c, args); + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9); + } + } + } else { + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9); + } + } + + if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { + semantic_name = TGSI_SEMANTIC_GENERIC; + goto handle_semantic; + } + } + + if (depth_index >= 0 || stencil_index >= 0) { + LLVMValueRef out_ptr; + unsigned mask = 0; + + /* Specify the target we are exporting */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ); + + if (depth_index >= 0) { + out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2]; + args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); + mask |= 0x1; + + if (stencil_index < 0) { + args[6] = + args[7] = + args[8] = args[5]; + } + } + + if (stencil_index >= 0) { + out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1]; + args[7] = + args[8] = + args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); + /* Only setting the stencil component bit (0x2) here + * breaks some stencil piglit tests + */ + mask |= 0x3; + + if (depth_index < 0) + args[5] = args[6]; + } + + /* Specify which components to enable */ + args[0] = lp_build_const_int32(base->gallivm, mask); + + args[1] = + args[2] = + args[4] = uint->zero; + + if (last_args[0]) + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9); + else + memcpy(last_args, args, sizeof(args)); + } + + if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { + unsigned pos_idx = 0; + + /* We need to add the position output manually if it's missing. */ + if (!pos_args[0][0]) { + pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */ + pos_args[0][1] = uint->zero; /* EXEC mask */ + pos_args[0][2] = uint->zero; /* last export? */ + pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS); + pos_args[0][4] = uint->zero; /* COMPR flag */ + pos_args[0][5] = base->zero; /* X */ + pos_args[0][6] = base->zero; /* Y */ + pos_args[0][7] = base->zero; /* Z */ + pos_args[0][8] = base->one; /* W */ + } + + /* Write the misc vector (point size, edgeflag, layer, viewport). */ + if (shader->vs_out_misc_write) { + pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */ + shader->vs_out_point_size | + (shader->vs_out_edgeflag << 1) | + (shader->vs_out_layer << 2)); + pos_args[1][1] = uint->zero; /* EXEC mask */ + pos_args[1][2] = uint->zero; /* last export? */ + pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1); + pos_args[1][4] = uint->zero; /* COMPR flag */ + pos_args[1][5] = base->zero; /* X */ + pos_args[1][6] = base->zero; /* Y */ + pos_args[1][7] = base->zero; /* Z */ + pos_args[1][8] = base->zero; /* W */ + + if (shader->vs_out_point_size) { + pos_args[1][5] = LLVMBuildLoad(base->gallivm->builder, + si_shader_ctx->radeon_bld.soa.outputs[psize_index][0], ""); + } + + if (shader->vs_out_edgeflag) { + LLVMValueRef output = LLVMBuildLoad(base->gallivm->builder, + si_shader_ctx->radeon_bld.soa.outputs[edgeflag_index][0], ""); + + /* The output is a float, but the hw expects an integer + * with the first bit containing the edge flag. */ + output = LLVMBuildFPToUI(base->gallivm->builder, output, + bld_base->uint_bld.elem_type, ""); + + output = lp_build_min(&bld_base->int_bld, output, bld_base->int_bld.one); + + /* The LLVM intrinsic expects a float. */ + pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder, output, + base->elem_type, ""); + } + + if (shader->vs_out_layer) { + pos_args[1][7] = LLVMBuildLoad(base->gallivm->builder, + si_shader_ctx->radeon_bld.soa.outputs[layer_index][0], ""); + } + } + + for (i = 0; i < 4; i++) + if (pos_args[i][0]) + shader->nr_pos_exports++; + + for (i = 0; i < 4; i++) { + if (!pos_args[i][0]) + continue; + + /* Specify the target we are exporting */ + pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++); + + if (pos_idx == shader->nr_pos_exports) + /* Specify that this is the last export */ + pos_args[i][2] = uint->one; + + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + pos_args[i], 9); + } + } else { + if (!last_args[0]) { + /* Specify which components to enable */ + last_args[0] = lp_build_const_int32(base->gallivm, 0x0); + + /* Specify the target we are exporting */ + last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); + + /* Set COMPR flag to zero to export data as 32-bit */ + last_args[4] = uint->zero; + + /* dummy bits */ + last_args[5]= uint->zero; + last_args[6]= uint->zero; + last_args[7]= uint->zero; + last_args[8]= uint->zero; + + si_shader_ctx->shader->spi_shader_col_format |= + V_028714_SPI_SHADER_32_ABGR; + si_shader_ctx->shader->cb_shader_mask |= S_02823C_OUTPUT0_ENABLE(0xf); + } + + /* Specify whether the EXEC mask represents the valid mask */ + last_args[1] = uint->one; + + /* Specify that this is the last export */ + last_args[2] = lp_build_const_int32(base->gallivm, 1); + + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + last_args, 9); + } +} + +static const struct lp_build_tgsi_action txf_action; + +static void build_tex_intrinsic(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data); + +static void tex_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + const struct tgsi_full_instruction * inst = emit_data->inst; + unsigned opcode = inst->Instruction.Opcode; + unsigned target = inst->Texture.Texture; + LLVMValueRef coords[4]; + LLVMValueRef address[16]; + int ref_pos; + unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos); + unsigned count = 0; + unsigned chan; + unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1; + unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index; + + if (target == TGSI_TEXTURE_BUFFER) { + LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128); + LLVMTypeRef v2i128 = LLVMVectorType(i128, 2); + LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context); + LLVMTypeRef v16i8 = LLVMVectorType(i8, 16); + + /* Truncate v32i8 to v16i8. */ + LLVMValueRef res = si_shader_ctx->resources[sampler_index]; + res = LLVMBuildBitCast(gallivm->builder, res, v2i128, ""); + res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.zero, ""); + res = LLVMBuildBitCast(gallivm->builder, res, v16i8, ""); + + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + emit_data->args[0] = res; + emit_data->args[1] = bld_base->uint_bld.zero; + emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0); + emit_data->arg_count = 3; + return; + } + + /* Fetch and project texture coordinates */ + coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + for (chan = 0; chan < 3; chan++ ) { + coords[chan] = lp_build_emit_fetch(bld_base, + emit_data->inst, 0, + chan); + if (opcode == TGSI_OPCODE_TXP) + coords[chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DIV, + coords[chan], + coords[3]); + } + + if (opcode == TGSI_OPCODE_TXP) + coords[3] = bld_base->base.one; + + /* Pack LOD bias value */ + if (opcode == TGSI_OPCODE_TXB) + address[count++] = coords[3]; + + if (target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE) + radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords); + + /* Pack depth comparison value */ + switch (target) { + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + assert(ref_pos >= 0); + address[count++] = coords[ref_pos]; + break; + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0); + } + + /* Pack user derivatives */ + if (opcode == TGSI_OPCODE_TXD) { + for (chan = 0; chan < 2; chan++) { + address[count++] = lp_build_emit_fetch(bld_base, inst, 1, chan); + if (num_coords > 1) + address[count++] = lp_build_emit_fetch(bld_base, inst, 2, chan); + } + } + + /* Pack texture coordinates */ + address[count++] = coords[0]; + if (num_coords > 1) + address[count++] = coords[1]; + if (num_coords > 2) + address[count++] = coords[2]; + + /* Pack LOD or sample index */ + if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF) + address[count++] = coords[3]; + + if (count > 16) { + assert(!"Cannot handle more than 16 texture address parameters"); + count = 16; + } + + for (chan = 0; chan < count; chan++ ) { + address[chan] = LLVMBuildBitCast(gallivm->builder, + address[chan], + LLVMInt32TypeInContext(gallivm->context), + ""); + } + + /* Adjust the sample index according to FMASK. + * + * For uncompressed MSAA surfaces, FMASK should return 0x76543210, + * which is the identity mapping. Each nibble says which physical sample + * should be fetched to get that sample. + * + * For example, 0x11111100 means there are only 2 samples stored and + * the second sample covers 3/4 of the pixel. When reading samples 0 + * and 1, return physical sample 0 (determined by the first two 0s + * in FMASK), otherwise return physical sample 1. + * + * The sample index should be adjusted as follows: + * sample_index = (fmask >> (sample_index * 4)) & 0xF; + */ + if (target == TGSI_TEXTURE_2D_MSAA || + target == TGSI_TEXTURE_2D_ARRAY_MSAA) { + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_emit_data txf_emit_data = *emit_data; + LLVMValueRef txf_address[4]; + unsigned txf_count = count; + + memcpy(txf_address, address, sizeof(txf_address)); + + if (target == TGSI_TEXTURE_2D_MSAA) { + txf_address[2] = bld_base->uint_bld.zero; + } + txf_address[3] = bld_base->uint_bld.zero; + + /* Pad to a power-of-two size. */ + while (txf_count < util_next_power_of_two(txf_count)) + txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); + + /* Read FMASK using TXF. */ + txf_emit_data.chan = 0; + txf_emit_data.dst_type = LLVMVectorType( + LLVMInt32TypeInContext(bld_base->base.gallivm->context), 4); + txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count); + txf_emit_data.args[1] = si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index]; + txf_emit_data.args[2] = lp_build_const_int32(bld_base->base.gallivm, + target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY); + txf_emit_data.arg_count = 3; + + build_tex_intrinsic(&txf_action, bld_base, &txf_emit_data); + + /* Initialize some constants. */ + LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0); + LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0); + + /* Apply the formula. */ + LLVMValueRef fmask = + LLVMBuildExtractElement(gallivm->builder, + txf_emit_data.output[0], + uint_bld->zero, ""); + + unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3; + + LLVMValueRef sample_index4 = + LLVMBuildMul(gallivm->builder, address[sample_chan], four, ""); + + LLVMValueRef shifted_fmask = + LLVMBuildLShr(gallivm->builder, fmask, sample_index4, ""); + + LLVMValueRef final_sample = + LLVMBuildAnd(gallivm->builder, shifted_fmask, F, ""); + + /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK + * resource descriptor is 0 (invalid), + */ + LLVMValueRef fmask_desc = + LLVMBuildBitCast(gallivm->builder, + si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index], + LLVMVectorType(uint_bld->elem_type, 8), ""); + + LLVMValueRef fmask_word1 = + LLVMBuildExtractElement(gallivm->builder, fmask_desc, + uint_bld->one, ""); + + LLVMValueRef word1_is_nonzero = + LLVMBuildICmp(gallivm->builder, LLVMIntNE, + fmask_word1, uint_bld->zero, ""); + + /* Replace the MSAA sample index. */ + address[sample_chan] = + LLVMBuildSelect(gallivm->builder, word1_is_nonzero, + final_sample, address[sample_chan], ""); + } + + /* Resource */ + emit_data->args[1] = si_shader_ctx->resources[sampler_index]; + + if (opcode == TGSI_OPCODE_TXF) { + /* add tex offsets */ + if (inst->Texture.NumOffsets) { + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + const struct tgsi_texture_offset * off = inst->TexOffsets; + + assert(inst->Texture.NumOffsets == 1); + + switch (target) { + case TGSI_TEXTURE_3D: + address[2] = lp_build_add(uint_bld, address[2], + bld->immediates[off->Index][off->SwizzleZ]); + /* fall through */ + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + address[1] = + lp_build_add(uint_bld, address[1], + bld->immediates[off->Index][off->SwizzleY]); + /* fall through */ + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + address[0] = + lp_build_add(uint_bld, address[0], + bld->immediates[off->Index][off->SwizzleX]); + break; + /* texture offsets do not apply to other texture targets */ + } + } + + emit_data->dst_type = LLVMVectorType( + LLVMInt32TypeInContext(bld_base->base.gallivm->context), + 4); + + emit_data->arg_count = 3; + } else { + /* Sampler */ + emit_data->args[2] = si_shader_ctx->samplers[sampler_index]; + + emit_data->dst_type = LLVMVectorType( + LLVMFloatTypeInContext(bld_base->base.gallivm->context), + 4); + + emit_data->arg_count = 4; + } + + /* Dimensions */ + emit_data->args[emit_data->arg_count - 1] = + lp_build_const_int32(bld_base->base.gallivm, target); + + /* Pad to power of two vector */ + while (count < util_next_power_of_two(count)) + address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); + + emit_data->args[0] = lp_build_gather_values(gallivm, address, count); +} + +static void build_tex_intrinsic(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_context * base = &bld_base->base; + char intr_name[127]; + + if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { + emit_data->output[emit_data->chan] = build_intrinsic( + base->gallivm->builder, + "llvm.SI.vs.load.input", emit_data->dst_type, + emit_data->args, emit_data->arg_count, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + return; + } + + sprintf(intr_name, "%sv%ui32", action->intr_name, + LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0]))); + + emit_data->output[emit_data->chan] = build_intrinsic( + base->gallivm->builder, intr_name, emit_data->dst_type, + emit_data->args, emit_data->arg_count, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); +} + +static void txq_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + const struct tgsi_full_instruction *inst = emit_data->inst; + struct gallivm_state *gallivm = bld_base->base.gallivm; + + if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { + LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef v8i32 = LLVMVectorType(i32, 8); + + /* Read the size from the buffer descriptor directly. */ + LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index]; + size = LLVMBuildBitCast(gallivm->builder, size, v8i32, ""); + size = LLVMBuildExtractElement(gallivm->builder, size, + lp_build_const_int32(gallivm, 2), ""); + emit_data->args[0] = size; + return; + } + + /* Mip level */ + emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + + /* Resource */ + emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index]; + + /* Dimensions */ + emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm, + inst->Texture.Texture); + + emit_data->arg_count = 3; + + emit_data->dst_type = LLVMVectorType( + LLVMInt32TypeInContext(bld_base->base.gallivm->context), + 4); +} + +static void build_txq_intrinsic(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { + /* Just return the buffer size. */ + emit_data->output[emit_data->chan] = emit_data->args[0]; + return; + } + + build_tgsi_intrinsic_nomem(action, bld_base, emit_data); +} + +#if HAVE_LLVM >= 0x0304 + +static void si_llvm_emit_ddxy( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_context * base = &bld_base->base; + const struct tgsi_full_instruction *inst = emit_data->inst; + unsigned opcode = inst->Instruction.Opcode; + LLVMValueRef indices[2]; + LLVMValueRef store_ptr, load_ptr0, load_ptr1; + LLVMValueRef tl, trbl, result[4]; + LLVMTypeRef i32; + unsigned swizzle[4]; + unsigned c; + + i32 = LLVMInt32TypeInContext(gallivm->context); + + indices[0] = bld_base->uint_bld.zero; + indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32, + NULL, 0, LLVMReadNoneAttribute); + store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds, + indices, 2, ""); + + indices[1] = LLVMBuildAnd(gallivm->builder, indices[1], + lp_build_const_int32(gallivm, 0xfffffffc), ""); + load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds, + indices, 2, ""); + + indices[1] = LLVMBuildAdd(gallivm->builder, indices[1], + lp_build_const_int32(gallivm, + opcode == TGSI_OPCODE_DDX ? 1 : 2), + ""); + load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds, + indices, 2, ""); + + for (c = 0; c < 4; ++c) { + unsigned i; + + swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c); + for (i = 0; i < c; ++i) { + if (swizzle[i] == swizzle[c]) { + result[c] = result[i]; + break; + } + } + if (i != c) + continue; + + LLVMBuildStore(gallivm->builder, + LLVMBuildBitCast(gallivm->builder, + lp_build_emit_fetch(bld_base, inst, 0, c), + i32, ""), + store_ptr); + + tl = LLVMBuildLoad(gallivm->builder, load_ptr0, ""); + tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, ""); + + trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, ""); + trbl = LLVMBuildBitCast(gallivm->builder, trbl, base->elem_type, ""); + + result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, ""); + } + + emit_data->output[0] = lp_build_gather_values(gallivm, result, 4); +} + +#endif /* HAVE_LLVM >= 0x0304 */ + +static const struct lp_build_tgsi_action tex_action = { + .fetch_args = tex_fetch_args, + .emit = build_tex_intrinsic, + .intr_name = "llvm.SI.sample." +}; + +static const struct lp_build_tgsi_action txb_action = { + .fetch_args = tex_fetch_args, + .emit = build_tex_intrinsic, + .intr_name = "llvm.SI.sampleb." +}; + +#if HAVE_LLVM >= 0x0304 +static const struct lp_build_tgsi_action txd_action = { + .fetch_args = tex_fetch_args, + .emit = build_tex_intrinsic, + .intr_name = "llvm.SI.sampled." +}; +#endif + +static const struct lp_build_tgsi_action txf_action = { + .fetch_args = tex_fetch_args, + .emit = build_tex_intrinsic, + .intr_name = "llvm.SI.imageload." +}; + +static const struct lp_build_tgsi_action txl_action = { + .fetch_args = tex_fetch_args, + .emit = build_tex_intrinsic, + .intr_name = "llvm.SI.samplel." +}; + +static const struct lp_build_tgsi_action txq_action = { + .fetch_args = txq_fetch_args, + .emit = build_txq_intrinsic, + .intr_name = "llvm.SI.resinfo" +}; + +static void create_meta_data(struct si_shader_context *si_shader_ctx) +{ + struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm; + LLVMValueRef args[3]; + + args[0] = LLVMMDStringInContext(gallivm->context, "const", 5); + args[1] = 0; + args[2] = lp_build_const_int32(gallivm, 1); + + si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3); +} + +static void create_function(struct si_shader_context *si_shader_ctx) +{ + struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32; + unsigned i, last_sgpr, num_params; + + i8 = LLVMInt8TypeInContext(gallivm->context); + i32 = LLVMInt32TypeInContext(gallivm->context); + f32 = LLVMFloatTypeInContext(gallivm->context); + v2i32 = LLVMVectorType(i32, 2); + v3i32 = LLVMVectorType(i32, 3); + + params[SI_PARAM_CONST] = LLVMPointerType( + LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE); + /* We assume at most 16 textures per program at the moment. + * This need probably need to be changed to support bindless textures */ + params[SI_PARAM_SAMPLER] = LLVMPointerType( + LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_VIEWS), CONST_ADDR_SPACE); + params[SI_PARAM_RESOURCE] = LLVMPointerType( + LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_STATES), CONST_ADDR_SPACE); + + switch (si_shader_ctx->type) { + case TGSI_PROCESSOR_VERTEX: + params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST]; + params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST]; + params[SI_PARAM_START_INSTANCE] = i32; + num_params = SI_PARAM_START_INSTANCE+1; + + /* The locations of the other parameters are assigned dynamically. */ + + /* Streamout SGPRs. */ + if (si_shader_ctx->shader->selector->so.num_outputs) { + params[si_shader_ctx->param_streamout_config = num_params++] = i32; + params[si_shader_ctx->param_streamout_write_index = num_params++] = i32; + } + /* A streamout buffer offset is loaded if the stride is non-zero. */ + for (i = 0; i < 4; i++) { + if (!si_shader_ctx->shader->selector->so.stride[i]) + continue; + + params[si_shader_ctx->param_streamout_offset[i] = num_params++] = i32; + } + + last_sgpr = num_params-1; + + /* VGPRs */ + params[si_shader_ctx->param_vertex_id = num_params++] = i32; + params[num_params++] = i32; /* unused*/ + params[num_params++] = i32; /* unused */ + params[si_shader_ctx->param_instance_id = num_params++] = i32; + break; + + case TGSI_PROCESSOR_FRAGMENT: + params[SI_PARAM_ALPHA_REF] = f32; + params[SI_PARAM_PRIM_MASK] = i32; + last_sgpr = SI_PARAM_PRIM_MASK; + params[SI_PARAM_PERSP_SAMPLE] = v2i32; + params[SI_PARAM_PERSP_CENTER] = v2i32; + params[SI_PARAM_PERSP_CENTROID] = v2i32; + params[SI_PARAM_PERSP_PULL_MODEL] = v3i32; + params[SI_PARAM_LINEAR_SAMPLE] = v2i32; + params[SI_PARAM_LINEAR_CENTER] = v2i32; + params[SI_PARAM_LINEAR_CENTROID] = v2i32; + params[SI_PARAM_LINE_STIPPLE_TEX] = f32; + params[SI_PARAM_POS_X_FLOAT] = f32; + params[SI_PARAM_POS_Y_FLOAT] = f32; + params[SI_PARAM_POS_Z_FLOAT] = f32; + params[SI_PARAM_POS_W_FLOAT] = f32; + params[SI_PARAM_FRONT_FACE] = f32; + params[SI_PARAM_ANCILLARY] = f32; + params[SI_PARAM_SAMPLE_COVERAGE] = f32; + params[SI_PARAM_POS_FIXED_PT] = f32; + num_params = SI_PARAM_POS_FIXED_PT+1; + break; + + default: + assert(0 && "unimplemented shader"); + return; + } + + assert(num_params <= Elements(params)); + radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params); + radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type); + + for (i = 0; i <= last_sgpr; ++i) { + LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i); + switch (i) { + default: + LLVMAddAttribute(P, LLVMInRegAttribute); + break; +#if HAVE_LLVM >= 0x0304 + /* We tell llvm that array inputs are passed by value to allow Sinking pass + * to move load. Inputs are constant so this is fine. */ + case SI_PARAM_CONST: + case SI_PARAM_SAMPLER: + case SI_PARAM_RESOURCE: + LLVMAddAttribute(P, LLVMByValAttribute); + break; +#endif + } + } + +#if HAVE_LLVM >= 0x0304 + if (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || + bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0) + si_shader_ctx->ddxy_lds = + LLVMAddGlobalInAddressSpace(gallivm->module, + LLVMArrayType(i32, 64), + "ddxy_lds", + LOCAL_ADDR_SPACE); +#endif +} + +static void preload_constants(struct si_shader_context *si_shader_ctx) +{ + struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; + struct gallivm_state * gallivm = bld_base->base.gallivm; + const struct tgsi_shader_info * info = bld_base->info; + unsigned buf; + LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); + + for (buf = 0; buf < NUM_CONST_BUFFERS; buf++) { + unsigned i, num_const = info->const_file_max[buf] + 1; + + if (num_const == 0) + continue; + + /* Allocate space for the constant values */ + si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef)); + + /* Load the resource descriptor */ + si_shader_ctx->const_resource[buf] = + build_indexed_load(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf)); + + /* Load the constants, we rely on the code sinking to do the rest */ + for (i = 0; i < num_const * 4; ++i) { + LLVMValueRef args[2] = { + si_shader_ctx->const_resource[buf], + lp_build_const_int32(gallivm, i * 4) + }; + si_shader_ctx->constants[buf][i] = + build_intrinsic(gallivm->builder, "llvm.SI.load.const", + bld_base->base.elem_type, args, 2, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + } + } +} + +static void preload_samplers(struct si_shader_context *si_shader_ctx) +{ + struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; + struct gallivm_state * gallivm = bld_base->base.gallivm; + const struct tgsi_shader_info * info = bld_base->info; + + unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1; + + LLVMValueRef res_ptr, samp_ptr; + LLVMValueRef offset; + + if (num_samplers == 0) + return; + + /* Allocate space for the values */ + si_shader_ctx->resources = CALLOC(NUM_SAMPLER_VIEWS, sizeof(LLVMValueRef)); + si_shader_ctx->samplers = CALLOC(num_samplers, sizeof(LLVMValueRef)); + + res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE); + samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER); + + /* Load the resources and samplers, we rely on the code sinking to do the rest */ + for (i = 0; i < num_samplers; ++i) { + /* Resource */ + offset = lp_build_const_int32(gallivm, i); + si_shader_ctx->resources[i] = build_indexed_load(si_shader_ctx, res_ptr, offset); + + /* Sampler */ + offset = lp_build_const_int32(gallivm, i); + si_shader_ctx->samplers[i] = build_indexed_load(si_shader_ctx, samp_ptr, offset); + + /* FMASK resource */ + if (info->is_msaa_sampler[i]) { + offset = lp_build_const_int32(gallivm, FMASK_TEX_OFFSET + i); + si_shader_ctx->resources[FMASK_TEX_OFFSET + i] = + build_indexed_load(si_shader_ctx, res_ptr, offset); + } + } +} + +static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx) +{ + struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; + struct gallivm_state * gallivm = bld_base->base.gallivm; + unsigned i; + + if (!si_shader_ctx->shader->selector->so.num_outputs) + return; + + LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_SO_BUFFER); + + /* Load the resources, we rely on the code sinking to do the rest */ + for (i = 0; i < 4; ++i) { + if (si_shader_ctx->shader->selector->so.stride[i]) { + LLVMValueRef offset = lp_build_const_int32(gallivm, i); + + si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset); + } + } +} + +int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader, + LLVMModuleRef mod) +{ + unsigned i; + uint32_t *ptr; + struct radeon_llvm_binary binary; + bool dump = r600_can_dump_shader(&rctx->screen->b, + shader->selector ? shader->selector->tokens : NULL); + memset(&binary, 0, sizeof(binary)); + radeon_llvm_compile(mod, &binary, + r600_get_llvm_processor_name(rctx->screen->b.family), dump); + if (dump && ! binary.disassembled) { + fprintf(stderr, "SI CODE:\n"); + for (i = 0; i < binary.code_size; i+=4 ) { + fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3], + binary.code[i + 2], binary.code[i + 1], + binary.code[i]); + } + } + + /* XXX: We may be able to emit some of these values directly rather than + * extracting fields to be emitted later. + */ + for (i = 0; i < binary.config_size; i+= 8) { + unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i)); + unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4)); + switch (reg) { + case R_00B028_SPI_SHADER_PGM_RSRC1_PS: + case R_00B128_SPI_SHADER_PGM_RSRC1_VS: + case R_00B228_SPI_SHADER_PGM_RSRC1_GS: + case R_00B848_COMPUTE_PGM_RSRC1: + shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8; + shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4; + break; + case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: + shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value); + break; + case R_00B84C_COMPUTE_PGM_RSRC2: + shader->lds_size = G_00B84C_LDS_SIZE(value); + break; + case R_0286CC_SPI_PS_INPUT_ENA: + shader->spi_ps_input_ena = value; + break; + default: + fprintf(stderr, "Warning: Compiler emitted unknown " + "config register: 0x%x\n", reg); + break; + } + } + + /* copy new shader */ + r600_resource_reference(&shader->bo, NULL); + shader->bo = r600_resource_create_custom(rctx->b.b.screen, PIPE_USAGE_IMMUTABLE, + binary.code_size); + if (shader->bo == NULL) { + return -ENOMEM; + } + + ptr = (uint32_t*)rctx->b.ws->buffer_map(shader->bo->cs_buf, rctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); + if (0 /*R600_BIG_ENDIAN*/) { + for (i = 0; i < binary.code_size / 4; ++i) { + ptr[i] = util_bswap32(*(uint32_t*)(binary.code + i*4)); + } + } else { + memcpy(ptr, binary.code, binary.code_size); + } + rctx->b.ws->buffer_unmap(shader->bo->cs_buf); + + free(binary.code); + free(binary.config); + + return 0; +} + +int si_pipe_shader_create( + struct pipe_context *ctx, + struct si_pipe_shader *shader) +{ + struct r600_context *rctx = (struct r600_context*)ctx; + struct si_pipe_shader_selector *sel = shader->selector; + struct si_shader_context si_shader_ctx; + struct tgsi_shader_info shader_info; + struct lp_build_tgsi_context * bld_base; + LLVMModuleRef mod; + int r = 0; + bool dump = r600_can_dump_shader(&rctx->screen->b, shader->selector->tokens); + + assert(shader->shader.noutput == 0); + assert(shader->shader.ninterp == 0); + assert(shader->shader.ninput == 0); + + memset(&si_shader_ctx, 0, sizeof(si_shader_ctx)); + radeon_llvm_context_init(&si_shader_ctx.radeon_bld); + bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; + + tgsi_scan_shader(sel->tokens, &shader_info); + + shader->shader.uses_kill = shader_info.uses_kill; + shader->shader.uses_instanceid = shader_info.uses_instanceid; + bld_base->info = &shader_info; + bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; + bld_base->emit_epilogue = si_llvm_emit_epilogue; + + bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; + bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action; +#if HAVE_LLVM >= 0x0304 + bld_base->op_actions[TGSI_OPCODE_TXD] = txd_action; +#endif + bld_base->op_actions[TGSI_OPCODE_TXF] = txf_action; + bld_base->op_actions[TGSI_OPCODE_TXL] = txl_action; + bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; + bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action; + +#if HAVE_LLVM >= 0x0304 + bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy; + bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy; +#endif + + si_shader_ctx.radeon_bld.load_input = declare_input; + si_shader_ctx.radeon_bld.load_system_value = declare_system_value; + si_shader_ctx.tokens = sel->tokens; + tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); + si_shader_ctx.shader = shader; + si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; + + create_meta_data(&si_shader_ctx); + create_function(&si_shader_ctx); + preload_constants(&si_shader_ctx); + preload_samplers(&si_shader_ctx); + preload_streamout_buffers(&si_shader_ctx); + + /* Dump TGSI code before doing TGSI->LLVM conversion in case the + * conversion fails. */ + if (dump) { + tgsi_dump(sel->tokens, 0); + si_dump_streamout(&sel->so); + } + + if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { + fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); + for (int i = 0; i < NUM_CONST_BUFFERS; i++) + FREE(si_shader_ctx.constants[i]); + FREE(si_shader_ctx.resources); + FREE(si_shader_ctx.samplers); + return -EINVAL; + } + + radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); + + mod = bld_base->base.gallivm->module; + r = si_compile_llvm(rctx, shader, mod); + + radeon_llvm_dispose(&si_shader_ctx.radeon_bld); + tgsi_parse_free(&si_shader_ctx.parse); + + for (int i = 0; i < NUM_CONST_BUFFERS; i++) + FREE(si_shader_ctx.constants[i]); + FREE(si_shader_ctx.resources); + FREE(si_shader_ctx.samplers); + + return r; +} + +void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) +{ + r600_resource_reference(&shader->bo, NULL); +} diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h new file mode 100644 index 00000000000..2a15a9fa2bd --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -0,0 +1,161 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Tom Stellard + * Michel Dänzer + * Christian König + */ + +#ifndef SI_SHADER_H +#define SI_SHADER_H + +#include /* LLVMModuleRef */ + +#define SI_SGPR_CONST 0 +#define SI_SGPR_SAMPLER 2 +#define SI_SGPR_RESOURCE 4 +#define SI_SGPR_VERTEX_BUFFER 6 /* VS only */ +#define SI_SGPR_SO_BUFFER 8 /* VS only, stream-out */ +#define SI_SGPR_START_INSTANCE 10 /* VS only */ +#define SI_SGPR_ALPHA_REF 6 /* PS only */ + +#define SI_VS_NUM_USER_SGPR 11 +#define SI_PS_NUM_USER_SGPR 7 + +/* LLVM function parameter indices */ +#define SI_PARAM_CONST 0 +#define SI_PARAM_SAMPLER 1 +#define SI_PARAM_RESOURCE 2 + +/* VS only parameters */ +#define SI_PARAM_VERTEX_BUFFER 3 +#define SI_PARAM_SO_BUFFER 4 +#define SI_PARAM_START_INSTANCE 5 +/* the other VS parameters are assigned dynamically */ + +/* PS only parameters */ +#define SI_PARAM_ALPHA_REF 3 +#define SI_PARAM_PRIM_MASK 4 +#define SI_PARAM_PERSP_SAMPLE 5 +#define SI_PARAM_PERSP_CENTER 6 +#define SI_PARAM_PERSP_CENTROID 7 +#define SI_PARAM_PERSP_PULL_MODEL 8 +#define SI_PARAM_LINEAR_SAMPLE 9 +#define SI_PARAM_LINEAR_CENTER 10 +#define SI_PARAM_LINEAR_CENTROID 11 +#define SI_PARAM_LINE_STIPPLE_TEX 12 +#define SI_PARAM_POS_X_FLOAT 13 +#define SI_PARAM_POS_Y_FLOAT 14 +#define SI_PARAM_POS_Z_FLOAT 15 +#define SI_PARAM_POS_W_FLOAT 16 +#define SI_PARAM_FRONT_FACE 17 +#define SI_PARAM_ANCILLARY 18 +#define SI_PARAM_SAMPLE_COVERAGE 19 +#define SI_PARAM_POS_FIXED_PT 20 + +struct si_shader_io { + unsigned name; + int sid; + unsigned param_offset; + unsigned interpolate; + bool centroid; +}; + +struct si_pipe_shader; + +struct si_pipe_shader_selector { + struct si_pipe_shader *current; + + struct tgsi_token *tokens; + struct pipe_stream_output_info so; + + unsigned num_shaders; + + /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ + unsigned type; + + /* 1 when the shader contains + * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0. + * Used to determine whether we need to include nr_cbufs in the key */ + unsigned fs_write_all; +}; + +struct si_shader { + unsigned ninput; + struct si_shader_io input[40]; + + unsigned noutput; + struct si_shader_io output[40]; + + unsigned ninterp; + bool uses_kill; + bool uses_instanceid; + bool fs_write_all; + bool vs_out_misc_write; + bool vs_out_point_size; + bool vs_out_edgeflag; + bool vs_out_layer; + unsigned nr_pos_exports; + unsigned clip_dist_write; +}; + +union si_shader_key { + struct { + unsigned export_16bpc:8; + unsigned nr_cbufs:4; + unsigned color_two_side:1; + unsigned alpha_func:3; + unsigned flatshade:1; + unsigned alpha_to_one:1; + } ps; + struct { + unsigned instance_divisors[PIPE_MAX_ATTRIBS]; + unsigned ucps_enabled:2; + } vs; +}; + +struct si_pipe_shader { + struct si_pipe_shader_selector *selector; + struct si_pipe_shader *next_variant; + struct si_shader shader; + struct si_pm4_state *pm4; + struct r600_resource *bo; + unsigned num_sgprs; + unsigned num_vgprs; + unsigned lds_size; + unsigned spi_ps_input_ena; + unsigned spi_shader_col_format; + unsigned cb_shader_mask; + bool cb0_is_integer; + unsigned sprite_coord_enable; + union si_shader_key key; +}; + +/* radeonsi_shader.c */ +int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader); +int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader); +int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader, + LLVMModuleRef mod); +void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader); + +#endif diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ba734016905..10d016730d3 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -34,8 +34,8 @@ #include "util/u_format_s3tc.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" -#include "radeonsi_pipe.h" -#include "radeonsi_shader.h" +#include "si_pipe.h" +#include "si_shader.h" #include "si_state.h" #include "../radeon/r600_cs.h" #include "sid.h" diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 6fac4f0442c..3002d56e536 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -27,7 +27,7 @@ #ifndef SI_STATE_H #define SI_STATE_H -#include "radeonsi_pm4.h" +#include "si_pm4.h" #include "../radeon/r600_pipe_common.h" struct si_state_blend { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index f64b51a04ff..4d5cd0166de 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -28,8 +28,8 @@ #include "util/u_framebuffer.h" #include "util/u_blitter.h" #include "tgsi/tgsi_parse.h" -#include "radeonsi_pipe.h" -#include "radeonsi_shader.h" +#include "si_pipe.h" +#include "si_shader.h" #include "si_state.h" #include "../radeon/r600_cs.h" #include "sid.h" diff --git a/src/gallium/drivers/radeonsi/si_translate.c b/src/gallium/drivers/radeonsi/si_translate.c new file mode 100644 index 00000000000..158bdc8b0da --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_translate.c @@ -0,0 +1,53 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + */ + +#include "util/u_index_modify.h" +#include "util/u_upload_mgr.h" +#include "si_pipe.h" + + +void r600_translate_index_buffer(struct r600_context *r600, + struct pipe_index_buffer *ib, + unsigned count) +{ + struct pipe_resource *out_buffer = NULL; + unsigned out_offset; + void *ptr; + + switch (ib->index_size) { + case 1: + u_upload_alloc(r600->b.uploader, 0, count * 2, + &out_offset, &out_buffer, &ptr); + + util_shorten_ubyte_elts_to_userptr( + &r600->b.b, ib, 0, ib->offset, count, ptr); + + pipe_resource_reference(&ib->buffer, NULL); + ib->buffer = out_buffer; + ib->offset = out_offset; + ib->index_size = 2; + break; + } +} diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c new file mode 100644 index 00000000000..847c8179992 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_uvd.c @@ -0,0 +1,153 @@ +/************************************************************************** + * + * Copyright 2011 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Authors: + * Christian König + * + */ + +#include +#include +#include +#include + +#include "pipe/p_video_codec.h" + +#include "util/u_memory.h" +#include "util/u_video.h" + +#include "vl/vl_defines.h" +#include "vl/vl_mpeg12_decoder.h" + +#include "si_pipe.h" +#include "radeon/radeon_uvd.h" +#include "sid.h" + +/** + * creates an video buffer with an UVD compatible memory layout + */ +struct pipe_video_buffer *radeonsi_video_buffer_create(struct pipe_context *pipe, + const struct pipe_video_buffer *tmpl) +{ + struct r600_context *ctx = (struct r600_context *)pipe; + struct r600_texture *resources[VL_NUM_COMPONENTS] = {}; + struct radeon_surface *surfaces[VL_NUM_COMPONENTS] = {}; + struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {}; + const enum pipe_format *resource_formats; + struct pipe_video_buffer template; + struct pipe_resource templ; + unsigned i, array_size; + + assert(pipe); + + /* first create the needed resources as "normal" textures */ + resource_formats = vl_video_buffer_formats(pipe->screen, tmpl->buffer_format); + if (!resource_formats) + return NULL; + + array_size = tmpl->interlaced ? 2 : 1; + template = *tmpl; + template.width = align(tmpl->width, VL_MACROBLOCK_WIDTH); + template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT); + + vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_STATIC, 0); + /* TODO: get tiling working */ + templ.bind = PIPE_BIND_LINEAR; + resources[0] = (struct r600_texture *) + pipe->screen->resource_create(pipe->screen, &templ); + if (!resources[0]) + goto error; + + if (resource_formats[1] != PIPE_FORMAT_NONE) { + vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_STATIC, 1); + templ.bind = PIPE_BIND_LINEAR; + resources[1] = (struct r600_texture *) + pipe->screen->resource_create(pipe->screen, &templ); + if (!resources[1]) + goto error; + } + + if (resource_formats[2] != PIPE_FORMAT_NONE) { + vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_STATIC, 2); + templ.bind = PIPE_BIND_LINEAR; + resources[2] = (struct r600_texture *) + pipe->screen->resource_create(pipe->screen, &templ); + if (!resources[2]) + goto error; + } + + for (i = 0; i < VL_NUM_COMPONENTS; ++i) { + if (!resources[i]) + continue; + + surfaces[i] = & resources[i]->surface; + pbs[i] = &resources[i]->resource.buf; + } + + ruvd_join_surfaces(ctx->b.ws, templ.bind, pbs, surfaces); + + for (i = 0; i < VL_NUM_COMPONENTS; ++i) { + if (!resources[i]) + continue; + + /* recreate the CS handle */ + resources[i]->resource.cs_buf = ctx->b.ws->buffer_get_cs_handle( + resources[i]->resource.buf); + } + + template.height *= array_size; + return vl_video_buffer_create_ex2(pipe, &template, (struct pipe_resource **)resources); + +error: + for (i = 0; i < VL_NUM_COMPONENTS; ++i) + pipe_resource_reference((struct pipe_resource **)&resources[i], NULL); + + return NULL; +} + +/* set the decoding target buffer offsets */ +static struct radeon_winsys_cs_handle* radeonsi_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_buffer *buf) +{ + struct r600_texture *luma = (struct r600_texture *)buf->resources[0]; + struct r600_texture *chroma = (struct r600_texture *)buf->resources[1]; + + msg->body.decode.dt_field_mode = buf->base.interlaced; + + ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface); + + return luma->resource.cs_buf; +} + +/** + * creates an UVD compatible decoder + */ +struct pipe_video_codec *radeonsi_uvd_create_decoder(struct pipe_context *context, + const struct pipe_video_codec *templ) +{ + return ruvd_create_decoder(context, templ, radeonsi_uvd_set_dtb); +} -- cgit v1.2.3