diff options
Diffstat (limited to 'src/gallium/drivers/v3d/v3dx_emit.c')
-rw-r--r-- | src/gallium/drivers/v3d/v3dx_emit.c | 722 |
1 files changed, 722 insertions, 0 deletions
diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c new file mode 100644 index 00000000000..e2aba356de4 --- /dev/null +++ b/src/gallium/drivers/v3d/v3dx_emit.c @@ -0,0 +1,722 @@ +/* + * Copyright © 2014-2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/u_format.h" +#include "util/u_half.h" +#include "v3d_context.h" +#include "broadcom/common/v3d_macros.h" +#include "broadcom/cle/v3dx_pack.h" +#include "broadcom/compiler/v3d_compiler.h" + +static uint8_t +vc5_factor(enum pipe_blendfactor factor, bool dst_alpha_one) +{ + /* We may get a bad blendfactor when blending is disabled. */ + if (factor == 0) + return V3D_BLEND_FACTOR_ZERO; + + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return V3D_BLEND_FACTOR_ZERO; + case PIPE_BLENDFACTOR_ONE: + return V3D_BLEND_FACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return V3D_BLEND_FACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return V3D_BLEND_FACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return V3D_BLEND_FACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return V3D_BLEND_FACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return V3D_BLEND_FACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return V3D_BLEND_FACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return (dst_alpha_one ? + V3D_BLEND_FACTOR_ONE : + V3D_BLEND_FACTOR_DST_ALPHA); + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return (dst_alpha_one ? + V3D_BLEND_FACTOR_ZERO : + V3D_BLEND_FACTOR_INV_DST_ALPHA); + case PIPE_BLENDFACTOR_CONST_COLOR: + return V3D_BLEND_FACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return V3D_BLEND_FACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return V3D_BLEND_FACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return V3D_BLEND_FACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE; + default: + unreachable("Bad blend factor"); + } +} + +static inline uint16_t +swizzled_border_color(const struct v3d_device_info *devinfo, + struct pipe_sampler_state *sampler, + struct vc5_sampler_view *sview, + int chan) +{ + const struct util_format_description *desc = + util_format_description(sview->base.format); + uint8_t swiz = chan; + + /* If we're doing swizzling in the sampler, then only rearrange the + * border color for the mismatch between the VC5 texture format and + * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by + * the sampler's swizzle. + * + * For swizzling in the shader, we don't do any pre-swizzling of the + * border color. + */ + if (vc5_get_tex_return_size(devinfo, sview->base.format, + sampler->compare_mode) != 32) + swiz = desc->swizzle[swiz]; + + switch (swiz) { + case PIPE_SWIZZLE_0: + return util_float_to_half(0.0); + case PIPE_SWIZZLE_1: + return util_float_to_half(1.0); + default: + return util_float_to_half(sampler->border_color.f[swiz]); + } +} + +#if V3D_VERSION < 40 +static uint32_t +translate_swizzle(unsigned char pipe_swizzle) +{ + switch (pipe_swizzle) { + case PIPE_SWIZZLE_0: + return 0; + case PIPE_SWIZZLE_1: + return 1; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + return 2 + pipe_swizzle; + default: + unreachable("unknown swizzle"); + } +} + +static void +emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex, + int i) +{ + struct vc5_job *job = vc5->job; + struct pipe_sampler_state *psampler = stage_tex->samplers[i]; + struct vc5_sampler_state *sampler = vc5_sampler_state(psampler); + struct pipe_sampler_view *psview = stage_tex->textures[i]; + struct vc5_sampler_view *sview = vc5_sampler_view(psview); + struct pipe_resource *prsc = psview->texture; + struct vc5_resource *rsc = vc5_resource(prsc); + const struct v3d_device_info *devinfo = &vc5->screen->devinfo; + + stage_tex->texture_state[i].offset = + vc5_cl_ensure_space(&job->indirect, + cl_packet_length(TEXTURE_SHADER_STATE), + 32); + vc5_bo_set_reference(&stage_tex->texture_state[i].bo, + job->indirect.bo); + + uint32_t return_size = vc5_get_tex_return_size(devinfo, psview->format, + psampler->compare_mode); + + struct V3D33_TEXTURE_SHADER_STATE unpacked = { + /* XXX */ + .border_color_red = swizzled_border_color(devinfo, psampler, + sview, 0), + .border_color_green = swizzled_border_color(devinfo, psampler, + sview, 1), + .border_color_blue = swizzled_border_color(devinfo, psampler, + sview, 2), + .border_color_alpha = swizzled_border_color(devinfo, psampler, + sview, 3), + + /* In the normal texturing path, the LOD gets clamped between + * min/max, and the base_level field (set in the sampler view + * from first_level) only decides where the min/mag switch + * happens, so we need to use the LOD clamps to keep us + * between min and max. + * + * For txf, the LOD clamp is still used, despite GL not + * wanting that. We will need to have a separate + * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to + * support txf properly. + */ + .min_level_of_detail = MIN2(psview->u.tex.first_level + + MAX2(psampler->min_lod, 0), + psview->u.tex.last_level), + .max_level_of_detail = MIN2(psview->u.tex.first_level + + psampler->max_lod, + psview->u.tex.last_level), + + .texture_base_pointer = cl_address(rsc->bo, + rsc->slices[0].offset), + + .output_32_bit = return_size == 32, + }; + + /* Set up the sampler swizzle if we're doing 16-bit sampling. For + * 32-bit, we leave swizzling up to the shader compiler. + * + * Note: Contrary to the docs, the swizzle still applies even if the + * return size is 32. It's just that you probably want to swizzle in + * the shader, because you need the Y/Z/W channels to be defined. + */ + if (return_size == 32) { + unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); + unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); + unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); + unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); + } else { + unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]); + unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]); + unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]); + unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]); + } + + int min_img_filter = psampler->min_img_filter; + int min_mip_filter = psampler->min_mip_filter; + int mag_img_filter = psampler->mag_img_filter; + + if (return_size == 32) { + min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + mag_img_filter = PIPE_TEX_FILTER_NEAREST; + } + + bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST; + switch (min_mip_filter) { + case PIPE_TEX_MIPFILTER_NONE: + unpacked.filter += min_nearest ? 2 : 0; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + unpacked.filter += min_nearest ? 4 : 8; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + unpacked.filter += min_nearest ? 4 : 8; + unpacked.filter += 2; + break; + } + + if (mag_img_filter == PIPE_TEX_FILTER_NEAREST) + unpacked.filter++; + + if (psampler->max_anisotropy > 8) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1; + else if (psampler->max_anisotropy > 4) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1; + else if (psampler->max_anisotropy > 2) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1; + else if (psampler->max_anisotropy) + unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1; + + uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)]; + cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked); + + for (int i = 0; i < ARRAY_SIZE(packed); i++) + packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i]; + + /* TMU indirect structs need to be 32b aligned. */ + vc5_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32); + cl_emit_prepacked(&job->indirect, &packed); +} + +static void +emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex) +{ + for (int i = 0; i < stage_tex->num_textures; i++) { + if (stage_tex->textures[i]) + emit_one_texture(vc5, stage_tex, i); + } +} +#endif /* V3D_VERSION < 40 */ + +static uint32_t +translate_colormask(struct vc5_context *vc5, uint32_t colormask, int rt) +{ + if (vc5->swap_color_rb & (1 << rt)) { + colormask = ((colormask & (2 | 8)) | + ((colormask & 1) << 2) | + ((colormask & 4) >> 2)); + } + + return (~colormask) & 0xf; +} + +static void +emit_rt_blend(struct vc5_context *vc5, struct vc5_job *job, + struct pipe_blend_state *blend, int rt) +{ + cl_emit(&job->bcl, BLEND_CONFIG, config) { + struct pipe_rt_blend_state *rtblend = &blend->rt[rt]; + +#if V3D_VERSION >= 40 + config.render_target_mask = 1 << rt; +#else + assert(rt == 0); +#endif + + config.colour_blend_mode = rtblend->rgb_func; + config.colour_blend_dst_factor = + vc5_factor(rtblend->rgb_dst_factor, + vc5->blend_dst_alpha_one); + config.colour_blend_src_factor = + vc5_factor(rtblend->rgb_src_factor, + vc5->blend_dst_alpha_one); + + config.alpha_blend_mode = rtblend->alpha_func; + config.alpha_blend_dst_factor = + vc5_factor(rtblend->alpha_dst_factor, + vc5->blend_dst_alpha_one); + config.alpha_blend_src_factor = + vc5_factor(rtblend->alpha_src_factor, + vc5->blend_dst_alpha_one); + } +} + +void +v3dX(emit_state)(struct pipe_context *pctx) +{ + struct vc5_context *vc5 = vc5_context(pctx); + struct vc5_job *job = vc5->job; + bool rasterizer_discard = vc5->rasterizer->base.rasterizer_discard; + + if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT | + VC5_DIRTY_RASTERIZER)) { + float *vpscale = vc5->viewport.scale; + float *vptranslate = vc5->viewport.translate; + float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; + float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; + float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; + float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; + + /* Clip to the scissor if it's enabled, but still clip to the + * drawable regardless since that controls where the binner + * tries to put things. + * + * Additionally, always clip the rendering to the viewport, + * since the hardware does guardband clipping, meaning + * primitives would rasterize outside of the view volume. + */ + uint32_t minx, miny, maxx, maxy; + if (!vc5->rasterizer->base.scissor) { + minx = MAX2(vp_minx, 0); + miny = MAX2(vp_miny, 0); + maxx = MIN2(vp_maxx, job->draw_width); + maxy = MIN2(vp_maxy, job->draw_height); + } else { + minx = MAX2(vp_minx, vc5->scissor.minx); + miny = MAX2(vp_miny, vc5->scissor.miny); + maxx = MIN2(vp_maxx, vc5->scissor.maxx); + maxy = MIN2(vp_maxy, vc5->scissor.maxy); + } + + cl_emit(&job->bcl, CLIP_WINDOW, clip) { + clip.clip_window_left_pixel_coordinate = minx; + clip.clip_window_bottom_pixel_coordinate = miny; + clip.clip_window_width_in_pixels = maxx - minx; + clip.clip_window_height_in_pixels = maxy - miny; + +#if V3D_VERSION < 41 + /* The HW won't entirely clip out when scissor w/h is + * 0. Just treat it the same as rasterizer discard. + */ + if (clip.clip_window_width_in_pixels == 0 || + clip.clip_window_height_in_pixels == 0) { + rasterizer_discard = true; + clip.clip_window_width_in_pixels = 1; + clip.clip_window_height_in_pixels = 1; + } +#endif + } + + job->draw_min_x = MIN2(job->draw_min_x, minx); + job->draw_min_y = MIN2(job->draw_min_y, miny); + job->draw_max_x = MAX2(job->draw_max_x, maxx); + job->draw_max_y = MAX2(job->draw_max_y, maxy); + } + + if (vc5->dirty & (VC5_DIRTY_RASTERIZER | + VC5_DIRTY_ZSA | + VC5_DIRTY_BLEND | + VC5_DIRTY_COMPILED_FS)) { + cl_emit(&job->bcl, CONFIGURATION_BITS, config) { + config.enable_forward_facing_primitive = + !rasterizer_discard && + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_FRONT); + config.enable_reverse_facing_primitive = + !rasterizer_discard && + !(vc5->rasterizer->base.cull_face & + PIPE_FACE_BACK); + /* This seems backwards, but it's what gets the + * clipflat test to pass. + */ + config.clockwise_primitives = + vc5->rasterizer->base.front_ccw; + + config.enable_depth_offset = + vc5->rasterizer->base.offset_tri; + + config.rasterizer_oversample_mode = + vc5->rasterizer->base.multisample; + + config.direct3d_provoking_vertex = + vc5->rasterizer->base.flatshade_first; + + config.blend_enable = vc5->blend->rt[0].blend_enable; + + /* Note: EZ state may update based on the compiled FS, + * along with ZSA + */ + config.early_z_updates_enable = + (job->ez_state != VC5_EZ_DISABLED); + if (vc5->zsa->base.depth.enabled) { + config.z_updates_enable = + vc5->zsa->base.depth.writemask; + config.early_z_enable = + config.early_z_updates_enable; + config.depth_test_function = + vc5->zsa->base.depth.func; + } else { + config.depth_test_function = PIPE_FUNC_ALWAYS; + } + + config.stencil_enable = + vc5->zsa->base.stencil[0].enabled; + } + + } + + if (vc5->dirty & VC5_DIRTY_RASTERIZER && + vc5->rasterizer->base.offset_tri) { + cl_emit(&job->bcl, DEPTH_OFFSET, depth) { + depth.depth_offset_factor = + vc5->rasterizer->offset_factor; + depth.depth_offset_units = + vc5->rasterizer->offset_units; + } + } + + if (vc5->dirty & VC5_DIRTY_RASTERIZER) { + cl_emit(&job->bcl, POINT_SIZE, point_size) { + point_size.point_size = vc5->rasterizer->point_size; + } + + cl_emit(&job->bcl, LINE_WIDTH, line_width) { + line_width.line_width = vc5->rasterizer->base.line_width; + } + } + + if (vc5->dirty & VC5_DIRTY_VIEWPORT) { + cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { + clip.viewport_half_width_in_1_256th_of_pixel = + vc5->viewport.scale[0] * 256.0f; + clip.viewport_half_height_in_1_256th_of_pixel = + vc5->viewport.scale[1] * 256.0f; + } + + cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { + clip.viewport_z_offset_zc_to_zs = + vc5->viewport.translate[2]; + clip.viewport_z_scale_zc_to_zs = + vc5->viewport.scale[2]; + } + cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { + clip.minimum_zw = (vc5->viewport.translate[2] - + vc5->viewport.scale[2]); + clip.maximum_zw = (vc5->viewport.translate[2] + + vc5->viewport.scale[2]); + } + + cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { + vp.viewport_centre_x_coordinate = + vc5->viewport.translate[0]; + vp.viewport_centre_y_coordinate = + vc5->viewport.translate[1]; + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND && vc5->blend->rt[0].blend_enable) { + struct pipe_blend_state *blend = vc5->blend; + + if (blend->independent_blend_enable) { + for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) + emit_rt_blend(vc5, job, blend, i); + } else { + emit_rt_blend(vc5, job, blend, 0); + } + } + + if (vc5->dirty & VC5_DIRTY_BLEND) { + struct pipe_blend_state *blend = vc5->blend; + + cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) { + if (blend->independent_blend_enable) { + mask.render_target_0_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 0); + mask.render_target_1_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[1].colormask, 1); + mask.render_target_2_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[2].colormask, 2); + mask.render_target_3_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[3].colormask, 3); + } else { + mask.render_target_0_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 0); + mask.render_target_1_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 1); + mask.render_target_2_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 2); + mask.render_target_3_per_colour_component_write_masks = + translate_colormask(vc5, blend->rt[0].colormask, 3); + } + } + } + + /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant + * color. + */ + if (vc5->dirty & VC5_DIRTY_BLEND_COLOR || + (V3D_VERSION < 41 && (vc5->dirty & VC5_DIRTY_BLEND))) { + cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) { + colour.red_f16 = (vc5->swap_color_rb ? + vc5->blend_color.hf[2] : + vc5->blend_color.hf[0]); + colour.green_f16 = vc5->blend_color.hf[1]; + colour.blue_f16 = (vc5->swap_color_rb ? + vc5->blend_color.hf[0] : + vc5->blend_color.hf[2]); + colour.alpha_f16 = vc5->blend_color.hf[3]; + } + } + + if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) { + struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0]; + struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1]; + + if (front->enabled) { + cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG, + vc5->zsa->stencil_front, config) { + config.stencil_ref_value = + vc5->stencil_ref.ref_value[0]; + } + } + + if (back->enabled) { + cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG, + vc5->zsa->stencil_back, config) { + config.stencil_ref_value = + vc5->stencil_ref.ref_value[1]; + } + } + } + +#if V3D_VERSION < 40 + /* Pre-4.x, we have texture state that depends on both the sampler and + * the view, so we merge them together at draw time. + */ + if (vc5->dirty & VC5_DIRTY_FRAGTEX) + emit_textures(vc5, &vc5->fragtex); + + if (vc5->dirty & VC5_DIRTY_VERTTEX) + emit_textures(vc5, &vc5->verttex); +#endif + + if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) { + bool emitted_any = false; + + for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->flat_shade_flags); i++) { + if (!vc5->prog.fs->prog_data.fs->flat_shade_flags[i]) + continue; + + cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { + flags.varying_offset_v0 = i; + + if (emitted_any) { + flags.action_for_flat_shade_flags_of_lower_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + flags.action_for_flat_shade_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + } else { + flags.action_for_flat_shade_flags_of_lower_numbered_varyings = + ((i == 0) ? + V3D_VARYING_FLAGS_ACTION_UNCHANGED : + V3D_VARYING_FLAGS_ACTION_ZEROED); + + flags.action_for_flat_shade_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_ZEROED; + } + + flags.flat_shade_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->flat_shade_flags[i]; + } + + emitted_any = true; + } + + if (!emitted_any) { + cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); + } + } + +#if V3D_VERSION >= 40 + if (vc5->dirty & VC5_DIRTY_CENTROID_FLAGS) { + bool emitted_any = false; + + for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->centroid_flags); i++) { + if (!vc5->prog.fs->prog_data.fs->centroid_flags[i]) + continue; + + cl_emit(&job->bcl, CENTROID_FLAGS, flags) { + flags.varying_offset_v0 = i; + + if (emitted_any) { + flags.action_for_centroid_flags_of_lower_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + flags.action_for_centroid_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_UNCHANGED; + } else { + flags.action_for_centroid_flags_of_lower_numbered_varyings = + ((i == 0) ? + V3D_VARYING_FLAGS_ACTION_UNCHANGED : + V3D_VARYING_FLAGS_ACTION_ZEROED); + + flags.action_for_centroid_flags_of_higher_numbered_varyings = + V3D_VARYING_FLAGS_ACTION_ZEROED; + } + + flags.centroid_flags_for_varyings_v024 = + vc5->prog.fs->prog_data.fs->centroid_flags[i]; + } + + emitted_any = true; + } + + if (!emitted_any) { + cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); + } + } +#endif + + /* Set up the transform feedback data specs (which VPM entries to + * output to which buffers). + */ + if (vc5->dirty & (VC5_DIRTY_STREAMOUT | + VC5_DIRTY_RASTERIZER | + VC5_DIRTY_PRIM_MODE)) { + struct vc5_streamout_stateobj *so = &vc5->streamout; + + if (so->num_targets) { + bool psiz_per_vertex = (vc5->prim_mode == PIPE_PRIM_POINTS && + vc5->rasterizer->base.point_size_per_vertex); + uint16_t *tf_specs = (psiz_per_vertex ? + vc5->prog.bind_vs->tf_specs_psiz : + vc5->prog.bind_vs->tf_specs); + +#if V3D_VERSION >= 40 + job->tf_enabled = (vc5->prog.bind_vs->num_tf_specs != 0 && + vc5->active_queries); + + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { + tfe.number_of_16_bit_output_data_specs_following = + vc5->prog.bind_vs->num_tf_specs; + tfe.enable = job->tf_enabled; + }; +#else /* V3D_VERSION < 40 */ + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) { + tfe.number_of_32_bit_output_buffer_address_following = + so->num_targets; + tfe.number_of_16_bit_output_data_specs_following = + vc5->prog.bind_vs->num_tf_specs; + }; +#endif /* V3D_VERSION < 40 */ + for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) { + cl_emit_prepacked(&job->bcl, &tf_specs[i]); + } + } else if (job->tf_enabled) { +#if V3D_VERSION >= 40 + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { + tfe.enable = false; + }; + job->tf_enabled = false; +#endif /* V3D_VERSION >= 40 */ + } + } + + /* Set up the trasnform feedback buffers. */ + if (vc5->dirty & VC5_DIRTY_STREAMOUT) { + struct vc5_streamout_stateobj *so = &vc5->streamout; + for (int i = 0; i < so->num_targets; i++) { + const struct pipe_stream_output_target *target = + so->targets[i]; + struct vc5_resource *rsc = target ? + vc5_resource(target->buffer) : NULL; + +#if V3D_VERSION >= 40 + if (!target) + continue; + + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) { + output.buffer_address = + cl_address(rsc->bo, + target->buffer_offset); + output.buffer_size_in_32_bit_words = + target->buffer_size >> 2; + output.buffer_number = i; + } +#else /* V3D_VERSION < 40 */ + cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) { + if (target) { + output.address = + cl_address(rsc->bo, + target->buffer_offset); + } + }; +#endif /* V3D_VERSION < 40 */ + if (target) { + vc5_job_add_write_resource(vc5->job, + target->buffer); + } + /* XXX: buffer_size? */ + } + } + + if (vc5->dirty & VC5_DIRTY_OQ) { + cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { + job->oq_enabled = vc5->active_queries && vc5->current_oq; + if (job->oq_enabled) { + counter.address = cl_address(vc5->current_oq, 0); + } + } + } +} |