From 54e0a8ed5dcaaa0ef483d5960ae86f88e0bf8990 Mon Sep 17 00:00:00 2001 From: Chia-I Wu <olvaffe@gmail.com> Date: Fri, 12 Jun 2015 15:08:02 +0800 Subject: ilo: add ilo_state_ps to ilo_shader_cso --- .../drivers/ilo/core/ilo_builder_3d_bottom.h | 190 +++----------- src/gallium/drivers/ilo/core/ilo_state_3d.h | 19 -- src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c | 280 --------------------- src/gallium/drivers/ilo/ilo_blitter.h | 1 + src/gallium/drivers/ilo/ilo_blitter_rectlist.c | 1 + src/gallium/drivers/ilo/ilo_render_gen6.c | 11 +- src/gallium/drivers/ilo/ilo_render_gen7.c | 20 +- src/gallium/drivers/ilo/ilo_render_gen8.c | 11 +- src/gallium/drivers/ilo/ilo_render_surface.c | 1 + src/gallium/drivers/ilo/ilo_shader.c | 88 ++++++- src/gallium/drivers/ilo/ilo_shader.h | 15 ++ src/gallium/drivers/ilo/ilo_state.c | 2 +- .../drivers/ilo/shader/ilo_shader_internal.h | 2 + 13 files changed, 160 insertions(+), 481 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 68461fff09d..88ed6ea054c 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -29,7 +29,6 @@ #define ILO_BUILDER_3D_BOTTOM_H #include "genhw/genhw.h" -#include "../ilo_shader.h" #include "intel_winsys.h" #include "ilo_core.h" @@ -38,6 +37,7 @@ #include "ilo_state_cc.h" #include "ilo_state_raster.h" #include "ilo_state_sbe.h" +#include "ilo_state_shader.h" #include "ilo_state_viewport.h" #include "ilo_builder.h" #include "ilo_builder_3d_top.h" @@ -200,56 +200,24 @@ gen8_3DSTATE_RASTER(struct ilo_builder *builder, static inline void gen6_3DSTATE_WM(struct ilo_builder *builder, const struct ilo_state_raster *rs, - const struct ilo_shader_state *fs, - bool dual_blend, bool cc_may_kill) + const struct ilo_state_ps *ps, + uint32_t kernel_offset) { const uint8_t cmd_len = 9; - const bool multisample = false; - const int num_samples = 1; - uint32_t dw2, dw4, dw5, dw6, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 6, 6); - dw2 = 0; - /* see raster_set_gen6_3dstate_wm() */ - dw4 = rs->raster[0]; - dw5 = rs->raster[1]; - dw6 = rs->raster[2]; - - if (fs) { - const union ilo_shader_cso *cso; - - cso = ilo_shader_get_kernel_cso(fs); - /* see fs_init_cso_gen6() */ - dw2 |= cso->ps_payload[0]; - dw4 |= cso->ps_payload[1]; - dw5 |= cso->ps_payload[2]; - dw6 |= cso->ps_payload[3]; - } else { - const int max_threads = (builder->dev->gt == 2) ? 80 : 40; - - /* honor the valid range even if dispatching is disabled */ - dw5 |= (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; - } - - if (cc_may_kill) - dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL | GEN6_WM_DW5_PS_DISPATCH_ENABLE; - - if (dual_blend) - dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND; - - if (multisample && num_samples > 1) - dw6 |= GEN6_WM_DW6_MSDISPMODE_PERPIXEL; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(fs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = dw6; + dw[1] = kernel_offset; + /* see raster_set_gen6_3dstate_wm() and ps_set_gen6_3dstate_wm() */ + dw[2] = ps->ps[0]; + dw[3] = ps->ps[1]; + dw[4] = rs->wm[0] | ps->ps[2]; + dw[5] = rs->wm[1] | ps->ps[3]; + dw[6] = rs->wm[2] | ps->ps[4]; dw[7] = 0; /* kernel 1 */ dw[8] = 0; /* kernel 2 */ } @@ -257,39 +225,19 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, static inline void gen7_3DSTATE_WM(struct ilo_builder *builder, const struct ilo_state_raster *rs, - const struct ilo_shader_state *fs, - bool cc_may_kill) + const struct ilo_state_ps *ps) { const uint8_t cmd_len = 3; - const bool multisample = false; - const int num_samples = 1; - uint32_t dw1, dw2, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - /* see raster_set_gen8_3DSTATE_WM() */ - dw1 = rs->wm[0]; - - if (fs) { - const union ilo_shader_cso *cso; - - cso = ilo_shader_get_kernel_cso(fs); - /* see fs_init_cso_gen7() */ - dw1 |= cso->ps_payload[3]; - } - - if (cc_may_kill) - dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE | GEN7_WM_DW1_PS_KILL_PIXEL; - - dw2 = 0; - if (multisample && num_samples > 1) - dw2 |= GEN7_WM_DW2_MSDISPMODE_PERPIXEL; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - dw[1] = dw1; - dw[2] = dw2; + /* see raster_set_gen8_3DSTATE_WM() and ps_set_gen7_3dstate_wm() */ + dw[1] = rs->wm[0] | ps->ps[0]; + dw[2] = ps->ps[1]; } static inline void @@ -379,100 +327,48 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder) static inline void gen7_3DSTATE_PS(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - bool dual_blend) + const struct ilo_state_ps *ps, + uint32_t kernel_offset) { const uint8_t cmd_len = 8; - const union ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - /* see fs_init_cso_gen7() */ - cso = ilo_shader_get_kernel_cso(fs); - dw2 = cso->ps_payload[0]; - dw4 = cso->ps_payload[1]; - dw5 = cso->ps_payload[2]; - - if (dual_blend) - dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(fs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; + dw[1] = kernel_offset; + /* see ps_set_gen7_3DSTATE_PS() */ + dw[2] = ps->ps[2]; + dw[3] = ps->ps[3]; + dw[4] = ps->ps[4]; + dw[5] = ps->ps[5]; dw[6] = 0; /* kernel 1 */ dw[7] = 0; /* kernel 2 */ } -static inline void -gen7_disable_3DSTATE_PS(struct ilo_builder *builder) -{ - const uint8_t cmd_len = 8; - int max_threads; - uint32_t dw4, *dw; - - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - - /* GPU hangs if none of the dispatch enable bits is set */ - dw4 = GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; - - /* see brwCreateContext() */ - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(7.5): - max_threads = (builder->dev->gt == 3) ? 408 : - (builder->dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (builder->dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = dw4; - dw[5] = 0; - dw[6] = 0; - dw[7] = 0; -} - static inline void gen8_3DSTATE_PS(struct ilo_builder *builder, - const struct ilo_shader_state *fs) + const struct ilo_state_ps *ps, + uint32_t kernel_offset) { const uint8_t cmd_len = 12; - const union ilo_shader_cso *cso; - uint32_t dw3, dw6, dw7, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - /* see fs_init_cso_gen8() */ - cso = ilo_shader_get_kernel_cso(fs); - dw3 = cso->ps_payload[0]; - dw6 = cso->ps_payload[1]; - dw7 = cso->ps_payload[2]; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); - dw[1] = ilo_shader_get_kernel_offset(fs); + dw[1] = kernel_offset; dw[2] = 0; - dw[3] = dw3; - dw[4] = 0; /* scratch */ + /* see ps_set_gen8_3DSTATE_PS() */ + dw[3] = ps->ps[0]; + dw[4] = ps->ps[1]; dw[5] = 0; - dw[6] = dw6; - dw[7] = dw7; + dw[6] = ps->ps[2]; + dw[7] = ps->ps[3]; dw[8] = 0; /* kernel 1 */ dw[9] = 0; dw[10] = 0; /* kernel 2 */ @@ -481,28 +377,18 @@ gen8_3DSTATE_PS(struct ilo_builder *builder, static inline void gen8_3DSTATE_PS_EXTRA(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - bool cc_may_kill, bool per_sample) + const struct ilo_state_ps *ps) { const uint8_t cmd_len = 2; - const union ilo_shader_cso *cso; - uint32_t dw1, *dw; + uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 8, 8); - /* see fs_init_cso_gen8() */ - cso = ilo_shader_get_kernel_cso(fs); - dw1 = cso->ps_payload[3]; - - if (cc_may_kill) - dw1 |= GEN8_PSX_DW1_VALID | GEN8_PSX_DW1_KILL_PIXEL; - if (per_sample) - dw1 |= GEN8_PSX_DW1_PER_SAMPLE; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN8_RENDER_CMD(3D, 3DSTATE_PS_EXTRA) | (cmd_len - 2); - dw[1] = dw1; + /* see ps_set_gen8_3DSTATE_PS_EXTRA() */ + dw[1] = ps->ps[4]; } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d.h b/src/gallium/drivers/ilo/core/ilo_state_3d.h index b2087df3470..dcc94bfc88c 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d.h +++ b/src/gallium/drivers/ilo/core/ilo_state_3d.h @@ -114,25 +114,6 @@ struct ilo_fb_state { enum gen_depth_format depth_offset_format; }; -union ilo_shader_cso { - struct ilo_state_vs vs; - struct ilo_state_hs hs; - struct ilo_state_ds ds; - struct ilo_state_gs gs; - - uint32_t ps_payload[5]; - - struct { - struct ilo_state_vs vs; - struct ilo_state_gs sol; - } vs_sol; -}; - -void -ilo_gpe_init_fs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso); - void ilo_gpe_set_fb(const struct ilo_dev *dev, const struct pipe_framebuffer_state *state, diff --git a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c index 004904fcd08..8734aff44da 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c +++ b/src/gallium/drivers/ilo/core/ilo_state_3d_bottom.c @@ -32,286 +32,6 @@ #include "ilo_format.h" #include "ilo_image.h" #include "ilo_state_3d.h" -#include "../ilo_shader.h" - -static void -fs_init_cso_gen6(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso) -{ - int start_grf, input_count, sampler_count, max_threads; - uint32_t dw2, dw4, dw5, dw6; - - ILO_DEV_ASSERT(dev, 6, 6); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - - /* see brwCreateContext() */ - max_threads = (dev->gt == 2) ? 80 : 40; - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT | - 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | - 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT; - - dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 275: - * - * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the - * PS kernel or color calculator has the ability to kill (discard) - * pixels or samples, other than due to depth or stencil testing. - * This bit is required to be ENABLED in the following situations: - * - * The API pixel shader program contains "killpix" or "discard" - * instructions, or other code in the pixel shader kernel that can - * cause the final pixel mask to differ from the pixel mask received - * on dispatch. - * - * A sampler with chroma key enabled with kill pixel mode is used by - * the pixel shader. - * - * Any render target has Alpha Test Enable or AlphaToCoverage Enable - * enabled. - * - * The pixel shader kernel generates and outputs oMask. - * - * Note: As ClipDistance clipping is fully supported in hardware and - * therefore not via PS instructions, there should be no need to - * ENABLE this bit due to ClipDistance clipping." - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 275: - * - * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth - * field must be set to disabled." - * - * TODO This is not checked yet. - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw5 |= GEN6_WM_DW5_PS_USE_W; - - /* - * TODO set this bit only when - * - * a) fs writes colors and color is not masked, or - * b) fs writes depth, or - * c) fs or cc kills - */ - if (true) - dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT; - - dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | - GEN6_POSOFFSET_NONE << GEN6_WM_DW6_PS_POSOFFSET__SHIFT; - - STATIC_ASSERT(Elements(cso->ps_payload) >= 4); - cso->ps_payload[0] = dw2; - cso->ps_payload[1] = dw4; - cso->ps_payload[2] = dw5; - cso->ps_payload[3] = dw6; -} - -static uint32_t -fs_get_wm_gen7(const struct ilo_dev *dev, - const struct ilo_shader_state *fs) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - dw = 0; - - /* - * TODO set this bit only when - * - * a) fs writes colors and color is not masked, or - * b) fs writes depth, or - * c) fs or cc kills - */ - dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 278: - * - * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that - * the PS kernel or color calculator has the ability to kill - * (discard) pixels or samples, other than due to depth or stencil - * testing. This bit is required to be ENABLED in the following - * situations: - * - * - The API pixel shader program contains "killpix" or "discard" - * instructions, or other code in the pixel shader kernel that - * can cause the final pixel mask to differ from the pixel mask - * received on dispatch. - * - * - A sampler with chroma key enabled with kill pixel mode is used - * by the pixel shader. - * - * - Any render target has Alpha Test Enable or AlphaToCoverage - * Enable enabled. - * - * - The pixel shader kernel generates and outputs oMask. - * - * Note: As ClipDistance clipping is fully supported in hardware - * and therefore not via PS instructions, there should be no need - * to ENABLE this bit due to ClipDistance clipping." - */ - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw |= GEN7_WM_DW1_PS_KILL_PIXEL; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw |= GEN7_PSCDEPTH_ON << GEN7_WM_DW1_PSCDEPTH__SHIFT; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw |= GEN7_WM_DW1_PS_USE_DEPTH; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw |= GEN7_WM_DW1_PS_USE_W; - - return dw; -} - -static void -fs_init_cso_gen7(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso) -{ - int start_grf, sampler_count, max_threads; - uint32_t dw2, dw4, dw5; - - ILO_DEV_ASSERT(dev, 7, 7.5); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - - dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - dw4 = GEN6_POSOFFSET_NONE << GEN7_PS_DW4_POSOFFSET__SHIFT; - - /* see brwCreateContext() */ - switch (ilo_dev_gen(dev)) { - case ILO_GEN(7.5): - max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; - } - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) - dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) - dw4 |= GEN7_PS_DW4_ATTR_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; - - dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT | - 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT | - 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT; - - STATIC_ASSERT(Elements(cso->ps_payload) >= 4); - cso->ps_payload[0] = dw2; - cso->ps_payload[1] = dw4; - cso->ps_payload[2] = dw5; - cso->ps_payload[3] = fs_get_wm_gen7(dev, fs); -} - -static uint32_t -fs_get_psx_gen8(const struct ilo_dev *dev, - const struct ilo_shader_state *fs) -{ - uint32_t dw; - - ILO_DEV_ASSERT(dev, 8, 8); - - dw = GEN8_PSX_DW1_VALID; - - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL)) - dw |= GEN8_PSX_DW1_KILL_PIXEL; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z)) - dw |= GEN7_PSCDEPTH_ON << GEN8_PSX_DW1_PSCDEPTH__SHIFT; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z)) - dw |= GEN8_PSX_DW1_USE_DEPTH; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W)) - dw |= GEN8_PSX_DW1_USE_W; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT)) - dw |= GEN8_PSX_DW1_ATTR_ENABLE; - - return dw; -} - -static void -fs_init_cso_gen8(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso) -{ - int start_grf, sampler_count; - uint32_t dw3, dw6, dw7; - - ILO_DEV_ASSERT(dev, 8, 8); - - start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT); - - dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT; - dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - /* always 64? */ - dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT | - GEN6_POSOFFSET_NONE << GEN8_PS_DW6_POSOFFSET__SHIFT; - if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) - dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE; - - assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET)); - dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT; - - dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT | - 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT | - 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT; - - STATIC_ASSERT(Elements(cso->ps_payload) >= 4); - cso->ps_payload[0] = dw3; - cso->ps_payload[1] = dw6; - cso->ps_payload[2] = dw7; - cso->ps_payload[3] = fs_get_psx_gen8(dev, fs); -} - -void -ilo_gpe_init_fs_cso(const struct ilo_dev *dev, - const struct ilo_shader_state *fs, - union ilo_shader_cso *cso) -{ - if (ilo_dev_gen(dev) >= ILO_GEN(8)) - fs_init_cso_gen8(dev, fs, cso); - else if (ilo_dev_gen(dev) >= ILO_GEN(7)) - fs_init_cso_gen7(dev, fs, cso); - else - fs_init_cso_gen6(dev, fs, cso); -} static void fb_set_blend_caps(const struct ilo_dev *dev, diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 392f784a503..08690f30378 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -74,6 +74,7 @@ struct ilo_blitter { uint32_t vp_data[20]; struct ilo_state_sbe sbe; + struct ilo_state_ps ps; struct ilo_state_cc cc; uint32_t depth_clear_value; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 9cc57f86f68..a4c8dead4a5 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -78,6 +78,7 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) blitter->vp_data, sizeof(blitter->vp_data)); ilo_state_sbe_init_for_rectlist(&blitter->sbe, blitter->ilo->dev, 0, 0); + ilo_state_ps_init_disabled(&blitter->ps, blitter->ilo->dev); ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev, ilo_state_vf_get_attr_count(&blitter->vf)); diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index 2f6743c8b28..30abead0cdc 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -666,14 +666,17 @@ gen6_draw_wm(struct ilo_render *r, } /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(BLEND) || + if (DIRTY(FS) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) || r->instruction_bo_changed) { + const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs); + const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs); + if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed) gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, - vec->blend->dual_blend, vec->blend->alpha_may_kill); + gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, + &cso->ps, kernel_offset); } } @@ -840,7 +843,7 @@ gen6_rectlist_wm(struct ilo_render *r, gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, NULL, false, false); + gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 04da1c41261..0b2245c80da 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -499,12 +499,12 @@ gen7_draw_wm(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { + const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs); + const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs); + /* 3DSTATE_WM */ - if (DIRTY(FS) || DIRTY(BLEND) || - (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)) { - gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, vec->fs, - vec->blend->alpha_may_kill); - } + if (DIRTY(FS) || (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM)) + gen7_3DSTATE_WM(r->builder, &vec->rasterizer->rs, &cso->ps); /* 3DSTATE_BINDING_TABLE_POINTERS_PS */ if (session->binding_table_fs_changed) { @@ -527,13 +527,11 @@ gen7_draw_wm(struct ilo_render *r, } /* 3DSTATE_PS */ - if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) { - const bool dual_blend = vec->blend->dual_blend; - + if (DIRTY(FS) || r->instruction_bo_changed) { if (r->hw_ctx_changed) gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend); + gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); } /* 3DSTATE_SCISSOR_STATE_POINTERS */ @@ -714,12 +712,12 @@ static void gen7_rectlist_wm(struct ilo_render *r, const struct ilo_blitter *blitter) { - gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, NULL, false); + gen7_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps); gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_disable_3DSTATE_PS(r->builder); + gen7_3DSTATE_PS(r->builder, &blitter->ps, 0); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 691c378c864..2ce71fb161e 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -86,6 +86,9 @@ gen8_draw_wm(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { + const union ilo_shader_cso *cso = ilo_shader_get_kernel_cso(vec->fs); + const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->fs); + /* 3DSTATE_WM */ if (session->rs_delta.dirty & ILO_STATE_RASTER_3DSTATE_WM) gen8_3DSTATE_WM(r->builder, &vec->rasterizer->rs); @@ -121,13 +124,11 @@ gen8_draw_wm(struct ilo_render *r, /* 3DSTATE_PS */ if (DIRTY(FS) || r->instruction_bo_changed) - gen8_3DSTATE_PS(r->builder, vec->fs); + gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); /* 3DSTATE_PS_EXTRA */ - if (DIRTY(FS) || DIRTY(BLEND)) { - gen8_3DSTATE_PS_EXTRA(r->builder, vec->fs, - vec->blend->alpha_may_kill, false); - } + if (DIRTY(FS)) + gen8_3DSTATE_PS_EXTRA(r->builder, &cso->ps); /* 3DSTATE_PS_BLEND */ if (session->cc_delta.dirty & ILO_STATE_CC_3DSTATE_PS_BLEND) diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c index 729cb604eaf..bbdd5fe7a0a 100644 --- a/src/gallium/drivers/ilo/ilo_render_surface.c +++ b/src/gallium/drivers/ilo/ilo_render_surface.c @@ -29,6 +29,7 @@ #include "ilo_common.h" #include "ilo_blitter.h" +#include "ilo_shader.h" #include "ilo_state.h" #include "ilo_render_gen.h" diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 29c2bf5f8f4..93a26268a29 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -27,7 +27,6 @@ #include "genhw/genhw.h" /* for SBE setup */ #include "core/ilo_builder.h" -#include "core/ilo_state_3d.h" #include "core/intel_winsys.h" #include "shader/ilo_shader_internal.h" #include "tgsi/tgsi_parse.h" @@ -654,6 +653,60 @@ init_gs(struct ilo_shader *kernel, ilo_state_gs_init(&kernel->cso.gs, state->info.dev, &info); } +static void +init_ps(struct ilo_shader *kernel, + const struct ilo_shader_state *state) +{ + struct ilo_state_ps_info info; + + memset(&info, 0, sizeof(info)); + + init_shader_kernel(kernel, state, &info.kernel_8); + init_shader_resource(kernel, state, &info.resource); + + info.io.has_rt_write = true; + info.io.posoffset = GEN6_POSOFFSET_NONE; + info.io.attr_count = kernel->in.count; + info.io.use_z = kernel->in.has_pos; + info.io.use_w = kernel->in.has_pos; + info.io.use_coverage_mask = false; + info.io.pscdepth = (kernel->out.has_pos) ? + GEN7_PSCDEPTH_ON : GEN7_PSCDEPTH_OFF; + info.io.write_pixel_mask = kernel->has_kill; + info.io.write_omask = false; + + info.params.sample_mask = 0x1; + info.params.earlyz_control_psexec = false; + info.params.alpha_may_kill = false; + info.params.dual_source_blending = false; + info.params.has_writeable_rt = true; + + info.valid_kernels = GEN6_PS_DISPATCH_8; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 284: + * + * "(MSDISPMODE_PERSAMPLE) This is the high-quality multisample mode + * where (over and above PERPIXEL mode) the PS is run for each covered + * sample. This mode is also used for "normal" non-multisample + * rendering (aka 1X), given Number of Multisamples is programmed to + * NUMSAMPLES_1." + */ + info.per_sample_dispatch = true; + + info.rt_clear_enable = false; + info.rt_resolve_enable = false; + info.cv_per_sample_interp = false; + info.cv_has_earlyz_op = false; + info.sample_count_one = true; + info.cv_has_depth_buffer = true; + + ilo_state_ps_init(&kernel->cso.ps, state->info.dev, &info); + + /* remember current parameters */ + kernel->ps_params = info.params; +} + static void init_sol(struct ilo_shader *kernel, const struct ilo_dev *dev, @@ -837,7 +890,7 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state, init_gs(sh, state); break; case PIPE_SHADER_FRAGMENT: - ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso); + init_ps(sh, state); break; default: break; @@ -955,16 +1008,33 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader, const struct ilo_state_vector *vec, uint32_t dirty) { - const struct ilo_shader * const cur = shader->shader; struct ilo_shader_variant variant; + bool changed = false; - if (!(shader->info.non_orthogonal_states & dirty)) - return false; + if (shader->info.non_orthogonal_states & dirty) { + const struct ilo_shader * const old = shader->shader; + + ilo_shader_variant_init(&variant, &shader->info, vec); + ilo_shader_state_use_variant(shader, &variant); + changed = (shader->shader != old); + } + + if (shader->info.type == PIPE_SHADER_FRAGMENT) { + struct ilo_shader *kernel = shader->shader; + + if (kernel->ps_params.sample_mask != vec->sample_mask || + kernel->ps_params.alpha_may_kill != vec->blend->alpha_may_kill) { + kernel->ps_params.sample_mask = vec->sample_mask; + kernel->ps_params.alpha_may_kill = vec->blend->alpha_may_kill; - ilo_shader_variant_init(&variant, &shader->info, vec); - ilo_shader_state_use_variant(shader, &variant); + ilo_state_ps_set_params(&kernel->cso.ps, shader->info.dev, + &kernel->ps_params); - return (shader->shader != cur); + changed = true; + } + } + + return changed; } static int @@ -1063,8 +1133,8 @@ ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader, assert(kernel->in.count <= Elements(swizzles)); dst_len = MIN2(kernel->in.count, Elements(swizzles)); - memset(&info, 0, sizeof(info)); memset(&swizzles, 0, sizeof(swizzles)); + memset(&info, 0, sizeof(info)); info.attr_count = dst_len; info.cv_vue_attr_count = src_skip + src_len; diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index 0f20877f83d..d9f02a4746a 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -28,6 +28,8 @@ #ifndef ILO_SHADER_H #define ILO_SHADER_H +#include "core/ilo_state_shader.h" + #include "ilo_common.h" enum ilo_kernel_param { @@ -90,6 +92,19 @@ struct ilo_state_sbe; struct ilo_state_sol; struct ilo_state_vector; +union ilo_shader_cso { + struct ilo_state_vs vs; + struct ilo_state_hs hs; + struct ilo_state_ds ds; + struct ilo_state_gs gs; + struct ilo_state_ps ps; + + struct { + struct ilo_state_vs vs; + struct ilo_state_gs sol; + } vs_sol; +}; + struct ilo_shader_cache * ilo_shader_cache_create(void); diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 902b6d3d1de..917839fa23e 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -698,6 +698,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, { ilo->state_vector.draw = draw; + finalize_blend(ilo); finalize_shader_states(&ilo->state_vector); finalize_constant_buffers(ilo); finalize_index_buffer(ilo); @@ -706,7 +707,6 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_urb(ilo); finalize_rasterizer(ilo); finalize_viewport(ilo); - finalize_blend(ilo); u_upload_unmap(ilo->uploader); } diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index 9c17ec0e58d..01c86675202 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -77,6 +77,7 @@ struct ilo_shader_variant { struct ilo_kernel_routing { bool initialized; + bool is_point; bool light_twoside; uint32_t sprite_coord_enable; @@ -140,6 +141,7 @@ struct ilo_shader { int kernel_size; struct ilo_kernel_routing routing; + struct ilo_state_ps_params_info ps_params; /* what does the push constant buffer consist of? */ struct { -- cgit v1.2.3