From e67f99bd292e2d3b083207fb806dc9a42105ce72 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Mon, 7 Oct 2013 01:21:08 +0800 Subject: ilo: preliminary GEN 7.5 support This is based on grepping for brw->is_haswell in i965 to see how GEN 7.5 differs from GEN 7. Slightly tested with Xonotic and some Mesa demos. --- src/gallium/drivers/ilo/ilo_3d_pipeline.c | 1 + src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c | 25 +++++-- src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c | 28 ++++--- src/gallium/drivers/ilo/ilo_gpe_gen6.c | 34 +++++---- src/gallium/drivers/ilo/ilo_gpe_gen6.h | 70 +++++++++-------- src/gallium/drivers/ilo/ilo_gpe_gen7.c | 53 ++++++++++--- src/gallium/drivers/ilo/ilo_gpe_gen7.h | 100 ++++++++++++++++++------- src/gallium/drivers/ilo/ilo_shader.c | 2 +- src/gallium/drivers/ilo/shader/ilo_shader_fs.c | 15 +++- src/gallium/drivers/ilo/shader/ilo_shader_vs.c | 15 +++- 10 files changed, 236 insertions(+), 107 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline.c b/src/gallium/drivers/ilo/ilo_3d_pipeline.c index afe1e4c09b3..c4e133ec57b 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline.c @@ -81,6 +81,7 @@ ilo_3d_pipeline_create(struct ilo_cp *cp, const struct ilo_dev_info *dev) ilo_3d_pipeline_init_gen6(p); break; case ILO_GEN(7): + case ILO_GEN(7.5): ilo_3d_pipeline_init_gen7(p); break; default: diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index c51de1412dc..b585fd6052d 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -399,11 +399,26 @@ gen6_pipeline_vf(struct ilo_3d_pipeline *p, const struct ilo_context *ilo, struct gen6_pipeline_session *session) { - /* 3DSTATE_INDEX_BUFFER */ - if (DIRTY(IB) || session->primitive_restart_changed || - session->batch_bo_changed) { - gen6_emit_3DSTATE_INDEX_BUFFER(p->dev, - &ilo->ib, ilo->draw->primitive_restart, p->cp); + if (p->dev->gen >= ILO_GEN(7.5)) { + /* 3DSTATE_INDEX_BUFFER */ + if (DIRTY(IB) || session->batch_bo_changed) { + gen6_emit_3DSTATE_INDEX_BUFFER(p->dev, + &ilo->ib, false, p->cp); + } + + /* 3DSTATE_VF */ + if (session->primitive_restart_changed) { + gen7_emit_3DSTATE_VF(p->dev, ilo->draw->primitive_restart, + ilo->draw->restart_index, p->cp); + } + } + else { + /* 3DSTATE_INDEX_BUFFER */ + if (DIRTY(IB) || session->primitive_restart_changed || + session->batch_bo_changed) { + gen6_emit_3DSTATE_INDEX_BUFFER(p->dev, + &ilo->ib, ilo->draw->primitive_restart, p->cp); + } } /* 3DSTATE_VERTEX_BUFFERS */ diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c index 27ecd952bb5..51b2218184d 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c @@ -46,7 +46,7 @@ gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p, struct intel_bo *bo = NULL; uint32_t dw1 = PIPE_CONTROL_CS_STALL; - assert(p->dev->gen == ILO_GEN(7)); + assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)); /* emit once */ if (p->state.has_gen6_wa_pipe_control) @@ -94,7 +94,7 @@ gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p, static void gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p) { - assert(p->dev->gen == ILO_GEN(7)); + assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)); /* * From the Ivy Bridge PRM, volume 2 part 1, page 106: @@ -115,7 +115,7 @@ static void gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p, bool change_depth_buffer) { - assert(p->dev->gen == ILO_GEN(7)); + assert(p->dev->gen == ILO_GEN(7) || p->dev->gen == ILO_GEN(7.5)); /* * From the Ivy Bridge PRM, volume 2 part 1, page 276: @@ -188,7 +188,8 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p, /* 3DSTATE_URB_{VS,GS,HS,DS} */ if (DIRTY(VE) || DIRTY(VS)) { /* the first 16KB are reserved for VS and PS PCBs */ - const int offset = 16 * 1024; + const int offset = + (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384; int vs_entry_size, vs_total_size; vs_entry_size = (ilo->vs) ? @@ -227,16 +228,21 @@ gen7_pipeline_common_pcb_alloc(struct ilo_3d_pipeline *p, /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */ if (session->hw_ctx_changed) { /* - * push constant buffers are only allowed to take up at most the first - * 16KB of the URB + * Push constant buffers are only allowed to take up at most the first + * 16KB of the URB. Split the space evenly for VS and FS. */ - gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev, - 0, 8192, p->cp); + const int max_size = + (p->dev->gen == ILO_GEN(7.5) && p->dev->gt == 3) ? 32768 : 16384; + const int size = max_size / 2; + int offset = 0; - gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev, - 8192, 8192, p->cp); + gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(p->dev, offset, size, p->cp); + offset += size; - gen7_wa_pipe_control_cs_stall(p, true, true); + gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->dev, offset, size, p->cp); + + if (p->dev->gen == ILO_GEN(7)) + gen7_wa_pipe_control_cs_stall(p, true, true); } } diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c index cd122734868..e3cbd42995e 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -261,7 +261,7 @@ ve_init_cso(const struct ilo_dev_info *dev, }; int format; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); switch (util_format_get_nr_components(state->src_format)) { case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0; @@ -295,7 +295,7 @@ ilo_gpe_init_ve(const struct ilo_dev_info *dev, { unsigned i; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); ve->count = num_states; ve->vb_count = 0; @@ -335,7 +335,7 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, int start_grf, vue_read_len, max_threads; uint32_t dw2, dw4, dw5; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT); @@ -377,7 +377,7 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev, break; case ILO_GEN(7.5): /* see brwCreateContext() */ - max_threads = (dev->gt == 2) ? 280 : 70; + max_threads = (dev->gt >= 2) ? 280 : 70; break; default: max_threads = 1; @@ -513,7 +513,7 @@ ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev, { uint32_t dw1, dw2, dw3; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); dw1 = GEN6_CLIP_STATISTICS_ENABLE; @@ -603,7 +603,7 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, int line_width, point_width; uint32_t dw1, dw2, dw3; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * Scale the constant term. The minimum representable value used by the HW @@ -740,6 +740,9 @@ ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev, dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT; + if (dev->gen >= ILO_GEN(7.5) && state->line_stipple_enable) + dw2 |= HSW_SF_LINE_STIPPLE_ENABLE; + if (state->scissor) dw2 |= GEN6_SF_SCISSOR_ENABLE; @@ -961,7 +964,7 @@ static void zs_init_info_null(const struct ilo_dev_info *dev, struct ilo_zs_surface_info *info) { - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); memset(info, 0, sizeof(*info)); @@ -986,7 +989,7 @@ zs_init_info(const struct ilo_dev_info *dev, bool separate_stencil; uint32_t x_offset[3], y_offset[3]; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); memset(info, 0, sizeof(*info)); @@ -1208,7 +1211,7 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, struct ilo_zs_surface_info info; uint32_t dw1, dw2, dw3, dw4, dw5, dw6; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); if (tex) zs_init_info(dev, tex, format, level, first_layer, num_layers, &info); @@ -1329,6 +1332,9 @@ ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev, zs->payload[6] = info.stencil.stride - 1; zs->payload[7] = info.stencil.offset; + if (dev->gen >= ILO_GEN(7.5)) + zs->payload[6] |= HSW_STENCIL_ENABLED; + /* do not increment reference count */ zs->separate_s8_bo = info.stencil.bo; } @@ -1422,7 +1428,7 @@ ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, const float scale_z = fabs(state->scale[2]); int min_gbx, max_gbx, min_gby, max_gby; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); viewport_get_guardband(dev, (int) state->translate[0], @@ -1511,7 +1517,7 @@ ilo_gpe_init_blend(const struct ilo_dev_info *dev, { unsigned num_cso, i; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); if (state->independent_blend_enable) { num_cso = Elements(blend->cso); @@ -1609,7 +1615,7 @@ ilo_gpe_init_dsa(const struct ilo_dev_info *dev, const struct pipe_alpha_state *alpha = &state->alpha; uint32_t *dw; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); STATIC_ASSERT(Elements(dsa->payload) >= 3); dw = dsa->payload; @@ -1699,7 +1705,7 @@ ilo_gpe_set_scissor(const struct ilo_dev_info *dev, { unsigned i; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); for (i = 0; i < num_states; i++) { uint16_t min_x, min_y, max_x, max_y; @@ -2176,7 +2182,7 @@ ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev, bool clamp_is_to_edge; uint32_t dw0, dw1, dw3; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); memset(sampler, 0, sizeof(*sampler)); diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index d8d71d73088..abd44f57e2a 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -266,7 +266,7 @@ ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, int output_count, vue_offset, vue_len; const struct ilo_kernel_routing *routing; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); assert(num_dwords == 13); if (!fs) { @@ -353,7 +353,7 @@ gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01); const uint8_t cmd_len = 10; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* 4K-page aligned */ assert(((general_state_size | dynamic_state_size | @@ -429,7 +429,7 @@ gen6_emit_STATE_SIP(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02); const uint8_t cmd_len = 2; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -445,7 +445,7 @@ gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b); const uint8_t cmd_len = 1; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | enable); @@ -460,7 +460,7 @@ gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev, const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04); const uint8_t cmd_len = 1; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* 3D or media */ assert(pipeline == 0x0 || pipeline == 0x1); @@ -695,7 +695,7 @@ gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev, uint8_t cmd_len; unsigned hw_idx; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 2 part 1, page 82: @@ -772,7 +772,7 @@ ve_init_cso_with_components(const struct ilo_dev_info *dev, int comp0, int comp1, int comp2, int comp3, struct ilo_ve_cso *cso) { - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); STATIC_ASSERT(Elements(cso->payload) >= 2); cso->payload[0] = GEN6_VE0_VALID; @@ -789,7 +789,7 @@ ve_set_cso_edgeflag(const struct ilo_dev_info *dev, { int format; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 2 part 1, page 94: @@ -846,7 +846,7 @@ gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev, uint8_t cmd_len; unsigned i; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 2 part 1, page 93: @@ -922,11 +922,15 @@ gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev, uint32_t start_offset, end_offset; int format; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); if (!buf) return; + /* this is moved to the new 3DSTATE_VF */ + if (dev->gen >= ILO_GEN(7.5)) + assert(!enable_cut_index); + switch (ib->hw_index_size) { case 4: format = BRW_INDEX_DWORD; @@ -1014,7 +1018,7 @@ gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f); const uint8_t cmd_len = 2; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -1033,7 +1037,7 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev, const struct ilo_shader_cso *cso; uint32_t dw2, dw4, dw5; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); if (!vs) { ilo_cp_begin(cp, cmd_len); @@ -1145,7 +1149,7 @@ gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev, const uint8_t cmd_len = 4; uint32_t dw1, dw2, dw3; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); if (rasterizer) { int interps; @@ -1456,7 +1460,7 @@ gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev, unsigned ymax = y + height - 1; int rect_limit; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); if (dev->gen >= ILO_GEN(7)) { rect_limit = 16383; @@ -1501,7 +1505,7 @@ gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev, ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05); const uint8_t cmd_len = 7; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -1523,7 +1527,7 @@ gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06); const uint8_t cmd_len = 2; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); assert(x_offset >= 0 && x_offset <= 31); assert(y_offset >= 0 && y_offset <= 31); @@ -1542,7 +1546,7 @@ gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev, const uint8_t cmd_len = 33; int i; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); assert(Elements(pattern->stipple) == 32); ilo_cp_begin(cp, cmd_len); @@ -1561,7 +1565,7 @@ gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev, const uint8_t cmd_len = 3; unsigned inverse; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); assert((pattern & 0xffff) == pattern); assert(factor >= 1 && factor <= 256); @@ -1590,7 +1594,7 @@ gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a); const uint8_t cmd_len = 3; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -1636,7 +1640,7 @@ gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev, const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3; uint32_t dw1, dw2, dw3; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); dw1 = (pixel_location_center) ? MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT; @@ -1686,7 +1690,7 @@ gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev, ILO_GPE_CMD(0x3, 0x1, 0x0e); const uint8_t cmd_len = 3; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -1707,7 +1711,7 @@ gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev, ILO_GPE_CMD(0x3, 0x1, 0x0f); const uint8_t cmd_len = 3; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -1747,7 +1751,7 @@ gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev, const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION; const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); if (dw1 & PIPE_CONTROL_CS_STALL) { /* @@ -1985,7 +1989,7 @@ gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev, uint32_t state_offset, *dw; unsigned i; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 2 part 1, page 385: @@ -2020,7 +2024,7 @@ gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev, const int state_len = 6; uint32_t state_offset, *dw; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE", state_len, state_align, &state_offset); @@ -2049,7 +2053,7 @@ gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev, uint32_t state_offset, *dw; unsigned num_targets, i; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 2 part 1, page 376: @@ -2173,7 +2177,7 @@ gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev, uint32_t state_offset, *dw; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE", state_len, state_align, &state_offset); @@ -2195,7 +2199,7 @@ gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev, const int state_len = 2 * num_viewports; uint32_t state_offset, *dw; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 2 part 1, page 263: @@ -2223,7 +2227,7 @@ gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev, const int state_len = num_surface_states; uint32_t state_offset, *dw; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 4 part 1, page 69: @@ -2254,7 +2258,7 @@ gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev, uint32_t state_offset; uint32_t read_domains, write_domain; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); if (for_render) { read_domains = INTEL_DOMAIN_RENDER; @@ -2342,7 +2346,7 @@ gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev, uint32_t state_offset, *dw; int i; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); /* * From the Sandy Bridge PRM, volume 4 part 1, page 101: @@ -2448,7 +2452,7 @@ gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev, const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12; uint32_t state_offset, *dw; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE", state_len, state_align, &state_offset); @@ -2473,7 +2477,7 @@ gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev, uint32_t state_offset; char *buf; - ILO_GPE_VALID_GEN(dev, 6, 7); + ILO_GPE_VALID_GEN(dev, 6, 7.5); buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER", state_len, state_align, &state_offset); diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index 84ac0f86dbe..545b3677bb4 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -42,7 +42,7 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, int start_grf, vue_read_len, max_threads; uint32_t dw2, dw4, dw5; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG); vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT); @@ -51,6 +51,9 @@ ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, vue_read_len = (vue_read_len + 1) / 2; switch (dev->gen) { + case ILO_GEN(7.5): + max_threads = (dev->gt >= 2) ? 256 : 70; + break; case ILO_GEN(7): max_threads = (dev->gt == 2) ? 128 : 36; break; @@ -83,7 +86,7 @@ ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev, { uint32_t dw1, dw2; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); dw1 = GEN7_WM_POSITION_ZW_PIXEL | GEN7_WM_LINE_AA_WIDTH_2_0 | @@ -132,16 +135,27 @@ ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev, uint32_t dw2, dw4, dw5; uint32_t wm_interps, wm_dw1; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG); - /* see brwCreateContext() */ - max_threads = (dev->gt == 2) ? 172 : 48; dw2 = (true) ? 0 : GEN7_PS_FLOATING_POINT_MODE_ALT; - dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT | - GEN7_PS_POSOFFSET_NONE; + dw4 = GEN7_PS_POSOFFSET_NONE; + + /* see brwCreateContext() */ + switch (dev->gen) { + case ILO_GEN(7.5): + max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; + dw4 |= (max_threads - 1) << HSW_PS_MAX_THREADS_SHIFT; + dw4 |= 1 << HSW_PS_SAMPLE_MASK_SHIFT; + break; + case ILO_GEN(7): + default: + max_threads = (dev->gt == 2) ? 172 : 48; + dw4 |= (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT; + break; + } if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE)) dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; @@ -226,7 +240,7 @@ ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev, { uint32_t *dw; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); /* * From the Ivy Bridge PRM, volume 4 part 1, page 62: @@ -299,7 +313,7 @@ ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev, int surface_type, surface_format, num_entries; uint32_t *dw; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER; @@ -396,6 +410,13 @@ ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev, dw[6] = 0; dw[7] = 0; + if (dev->gen >= ILO_GEN(7.5)) { + dw[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + } + /* do not increment reference count */ surf->bo = buf->bo; } @@ -416,7 +437,7 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, unsigned layer_offset, x_offset, y_offset; uint32_t *dw; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); surface_type = ilo_gpe_gen6_translate_texture(tex->base.target); assert(surface_type != BRW_SURFACE_BUFFER); @@ -646,6 +667,13 @@ ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev, dw[6] = 0; dw[7] = 0; + if (dev->gen >= ILO_GEN(7.5)) { + dw[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + } + /* do not increment reference count */ surf->bo = tex->bo; } @@ -675,6 +703,7 @@ ilo_gpe_gen7_estimate_command_size(const struct ilo_dev_info *dev, [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 }, [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 }, [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 }, + [ILO_GPE_GEN7_3DSTATE_VF] = { 0, 2 }, [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 }, [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 }, [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 }, @@ -732,7 +761,7 @@ ilo_gpe_gen7_estimate_command_size(const struct ilo_dev_info *dev, const int body = gen7_command_size_table[cmd].body; const int count = arg; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT); return (likely(count)) ? header + body * count : 0; @@ -767,7 +796,7 @@ ilo_gpe_gen7_estimate_state_size(const struct ilo_dev_info *dev, const int count = arg; int estimate; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); assert(state < ILO_GPE_GEN7_STATE_COUNT); if (likely(count)) { diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_gpe_gen7.h index 1d4c0a13e82..dc138b7e6c5 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.h @@ -56,6 +56,7 @@ enum ilo_gpe_gen7_command { ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */ ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */ ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */ + ILO_GPE_GEN7_3DSTATE_VF, /* (0x3, 0x0, 0x0c) */ ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */ ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */ ILO_GPE_GEN7_3DSTATE_VS, /* (0x3, 0x0, 0x10) */ @@ -157,7 +158,7 @@ gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04); const uint8_t cmd_len = 3; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -166,6 +167,24 @@ gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev, ilo_cp_end(cp); } +static inline void +gen7_emit_3DSTATE_VF(const struct ilo_dev_info *dev, + bool enable_cut_index, + uint32_t cut_index, + struct ilo_cp *cp) +{ + const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0c); + const uint8_t cmd_len = 2; + + ILO_GPE_VALID_GEN(dev, 7.5, 7.5); + + ilo_cp_begin(cp, cmd_len); + ilo_cp_write(cp, cmd | (cmd_len - 2) | + ((enable_cut_index) ? HSW_CUT_INDEX_ENABLE : 0)); + ilo_cp_write(cp, cut_index); + ilo_cp_end(cp); +} + static inline void gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev, int subop, uint32_t pointer, @@ -174,7 +193,7 @@ gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop); const uint8_t cmd_len = 2; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -201,7 +220,7 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev, const struct ilo_shader_cso *cso; uint32_t dw2, dw4, dw5; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); if (!gs) { ilo_cp_begin(cp, cmd_len); @@ -245,7 +264,7 @@ gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev, const int num_samples = 1; uint32_t payload[6]; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer, num_samples, @@ -270,7 +289,7 @@ gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev, const int num_samples = 1; uint32_t dw1, dw2; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); /* see ilo_gpe_init_rasterizer_wm() */ dw1 = rasterizer->wm.payload[0]; @@ -319,7 +338,7 @@ gen7_emit_3dstate_constant(const struct ilo_dev_info *dev, uint32_t dw[6]; int total_read_length, i; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); /* VS, HS, DS, GS, and PS variants */ assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18); @@ -412,7 +431,7 @@ gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev, const uint8_t cmd_len = 2; const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); /* * From the Ivy Bridge PRM, volume 2 part 1, page 294: @@ -458,7 +477,7 @@ gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b); const uint8_t cmd_len = 7; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); assert(!hs); @@ -480,7 +499,7 @@ gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c); const uint8_t cmd_len = 4; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -499,7 +518,7 @@ gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev, const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d); const uint8_t cmd_len = 6; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); assert(!ds); @@ -527,7 +546,7 @@ gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev, uint32_t dw1, dw2; int read_len; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); if (!enable) { dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT; @@ -586,7 +605,7 @@ gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev, const uint8_t cmd_len = 14; uint32_t dw[13]; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, fs, dw, Elements(dw)); @@ -607,20 +626,33 @@ gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev, const struct ilo_shader_cso *cso; uint32_t dw2, dw4, dw5; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); if (!fs) { + int max_threads; + + /* GPU hangs if none of the dispatch enable bits is set */ + dw4 = GEN7_PS_8_DISPATCH_ENABLE; + /* see brwCreateContext() */ - const int max_threads = (dev->gt == 2) ? 172 : 48; + switch (dev->gen) { + case ILO_GEN(7.5): + max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102; + dw4 |= max_threads << HSW_PS_MAX_THREADS_SHIFT; + break; + case ILO_GEN(7): + default: + max_threads = (dev->gt == 2) ? 172 : 48; + dw4 |= max_threads << IVB_PS_MAX_THREADS_SHIFT; + break; + } ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); ilo_cp_write(cp, 0); ilo_cp_write(cp, 0); ilo_cp_write(cp, 0); - /* GPU hangs if none of the dispatch enable bits is set */ - ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT | - GEN7_PS_8_DISPATCH_ENABLE); + ilo_cp_write(cp, dw4); ilo_cp_write(cp, 0); ilo_cp_write(cp, 0); ilo_cp_write(cp, 0); @@ -774,7 +806,7 @@ gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, const int row_size = 64; /* 512 bits */ int alloc_size, num_entries, min_entries, max_entries; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); /* VS, HS, DS, and GS variants */ assert(subop >= 0x30 && subop <= 0x33); @@ -804,7 +836,16 @@ gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, switch (subop) { case 0x30: /* 3DSTATE_URB_VS */ min_entries = 32; - max_entries = (dev->gt == 2) ? 704 : 512; + + switch (dev->gen) { + case ILO_GEN(7.5): + max_entries = (dev->gt >= 2) ? 1644 : 640; + break; + case ILO_GEN(7): + default: + max_entries = (dev->gt == 2) ? 704 : 512; + break; + } assert(num_entries >= min_entries); if (num_entries > max_entries) @@ -820,7 +861,16 @@ gen7_emit_3dstate_urb(const struct ilo_dev_info *dev, assert(num_entries >= 138); break; case 0x33: /* 3DSTATE_URB_GS */ - max_entries = (dev->gt == 2) ? 320 : 192; + switch (dev->gen) { + case ILO_GEN(7.5): + max_entries = (dev->gt >= 2) ? 640 : 256; + break; + case ILO_GEN(7): + default: + max_entries = (dev->gt == 2) ? 320 : 192; + break; + } + if (num_entries > max_entries) num_entries = max_entries; break; @@ -877,7 +927,7 @@ gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev, const uint8_t cmd_len = 2; int end; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); /* VS, HS, DS, GS, and PS variants */ assert(subop >= 0x12 && subop <= 0x16); @@ -977,7 +1027,7 @@ gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev, int buffer_selects, num_entries, i; uint16_t so_decls[128]; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); buffer_selects = 0; num_entries = 0; @@ -1069,7 +1119,7 @@ gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev, struct ilo_buffer *buf; int end; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); if (!so_target || !so_target->buffer) { ilo_cp_begin(cp, cmd_len); @@ -1117,7 +1167,7 @@ gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, const uint32_t vb_start = info->start + ((info->indexed) ? ib->draw_start_offset : 0); - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); ilo_cp_begin(cp, cmd_len); ilo_cp_write(cp, cmd | (cmd_len - 2)); @@ -1141,7 +1191,7 @@ gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev, uint32_t state_offset, *dw; unsigned i; - ILO_GPE_VALID_GEN(dev, 7, 7); + ILO_GPE_VALID_GEN(dev, 7, 7.5); /* * From the Ivy Bridge PRM, volume 2 part 1, page 270: diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 0c785201d1a..480b4771816 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -903,7 +903,7 @@ ilo_shader_select_kernel_routing(struct ilo_shader_state *shader, /* we are constructing 3DSTATE_SBE here */ assert(shader->info.dev->gen >= ILO_GEN(6) && - shader->info.dev->gen <= ILO_GEN(7)); + shader->info.dev->gen <= ILO_GEN(7.5)); assert(kernel); diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c index 48d5721631c..0a65498b536 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c +++ b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c @@ -790,10 +790,19 @@ fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst, } break; case TOY_OPCODE_TGSI_TXD: - if (ref_pos >= 0) - tc_fail(tc, "TXD with shadow sampler not supported"); + if (ref_pos >= 0) { + assert(ref_pos < 4); + + msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; + ref_or_si = coords[ref_pos]; + + if (tc->dev->gen < ILO_GEN(7.5)) + tc_fail(tc, "TXD with shadow sampler not supported"); + } + else { + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; + } - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; tsrc_transpose(inst->src[1], ddx); tsrc_transpose(inst->src[2], ddy); num_derivs = num_coords; diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c index b5b44b57796..c20ecc6f6a2 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_vs.c +++ b/src/gallium/drivers/ilo/shader/ilo_shader_vs.c @@ -411,10 +411,19 @@ vs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst, /* extract the parameters */ switch (inst->opcode) { case TOY_OPCODE_TGSI_TXD: - if (ref_pos >= 0) - tc_fail(tc, "TXD with shadow sampler not supported"); + if (ref_pos >= 0) { + assert(ref_pos < 4); + + msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; + ref_or_si = tsrc_swizzle1(coords, ref_pos); + + if (tc->dev->gen < ILO_GEN(7.5)) + tc_fail(tc, "TXD with shadow sampler not supported"); + } + else { + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; + } - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; ddx = inst->src[1]; ddy = inst->src[2]; num_derivs = num_coords; -- cgit v1.2.3