diff options
author | Chia-I Wu <[email protected]> | 2015-10-23 00:24:26 +0800 |
---|---|---|
committer | Chia-I Wu <[email protected]> | 2015-10-23 17:29:58 +0800 |
commit | 4a7d18296a9e80d2c5458bf77f8eb88913433c90 (patch) | |
tree | 1b954ffa50a755f2d12c21bae258e362e674bf6c /src | |
parent | 3994ef5f1b7386c17dff532cb5d04a7823520c7a (diff) |
ilo: fix scratch space setup in core
Move scratch_size out of ilo_state_shader_kernel_info and
ilo_state_compute_interface_info. A scratch space is shared by all
kernels/interfaces. Update builder to emit relocs for scratch bos.
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h | 33 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_builder_3d_top.h | 99 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_state_compute.c | 95 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_state_compute.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_state_shader.c | 74 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_state_shader.h | 43 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_state_shader_ps.c | 52 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_render_gen6.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_render_gen7.c | 28 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_render_gen8.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/ilo_shader.c | 4 |
11 files changed, 327 insertions, 133 deletions
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h index 5efe9da2d22..2e9470e66e9 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h @@ -202,14 +202,16 @@ static inline void gen6_3DSTATE_WM(struct ilo_builder *builder, const struct ilo_state_raster *rs, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 6); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); dw[1] = kernel_offset; @@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder, dw[6] = rs->wm[2] | ps->ps[4]; dw[7] = 0; /* kernel 1 */ dw[8] = 0; /* kernel 2 */ + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc(builder, pos + 2, scratch_bo, + ps->ps[0], 0); + } } static inline void @@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder) static inline void gen7_3DSTATE_PS(struct ilo_builder *builder, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 8; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder, dw[5] = ps->ps[5]; dw[6] = 0; /* kernel 1 */ dw[7] = 0; /* kernel 2 */ + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + ps->ps[3], 0); + } } static inline void gen8_3DSTATE_PS(struct ilo_builder *builder, const struct ilo_state_ps *ps, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 12; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder, dw[9] = 0; dw[10] = 0; /* kernel 2 */ dw[11] = 0; + + if (ilo_state_ps_get_scratch_size(ps)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + ps->ps[1], 0); + } } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 6e94fb25f1f..3a448719c15 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, static inline void gen6_3DSTATE_VS(struct ilo_builder *builder, const struct ilo_state_vs *vs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 6; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder, dw[3] = vs->vs[1]; dw[4] = vs->vs[2]; dw[5] = vs->vs[3]; + + if (ilo_state_vs_get_scratch_size(vs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + vs->vs[1], 0); + } } static inline void gen8_3DSTATE_VS(struct ilo_builder *builder, const struct ilo_state_vs *vs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder, dw[6] = vs->vs[2]; dw[7] = vs->vs[3]; dw[8] = vs->vs[4]; + + if (ilo_state_vs_get_scratch_size(vs)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + vs->vs[1], 0); + } } static inline void gen7_3DSTATE_HS(struct ilo_builder *builder, const struct ilo_state_hs *hs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); /* see hs_set_gen7_3DSTATE_HS() */ @@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder, dw[4] = hs->hs[2]; dw[5] = hs->hs[3]; dw[6] = 0; + + if (ilo_state_hs_get_scratch_size(hs)) { + ilo_builder_batch_reloc(builder, pos + 4, scratch_bo, + hs->hs[2], 0); + } } static inline void gen8_3DSTATE_HS(struct ilo_builder *builder, const struct ilo_state_hs *hs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2); /* see hs_set_gen7_3DSTATE_HS() */ @@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder, dw[6] = 0; dw[7] = hs->hs[3]; dw[8] = 0; + + if (ilo_state_hs_get_scratch_size(hs)) { + ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo, + hs->hs[2], 0); + } } static inline void @@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder, static inline void gen7_3DSTATE_DS(struct ilo_builder *builder, const struct ilo_state_ds *ds, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 6; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); /* see ds_set_gen7_3DSTATE_DS() */ @@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder, dw[3] = ds->ds[1]; dw[4] = ds->ds[2]; dw[5] = ds->ds[3]; + + if (ilo_state_ds_get_scratch_size(ds)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + ds->ds[1], 0); + } } static inline void gen8_3DSTATE_DS(struct ilo_builder *builder, const struct ilo_state_ds *ds, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 9; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2); /* see ds_set_gen7_3DSTATE_DS() */ @@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder, dw[6] = ds->ds[2]; dw[7] = ds->ds[3]; dw[8] = ds->ds[4]; + + if (ilo_state_ds_get_scratch_size(ds)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + ds->ds[1], 0); + } } static inline void gen6_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 6, 6); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder, dw[4] = gs->gs[2]; dw[5] = gs->gs[3]; dw[6] = gs->gs[4]; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + gs->gs[1], 0); + } } static inline void @@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, static inline void gen7_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 7; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder, dw[4] = gs->gs[2]; dw[5] = gs->gs[3]; dw[6] = 0; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc(builder, pos + 3, scratch_bo, + gs->gs[1], 0); + } } static inline void gen8_3DSTATE_GS(struct ilo_builder *builder, const struct ilo_state_gs *gs, - uint32_t kernel_offset) + uint32_t kernel_offset, + struct intel_bo *scratch_bo) { const uint8_t cmd_len = 10; uint32_t *dw; + unsigned pos; ILO_DEV_ASSERT(builder->dev, 8, 8); - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); dw[1] = kernel_offset; @@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder, dw[7] = gs->gs[3]; dw[8] = 0; dw[9] = gs->gs[4]; + + if (ilo_state_gs_get_scratch_size(gs)) { + ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo, + gs->gs[1], 0); + } } static inline void diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c index a5fe5e1a6b0..ba3ff9001e1 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_compute.c +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c @@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev, */ assert(per_thread_read <= 63); - /* From the Haswell PRM, volume 2d, page 199: + /* + * From the Haswell PRM, volume 2d, page 199: * * "(Cross-Thread Constant Data Read Length) [0,127]" */ @@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev, return true; } -static uint8_t -compute_get_gen6_scratch_space(const struct ilo_dev *dev, - const struct ilo_state_compute_info *info) +static uint32_t +compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + uint8_t *per_thread_space) { - uint32_t scratch_size = 0; - uint8_t i; + ILO_DEV_ASSERT(dev, 6, 7); - ILO_DEV_ASSERT(dev, 6, 8); + /* + * From the Sandy Bridge PRM, volume 2 part 2, page 30: + * + * "(Per Thread Scratch Space) + * Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]" + */ + assert(info->per_thread_scratch_size <= 12 * 1024); - for (i = 0; i < info->interface_count; i++) { - if (scratch_size < info->interfaces[i].scratch_size) - scratch_size = info->interfaces[i].scratch_size; + if (!info->per_thread_scratch_size) { + *per_thread_space = 0; + return 0; } - if (ilo_dev_gen(dev) >= ILO_GEN(8)) { - assert(scratch_size <= 2 * 1024 * 1024); + *per_thread_space = (info->per_thread_scratch_size > 1024) ? + (info->per_thread_scratch_size - 1) / 1024 : 0; + + return 1024 * (1 + *per_thread_space); +} - /* next power of two, starting from 1KB */ - return (scratch_size > 1024) ? - (util_last_bit(scratch_size - 1) - 10): 0; - } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { - assert(scratch_size <= 2 * 1024 * 1024); +static uint32_t +compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev, + const struct ilo_state_compute_info *info, + uint8_t *per_thread_space) +{ + ILO_DEV_ASSERT(dev, 7.5, 8); - /* next power of two, starting from 2KB */ - return (scratch_size > 2048) ? - (util_last_bit(scratch_size - 1) - 11): 0; - } else { - assert(scratch_size <= 12 * 1024); + /* + * From the Haswell PRM, volume 2b, page 407: + * + * "(Per Thread Scratch Space) + * [0,10] Indicating [2k bytes, 2 Mbytes]" + * + * "Note: The scratch space should be declared as 2x the desired + * scratch space. The stack will start at the half-way point instead + * of the end. The upper half of scratch space will not be accessed + * and so does not have to be allocated in memory." + * + * From the Broadwell PRM, volume 2a, page 450: + * + * "(Per Thread Scratch Space) + * [0,11] indicating [1k bytes, 2 Mbytes]" + */ + assert(info->per_thread_scratch_size <= + ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024); - return (scratch_size > 1024) ? - (scratch_size - 1) / 1024 : 0; + if (!info->per_thread_scratch_size) { + *per_thread_space = 0; + return 0; } + + /* next power of two, starting from 1KB */ + *per_thread_space = (info->per_thread_scratch_size > 1024) ? + (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0; + + return 1 << (10 + *per_thread_space); } static bool @@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, const struct ilo_state_compute_info *info) { struct compute_urb_configuration urb; - uint8_t scratch_space; + uint32_t per_thread_size; + uint8_t per_thread_space; uint32_t dw1, dw2, dw4; @@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, !compute_validate_gen6(dev, info, &urb)) return false; - scratch_space = compute_get_gen6_scratch_space(dev, info); + if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) { + per_thread_size = compute_get_gen75_per_thread_scratch_size(dev, + info, &per_thread_space); + } else { + per_thread_size = compute_get_gen6_per_thread_scratch_size(dev, + info, &per_thread_space); + } + + dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT; - dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT; dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT | urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT | GEN6_VFE_DW2_RESET_GATEWAY_TIMER | @@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute, compute->vfe[1] = dw2; compute->vfe[2] = dw4; + compute->scratch_size = per_thread_size * dev->thread_count; + return true; } diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h index 346f7b617f4..bd56bba4369 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_compute.h +++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h @@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info { /* usually 0 unless there are multiple interfaces */ uint32_t kernel_offset; - uint32_t scratch_size; - uint8_t sampler_count; uint8_t surface_count; @@ -65,6 +63,8 @@ struct ilo_state_compute_info { const struct ilo_state_compute_interface_info *interfaces; uint8_t interface_count; + uint32_t per_thread_scratch_size; + uint32_t cv_urb_alloc_size; uint32_t curbe_alloc_size; }; @@ -74,6 +74,8 @@ struct ilo_state_compute { uint32_t (*idrt)[6]; uint8_t idrt_count; + + uint32_t scratch_size; }; static inline size_t @@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute, const struct ilo_dev *dev, const struct ilo_state_compute_info *info); +static inline uint32_t +ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute) +{ + return compute->scratch_size; +} + #endif /* ILO_STATE_COMPUTE_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c index 2e06b07a8e3..aec4fd6d8a6 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader.c +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c @@ -37,7 +37,9 @@ enum vertex_stage { struct vertex_ff { uint8_t grf_start; - uint8_t scratch_space; + + uint8_t per_thread_scratch_space; + uint32_t per_thread_scratch_size; uint8_t sampler_count; uint8_t surface_count; @@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev, * others. */ const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 134: - * - * "(Per-Thread Scratch Space) - * Range [0,11] indicating [1K Bytes, 2M Bytes]" - */ - const uint32_t max_scratch_size = 2 * 1024 * 1024; ILO_DEV_ASSERT(dev, 6, 8); @@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev, assert(!kernel->offset); assert(kernel->grf_start < max_grf_start); - assert(kernel->scratch_size <= max_scratch_size); return true; } @@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev, const struct ilo_state_shader_kernel_info *kernel, const struct ilo_state_shader_resource_info *resource, const struct ilo_state_shader_urb_info *urb, + uint32_t per_thread_scratch_size, struct vertex_ff *ff) { ILO_DEV_ASSERT(dev, 6, 8); + memset(ff, 0, sizeof(*ff)); + if (!vertex_validate_gen6_kernel(dev, stage, kernel) || !vertex_validate_gen6_urb(dev, stage, urb)) return false; ff->grf_start = kernel->grf_start; - /* next power of two, starting from 1KB */ - ff->scratch_space = (kernel->scratch_size > 1024) ? - (util_last_bit(kernel->scratch_size - 1) - 10): 0; + + if (per_thread_scratch_size) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 134: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1K Bytes, 2M Bytes]" + */ + assert(per_thread_scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 1KB */ + ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ? + (util_last_bit(per_thread_scratch_size - 1) - 10) : 0; + ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space); + } ff->sampler_count = (resource->sampler_count <= 12) ? (resource->sampler_count + 3) / 4 : 4; @@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, ILO_DEV_ASSERT(dev, 6, 8); - if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = vs_get_gen6_thread_count(dev, info); @@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT | ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT | @@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs, if (ilo_dev_gen(dev) >= ILO_GEN(8)) vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT; + vs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, ILO_DEV_ASSERT(dev, 7, 8); - if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = hs_get_gen7_thread_count(dev, info); @@ -296,7 +308,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, if (info->stats_enable) dw2 |= GEN7_HS_DW2_STATISTICS; - dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw4 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES | ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT | @@ -312,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs, hs->hs[2] = dw4; hs->hs[3] = dw5; + hs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -375,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, ILO_DEV_ASSERT(dev, 7, 8); - if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, - &info->resource, &info->urb, &ff)) + if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, &ff)) return false; thread_count = ds_get_gen7_thread_count(dev, info); @@ -387,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT | ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT | @@ -414,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds, if (ilo_dev_gen(dev) >= ILO_GEN(8)) ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT; + ds->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -427,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev, ILO_DEV_ASSERT(dev, 6, 8); - if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, - &info->resource, &info->urb, ff)) + if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource, + &info->urb, info->per_thread_scratch_size, ff)) return false; /* @@ -512,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs, ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT | ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT | @@ -552,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs, gs->gs[3] = dw5; gs->gs[4] = dw6; + gs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } @@ -590,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs, if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav) dw2 |= GEN75_THREADDISP_ACCESS_UAV; - dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff.per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT | 0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT | @@ -620,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs, if (ilo_dev_gen(dev) >= ILO_GEN(8)) gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT; + gs->scratch_size = ff.per_thread_scratch_size * thread_count; + return true; } diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h index 44690c5b0bb..35651090d66 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader.h +++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h @@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info { uint8_t grf_start; uint8_t pcb_attr_count; - - uint32_t scratch_size; }; /** @@ -77,6 +75,7 @@ struct ilo_state_vs_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -86,6 +85,7 @@ struct ilo_state_hs_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -95,6 +95,7 @@ struct ilo_state_ds_info { struct ilo_state_shader_resource_info resource; struct ilo_state_shader_urb_info urb; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -119,6 +120,7 @@ struct ilo_state_gs_info { struct ilo_state_gs_sol_info sol; + uint32_t per_thread_scratch_size; bool dispatch_enable; bool stats_enable; }; @@ -158,6 +160,8 @@ struct ilo_state_ps_info { struct ilo_state_ps_io_info io; struct ilo_state_ps_params_info params; + uint32_t per_thread_scratch_size; + /* bitmask of GEN6_PS_DISPATCH_x */ uint8_t valid_kernels; bool per_sample_dispatch; @@ -173,23 +177,28 @@ struct ilo_state_ps_info { struct ilo_state_vs { uint32_t vs[5]; + uint32_t scratch_size; }; struct ilo_state_hs { uint32_t hs[4]; + uint32_t scratch_size; }; struct ilo_state_ds { uint32_t te[3]; uint32_t ds[5]; + uint32_t scratch_size; }; struct ilo_state_gs { uint32_t gs[5]; + uint32_t scratch_size; }; struct ilo_state_ps { uint32_t ps[8]; + uint32_t scratch_size; struct ilo_state_ps_dispatch_conds { bool ps_valid; @@ -211,6 +220,12 @@ bool ilo_state_vs_init_disabled(struct ilo_state_vs *vs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs) +{ + return vs->scratch_size; +} + bool ilo_state_hs_init(struct ilo_state_hs *hs, const struct ilo_dev *dev, @@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs) +{ + return hs->scratch_size; +} + bool ilo_state_ds_init(struct ilo_state_ds *ds, const struct ilo_dev *dev, @@ -230,6 +251,12 @@ bool ilo_state_ds_init_disabled(struct ilo_state_ds *ds, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds) +{ + return ds->scratch_size; +} + bool ilo_state_gs_init(struct ilo_state_gs *gs, const struct ilo_dev *dev, @@ -239,6 +266,12 @@ bool ilo_state_gs_init_disabled(struct ilo_state_gs *gs, const struct ilo_dev *dev); +static inline uint32_t +ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs) +{ + return gs->scratch_size; +} + bool ilo_state_ps_init(struct ilo_state_ps *ps, const struct ilo_dev *dev, @@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps, const struct ilo_dev *dev, const struct ilo_state_ps_params_info *params); +static inline uint32_t +ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps) +{ + return ps->scratch_size; +} + #endif /* ILO_STATE_SHADER_H */ diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c index ceeb68a460e..5c3ca1ebe37 100644 --- a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c +++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c @@ -34,7 +34,8 @@ struct pixel_ff { uint32_t kernel_offsets[3]; uint8_t grf_starts[3]; bool pcb_enable; - uint8_t scratch_space; + uint8_t per_thread_scratch_space; + uint32_t per_thread_scratch_size; uint8_t sampler_count; uint8_t surface_count; @@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev, { /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */ const uint8_t max_grf_start = 128; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 271: - * - * "(Per-Thread Scratch Space) - * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" - */ - const uint32_t max_scratch_size = 2 * 1024 * 1024; ILO_DEV_ASSERT(dev, 6, 8); @@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev, assert(kernel->offset % 64 == 0); assert(kernel->grf_start < max_grf_start); - assert(kernel->scratch_size <= max_scratch_size); return true; } @@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev, const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8; const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16; const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32; - uint32_t scratch_size; ILO_DEV_ASSERT(dev, 6, 8); @@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev, ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && kernel_32->pcb_attr_count)); - scratch_size = 0; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) && - scratch_size < kernel_8->scratch_size) - scratch_size = kernel_8->scratch_size; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) && - scratch_size < kernel_16->scratch_size) - scratch_size = kernel_16->scratch_size; - if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) && - scratch_size < kernel_32->scratch_size) - scratch_size = kernel_32->scratch_size; - - /* next power of two, starting from 1KB */ - ff->scratch_space = (scratch_size > 1024) ? - (util_last_bit(scratch_size - 1) - 10): 0; - /* GPU hangs on Haswell if none of the dispatch mode bits is set */ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes) ff->dispatch_modes |= GEN6_PS_DISPATCH_8; @@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev, if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff)) return false; + if (info->per_thread_scratch_size) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 271: + * + * "(Per-Thread Scratch Space) + * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two" + */ + assert(info->per_thread_scratch_size <= 2 * 1024 * 1024); + + /* next power of two, starting from 1KB */ + ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ? + (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0; + ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space); + } + ff->sampler_count = (resource->sampler_count <= 12) ? (resource->sampler_count + 3) / 4 : 4; ff->surface_count = resource->surface_count; @@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps, if (false) dw2 |= GEN6_THREADDISP_FP_MODE_ALT; - dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT | ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT | @@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps, if (false) dw2 |= GEN6_THREADDISP_FP_MODE_ALT; - dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw3 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT | ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT; @@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps, if (false) dw3 |= GEN6_THREADDISP_FP_MODE_ALT; - dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; + dw4 = ff->per_thread_scratch_space << + GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT; dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT | io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT | @@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps, ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff); } + ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count; /* save conditions */ ps->conds = ff.conds; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index c1f759f3043..c81514f9b4c 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -476,9 +476,9 @@ gen6_draw_vs(struct ilo_render *r, if (ilo_dev_gen(r->dev) == ILO_GEN(6) && ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) - gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset); + gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset, NULL); else - gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); + gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL); } } @@ -501,7 +501,7 @@ gen6_draw_gs(struct ilo_render *r, cso = ilo_shader_get_kernel_cso(vec->gs); kernel_offset = ilo_shader_get_kernel_offset(vec->gs); - gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset); + gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset, NULL); } else if (ilo_dev_gen(r->dev) == ILO_GEN(6) && ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) { const int verts_per_prim = @@ -524,9 +524,9 @@ gen6_draw_gs(struct ilo_render *r, kernel_offset = ilo_shader_get_kernel_offset(vec->vs) + ilo_shader_get_kernel_param(vec->vs, param); - gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset); + gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset, NULL); } else { - gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0); + gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL); } } } @@ -672,7 +672,7 @@ gen6_draw_wm(struct ilo_render *r, gen6_wa_pre_3dstate_wm_max_threads(r); gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs, - &cso->ps, kernel_offset); + &cso->ps, kernel_offset, NULL); } } @@ -817,10 +817,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r, gen6_wa_post_3dstate_constant_vs(r); gen6_wa_pre_3dstate_vs_toggle(r); - gen6_3DSTATE_VS(r->builder, &blitter->vs, 0); + gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL); gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen6_3DSTATE_GS(r->builder, &blitter->gs, 0); + gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL); gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe); @@ -833,7 +833,7 @@ gen6_rectlist_wm(struct ilo_render *r, gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen6_wa_pre_3dstate_wm_max_threads(r); - gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0); + gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 6623a8bcb43..97d9d058fdf 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -319,9 +319,9 @@ gen7_draw_vs(struct ilo_render *r, const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); + gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL); else - gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset); + gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL); } } @@ -338,9 +338,9 @@ gen7_draw_hs(struct ilo_render *r, gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_HS(r->builder, hs, kernel_offset); + gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL); else - gen7_3DSTATE_HS(r->builder, hs, kernel_offset); + gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_HS */ @@ -373,9 +373,9 @@ gen7_draw_ds(struct ilo_render *r, gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_DS(r->builder, ds, kernel_offset); + gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL); else - gen7_3DSTATE_DS(r->builder, ds, kernel_offset); + gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_DS */ @@ -397,9 +397,9 @@ gen7_draw_gs(struct ilo_render *r, gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0); if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) - gen8_3DSTATE_GS(r->builder, gs, kernel_offset); + gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL); else - gen7_3DSTATE_GS(r->builder, gs, kernel_offset); + gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL); } /* 3DSTATE_BINDING_TABLE_POINTERS_GS */ @@ -534,7 +534,7 @@ gen7_draw_wm(struct ilo_render *r, if (r->hw_ctx_changed) gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); + gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL); } /* 3DSTATE_SCISSOR_STATE_POINTERS */ @@ -678,18 +678,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, const struct ilo_blitter *blitter) { gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0); - gen6_3DSTATE_VS(r->builder, &blitter->vs, 0); + gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL); gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_HS(r->builder, &blitter->hs, 0); + gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL); gen7_3DSTATE_TE(r->builder, &blitter->ds); gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_DS(r->builder, &blitter->ds, 0); + gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL); gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); - gen7_3DSTATE_GS(r->builder, &blitter->gs, 0); + gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL); gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol); @@ -711,7 +711,7 @@ gen7_rectlist_wm(struct ilo_render *r, gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0); gen7_wa_pre_3dstate_ps_max_threads(r); - gen7_3DSTATE_PS(r->builder, &blitter->ps, 0); + gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL); } static void diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c index 65494b4058a..1f750a2bfed 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen8.c +++ b/src/gallium/drivers/ilo/ilo_render_gen8.c @@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r, /* 3DSTATE_PS */ if (DIRTY(FS) || r->instruction_bo_changed) - gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset); + gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL); /* 3DSTATE_PS_EXTRA */ if (DIRTY(FS)) diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index 73b625e9de4..c78d0e0b602 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -578,7 +578,6 @@ init_shader_kernel(const struct ilo_shader *kernel, kern->grf_start = kernel->in.start_grf; kern->pcb_attr_count = (kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16; - kern->scratch_size = 0; } static void @@ -602,6 +601,7 @@ init_vs(struct ilo_shader *kernel, init_shader_urb(kernel, state, &info.urb); init_shader_kernel(kernel, state, &info.kernel); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = 0; info.dispatch_enable = true; info.stats_enable = true; @@ -640,6 +640,7 @@ init_gs(struct ilo_shader *kernel, init_shader_urb(kernel, state, &info.urb); init_shader_kernel(kernel, state, &info.kernel); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = 0; info.dispatch_enable = true; info.stats_enable = true; @@ -664,6 +665,7 @@ init_ps(struct ilo_shader *kernel, init_shader_kernel(kernel, state, &info.kernel_8); init_shader_resource(kernel, state, &info.resource); + info.per_thread_scratch_size = 0; info.io.has_rt_write = true; info.io.posoffset = GEN6_POSOFFSET_NONE; info.io.attr_count = kernel->in.count; |