aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChia-I Wu <[email protected]>2015-10-23 00:24:26 +0800
committerChia-I Wu <[email protected]>2015-10-23 17:29:58 +0800
commit4a7d18296a9e80d2c5458bf77f8eb88913433c90 (patch)
tree1b954ffa50a755f2d12c21bae258e362e674bf6c
parent3994ef5f1b7386c17dff532cb5d04a7823520c7a (diff)
ilo: fix scratch space setup in core
Move scratch_size out of ilo_state_shader_kernel_info and ilo_state_compute_interface_info. A scratch space is shared by all kernels/interfaces. Update builder to emit relocs for scratch bos.
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h33
-rw-r--r--src/gallium/drivers/ilo/core/ilo_builder_3d_top.h99
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_compute.c95
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_compute.h12
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader.c74
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader.h43
-rw-r--r--src/gallium/drivers/ilo/core/ilo_state_shader_ps.c52
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen6.c18
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen7.c28
-rw-r--r--src/gallium/drivers/ilo/ilo_render_gen8.c2
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.c4
11 files changed, 327 insertions, 133 deletions
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 5efe9da2d22..2e9470e66e9 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -202,14 +202,16 @@ static inline void
gen6_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_state_raster *rs,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -221,6 +223,11 @@ gen6_3DSTATE_WM(struct ilo_builder *builder,
dw[6] = rs->wm[2] | ps->ps[4];
dw[7] = 0; /* kernel 1 */
dw[8] = 0; /* kernel 2 */
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc(builder, pos + 2, scratch_bo,
+ ps->ps[0], 0);
+ }
}
static inline void
@@ -329,14 +336,16 @@ gen8_3DSTATE_WM_CHROMAKEY(struct ilo_builder *builder)
static inline void
gen7_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 8;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -347,19 +356,26 @@ gen7_3DSTATE_PS(struct ilo_builder *builder,
dw[5] = ps->ps[5];
dw[6] = 0; /* kernel 1 */
dw[7] = 0; /* kernel 2 */
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ ps->ps[3], 0);
+ }
}
static inline void
gen8_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 12;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -374,6 +390,11 @@ gen8_3DSTATE_PS(struct ilo_builder *builder,
dw[9] = 0;
dw[10] = 0; /* kernel 2 */
dw[11] = 0;
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ ps->ps[1], 0);
+ }
}
static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
index 6e94fb25f1f..3a448719c15 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
@@ -477,14 +477,16 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
static inline void
gen6_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -493,19 +495,26 @@ gen6_3DSTATE_VS(struct ilo_builder *builder,
dw[3] = vs->vs[1];
dw[4] = vs->vs[2];
dw[5] = vs->vs[3];
+
+ if (ilo_state_vs_get_scratch_size(vs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ vs->vs[1], 0);
+ }
}
static inline void
gen8_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -517,19 +526,26 @@ gen8_3DSTATE_VS(struct ilo_builder *builder,
dw[6] = vs->vs[2];
dw[7] = vs->vs[3];
dw[8] = vs->vs[4];
+
+ if (ilo_state_vs_get_scratch_size(vs)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ vs->vs[1], 0);
+ }
}
static inline void
gen7_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
@@ -539,19 +555,26 @@ gen7_3DSTATE_HS(struct ilo_builder *builder,
dw[4] = hs->hs[2];
dw[5] = hs->hs[3];
dw[6] = 0;
+
+ if (ilo_state_hs_get_scratch_size(hs)) {
+ ilo_builder_batch_reloc(builder, pos + 4, scratch_bo,
+ hs->hs[2], 0);
+ }
}
static inline void
gen8_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
@@ -563,6 +586,11 @@ gen8_3DSTATE_HS(struct ilo_builder *builder,
dw[6] = 0;
dw[7] = hs->hs[3];
dw[8] = 0;
+
+ if (ilo_state_hs_get_scratch_size(hs)) {
+ ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo,
+ hs->hs[2], 0);
+ }
}
static inline void
@@ -586,14 +614,16 @@ gen7_3DSTATE_TE(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
@@ -602,19 +632,26 @@ gen7_3DSTATE_DS(struct ilo_builder *builder,
dw[3] = ds->ds[1];
dw[4] = ds->ds[2];
dw[5] = ds->ds[3];
+
+ if (ilo_state_ds_get_scratch_size(ds)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ ds->ds[1], 0);
+ }
}
static inline void
gen8_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
@@ -626,19 +663,26 @@ gen8_3DSTATE_DS(struct ilo_builder *builder,
dw[6] = ds->ds[2];
dw[7] = ds->ds[3];
dw[8] = ds->ds[4];
+
+ if (ilo_state_ds_get_scratch_size(ds)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ ds->ds[1], 0);
+ }
}
static inline void
gen6_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -648,6 +692,11 @@ gen6_3DSTATE_GS(struct ilo_builder *builder,
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = gs->gs[4];
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
@@ -677,14 +726,16 @@ gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
static inline void
gen7_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -694,19 +745,26 @@ gen7_3DSTATE_GS(struct ilo_builder *builder,
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = 0;
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
gen8_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 10;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
@@ -719,6 +777,11 @@ gen8_3DSTATE_GS(struct ilo_builder *builder,
dw[7] = gs->gs[3];
dw[8] = 0;
dw[9] = gs->gs[4];
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.c b/src/gallium/drivers/ilo/core/ilo_state_compute.c
index a5fe5e1a6b0..ba3ff9001e1 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_compute.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.c
@@ -158,7 +158,8 @@ compute_interface_get_gen6_read_end(const struct ilo_dev *dev,
*/
assert(per_thread_read <= 63);
- /* From the Haswell PRM, volume 2d, page 199:
+ /*
+ * From the Haswell PRM, volume 2d, page 199:
*
* "(Cross-Thread Constant Data Read Length) [0,127]"
*/
@@ -210,38 +211,68 @@ compute_validate_gen6(const struct ilo_dev *dev,
return true;
}
-static uint8_t
-compute_get_gen6_scratch_space(const struct ilo_dev *dev,
- const struct ilo_state_compute_info *info)
+static uint32_t
+compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ uint8_t *per_thread_space)
{
- uint32_t scratch_size = 0;
- uint8_t i;
+ ILO_DEV_ASSERT(dev, 6, 7);
- ILO_DEV_ASSERT(dev, 6, 8);
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 2, page 30:
+ *
+ * "(Per Thread Scratch Space)
+ * Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]"
+ */
+ assert(info->per_thread_scratch_size <= 12 * 1024);
- for (i = 0; i < info->interface_count; i++) {
- if (scratch_size < info->interfaces[i].scratch_size)
- scratch_size = info->interfaces[i].scratch_size;
+ if (!info->per_thread_scratch_size) {
+ *per_thread_space = 0;
+ return 0;
}
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- assert(scratch_size <= 2 * 1024 * 1024);
+ *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+ (info->per_thread_scratch_size - 1) / 1024 : 0;
+
+ return 1024 * (1 + *per_thread_space);
+}
- /* next power of two, starting from 1KB */
- return (scratch_size > 1024) ?
- (util_last_bit(scratch_size - 1) - 10): 0;
- } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
- assert(scratch_size <= 2 * 1024 * 1024);
+static uint32_t
+compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ uint8_t *per_thread_space)
+{
+ ILO_DEV_ASSERT(dev, 7.5, 8);
- /* next power of two, starting from 2KB */
- return (scratch_size > 2048) ?
- (util_last_bit(scratch_size - 1) - 11): 0;
- } else {
- assert(scratch_size <= 12 * 1024);
+ /*
+ * From the Haswell PRM, volume 2b, page 407:
+ *
+ * "(Per Thread Scratch Space)
+ * [0,10] Indicating [2k bytes, 2 Mbytes]"
+ *
+ * "Note: The scratch space should be declared as 2x the desired
+ * scratch space. The stack will start at the half-way point instead
+ * of the end. The upper half of scratch space will not be accessed
+ * and so does not have to be allocated in memory."
+ *
+ * From the Broadwell PRM, volume 2a, page 450:
+ *
+ * "(Per Thread Scratch Space)
+ * [0,11] indicating [1k bytes, 2 Mbytes]"
+ */
+ assert(info->per_thread_scratch_size <=
+ ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024);
- return (scratch_size > 1024) ?
- (scratch_size - 1) / 1024 : 0;
+ if (!info->per_thread_scratch_size) {
+ *per_thread_space = 0;
+ return 0;
}
+
+ /* next power of two, starting from 1KB */
+ *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+ (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+
+ return 1 << (10 + *per_thread_space);
}
static bool
@@ -250,7 +281,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
const struct ilo_state_compute_info *info)
{
struct compute_urb_configuration urb;
- uint8_t scratch_space;
+ uint32_t per_thread_size;
+ uint8_t per_thread_space;
uint32_t dw1, dw2, dw4;
@@ -260,9 +292,16 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
!compute_validate_gen6(dev, info, &urb))
return false;
- scratch_space = compute_get_gen6_scratch_space(dev, info);
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ per_thread_size = compute_get_gen75_per_thread_scratch_size(dev,
+ info, &per_thread_space);
+ } else {
+ per_thread_size = compute_get_gen6_per_thread_scratch_size(dev,
+ info, &per_thread_space);
+ }
+
+ dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
- dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
@@ -281,6 +320,8 @@ compute_set_gen6_MEDIA_VFE_STATE(struct ilo_state_compute *compute,
compute->vfe[1] = dw2;
compute->vfe[2] = dw4;
+ compute->scratch_size = per_thread_size * dev->thread_count;
+
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_compute.h b/src/gallium/drivers/ilo/core/ilo_state_compute.h
index 346f7b617f4..bd56bba4369 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_compute.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_compute.h
@@ -45,8 +45,6 @@ struct ilo_state_compute_interface_info {
/* usually 0 unless there are multiple interfaces */
uint32_t kernel_offset;
- uint32_t scratch_size;
-
uint8_t sampler_count;
uint8_t surface_count;
@@ -65,6 +63,8 @@ struct ilo_state_compute_info {
const struct ilo_state_compute_interface_info *interfaces;
uint8_t interface_count;
+ uint32_t per_thread_scratch_size;
+
uint32_t cv_urb_alloc_size;
uint32_t curbe_alloc_size;
};
@@ -74,6 +74,8 @@ struct ilo_state_compute {
uint32_t (*idrt)[6];
uint8_t idrt_count;
+
+ uint32_t scratch_size;
};
static inline size_t
@@ -89,4 +91,10 @@ ilo_state_compute_init(struct ilo_state_compute *compute,
const struct ilo_dev *dev,
const struct ilo_state_compute_info *info);
+static inline uint32_t
+ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute)
+{
+ return compute->scratch_size;
+}
+
#endif /* ILO_STATE_COMPUTE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.c b/src/gallium/drivers/ilo/core/ilo_state_shader.c
index 2e06b07a8e3..aec4fd6d8a6 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.c
@@ -37,7 +37,9 @@ enum vertex_stage {
struct vertex_ff {
uint8_t grf_start;
- uint8_t scratch_space;
+
+ uint8_t per_thread_scratch_space;
+ uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@@ -59,13 +61,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
* others.
*/
const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 134:
- *
- * "(Per-Thread Scratch Space)
- * Range [0,11] indicating [1K Bytes, 2M Bytes]"
- */
- const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -73,7 +68,6 @@ vertex_validate_gen6_kernel(const struct ilo_dev *dev,
assert(!kernel->offset);
assert(kernel->grf_start < max_grf_start);
- assert(kernel->scratch_size <= max_scratch_size);
return true;
}
@@ -112,18 +106,33 @@ vertex_get_gen6_ff(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel,
const struct ilo_state_shader_resource_info *resource,
const struct ilo_state_shader_urb_info *urb,
+ uint32_t per_thread_scratch_size,
struct vertex_ff *ff)
{
ILO_DEV_ASSERT(dev, 6, 8);
+ memset(ff, 0, sizeof(*ff));
+
if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
!vertex_validate_gen6_urb(dev, stage, urb))
return false;
ff->grf_start = kernel->grf_start;
- /* next power of two, starting from 1KB */
- ff->scratch_space = (kernel->scratch_size > 1024) ?
- (util_last_bit(kernel->scratch_size - 1) - 10): 0;
+
+ if (per_thread_scratch_size) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 134:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1K Bytes, 2M Bytes]"
+ */
+ assert(per_thread_scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
+ (util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
+ ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+ }
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
@@ -192,8 +201,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = vs_get_gen6_thread_count(dev, info);
@@ -207,7 +216,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
@@ -234,6 +244,8 @@ vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
+ vs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -273,8 +285,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
ILO_DEV_ASSERT(dev, 7, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = hs_get_gen7_thread_count(dev, info);
@@ -296,7 +308,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
if (info->stats_enable)
dw2 |= GEN7_HS_DW2_STATISTICS;
- dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw4 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
@@ -312,6 +325,8 @@ hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
hs->hs[2] = dw4;
hs->hs[3] = dw5;
+ hs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -375,8 +390,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
ILO_DEV_ASSERT(dev, 7, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = ds_get_gen7_thread_count(dev, info);
@@ -387,7 +402,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
@@ -414,6 +430,8 @@ ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
+ ds->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -427,8 +445,8 @@ gs_get_gen6_ff(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
- &info->resource, &info->urb, ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, ff))
return false;
/*
@@ -512,7 +530,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
@@ -552,6 +571,8 @@ gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
gs->gs[3] = dw5;
gs->gs[4] = dw6;
+ gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
@@ -590,7 +611,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
@@ -620,6 +642,8 @@ gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
if (ilo_dev_gen(dev) >= ILO_GEN(8))
gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
+ gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader.h b/src/gallium/drivers/ilo/core/ilo_state_shader.h
index 44690c5b0bb..35651090d66 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader.h
@@ -42,8 +42,6 @@ struct ilo_state_shader_kernel_info {
uint8_t grf_start;
uint8_t pcb_attr_count;
-
- uint32_t scratch_size;
};
/**
@@ -77,6 +75,7 @@ struct ilo_state_vs_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -86,6 +85,7 @@ struct ilo_state_hs_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -95,6 +95,7 @@ struct ilo_state_ds_info {
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -119,6 +120,7 @@ struct ilo_state_gs_info {
struct ilo_state_gs_sol_info sol;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
@@ -158,6 +160,8 @@ struct ilo_state_ps_info {
struct ilo_state_ps_io_info io;
struct ilo_state_ps_params_info params;
+ uint32_t per_thread_scratch_size;
+
/* bitmask of GEN6_PS_DISPATCH_x */
uint8_t valid_kernels;
bool per_sample_dispatch;
@@ -173,23 +177,28 @@ struct ilo_state_ps_info {
struct ilo_state_vs {
uint32_t vs[5];
+ uint32_t scratch_size;
};
struct ilo_state_hs {
uint32_t hs[4];
+ uint32_t scratch_size;
};
struct ilo_state_ds {
uint32_t te[3];
uint32_t ds[5];
+ uint32_t scratch_size;
};
struct ilo_state_gs {
uint32_t gs[5];
+ uint32_t scratch_size;
};
struct ilo_state_ps {
uint32_t ps[8];
+ uint32_t scratch_size;
struct ilo_state_ps_dispatch_conds {
bool ps_valid;
@@ -211,6 +220,12 @@ bool
ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs)
+{
+ return vs->scratch_size;
+}
+
bool
ilo_state_hs_init(struct ilo_state_hs *hs,
const struct ilo_dev *dev,
@@ -221,6 +236,12 @@ ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs)
+{
+ return hs->scratch_size;
+}
+
bool
ilo_state_ds_init(struct ilo_state_ds *ds,
const struct ilo_dev *dev,
@@ -230,6 +251,12 @@ bool
ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds)
+{
+ return ds->scratch_size;
+}
+
bool
ilo_state_gs_init(struct ilo_state_gs *gs,
const struct ilo_dev *dev,
@@ -239,6 +266,12 @@ bool
ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs)
+{
+ return gs->scratch_size;
+}
+
bool
ilo_state_ps_init(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
@@ -253,4 +286,10 @@ ilo_state_ps_set_params(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params);
+static inline uint32_t
+ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps)
+{
+ return ps->scratch_size;
+}
+
#endif /* ILO_STATE_SHADER_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
index ceeb68a460e..5c3ca1ebe37 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_shader_ps.c
@@ -34,7 +34,8 @@ struct pixel_ff {
uint32_t kernel_offsets[3];
uint8_t grf_starts[3];
bool pcb_enable;
- uint8_t scratch_space;
+ uint8_t per_thread_scratch_space;
+ uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
@@ -56,13 +57,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
{
/* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
const uint8_t max_grf_start = 128;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 271:
- *
- * "(Per-Thread Scratch Space)
- * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
- */
- const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -70,7 +64,6 @@ ps_kernel_validate_gen6(const struct ilo_dev *dev,
assert(kernel->offset % 64 == 0);
assert(kernel->grf_start < max_grf_start);
- assert(kernel->scratch_size <= max_scratch_size);
return true;
}
@@ -325,7 +318,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
- uint32_t scratch_size;
ILO_DEV_ASSERT(dev, 6, 8);
@@ -363,21 +355,6 @@ ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
kernel_32->pcb_attr_count));
- scratch_size = 0;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
- scratch_size < kernel_8->scratch_size)
- scratch_size = kernel_8->scratch_size;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
- scratch_size < kernel_16->scratch_size)
- scratch_size = kernel_16->scratch_size;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
- scratch_size < kernel_32->scratch_size)
- scratch_size = kernel_32->scratch_size;
-
- /* next power of two, starting from 1KB */
- ff->scratch_space = (scratch_size > 1024) ?
- (util_last_bit(scratch_size - 1) - 10): 0;
-
/* GPU hangs on Haswell if none of the dispatch mode bits is set */
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
@@ -401,6 +378,21 @@ ps_get_gen6_ff(const struct ilo_dev *dev,
if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
return false;
+ if (info->per_thread_scratch_size) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 271:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
+ */
+ assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
+ (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+ ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+ }
+
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
ff->surface_count = resource->surface_count;
@@ -441,7 +433,8 @@ ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
@@ -539,7 +532,8 @@ ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
@@ -603,7 +597,8 @@ ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
if (false)
dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw4 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
@@ -705,6 +700,7 @@ ilo_state_ps_init(struct ilo_state_ps *ps,
ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
}
+ ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
/* save conditions */
ps->conds = ff.conds;
diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c
index c1f759f3043..c81514f9b4c 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen6.c
@@ -476,9 +476,9 @@ gen6_draw_vs(struct ilo_render *r,
if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
- gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset, NULL);
else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
}
}
@@ -501,7 +501,7 @@ gen6_draw_gs(struct ilo_render *r,
cso = ilo_shader_get_kernel_cso(vec->gs);
kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
- gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
+ gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset, NULL);
} else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
const int verts_per_prim =
@@ -524,9 +524,9 @@ gen6_draw_gs(struct ilo_render *r,
kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
ilo_shader_get_kernel_param(vec->vs, param);
- gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
+ gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset, NULL);
} else {
- gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
+ gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
}
}
}
@@ -672,7 +672,7 @@ gen6_draw_wm(struct ilo_render *r,
gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
- &cso->ps, kernel_offset);
+ &cso->ps, kernel_offset, NULL);
}
}
@@ -817,10 +817,10 @@ gen6_rectlist_vs_to_sf(struct ilo_render *r,
gen6_wa_post_3dstate_constant_vs(r);
gen6_wa_pre_3dstate_vs_toggle(r);
- gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
+ gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
@@ -833,7 +833,7 @@ gen6_rectlist_wm(struct ilo_render *r,
gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen6_wa_pre_3dstate_wm_max_threads(r);
- gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
+ gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL);
}
static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c
index 6623a8bcb43..97d9d058fdf 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen7.c
@@ -319,9 +319,9 @@ gen7_draw_vs(struct ilo_render *r,
const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
}
}
@@ -338,9 +338,9 @@ gen7_draw_hs(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
+ gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
else
- gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
+ gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
@@ -373,9 +373,9 @@ gen7_draw_ds(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
+ gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
else
- gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
+ gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
@@ -397,9 +397,9 @@ gen7_draw_gs(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
+ gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
else
- gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
+ gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
@@ -534,7 +534,7 @@ gen7_draw_wm(struct ilo_render *r,
if (r->hw_ctx_changed)
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+ gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
}
/* 3DSTATE_SCISSOR_STATE_POINTERS */
@@ -678,18 +678,18 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r,
const struct ilo_blitter *blitter)
{
gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
- gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
+ gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL);
gen7_3DSTATE_TE(r->builder, &blitter->ds);
gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
+ gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL);
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
+ gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
@@ -711,7 +711,7 @@ gen7_rectlist_wm(struct ilo_render *r,
gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
+ gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL);
}
static void
diff --git a/src/gallium/drivers/ilo/ilo_render_gen8.c b/src/gallium/drivers/ilo/ilo_render_gen8.c
index 65494b4058a..1f750a2bfed 100644
--- a/src/gallium/drivers/ilo/ilo_render_gen8.c
+++ b/src/gallium/drivers/ilo/ilo_render_gen8.c
@@ -125,7 +125,7 @@ gen8_draw_wm(struct ilo_render *r,
/* 3DSTATE_PS */
if (DIRTY(FS) || r->instruction_bo_changed)
- gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+ gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
/* 3DSTATE_PS_EXTRA */
if (DIRTY(FS))
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 73b625e9de4..c78d0e0b602 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -578,7 +578,6 @@ init_shader_kernel(const struct ilo_shader *kernel,
kern->grf_start = kernel->in.start_grf;
kern->pcb_attr_count =
(kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
- kern->scratch_size = 0;
}
static void
@@ -602,6 +601,7 @@ init_vs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.dispatch_enable = true;
info.stats_enable = true;
@@ -640,6 +640,7 @@ init_gs(struct ilo_shader *kernel,
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.dispatch_enable = true;
info.stats_enable = true;
@@ -664,6 +665,7 @@ init_ps(struct ilo_shader *kernel,
init_shader_kernel(kernel, state, &info.kernel_8);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.io.has_rt_write = true;
info.io.posoffset = GEN6_POSOFFSET_NONE;
info.io.attr_count = kernel->in.count;