summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChia-I Wu <[email protected]>2013-06-20 17:42:21 +0800
committerChia-I Wu <[email protected]>2013-06-25 13:17:21 +0800
commit851202c319701c541d52f87ffa22505504c50d57 (patch)
tree0d36147c66cd5bf795168fb567c0089761f579d6
parentd209da5e338ee1f437cbce21e9cba667d60ee557 (diff)
ilo: use ilo_shader_cso for GS
Add ilo_gpe_init_gs_cso() to construct 3DSTATE_GS once and early for geometry shaders.
-rw-r--r--src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c11
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe.h23
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe_gen6.c260
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe_gen6.h6
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe_gen7.c65
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe_gen7.h2
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.c3
7 files changed, 225 insertions, 145 deletions
diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
index e51d7942ab3..72e87d4efa7 100644
--- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c
@@ -501,16 +501,9 @@ gen6_pipeline_gs(struct ilo_3d_pipeline *p,
/* 3DSTATE_GS */
if (DIRTY(GS) || DIRTY(VS) ||
session->prim_changed || session->kernel_bo_changed) {
- const struct ilo_shader *gs = (ilo->gs)? ilo->gs->shader : NULL;
- const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL;
- const int num_vertices = u_vertices_per_prim(session->reduced_prim);
-
- if (gs)
- assert(!gs->pcb.clip_state_size);
+ const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
- p->gen6_3DSTATE_GS(p->dev, gs, vs,
- (vs) ? vs->cache_offset + vs->gs_offsets[num_vertices - 1] : 0,
- p->cp);
+ p->gen6_3DSTATE_GS(p->dev, ilo->gs, ilo->vs, verts_per_prim, p->cp);
}
}
diff --git a/src/gallium/drivers/ilo/ilo_gpe.h b/src/gallium/drivers/ilo/ilo_gpe.h
index b5d0163f909..e9141f7c3af 100644
--- a/src/gallium/drivers/ilo/ilo_gpe.h
+++ b/src/gallium/drivers/ilo/ilo_gpe.h
@@ -438,4 +438,27 @@ ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
const struct ilo_shader_state *vs,
struct ilo_shader_cso *cso);
+void
+ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso);
+
+void
+ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso);
+
+static inline void
+ilo_gpe_init_gs_cso(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso)
+{
+ if (dev->gen >= ILO_GEN(7)) {
+ ilo_gpe_init_gs_cso_gen7(dev, gs, cso);
+ }
+ else {
+ ilo_gpe_init_gs_cso_gen6(dev, gs, cso);
+ }
+}
+
#endif /* ILO_GPE_H */
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c
index e57609bcfa4..f4918dccf4b 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c
@@ -1246,135 +1246,167 @@ gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
ilo_cp_end(cp);
}
+void
+ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, vue_read_len, max_threads;
+ uint32_t dw2, dw4, dw5, dw6;
+
+ ILO_GPE_VALID_GEN(dev, 6, 6);
+
+ if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
+ start_grf = ilo_shader_get_kernel_param(gs,
+ ILO_KERNEL_URB_DATA_START_REG);
+
+ vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
+ }
+ else {
+ start_grf = ilo_shader_get_kernel_param(gs,
+ ILO_KERNEL_VS_GEN6_SO_START_REG);
+
+ vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 153:
+ *
+ * "Specifies the amount of URB data read and passed in the thread
+ * payload for each Vertex URB entry, in 256-bit register increments.
+ *
+ * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
+ * 0 indicating no Vertex URB data to be read and passed to the
+ * thread."
+ */
+ vue_read_len = (vue_read_len + 1) / 2;
+ if (!vue_read_len)
+ vue_read_len = 1;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 154:
+ *
+ * "Maximum Number of Threads valid range is [0,27] when Rendering
+ * Enabled bit is set."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 173:
+ *
+ * "Programming Note: If the GS stage is enabled, software must always
+ * allocate at least one GS URB Entry. This is true even if the GS
+ * thread never needs to output vertices to the pipeline, e.g., when
+ * only performing stream output. This is an artifact of the need to
+ * pass the GS thread an initial destination URB handle."
+ *
+ * As such, we always enable rendering, and limit the number of threads.
+ */
+ if (dev->gt == 2) {
+ /* maximum is 60, but limited to 28 */
+ max_threads = 28;
+ }
+ else {
+ /* maximum is 24, but limited to 21 (see brwCreateContext()) */
+ max_threads = 21;
+ }
+
+ if (max_threads > 28)
+ max_threads = 28;
+
+ dw2 = GEN6_GS_SPF_MODE;
+
+ dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
+ 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
+ start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
+
+ dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_SO_STATISTICS_ENABLE |
+ GEN6_GS_RENDERING_ENABLE;
+
+ /*
+ * we cannot make use of GEN6_GS_REORDER because it will reorder
+ * triangle strips according to D3D rules (triangle 2N+1 uses vertices
+ * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
+ * (2N+2, 2N+1, 2N+3)).
+ */
+ dw6 = GEN6_GS_ENABLE;
+
+ if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
+ dw6 |= GEN6_GS_DISCARD_ADJACENCY;
+
+ if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
+ const uint32_t svbi_post_inc =
+ ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
+
+ dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
+ if (svbi_post_inc) {
+ dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
+ svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
+ }
+ }
+
+ STATIC_ASSERT(Elements(cso->payload) >= 4);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+ cso->payload[3] = dw6;
+}
+
static void
gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
- const struct ilo_shader *gs,
- const struct ilo_shader *vs,
- uint32_t vs_offset,
+ const struct ilo_shader_state *gs,
+ const struct ilo_shader_state *vs,
+ int verts_per_prim,
struct ilo_cp *cp)
{
const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
const uint8_t cmd_len = 7;
uint32_t dw1, dw2, dw4, dw5, dw6;
- int i;
ILO_GPE_VALID_GEN(dev, 6, 6);
- if (!gs && (!vs || !vs->stream_output)) {
- dw1 = 0;
- dw2 = 0;
- dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
- dw5 = GEN6_GS_STATISTICS_ENABLE;
- dw6 = 0;
- }
- else {
- int max_threads, vue_read_len;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 154:
- *
- * "Maximum Number of Threads valid range is [0,27] when Rendering
- * Enabled bit is set."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 173:
- *
- * "Programming Note: If the GS stage is enabled, software must
- * always allocate at least one GS URB Entry. This is true even if
- * the GS thread never needs to output vertices to the pipeline,
- * e.g., when only performing stream output. This is an artifact of
- * the need to pass the GS thread an initial destination URB
- * handle."
- *
- * As such, we always enable rendering, and limit the number of threads.
- */
- if (dev->gt == 2) {
- /* maximum is 60, but limited to 28 */
- max_threads = 28;
- }
- else {
- /* maximum is 24, but limited to 21 (see brwCreateContext()) */
- max_threads = 21;
- }
+ if (gs) {
+ const struct ilo_shader_cso *cso;
- if (max_threads > 28)
- max_threads = 28;
+ dw1 = ilo_shader_get_kernel_offset(gs);
- dw2 = GEN6_GS_SPF_MODE;
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
+ dw6 = cso->payload[3];
+ }
+ else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
+ struct ilo_shader_cso cso;
+ enum ilo_kernel_param param;
- dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
- GEN6_GS_STATISTICS_ENABLE |
- GEN6_GS_SO_STATISTICS_ENABLE |
- GEN6_GS_RENDERING_ENABLE;
+ switch (verts_per_prim) {
+ case 1:
+ param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
+ break;
+ case 2:
+ param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
+ break;
+ default:
+ param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
+ break;
+ }
- /*
- * we cannot make use of GEN6_GS_REORDER because it will reorder
- * triangle strips according to D3D rules (triangle 2N+1 uses vertices
- * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
- * (2N+2, 2N+1, 2N+3)).
- */
- dw6 = GEN6_GS_ENABLE;
-
- if (gs) {
- /* VS ouputs must match GS inputs */
- assert(gs->in.count == vs->out.count);
- for (i = 0; i < gs->in.count; i++) {
- assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
- assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
- }
+ dw1 = ilo_shader_get_kernel_offset(vs) +
+ ilo_shader_get_kernel_param(vs, param);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 153:
- *
- * "It is UNDEFINED to set this field (Vertex URB Entry Read
- * Length) to 0 indicating no Vertex URB data to be read and
- * passed to the thread."
- */
- vue_read_len = (gs->in.count + 1) / 2;
- if (!vue_read_len)
- vue_read_len = 1;
-
- dw1 = gs->cache_offset;
- dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
- 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
- gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
-
- if (gs->in.discard_adj)
- dw6 |= GEN6_GS_DISCARD_ADJACENCY;
-
- if (gs->stream_output) {
- dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
- if (gs->svbi_post_inc) {
- dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
- gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
- }
- }
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 153:
- *
- * "It is UNDEFINED to set this field (Vertex URB Entry Read
- * Length) to 0 indicating no Vertex URB data to be read and
- * passed to the thread."
- */
- vue_read_len = (vs->out.count + 1) / 2;
- if (!vue_read_len)
- vue_read_len = 1;
-
- dw1 = vs_offset;
- dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
- 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
- vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
-
- if (vs->in.discard_adj)
- dw6 |= GEN6_GS_DISCARD_ADJACENCY;
-
- dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
- if (vs->svbi_post_inc) {
- dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
- vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
- }
- }
+ /* cannot use VS's CSO */
+ ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
+ dw2 = cso.payload[0];
+ dw4 = cso.payload[1];
+ dw5 = cso.payload[2];
+ dw6 = cso.payload[3];
+ }
+ else {
+ dw1 = 0;
+ dw2 = 0;
+ dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
+ dw5 = GEN6_GS_STATISTICS_ENABLE;
+ dw6 = 0;
}
ilo_cp_begin(cp, cmd_len);
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h
index 07e6050500b..bf4ed750341 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h
+++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h
@@ -241,9 +241,9 @@ typedef void
typedef void
(*ilo_gpe_gen6_3DSTATE_GS)(const struct ilo_dev_info *dev,
- const struct ilo_shader *gs,
- const struct ilo_shader *vs,
- uint32_t vs_offset,
+ const struct ilo_shader_state *gs,
+ const struct ilo_shader_state *vs,
+ int verts_per_prim,
struct ilo_cp *cp);
typedef void
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c
index 74ba793dea1..f374473b291 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c
+++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c
@@ -84,19 +84,22 @@ gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
}
-static void
-gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
- const struct ilo_shader *gs,
- int num_samplers,
- struct ilo_cp *cp)
+void
+ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso)
{
- const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
- const uint8_t cmd_len = 7;
+ int start_grf, vue_read_len, max_threads;
uint32_t dw2, dw4, dw5;
- int max_threads;
ILO_GPE_VALID_GEN(dev, 7, 7);
+ start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
+ vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
+
+ /* in pairs */
+ vue_read_len = (vue_read_len + 1) / 2;
+
switch (dev->gen) {
case ILO_GEN(7):
max_threads = (dev->gt == 2) ? 128 : 36;
@@ -106,6 +109,36 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
break;
}
+ dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
+
+ dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
+ GEN7_GS_INCLUDE_VERTEX_HANDLES |
+ 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
+ start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
+
+ dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_ENABLE;
+
+ STATIC_ASSERT(Elements(cso->payload) >= 3);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+}
+
+static void
+gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
+ const struct ilo_shader_state *gs,
+ int num_samplers,
+ struct ilo_cp *cp)
+{
+ const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
+ const uint8_t cmd_len = 7;
+ const struct ilo_shader_cso *cso;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_GPE_VALID_GEN(dev, 7, 7);
+
if (!gs) {
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
@@ -119,20 +152,16 @@ gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
return;
}
- dw2 = ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
+ cso = ilo_shader_get_kernel_cso(gs);
+ dw2 = cso->payload[0];
+ dw4 = cso->payload[1];
+ dw5 = cso->payload[2];
- dw4 = ((gs->in.count + 1) / 2) << GEN6_GS_URB_READ_LENGTH_SHIFT |
- GEN7_GS_INCLUDE_VERTEX_HANDLES |
- 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
- gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
-
- dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
- GEN6_GS_STATISTICS_ENABLE |
- GEN6_GS_ENABLE;
+ dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
ilo_cp_begin(cp, cmd_len);
ilo_cp_write(cp, cmd | (cmd_len - 2));
- ilo_cp_write(cp, gs->cache_offset);
+ ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
ilo_cp_write(cp, dw2);
ilo_cp_write(cp, 0); /* scratch */
ilo_cp_write(cp, dw4);
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_gpe_gen7.h
index 1f123eaa285..f8e8745fcb4 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h
+++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.h
@@ -158,7 +158,7 @@ typedef ilo_gpe_gen6_3DSTATE_VS ilo_gpe_gen7_3DSTATE_VS;
typedef void
(*ilo_gpe_gen7_3DSTATE_GS)(const struct ilo_dev_info *dev,
- const struct ilo_shader *gs,
+ const struct ilo_shader_state *gs,
int num_samplers,
struct ilo_cp *cp);
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 983cfffc851..086134a2bc8 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -683,6 +683,9 @@ ilo_shader_state_use_variant(struct ilo_shader_state *state,
case PIPE_SHADER_VERTEX:
ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
break;
+ case PIPE_SHADER_GEOMETRY:
+ ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
+ break;
default:
break;
}