summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/ilo/ilo_gpe_gen6.c190
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.c165
-rw-r--r--src/gallium/drivers/ilo/ilo_shader.h18
-rw-r--r--src/gallium/drivers/ilo/ilo_state.c8
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_fs.c7
-rw-r--r--src/gallium/drivers/ilo/shader/ilo_shader_internal.h4
6 files changed, 237 insertions, 155 deletions
diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c
index c0ed42dfe8c..97b566a89a4 100644
--- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c
+++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c
@@ -30,7 +30,6 @@
#include "brw_defines.h"
#include "intel_reg.h"
-#include "shader/ilo_shader_internal.h"
#include "ilo_context.h"
#include "ilo_cp.h"
#include "ilo_format.h"
@@ -1814,178 +1813,52 @@ ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
void
ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
const struct ilo_rasterizer_state *rasterizer,
- const struct ilo_shader_state *fs_state,
- const struct ilo_shader_state *last_sh_state,
+ const struct ilo_shader_state *fs,
+ const struct ilo_shader_state *last_sh,
uint32_t *dw, int num_dwords)
{
- const struct ilo_shader *fs = fs_state->shader;
- const struct ilo_shader *last_sh = last_sh_state->shader;
- uint32_t point_sprite_enable, const_interp_enable;
- uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS];
- int vue_offset, vue_len;
- int dst, max_src, i;
+ int output_count, vue_offset, vue_len;
+ const struct ilo_kernel_routing *routing;
ILO_GPE_VALID_GEN(dev, 6, 7);
assert(num_dwords == 13);
if (!fs) {
+ memset(dw, 0, sizeof(dw[0]) * num_dwords);
+
if (dev->gen >= ILO_GEN(7))
dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
else
dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
- for (i = 1; i < num_dwords; i++)
- dw[i] = 0;
-
return;
}
- if (last_sh) {
- /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
- assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE);
- assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION);
- vue_offset = 2;
- vue_len = last_sh->out.count - vue_offset;
- }
- else {
- vue_offset = 0;
- vue_len = fs->in.count;
- }
-
- point_sprite_enable = 0;
- const_interp_enable = 0;
- max_src = (last_sh) ? 0 : fs->in.count - 1;
-
- for (dst = 0; dst < fs->in.count; dst++) {
- const int semantic = fs->in.semantic_names[dst];
- const int index = fs->in.semantic_indices[dst];
- const int interp = fs->in.interp[dst];
- int src;
- uint16_t ctrl;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 268:
- *
- * "This field (Point Sprite Texture Coordinate Enable) must be
- * programmed to 0 when non-point primitives are rendered."
- *
- * TODO We do not check that yet.
- */
- if (semantic == TGSI_SEMANTIC_GENERIC &&
- (rasterizer->state.sprite_coord_enable & (1 << index)))
- point_sprite_enable |= 1 << dst;
-
- if (interp == TGSI_INTERPOLATE_CONSTANT ||
- (interp == TGSI_INTERPOLATE_COLOR && rasterizer->state.flatshade))
- const_interp_enable |= 1 << dst;
-
- if (!last_sh) {
- attr_ctrl[dst] = 0;
- continue;
- }
-
- /* find the matching VS/GS OUT for FS IN[i] */
- ctrl = 0;
- for (src = 0; src < vue_len; src++) {
- if (last_sh->out.semantic_names[vue_offset + src] != semantic ||
- last_sh->out.semantic_indices[vue_offset + src] != index)
- continue;
-
- ctrl = src;
-
- if (semantic == TGSI_SEMANTIC_COLOR &&
- rasterizer->state.light_twoside &&
- src < vue_len - 1) {
- const int next = src + 1;
-
- if (last_sh->out.semantic_names[vue_offset + next] ==
- TGSI_SEMANTIC_BCOLOR &&
- last_sh->out.semantic_indices[vue_offset + next] == index) {
- ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
- ATTRIBUTE_SWIZZLE_SHIFT;
- src++;
- }
- }
-
- break;
- }
-
- /* if there is no COLOR, try BCOLOR */
- if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) {
- for (src = 0; src < vue_len; src++) {
- if (last_sh->out.semantic_names[vue_offset + src] !=
- TGSI_SEMANTIC_BCOLOR ||
- last_sh->out.semantic_indices[vue_offset + src] != index)
- continue;
-
- ctrl = src;
- break;
- }
- }
-
- if (src < vue_len) {
- attr_ctrl[dst] = ctrl;
- if (max_src < src)
- max_src = src;
- }
- else {
- /*
- * The previous shader stage does not output this attribute. The
- * value is supposed to be undefined for fs, unless the attribute
- * goes through point sprite replacement or the attribute is
- * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
- * attribute is picked.
- *
- * We should update the fs code and omit the output of
- * TGSI_SEMANTIC_POSITION here.
- */
- attr_ctrl[dst] = 0;
- }
- }
-
- for (; dst < Elements(attr_ctrl); dst++)
- attr_ctrl[dst] = 0;
+ output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+ assert(output_count <= 32);
- /* only the first 16 attributes can be remapped */
- for (dst = 16; dst < Elements(attr_ctrl); dst++)
- assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst);
+ routing = ilo_shader_get_kernel_routing(fs);
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
- * 0 indicating no Vertex URB data to be read.
- *
- * This field should be set to the minimum length required to read the
- * maximum source attribute. The maximum source attribute is indicated
- * by the maximum value of the enabled Attribute # Source Attribute if
- * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
- * enable is not set.
- *
- * read_length = ceiling((max_source_attr+1)/2)
- *
- * [errata] Corruption/Hang possible if length programmed larger than
- * recommended"
- */
- vue_len = max_src + 1;
-
- assert(fs->in.count <= 32);
+ vue_offset = routing->source_skip;
assert(vue_offset % 2 == 0);
+ vue_offset /= 2;
- if (dev->gen >= ILO_GEN(7)) {
- dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
- (vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
- vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+ vue_len = (routing->source_len + 1) / 2;
+ if (!vue_len)
+ vue_len = 1;
- if (last_sh)
+ if (dev->gen >= ILO_GEN(7)) {
+ dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
+ vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+ vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+ if (routing->swizzle_enable)
dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
}
else {
- dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT |
- (vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
- vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
-
- if (last_sh)
+ dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
+ vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+ vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+ if (routing->swizzle_enable)
dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
}
@@ -1998,11 +1871,20 @@ ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
break;
}
- for (i = 0; i < 8; i++)
- dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i];
+ STATIC_ASSERT(Elements(routing->swizzles) >= 16);
+ memcpy(&dw[1], routing->swizzles, 2 * 16);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 268:
+ *
+ * "This field (Point Sprite Texture Coordinate Enable) must be
+ * programmed to 0 when non-point primitives are rendered."
+ *
+ * TODO We do not check that yet.
+ */
+ dw[9] = routing->point_sprite_enable;
- dw[9] = point_sprite_enable;
- dw[10] = const_interp_enable;
+ dw[10] = routing->const_interp_enable;
/* WrapShortest enables */
dw[11] = 0;
diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c
index 2cff95bd994..5f95a19244f 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -27,6 +27,7 @@
#include "tgsi/tgsi_parse.h"
#include "intel_winsys.h"
+#include "brw_defines.h" /* for SBE setup */
#include "shader/ilo_shader_internal.h"
#include "ilo_state.h"
@@ -848,6 +849,157 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader,
return (shader->shader != cur);
}
+static int
+route_attr(const int *semantics, const int *indices, int len,
+ int semantic, int index)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (semantics[i] == semantic && indices[i] == index)
+ return i;
+ }
+
+ /* failed to match for COLOR, try BCOLOR */
+ if (semantic == TGSI_SEMANTIC_COLOR) {
+ for (i = 0; i < len; i++) {
+ if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index)
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * Select a routing for the given source shader and rasterizer state.
+ *
+ * \return true if a different routing is selected
+ */
+bool
+ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
+ const struct ilo_shader_state *source,
+ const struct ilo_rasterizer_state *rasterizer)
+{
+ const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
+ const bool light_twoside = rasterizer->state.light_twoside;
+ struct ilo_shader *kernel = shader->shader;
+ struct ilo_kernel_routing *routing = &kernel->routing;
+ const int *src_semantics, *src_indices;
+ int src_len, max_src_slot;
+ int dst_len, dst_slot;
+
+ /* we are constructing 3DSTATE_SBE here */
+ assert(shader->info.dev->gen >= ILO_GEN(6) &&
+ shader->info.dev->gen <= ILO_GEN(7));
+
+ assert(kernel);
+
+ if (source) {
+ assert(source->shader);
+ src_semantics = source->shader->out.semantic_names;
+ src_indices = source->shader->out.semantic_indices;
+ src_len = source->shader->out.count;
+
+ /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
+ assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE);
+ assert(src_semantics[1] == TGSI_SEMANTIC_POSITION);
+ routing->source_skip = 2;
+ routing->source_len = src_len - routing->source_skip;
+ src_semantics += routing->source_skip;
+ src_indices += routing->source_skip;
+ }
+ else {
+ src_semantics = kernel->in.semantic_names;
+ src_indices = kernel->in.semantic_indices;
+ src_len = kernel->in.count;
+
+ routing->source_skip = 0;
+ routing->source_len = src_len;
+ }
+
+ routing->const_interp_enable = kernel->in.const_interp_enable;
+ routing->point_sprite_enable = 0;
+ routing->swizzle_enable = false;
+
+ assert(kernel->in.count <= Elements(routing->swizzles));
+ dst_len = MIN2(kernel->in.count, Elements(routing->swizzles));
+ max_src_slot = -1;
+
+ for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
+ const int semantic = kernel->in.semantic_names[dst_slot];
+ const int index = kernel->in.semantic_indices[dst_slot];
+ int src_slot;
+
+ if (semantic == TGSI_SEMANTIC_GENERIC &&
+ (sprite_coord_enable & (1 << index)))
+ routing->point_sprite_enable |= 1 << dst_slot;
+
+ if (source) {
+ src_slot = route_attr(src_semantics, src_indices,
+ routing->source_len, semantic, index);
+
+ /*
+ * The source shader stage does not output this attribute. The value
+ * is supposed to be undefined, unless the attribute goes through
+ * point sprite replacement or the attribute is
+ * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
+ * attribute is picked.
+ *
+ * We should update the kernel code and omit the output of
+ * TGSI_SEMANTIC_POSITION here.
+ */
+ if (src_slot < 0)
+ src_slot = 0;
+ }
+ else {
+ src_slot = dst_slot;
+ }
+
+ routing->swizzles[dst_slot] = src_slot;
+
+ /* use the following slot for two-sided lighting */
+ if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
+ src_slot + 1 < routing->source_len &&
+ src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
+ src_indices[src_slot + 1] == index) {
+ routing->swizzles[dst_slot] |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
+ ATTRIBUTE_SWIZZLE_SHIFT;
+ src_slot++;
+ }
+
+ if (routing->swizzles[dst_slot] != dst_slot)
+ routing->swizzle_enable = true;
+
+ if (max_src_slot < src_slot)
+ max_src_slot = src_slot;
+ }
+
+ memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) -
+ sizeof(routing->swizzles[0]) * dst_slot);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
+ * 0 indicating no Vertex URB data to be read.
+ *
+ * This field should be set to the minimum length required to read the
+ * maximum source attribute. The maximum source attribute is indicated
+ * by the maximum value of the enabled Attribute # Source Attribute if
+ * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+ * enable is not set.
+ *
+ * read_length = ceiling((max_source_attr+1)/2)
+ *
+ * [errata] Corruption/Hang possible if length programmed larger than
+ * recommended"
+ */
+ routing->source_len = max_src_slot + 1;
+
+ return true;
+}
+
/**
* Return the cache offset of the selected kernel. This must be called after
* ilo_shader_select_kernel() and ilo_shader_cache_upload().
@@ -978,3 +1130,16 @@ ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
return &kernel->so_info;
}
+
+/**
+ * Return the routing info of the selected kernel.
+ */
+const struct ilo_kernel_routing *
+ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader)
+{
+ const struct ilo_shader *kernel = shader->shader;
+
+ assert(kernel);
+
+ return &kernel->routing;
+}
diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h
index d326b9c01f4..6a2b8a397b3 100644
--- a/src/gallium/drivers/ilo/ilo_shader.h
+++ b/src/gallium/drivers/ilo/ilo_shader.h
@@ -58,8 +58,18 @@ enum ilo_kernel_param {
ILO_KERNEL_PARAM_COUNT,
};
+struct ilo_kernel_routing {
+ uint32_t const_interp_enable;
+ uint32_t point_sprite_enable;
+ unsigned source_skip, source_len;
+
+ bool swizzle_enable;
+ uint16_t swizzles[16];
+};
+
struct intel_bo;
struct ilo_context;
+struct ilo_rasterizer_state;
struct ilo_shader_cache;
struct ilo_shader_state;
struct ilo_shader_cso;
@@ -114,6 +124,11 @@ ilo_shader_select_kernel(struct ilo_shader_state *shader,
const struct ilo_context *ilo,
uint32_t dirty);
+bool
+ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
+ const struct ilo_shader_state *source,
+ const struct ilo_rasterizer_state *rasterizer);
+
uint32_t
ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader);
@@ -127,4 +142,7 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader);
const struct pipe_stream_output_info *
ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader);
+const struct ilo_kernel_routing *
+ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader);
+
#endif /* ILO_SHADER_H */
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 58894f288ff..7046a69890c 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -72,6 +72,14 @@ finalize_shader_states(struct ilo_context *ilo)
/* mark the state dirty if a new kernel is selected */
ilo->dirty |= state;
}
+
+ /* need to setup SBE for FS */
+ if (type == PIPE_SHADER_FRAGMENT && ilo->dirty &
+ (state | ILO_DIRTY_GS | ILO_DIRTY_VS | ILO_DIRTY_RASTERIZER)) {
+ if (ilo_shader_select_kernel_routing(shader,
+ (ilo->gs) ? ilo->gs : ilo->vs, ilo->rasterizer))
+ ilo->dirty |= state;
+ }
}
}
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
index eca118aa2a1..bea2c097e14 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_fs.c
@@ -1574,6 +1574,9 @@ fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
}
switch (tgsi->inputs[i].interp) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ sh->in.const_interp_enable |= 1 << i;
+ break;
case TGSI_INTERPOLATE_LINEAR:
sh->in.has_linear_interp = true;
@@ -1587,8 +1590,10 @@ fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
}
break;
case TGSI_INTERPOLATE_COLOR:
- if (flatshade)
+ if (flatshade) {
+ sh->in.const_interp_enable |= 1 << i;
break;
+ }
/* fall through */
case TGSI_INTERPOLATE_PERSPECTIVE:
if (tgsi->inputs[i].centroid) {
diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
index 3515e3f1d9f..d9ae2fa0a8f 100644
--- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
+++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h
@@ -30,6 +30,7 @@
#include "ilo_common.h"
#include "ilo_context.h"
+#include "ilo_shader.h"
/* XXX The interface needs to be reworked */
@@ -88,6 +89,7 @@ struct ilo_shader {
bool has_pos;
bool has_linear_interp;
int barycentric_interpolation_mode;
+ uint32_t const_interp_enable;
bool discard_adj;
} in;
@@ -114,6 +116,8 @@ struct ilo_shader {
void *kernel;
int kernel_size;
+ struct ilo_kernel_routing routing;
+
/* what does the push constant buffer consist of? */
struct {
int clip_state_size;