summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <[email protected]>2013-03-22 15:59:22 +0100
committerChristian König <[email protected]>2013-04-02 13:01:43 +0200
commita0dca4409a25b7810c28bcd64b48b3f0f159a455 (patch)
tree2435c9fc39bcdd19f770ea6c91f031678b85f0a9
parentcf9b31f78a545ede480c2dc25937a07a96cf6656 (diff)
radeonsi: add instance divisor support v3
v2: reduce key size, don't copy key around to much. v3: remove key size reduction Signed-off-by: Christian König <[email protected]> Reviewed-by: Michel Dänzer <[email protected]>
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.c67
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.h24
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c44
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c18
4 files changed, 94 insertions, 59 deletions
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 05125289a1f..5fdf46e7358 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -54,11 +54,9 @@
struct si_shader_context
{
struct radeon_llvm_context radeon_bld;
- struct r600_context *rctx;
struct tgsi_parse_context parse;
struct tgsi_token * tokens;
struct si_pipe_shader *shader;
- struct si_shader_key key;
unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
LLVMValueRef const_md;
LLVMValueRef const_resource;
@@ -112,22 +110,41 @@ static LLVMValueRef build_indexed_load(
return result;
}
+static LLVMValueRef get_instance_index(
+ struct radeon_llvm_context * radeon_bld,
+ unsigned divisor)
+{
+ struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
+
+ LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_INSTANCE_ID);
+ result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
+ radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+
+ if (divisor > 1)
+ result = LLVMBuildUDiv(gallivm->builder, result,
+ lp_build_const_int32(gallivm, divisor), "");
+
+ return result;
+}
+
static void declare_input_vs(
struct si_shader_context * si_shader_ctx,
unsigned input_index,
const struct tgsi_full_declaration *decl)
{
+ struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
+ unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index];
+
+ unsigned chan;
+
LLVMValueRef t_list_ptr;
LLVMValueRef t_offset;
LLVMValueRef t_list;
LLVMValueRef attribute_offset;
- LLVMValueRef buffer_index_reg;
+ LLVMValueRef buffer_index;
LLVMValueRef args[3];
LLVMTypeRef vec4_type;
LLVMValueRef input;
- struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
- //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
- unsigned chan;
/* Load the T list */
t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
@@ -139,14 +156,20 @@ static void declare_input_vs(
/* Build the attribute offset */
attribute_offset = lp_build_const_int32(base->gallivm, 0);
- /* Load the buffer index, which is always stored in VGPR0
- * for Vertex Shaders */
- buffer_index_reg = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_ID);
+ if (divisor) {
+ /* Build index from instance ID, start instance and divisor */
+ si_shader_ctx->shader->shader.uses_instanceid = true;
+ buffer_index = get_instance_index(&si_shader_ctx->radeon_bld, divisor);
+ } else {
+ /* Load the buffer index, which is always stored in VGPR0
+ * for Vertex Shaders */
+ buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_ID);
+ }
vec4_type = LLVMVectorType(base->elem_type, 4);
args[0] = t_list;
args[1] = attribute_offset;
- args[2] = buffer_index_reg;
+ args[2] = buffer_index;
input = build_intrinsic(base->gallivm->builder,
"llvm.SI.vs.load.input", vec4_type, args, 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -239,7 +262,7 @@ static void declare_input_fs(
/* XXX: Handle all possible interpolation modes */
switch (decl->Interp.Interpolate) {
case TGSI_INTERPOLATE_COLOR:
- if (si_shader_ctx->key.flatshade) {
+ if (si_shader_ctx->shader->key.ps.flatshade) {
interp_param = 0;
} else {
if (decl->Interp.Centroid)
@@ -272,7 +295,7 @@ static void declare_input_fs(
/* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
- si_shader_ctx->key.color_two_side) {
+ si_shader_ctx->shader->key.ps.color_two_side) {
LLVMValueRef args[4];
LLVMValueRef face, is_face_positive;
LLVMValueRef back_attr_number =
@@ -351,15 +374,12 @@ static void declare_system_value(
unsigned index,
const struct tgsi_full_declaration *decl)
{
- struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
LLVMValueRef value = 0;
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_INSTANCEID:
- value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_INSTANCE_ID);
- value = LLVMBuildAdd(gallivm->builder, value,
- LLVMGetParam(radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+ value = get_instance_index(radeon_bld, 1);
break;
case TGSI_SEMANTIC_VERTEXID:
@@ -433,7 +453,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
int cbuf = target - V_008DFC_SQ_EXP_MRT;
if (cbuf >= 0 && cbuf < 8) {
- compressed = (si_shader_ctx->key.export_16bpc >> cbuf) & 0x1;
+ compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
if (compressed)
si_shader_ctx->shader->spi_shader_col_format |=
@@ -509,13 +529,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- if (si_shader_ctx->key.alpha_func != PIPE_FUNC_NEVER) {
+ if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3];
LLVMValueRef alpha_pass =
lp_build_cmp(&bld_base->base,
- si_shader_ctx->key.alpha_func,
+ si_shader_ctx->shader->key.ps.alpha_func,
LLVMBuildLoad(gallivm->builder, out_ptr, ""),
- lp_build_const_float(gallivm, si_shader_ctx->key.alpha_ref));
+ lp_build_const_float(gallivm, si_shader_ctx->shader->key.ps.alpha_ref));
LLVMValueRef arg =
lp_build_select(&bld_base->base,
alpha_pass,
@@ -612,7 +632,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
} else {
target = V_008DFC_SQ_EXP_MRT + color_count;
if (color_count == 0 &&
- si_shader_ctx->key.alpha_func != PIPE_FUNC_ALWAYS)
+ si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
si_alpha_test(bld_base, index);
color_count++;
@@ -1075,8 +1095,7 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx)
int si_pipe_shader_create(
struct pipe_context *ctx,
- struct si_pipe_shader *shader,
- struct si_shader_key key)
+ struct si_pipe_shader *shader)
{
struct r600_context *rctx = (struct r600_context*)ctx;
struct si_pipe_shader_selector *sel = shader->selector;
@@ -1117,9 +1136,7 @@ int si_pipe_shader_create(
si_shader_ctx.tokens = sel->tokens;
tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
si_shader_ctx.shader = shader;
- si_shader_ctx.key = key;
si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
- si_shader_ctx.rctx = rctx;
create_meta_data(&si_shader_ctx);
create_function(&si_shader_ctx);
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h
index 9dae742115f..9d3c14b3dbb 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -111,13 +111,18 @@ struct si_shader {
unsigned nr_cbufs;
};
-struct si_shader_key {
- unsigned export_16bpc:8;
- unsigned nr_cbufs:4;
- unsigned color_two_side:1;
- unsigned alpha_func:3;
- unsigned flatshade:1;
- float alpha_ref;
+union si_shader_key {
+ struct {
+ unsigned export_16bpc:8;
+ unsigned nr_cbufs:4;
+ unsigned color_two_side:1;
+ unsigned alpha_func:3;
+ unsigned flatshade:1;
+ float alpha_ref;
+ } ps;
+ struct {
+ unsigned instance_divisors[PIPE_MAX_ATTRIBS];
+ } vs;
};
struct si_pipe_shader {
@@ -132,12 +137,11 @@ struct si_pipe_shader {
unsigned spi_shader_col_format;
unsigned sprite_coord_enable;
unsigned so_strides[4];
- struct si_shader_key key;
+ union si_shader_key key;
};
/* radeonsi_shader.c */
-int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader,
- struct si_shader_key key);
+int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader);
#endif
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index bdd41b45534..ca9e8b43902 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1870,30 +1870,36 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
*/
/* Compute the key for the hw shader variant */
-static INLINE struct si_shader_key si_shader_selector_key(struct pipe_context *ctx,
- struct si_pipe_shader_selector *sel)
+static INLINE void si_shader_selector_key(struct pipe_context *ctx,
+ struct si_pipe_shader_selector *sel,
+ union si_shader_key *key)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct si_shader_key key;
- memset(&key, 0, sizeof(key));
+ memset(key, 0, sizeof(*key));
- if (sel->type == PIPE_SHADER_FRAGMENT) {
+ if (sel->type == PIPE_SHADER_VERTEX) {
+ unsigned i;
+ if (!rctx->vertex_elements)
+ return;
+
+ for (i = 0; i < rctx->vertex_elements->count; ++i)
+ key->vs.instance_divisors[i] = rctx->vertex_elements->elements[i].instance_divisor;
+
+ } else if (sel->type == PIPE_SHADER_FRAGMENT) {
if (sel->fs_write_all)
- key.nr_cbufs = rctx->framebuffer.nr_cbufs;
- key.export_16bpc = rctx->export_16bpc;
+ key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs;
+ key->ps.export_16bpc = rctx->export_16bpc;
if (rctx->queued.named.rasterizer) {
- key.color_two_side = rctx->queued.named.rasterizer->two_side;
- key.flatshade = rctx->queued.named.rasterizer->flatshade;
+ key->ps.color_two_side = rctx->queued.named.rasterizer->two_side;
+ key->ps.flatshade = rctx->queued.named.rasterizer->flatshade;
}
if (rctx->queued.named.dsa) {
- key.alpha_func = rctx->queued.named.dsa->alpha_func;
- key.alpha_ref = rctx->queued.named.dsa->alpha_ref;
+ key->ps.alpha_func = rctx->queued.named.dsa->alpha_func;
+ key->ps.alpha_ref = rctx->queued.named.dsa->alpha_ref;
} else {
- key.alpha_func = PIPE_FUNC_ALWAYS;
+ key->ps.alpha_func = PIPE_FUNC_ALWAYS;
}
}
-
- return key;
}
/* Select the hw shader variant depending on the current state.
@@ -1902,11 +1908,11 @@ int si_shader_select(struct pipe_context *ctx,
struct si_pipe_shader_selector *sel,
unsigned *dirty)
{
- struct si_shader_key key;
+ union si_shader_key key;
struct si_pipe_shader * shader = NULL;
int r;
- key = si_shader_selector_key(ctx, sel);
+ si_shader_selector_key(ctx, sel, &key);
/* Check if we don't need to change anything.
* This path is also used for most shaders that don't need multiple
@@ -1934,8 +1940,9 @@ int si_shader_select(struct pipe_context *ctx,
if (unlikely(!shader)) {
shader = CALLOC(1, sizeof(struct si_pipe_shader));
shader->selector = sel;
+ shader->key = key;
- r = si_pipe_shader_create(ctx, shader, key);
+ r = si_pipe_shader_create(ctx, shader);
if (unlikely(r)) {
R600_ERR("Failed to build shader variant (type=%u) %d\n",
sel->type, r);
@@ -1951,10 +1958,9 @@ int si_shader_select(struct pipe_context *ctx,
sel->num_shaders == 0 &&
shader->shader.fs_write_all) {
sel->fs_write_all = 1;
- key = si_shader_selector_key(ctx, sel);
+ si_shader_selector_key(ctx, sel, &shader->key);
}
- shader->key = key;
sel->num_shaders++;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 0deb06f2242..a90a5dab065 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -145,7 +145,7 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s
if (shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL)
db_shader_control |= S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1);
}
- if (shader->shader.uses_kill || shader->key.alpha_func != PIPE_FUNC_ALWAYS)
+ if (shader->shader.uses_kill || shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
db_shader_control |= S_02880C_KILL_ENABLE(1);
exports_ps = 0;
@@ -329,7 +329,7 @@ bcolor:
if (ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT ||
(ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR &&
- rctx->ps_shader->current->key.flatshade)) {
+ rctx->ps_shader->current->key.ps.flatshade)) {
tmp |= S_028644_FLAT_SHADE(1);
}
@@ -356,7 +356,7 @@ bcolor:
tmp);
if (name == TGSI_SEMANTIC_COLOR &&
- rctx->ps_shader->current->key.color_two_side) {
+ rctx->ps_shader->current->key.ps.color_two_side) {
name = TGSI_SEMANTIC_BCOLOR;
param_offset++;
goto bcolor;
@@ -369,7 +369,7 @@ bcolor:
static void si_update_derived_state(struct r600_context *rctx)
{
struct pipe_context * ctx = (struct pipe_context*)rctx;
- unsigned ps_dirty = 0;
+ unsigned vs_dirty = 0, ps_dirty = 0;
if (!rctx->blitter->running) {
/* Flush depth textures which need to be flushed. */
@@ -381,12 +381,20 @@ static void si_update_derived_state(struct r600_context *rctx)
}
}
- si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
+ si_shader_select(ctx, rctx->vs_shader, &vs_dirty);
if (!rctx->vs_shader->current->pm4) {
si_pipe_shader_vs(ctx, rctx->vs_shader->current);
+ vs_dirty = 0;
+ }
+
+ if (vs_dirty) {
+ si_pm4_bind_state(rctx, vs, rctx->vs_shader->current->pm4);
}
+
+ si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
+
if (!rctx->ps_shader->current->pm4) {
si_pipe_shader_ps(ctx, rctx->ps_shader->current);
ps_dirty = 0;