aboutsummaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2017-02-21 14:13:20 +1000
committerDave Airlie <[email protected]>2017-02-23 15:31:41 +1000
commitb71e6538a8dcd51a8e9760df61764972020dfff7 (patch)
treee4417465972362fc236d202e59c939d44bb9eca2 /src/amd
parentbec584ec0ea29e81f87fbd4d4a07ef398b8961d1 (diff)
radv/ac: handle gs->copy shader clip distances.
This fixes up the clip distance passing between the geometry shader and the copy shader. It packs the clip and cull distances into one or two consecutive slots, and avoids wasting space and make sure the gs output and copy shader input agree on where things are stored. Reviewed-by: Bas Nieuwenhuizen <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_nir_to_llvm.c81
1 files changed, 68 insertions, 13 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 43d52952e11..a74b9065252 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2980,7 +2980,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
LLVMValueRef gs_next_vertex;
LLVMValueRef can_emit, kill;
int idx;
-
+ int clip_cull_slot = -1;
assert(instr->const_index[0] == 0);
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(ctx->builder,
@@ -3005,13 +3005,40 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
idx = 0;
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
+ int length = 4;
+ int start = 0;
+ int slot = idx;
+ int slot_inc = 1;
+
if (!(ctx->output_mask & (1ull << i)))
continue;
- for (unsigned j = 0; j < 4; j++) {
+ if (i == VARYING_SLOT_CLIP_DIST1 ||
+ i == VARYING_SLOT_CULL_DIST1)
+ continue;
+
+ if (i == VARYING_SLOT_CLIP_DIST0 ||
+ i == VARYING_SLOT_CULL_DIST0) {
+ /* pack clip and cull into a single set of slots */
+ if (clip_cull_slot == -1) {
+ clip_cull_slot = idx;
+ if (ctx->num_output_clips + ctx->num_output_culls > 4)
+ slot_inc = 2;
+ } else {
+ slot = clip_cull_slot;
+ slot_inc = 0;
+ }
+ if (i == VARYING_SLOT_CLIP_DIST0)
+ length = ctx->num_output_clips;
+ if (i == VARYING_SLOT_CULL_DIST0) {
+ start = ctx->num_output_clips;
+ length = ctx->num_output_culls;
+ }
+ }
+ for (unsigned j = 0; j < length; j++) {
LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
out_ptr[j], "");
- LLVMValueRef voffset = LLVMConstInt(ctx->i32, (idx * 4 + j) * ctx->gs_max_out_vertices, false);
+ LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j + start) * ctx->gs_max_out_vertices, false);
voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), "");
@@ -3024,7 +3051,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
V_008F0C_BUF_NUM_FORMAT_UINT,
1, 0, 1, 1, 0);
}
- idx++;
+ idx += slot_inc;
}
gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
@@ -4155,14 +4182,14 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
if (idx == VARYING_SLOT_CLIP_DIST0 ||
idx == VARYING_SLOT_CULL_DIST0) {
int length = glsl_get_length(variable->type);
- if (ctx->stage == MESA_SHADER_VERTEX) {
- if (idx == VARYING_SLOT_CLIP_DIST0) {
+ if (idx == VARYING_SLOT_CLIP_DIST0) {
+ if (ctx->stage == MESA_SHADER_VERTEX)
ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
- ctx->num_output_clips = length;
- } else if (idx == VARYING_SLOT_CULL_DIST0) {
+ ctx->num_output_clips = length;
+ } else if (idx == VARYING_SLOT_CULL_DIST0) {
+ if (ctx->stage == MESA_SHADER_VERTEX)
ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
- ctx->num_output_culls = length;
- }
+ ctx->num_output_culls = length;
}
if (length > 4)
attrib_count = 2;
@@ -5049,14 +5076,42 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
args[8] = ctx->i32zero; /* TFE */
int idx = 0;
+ int clip_cull_slot = -1;
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
+ int length = 4;
+ int start = 0;
+ int slot = idx;
+ int slot_inc = 1;
if (!(ctx->output_mask & (1ull << i)))
continue;
- for (unsigned j = 0; j < 4; j++) {
+ if (i == VARYING_SLOT_CLIP_DIST1 ||
+ i == VARYING_SLOT_CULL_DIST1)
+ continue;
+
+ if (i == VARYING_SLOT_CLIP_DIST0 ||
+ i == VARYING_SLOT_CULL_DIST0) {
+ /* unpack clip and cull from a single set of slots */
+ if (clip_cull_slot == -1) {
+ clip_cull_slot = idx;
+ if (ctx->num_output_clips + ctx->num_output_culls > 4)
+ slot_inc = 2;
+ } else {
+ slot = clip_cull_slot;
+ slot_inc = 0;
+ }
+ if (i == VARYING_SLOT_CLIP_DIST0)
+ length = ctx->num_output_clips;
+ if (i == VARYING_SLOT_CULL_DIST0) {
+ start = ctx->num_output_clips;
+ length = ctx->num_output_culls;
+ }
+ }
+
+ for (unsigned j = 0; j < length; j++) {
LLVMValueRef value;
args[2] = LLVMConstInt(ctx->i32,
- (idx * 4 + j) *
+ (slot * 4 + j + start) *
ctx->gs_max_out_vertices * 16 * 4, false);
value = ac_emit_llvm_intrinsic(&ctx->ac,
@@ -5067,7 +5122,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
LLVMBuildStore(ctx->builder,
to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
}
- idx++;
+ idx += slot_inc;
}
handle_vs_outputs_post(ctx);
}