summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2017-08-01 05:10:49 +0100
committerDave Airlie <[email protected]>2017-08-02 00:12:01 +0100
commitcb6f16dce90b4737f62588f8ea5083ee6544787e (patch)
treef950568eda3a9e996d7ef2390048dbf7f32a7de4
parent35338a242bbe72bbd75082917f36ffb991be9a62 (diff)
radeon/ac: use ds_swizzle for derivs on si/cik.
This looks like it's supported since llvm 3.9 at least, so switch over radeonsi and radv to using it, -pro also uses this. We can now drop creating lds for these operations as the ds_swizzle operation doesn't actually write to lds at all. Acked-by: Marek Olšák <[email protected]> (stable requested due to fixing radv CIK conformance tests) Cc: [email protected] Signed-off-by: Dave Airlie <[email protected]>
-rw-r--r--src/amd/common/ac_llvm_build.c57
-rw-r--r--src/amd/common/ac_llvm_build.h1
-rw-r--r--src/amd/common/ac_nir_to_llvm.c9
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c16
4 files changed, 44 insertions, 39 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9b939c148e5..a38aad68f72 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -796,21 +796,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
bool has_ds_bpermute,
uint32_t mask,
int idx,
- LLVMValueRef lds,
LLVMValueRef val)
{
- LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
+ LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
- thread_id = ac_get_thread_id(ctx);
+ if (has_ds_bpermute) {
+ LLVMValueRef thread_id, tl_tid, trbl_tid;
+ thread_id = ac_get_thread_id(ctx);
- tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
- LLVMConstInt(ctx->i32, mask, false), "");
+ tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
+ LLVMConstInt(ctx->i32, mask, false), "");
- trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
- LLVMConstInt(ctx->i32, idx, false), "");
+ trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
+ LLVMConstInt(ctx->i32, idx, false), "");
- if (has_ds_bpermute) {
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
LLVMConstInt(ctx->i32, 4, false), "");
args[1] = val;
@@ -828,15 +828,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT);
} else {
- LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+ uint32_t masks[2];
+
+ switch (mask) {
+ case AC_TID_MASK_TOP_LEFT:
+ masks[0] = 0x8000;
+ if (idx == 1)
+ masks[1] = 0x8055;
+ else
+ masks[1] = 0x80aa;
+
+ break;
+ case AC_TID_MASK_TOP:
+ masks[0] = 0x8044;
+ masks[1] = 0x80ee;
+ break;
+ case AC_TID_MASK_LEFT:
+ masks[0] = 0x80a0;
+ masks[1] = 0x80f5;
+ break;
+ }
- store_ptr = ac_build_gep0(ctx, lds, thread_id);
- load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
- load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
+ args[0] = val;
+ args[1] = LLVMConstInt(ctx->i32, masks[0], false);
- LLVMBuildStore(ctx->builder, val, store_ptr);
- tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
- trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
+ tl = ac_build_intrinsic(ctx,
+ "llvm.amdgcn.ds.swizzle", ctx->i32,
+ args, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+
+ args[1] = LLVMConstInt(ctx->i32, masks[1], false);
+ trbl = ac_build_intrinsic(ctx,
+ "llvm.amdgcn.ds.swizzle", ctx->i32,
+ args, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
}
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 09fd585706a..ee27d3ca25c 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -174,7 +174,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
bool has_ds_bpermute,
uint32_t mask,
int idx,
- LLVMValueRef lds,
LLVMValueRef val);
#define AC_SENDMSG_GS 2
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index a05fd0e9cbe..3a6252395ba 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -68,8 +68,6 @@ struct ac_nir_context {
int num_locals;
LLVMValueRef *locals;
- LLVMValueRef ddxy_lds;
-
struct nir_to_llvm_context *nctx; /* TODO get rid of this */
};
@@ -1463,11 +1461,6 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
LLVMValueRef result;
bool has_ds_bpermute = ctx->abi->chip_class >= VI;
- if (!ctx->ddxy_lds && !has_ds_bpermute)
- ctx->ddxy_lds = LLVMAddGlobalInAddressSpace(ctx->ac.module,
- LLVMArrayType(ctx->ac.i32, 64),
- "ddxy_lds", LOCAL_ADDR_SPACE);
-
if (op == nir_op_fddx_fine || op == nir_op_fddx)
mask = AC_TID_MASK_LEFT;
else if (op == nir_op_fddy_fine || op == nir_op_fddy)
@@ -1484,7 +1477,7 @@ static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
idx = 2;
result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
- mask, idx, ctx->ddxy_lds,
+ mask, idx,
src0);
return result;
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 628e6f80d3f..09053c355eb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3591,7 +3591,7 @@ static void si_llvm_emit_ddxy(
val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
- mask, idx, ctx->lds, val);
+ mask, idx, val);
emit_data->output[emit_data->chan] = val;
}
@@ -4635,20 +4635,6 @@ static void create_function(struct si_shader_context *ctx)
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
shader->info.num_input_vgprs -= num_prolog_vgprs;
- if (!ctx->screen->has_ds_bpermute &&
- bld_base->info &&
- (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
- ctx->lds =
- LLVMAddGlobalInAddressSpace(gallivm->module,
- LLVMArrayType(ctx->i32, 64),
- "ddxy_lds",
- LOCAL_ADDR_SPACE);
-
if (shader->key.as_ls ||
ctx->type == PIPE_SHADER_TESS_CTRL ||
/* GFX9 has the ESGS ring buffer in LDS. */