summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/amd/common/ac_nir_to_llvm.c136
1 files changed, 43 insertions, 93 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 1d71795426c..17cefea34a9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -130,6 +130,8 @@ struct nir_to_llvm_context {
bool has_ddxy;
unsigned num_clips;
unsigned num_culls;
+
+ bool has_ds_bpermute;
};
struct ac_tex_info {
@@ -377,14 +379,23 @@ static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
return v;
}
+static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ LLVMValueRef indices[2] = {
+ ctx->i32zero,
+ index,
+ };
+ return LLVMBuildGEP(ctx->builder, base_ptr,
+ indices, 2, "");
+}
+
static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index,
bool uniform)
{
LLVMValueRef pointer;
- LLVMValueRef indices[] = {ctx->i32zero, index};
-
- pointer = LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
+ pointer = build_gep0(ctx, base_ptr, index);
if (uniform)
LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
return LLVMBuildLoad(ctx->builder, pointer, "");
@@ -1132,55 +1143,44 @@ static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
#define TID_MASK_TOP 0xfffffffd
#define TID_MASK_LEFT 0xfffffffe
static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
- nir_alu_instr *instr,
+ nir_op op,
LLVMValueRef src0)
{
- LLVMValueRef indices[2];
- LLVMValueRef store_ptr, load_ptr0, load_ptr1;
LLVMValueRef tl, trbl, result;
LLVMValueRef tl_tid, trbl_tid;
LLVMValueRef args[2];
+ LLVMValueRef thread_id;
unsigned mask;
int idx;
ctx->has_ddxy = true;
- if (!ctx->lds)
+
+ if (!ctx->lds && !ctx->has_ds_bpermute)
ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
LLVMArrayType(ctx->i32, 64),
"ddxy_lds", LOCAL_ADDR_SPACE);
- indices[0] = ctx->i32zero;
- indices[1] = get_thread_id(ctx);
- store_ptr = LLVMBuildGEP(ctx->builder, ctx->lds,
- indices, 2, "");
-
- if (instr->op == nir_op_fddx_fine || instr->op == nir_op_fddx)
+ thread_id = get_thread_id(ctx);
+ if (op == nir_op_fddx_fine || op == nir_op_fddx)
mask = TID_MASK_LEFT;
- else if (instr->op == nir_op_fddy_fine || instr->op == nir_op_fddy)
+ else if (op == nir_op_fddy_fine || op == nir_op_fddy)
mask = TID_MASK_TOP;
else
mask = TID_MASK_TOP_LEFT;
- tl_tid = LLVMBuildAnd(ctx->builder, indices[1],
+ tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
LLVMConstInt(ctx->i32, mask, false), "");
- indices[1] = tl_tid;
- load_ptr0 = LLVMBuildGEP(ctx->builder, ctx->lds,
- indices, 2, "");
-
/* for DDX we want to next X pixel, DDY next Y pixel. */
- if (instr->op == nir_op_fddx_fine ||
- instr->op == nir_op_fddx_coarse ||
- instr->op == nir_op_fddx)
+ if (op == nir_op_fddx_fine ||
+ op == nir_op_fddx_coarse ||
+ op == nir_op_fddx)
idx = 1;
else
idx = 2;
- trbl_tid = LLVMBuildAdd(ctx->builder, indices[1],
+ trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
LLVMConstInt(ctx->i32, idx, false), "");
- indices[1] = trbl_tid;
- load_ptr1 = LLVMBuildGEP(ctx->builder, ctx->lds,
- indices, 2, "");
- if (ctx->options->family >= CHIP_TONGA) {
+ if (ctx->has_ds_bpermute) {
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
LLVMConstInt(ctx->i32, 4, false), "");
args[1] = src0;
@@ -1194,8 +1194,13 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
ctx->i32, args, 2,
AC_FUNC_ATTR_READNONE);
} else {
- LLVMBuildStore(ctx->builder, src0, store_ptr);
+ LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+
+ store_ptr = build_gep0(ctx, ctx->lds, thread_id);
+ load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
+ load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
+ LLVMBuildStore(ctx->builder, src0, store_ptr);
tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
}
@@ -1214,72 +1219,15 @@ static LLVMValueRef emit_ddxy_interp(
struct nir_to_llvm_context *ctx,
LLVMValueRef interp_ij)
{
- LLVMValueRef indices[2];
- LLVMValueRef store_ptr, load_ptr_x, load_ptr_y, load_ptr_ddx, load_ptr_ddy, temp, temp2;
- LLVMValueRef tl, tr, bl, result[4];
- unsigned c;
-
- if (!ctx->lds)
- ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
- LLVMArrayType(ctx->i32, 64),
- "ddxy_lds", LOCAL_ADDR_SPACE);
-
- indices[0] = ctx->i32zero;
- indices[1] = get_thread_id(ctx);
- store_ptr = LLVMBuildGEP(ctx->builder, ctx->lds,
- indices, 2, "");
-
- temp = LLVMBuildAnd(ctx->builder, indices[1],
- LLVMConstInt(ctx->i32, TID_MASK_LEFT, false), "");
-
- temp2 = LLVMBuildAnd(ctx->builder, indices[1],
- LLVMConstInt(ctx->i32, TID_MASK_TOP, false), "");
-
- indices[1] = temp;
- load_ptr_x = LLVMBuildGEP(ctx->builder, ctx->lds,
- indices, 2, "");
-
- indices[1] = temp2;
- load_ptr_y = LLVMBuildGEP(ctx->builder, ctx->lds,
- indices, 2, "");
-
- indices[1] = LLVMBuildAdd(ctx->builder, temp,
- LLVMConstInt(ctx->i32, 1, false), "");
- load_ptr_ddx = LLVMBuildGEP(ctx->builder, ctx->lds,
- indices, 2, "");
-
- indices[1] = LLVMBuildAdd(ctx->builder, temp2,
- LLVMConstInt(ctx->i32, 2, false), "");
- load_ptr_ddy = LLVMBuildGEP(ctx->builder, ctx->lds,
- indices, 2, "");
-
- for (c = 0; c < 2; ++c) {
- LLVMValueRef store_val;
- LLVMValueRef c_ll = LLVMConstInt(ctx->i32, c, false);
-
- store_val = LLVMBuildExtractElement(ctx->builder,
- interp_ij, c_ll, "");
- LLVMBuildStore(ctx->builder,
- store_val,
- store_ptr);
-
- tl = LLVMBuildLoad(ctx->builder, load_ptr_x, "");
- tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
-
- tr = LLVMBuildLoad(ctx->builder, load_ptr_ddx, "");
- tr = LLVMBuildBitCast(ctx->builder, tr, ctx->f32, "");
-
- result[c] = LLVMBuildFSub(ctx->builder, tr, tl, "");
-
- tl = LLVMBuildLoad(ctx->builder, load_ptr_y, "");
- tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
-
- bl = LLVMBuildLoad(ctx->builder, load_ptr_ddy, "");
- bl = LLVMBuildBitCast(ctx->builder, bl, ctx->f32, "");
+ LLVMValueRef result[4], a;
+ unsigned i;
- result[c + 2] = LLVMBuildFSub(ctx->builder, bl, tl, "");
+ for (i = 0; i < 2; i++) {
+ a = LLVMBuildExtractElement(ctx->builder, interp_ij,
+ LLVMConstInt(ctx->i32, i, false), "");
+ result[i] = emit_ddxy(ctx, nir_op_fddx, a);
+ result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
}
-
return build_gather_values(ctx, result, 4);
}
@@ -1593,7 +1541,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
case nir_op_fddy_fine:
case nir_op_fddx_coarse:
case nir_op_fddy_coarse:
- result = emit_ddxy(ctx, instr, src[0]);
+ result = emit_ddxy(ctx, instr->op, src[0]);
break;
default:
fprintf(stderr, "Unknown NIR alu instr: ");
@@ -4558,6 +4506,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ctx.context = LLVMContextCreate();
ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
+ ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
+
memset(shader_info, 0, sizeof(*shader_info));
LLVMSetTarget(ctx.module, "amdgcn--");