summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2017-02-02 09:52:52 +1000
committerDave Airlie <[email protected]>2017-02-03 09:54:04 +1000
commitc9a2fc367940d1b367bee2583eed161f682f2998 (patch)
treea1feec77a6454e6598c2c8aa8fd6400143a9a393 /src/amd
parent278d5ef70a808532af9d38ec8681264bdfd72749 (diff)
radeonsi/ac: move most of emit_ddxy to shared code.
We can reuse this in radv. Reviewed-by: Bas Nieuwenhuizen <[email protected]> Reviewed-by: Nicolai Hähnle <[email protected]> Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/common/ac_llvm_util.c74
-rw-r--r--src/amd/common/ac_llvm_util.h12
2 files changed, 86 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 4c114585292..156d8745dfa 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -851,3 +851,77 @@ ac_get_thread_id(struct ac_llvm_context *ctx)
set_range_metadata(ctx, tid, 0, 64);
return tid;
}
+
+/*
+ * SI implements derivatives using the local data store (LDS)
+ * All writes to the LDS happen in all executing threads at
+ * the same time. TID is the Thread ID for the current
+ * thread and is a value between 0 and 63, representing
+ * the thread's position in the wavefront.
+ *
+ * For the pixel shader threads are grouped into quads of four pixels.
+ * The TIDs of the pixels of a quad are:
+ *
+ * +------+------+
+ * |4n + 0|4n + 1|
+ * +------+------+
+ * |4n + 2|4n + 3|
+ * +------+------+
+ *
+ * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
+ * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
+ * the current pixel's column, and masking with 0xfffffffe yields the TID
+ * of the left pixel of the current pixel's row.
+ *
+ * Adding 1 yields the TID of the pixel to the right of the left pixel, and
+ * adding 2 yields the TID of the pixel below the top pixel.
+ */
+LLVMValueRef
+ac_emit_ddxy(struct ac_llvm_context *ctx,
+ bool has_ds_bpermute,
+ uint32_t mask,
+ int idx,
+ LLVMValueRef lds,
+ LLVMValueRef val)
+{
+ LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
+ LLVMValueRef result;
+
+ thread_id = ac_get_thread_id(ctx);
+
+ tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
+ LLVMConstInt(ctx->i32, mask, false), "");
+
+ trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
+ LLVMConstInt(ctx->i32, idx, false), "");
+
+ if (has_ds_bpermute) {
+ args[0] = LLVMBuildMul(ctx->builder, tl_tid,
+ LLVMConstInt(ctx->i32, 4, false), "");
+ args[1] = val;
+ tl = ac_emit_llvm_intrinsic(ctx,
+ "llvm.amdgcn.ds.bpermute", ctx->i32,
+ args, 2, AC_FUNC_ATTR_READNONE);
+
+ args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
+ LLVMConstInt(ctx->i32, 4, false), "");
+ trbl = ac_emit_llvm_intrinsic(ctx,
+ "llvm.amdgcn.ds.bpermute", ctx->i32,
+ args, 2, AC_FUNC_ATTR_READNONE);
+ } else {
+ LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+
+ store_ptr = ac_build_gep0(ctx, lds, thread_id);
+ load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
+ load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
+
+ LLVMBuildStore(ctx->builder, val, store_ptr);
+ tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
+ trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
+ }
+
+ tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
+ trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
+ result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
+ return result;
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 0244dc48b3d..a457714957b 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -180,6 +180,18 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
+
+#define AC_TID_MASK_TOP_LEFT 0xfffffffc
+#define AC_TID_MASK_TOP 0xfffffffd
+#define AC_TID_MASK_LEFT 0xfffffffe
+
+LLVMValueRef
+ac_emit_ddxy(struct ac_llvm_context *ctx,
+ bool has_ds_bpermute,
+ uint32_t mask,
+ int idx,
+ LLVMValueRef lds,
+ LLVMValueRef val);
#ifdef __cplusplus
}
#endif