diff options
author | Dave Airlie <[email protected]> | 2017-02-02 09:52:52 +1000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2017-02-03 09:54:04 +1000 |
commit | c9a2fc367940d1b367bee2583eed161f682f2998 (patch) | |
tree | a1feec77a6454e6598c2c8aa8fd6400143a9a393 /src/amd | |
parent | 278d5ef70a808532af9d38ec8681264bdfd72749 (diff) |
radeonsi/ac: move most of emit_ddxy to shared code.
We can reuse this in radv.
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Reviewed-by: Nicolai Hähnle <[email protected]>
Signed-off-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/common/ac_llvm_util.c | 74 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_util.h | 12 |
2 files changed, 86 insertions, 0 deletions
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 4c114585292..156d8745dfa 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -851,3 +851,77 @@ ac_get_thread_id(struct ac_llvm_context *ctx) set_range_metadata(ctx, tid, 0, 64); return tid; } + +/* + * SI implements derivatives using the local data store (LDS) + * All writes to the LDS happen in all executing threads at + * the same time. TID is the Thread ID for the current + * thread and is a value between 0 and 63, representing + * the thread's position in the wavefront. + * + * For the pixel shader threads are grouped into quads of four pixels. + * The TIDs of the pixels of a quad are: + * + * +------+------+ + * |4n + 0|4n + 1| + * +------+------+ + * |4n + 2|4n + 3| + * +------+------+ + * + * So, masking the TID with 0xfffffffc yields the TID of the top left pixel + * of the quad, masking with 0xfffffffd yields the TID of the top pixel of + * the current pixel's column, and masking with 0xfffffffe yields the TID + * of the left pixel of the current pixel's row. + * + * Adding 1 yields the TID of the pixel to the right of the left pixel, and + * adding 2 yields the TID of the pixel below the top pixel. + */ +LLVMValueRef +ac_emit_ddxy(struct ac_llvm_context *ctx, + bool has_ds_bpermute, + uint32_t mask, + int idx, + LLVMValueRef lds, + LLVMValueRef val) +{ + LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2]; + LLVMValueRef result; + + thread_id = ac_get_thread_id(ctx); + + tl_tid = LLVMBuildAnd(ctx->builder, thread_id, + LLVMConstInt(ctx->i32, mask, false), ""); + + trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, + LLVMConstInt(ctx->i32, idx, false), ""); + + if (has_ds_bpermute) { + args[0] = LLVMBuildMul(ctx->builder, tl_tid, + LLVMConstInt(ctx->i32, 4, false), ""); + args[1] = val; + tl = ac_emit_llvm_intrinsic(ctx, + "llvm.amdgcn.ds.bpermute", ctx->i32, + args, 2, AC_FUNC_ATTR_READNONE); + + args[0] = LLVMBuildMul(ctx->builder, trbl_tid, + LLVMConstInt(ctx->i32, 4, false), ""); + trbl = ac_emit_llvm_intrinsic(ctx, + "llvm.amdgcn.ds.bpermute", ctx->i32, + args, 2, AC_FUNC_ATTR_READNONE); + } else { + LLVMValueRef store_ptr, load_ptr0, load_ptr1; + + store_ptr = ac_build_gep0(ctx, lds, thread_id); + load_ptr0 = ac_build_gep0(ctx, lds, tl_tid); + load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid); + + LLVMBuildStore(ctx->builder, val, store_ptr); + tl = LLVMBuildLoad(ctx->builder, load_ptr0, ""); + trbl = LLVMBuildLoad(ctx->builder, load_ptr1, ""); + } + + tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); + trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); + result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); + return result; +} diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 0244dc48b3d..a457714957b 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -180,6 +180,18 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx); + +#define AC_TID_MASK_TOP_LEFT 0xfffffffc +#define AC_TID_MASK_TOP 0xfffffffd +#define AC_TID_MASK_LEFT 0xfffffffe + +LLVMValueRef +ac_emit_ddxy(struct ac_llvm_context *ctx, + bool has_ds_bpermute, + uint32_t mask, + int idx, + LLVMValueRef lds, + LLVMValueRef val); #ifdef __cplusplus } #endif |