diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/amd/common/ac_llvm_build.c | 270 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_build.h | 8 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_helper.cpp | 7 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_util.c | 20 | ||||
-rw-r--r-- | src/amd/common/ac_llvm_util.h | 1 | ||||
-rw-r--r-- | src/amd/common/ac_nir_to_llvm.c | 24 | ||||
-rw-r--r-- | src/amd/vulkan/radv_cmd_buffer.c | 15 | ||||
-rw-r--r-- | src/amd/vulkan/radv_device.c | 4 | ||||
-rw-r--r-- | src/amd/vulkan/radv_extensions.py | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_llvm_helper.cpp | 3 | ||||
-rw-r--r-- | src/amd/vulkan/radv_nir_to_llvm.c | 13 | ||||
-rw-r--r-- | src/amd/vulkan/radv_private.h | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 2 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader_helper.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 49 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_get.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 92 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 27 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 47 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 7 |
21 files changed, 85 insertions, 520 deletions
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index abc18da13db..58f72972d25 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -75,7 +75,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); - ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; + ctx->intptr = ctx->i32; ctx->f16 = LLVMHalfTypeInContext(ctx->context); ctx->f32 = LLVMFloatTypeInContext(ctx->context); ctx->f64 = LLVMDoubleTypeInContext(ctx->context); @@ -1403,99 +1403,28 @@ ac_build_ddxy(struct ac_llvm_context *ctx, int idx, LLVMValueRef val) { + unsigned tl_lanes[4], trbl_lanes[4]; LLVMValueRef tl, trbl, args[2]; LLVMValueRef result; - if (HAVE_LLVM >= 0x0700) { - unsigned tl_lanes[4], trbl_lanes[4]; - - for (unsigned i = 0; i < 4; ++i) { - tl_lanes[i] = i & mask; - trbl_lanes[i] = (i & mask) + idx; - } - - tl = ac_build_quad_swizzle(ctx, val, - tl_lanes[0], tl_lanes[1], - tl_lanes[2], tl_lanes[3]); - trbl = ac_build_quad_swizzle(ctx, val, - trbl_lanes[0], trbl_lanes[1], - trbl_lanes[2], trbl_lanes[3]); - } else if (ctx->chip_class >= VI) { - LLVMValueRef thread_id, tl_tid, trbl_tid; - thread_id = ac_get_thread_id(ctx); - - tl_tid = LLVMBuildAnd(ctx->builder, thread_id, - LLVMConstInt(ctx->i32, mask, false), ""); - - trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, - LLVMConstInt(ctx->i32, idx, false), ""); - - args[0] = LLVMBuildMul(ctx->builder, tl_tid, - LLVMConstInt(ctx->i32, 4, false), ""); - args[1] = val; - tl = ac_build_intrinsic(ctx, - "llvm.amdgcn.ds.bpermute", ctx->i32, - args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - - args[0] = LLVMBuildMul(ctx->builder, trbl_tid, - LLVMConstInt(ctx->i32, 4, false), ""); - trbl = ac_build_intrinsic(ctx, - "llvm.amdgcn.ds.bpermute", ctx->i32, - args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - } else { - uint32_t masks[2] = {}; - - switch (mask) { - case AC_TID_MASK_TOP_LEFT: - masks[0] = 0x8000; - if (idx == 1) - masks[1] = 0x8055; - else - masks[1] = 0x80aa; - - break; - case AC_TID_MASK_TOP: - masks[0] = 0x8044; - masks[1] = 0x80ee; - break; - case AC_TID_MASK_LEFT: - masks[0] = 0x80a0; - masks[1] = 0x80f5; - break; - default: - assert(0); - } - - args[0] = val; - args[1] = LLVMConstInt(ctx->i32, masks[0], false); - - tl = ac_build_intrinsic(ctx, - "llvm.amdgcn.ds.swizzle", ctx->i32, - args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - - args[1] = LLVMConstInt(ctx->i32, masks[1], false); - trbl = ac_build_intrinsic(ctx, - "llvm.amdgcn.ds.swizzle", ctx->i32, - args, 2, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); + for (unsigned i = 0; i < 4; ++i) { + tl_lanes[i] = i & mask; + trbl_lanes[i] = (i & mask) + idx; } + tl = ac_build_quad_swizzle(ctx, val, + tl_lanes[0], tl_lanes[1], + tl_lanes[2], tl_lanes[3]); + trbl = ac_build_quad_swizzle(ctx, val, + trbl_lanes[0], trbl_lanes[1], + trbl_lanes[2], trbl_lanes[3]); + tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, ""); trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, ""); result = LLVMBuildFSub(ctx->builder, trbl, tl, ""); - if (HAVE_LLVM >= 0x0700) { - result = ac_build_intrinsic(ctx, - "llvm.amdgcn.wqm.f32", ctx->f32, - &result, 1, 0); - } + result = ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32, + &result, 1, 0); return result; } @@ -1740,171 +1669,6 @@ static const char *get_atomic_name(enum ac_atomic_op op) unreachable("bad atomic op"); } -/* LLVM 6 and older */ -static LLVMValueRef ac_build_image_opcode_llvm6(struct ac_llvm_context *ctx, - struct ac_image_args *a) -{ - LLVMValueRef args[16]; - LLVMTypeRef retty = ctx->v4f32; - const char *name = NULL; - const char *atomic_subop = ""; - char intr_name[128], coords_type[64]; - - bool sample = a->opcode == ac_image_sample || - a->opcode == ac_image_gather4 || - a->opcode == ac_image_get_lod; - bool atomic = a->opcode == ac_image_atomic || - a->opcode == ac_image_atomic_cmpswap; - bool da = a->dim == ac_image_cube || - a->dim == ac_image_1darray || - a->dim == ac_image_2darray || - a->dim == ac_image_2darraymsaa; - if (a->opcode == ac_image_get_lod) - da = false; - - unsigned num_coords = - a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0; - LLVMValueRef addr; - unsigned num_addr = 0; - - if (a->opcode == ac_image_get_lod) { - switch (a->dim) { - case ac_image_1darray: - num_coords = 1; - break; - case ac_image_2darray: - case ac_image_cube: - num_coords = 2; - break; - default: - break; - } - } - - if (a->offset) - args[num_addr++] = ac_to_integer(ctx, a->offset); - if (a->bias) - args[num_addr++] = ac_to_integer(ctx, a->bias); - if (a->compare) - args[num_addr++] = ac_to_integer(ctx, a->compare); - if (a->derivs[0]) { - unsigned num_derivs = ac_num_derivs(a->dim); - for (unsigned i = 0; i < num_derivs; ++i) - args[num_addr++] = ac_to_integer(ctx, a->derivs[i]); - } - for (unsigned i = 0; i < num_coords; ++i) - args[num_addr++] = ac_to_integer(ctx, a->coords[i]); - if (a->lod) - args[num_addr++] = ac_to_integer(ctx, a->lod); - - unsigned pad_goal = util_next_power_of_two(num_addr); - while (num_addr < pad_goal) - args[num_addr++] = LLVMGetUndef(ctx->i32); - - addr = ac_build_gather_values(ctx, args, num_addr); - - unsigned num_args = 0; - if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) { - args[num_args++] = a->data[0]; - if (a->opcode == ac_image_atomic_cmpswap) - args[num_args++] = a->data[1]; - } - - unsigned coords_arg = num_args; - if (sample) - args[num_args++] = ac_to_float(ctx, addr); - else - args[num_args++] = ac_to_integer(ctx, addr); - - args[num_args++] = a->resource; - if (sample) - args[num_args++] = a->sampler; - if (!atomic) { - args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0); - if (sample) - args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0); - args[num_args++] = a->cache_policy & ac_glc ? ctx->i1true : ctx->i1false; - args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false; - args[num_args++] = ctx->i1false; /* lwe */ - args[num_args++] = LLVMConstInt(ctx->i1, da, 0); - } else { - args[num_args++] = ctx->i1false; /* r128 */ - args[num_args++] = LLVMConstInt(ctx->i1, da, 0); - args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false; - } - - switch (a->opcode) { - case ac_image_sample: - name = "llvm.amdgcn.image.sample"; - break; - case ac_image_gather4: - name = "llvm.amdgcn.image.gather4"; - break; - case ac_image_load: - name = "llvm.amdgcn.image.load"; - break; - case ac_image_load_mip: - name = "llvm.amdgcn.image.load.mip"; - break; - case ac_image_store: - name = "llvm.amdgcn.image.store"; - retty = ctx->voidt; - break; - case ac_image_store_mip: - name = "llvm.amdgcn.image.store.mip"; - retty = ctx->voidt; - break; - case ac_image_atomic: - case ac_image_atomic_cmpswap: - name = "llvm.amdgcn.image.atomic."; - retty = ctx->i32; - if (a->opcode == ac_image_atomic_cmpswap) { - atomic_subop = "cmpswap"; - } else { - atomic_subop = get_atomic_name(a->atomic); - } - break; - case ac_image_get_lod: - name = "llvm.amdgcn.image.getlod"; - break; - case ac_image_get_resinfo: - name = "llvm.amdgcn.image.getresinfo"; - break; - default: - unreachable("invalid image opcode"); - } - - ac_build_type_name_for_intr(LLVMTypeOf(args[coords_arg]), coords_type, - sizeof(coords_type)); - - if (atomic) { - snprintf(intr_name, sizeof(intr_name), "llvm.amdgcn.image.atomic.%s.%s", - atomic_subop, coords_type); - } else { - bool lod_suffix = - a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); - - snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32", - name, - a->compare ? ".c" : "", - a->bias ? ".b" : - lod_suffix ? ".l" : - a->derivs[0] ? ".d" : - a->level_zero ? ".lz" : "", - a->offset ? ".o" : "", - coords_type); - } - - LLVMValueRef result = - ac_build_intrinsic(ctx, intr_name, retty, args, num_args, - a->attributes); - if (!sample && retty == ctx->v4f32) { - result = LLVMBuildBitCast(ctx->builder, result, - ctx->v4i32, ""); - } - return result; -} - LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a) { @@ -1929,9 +1693,6 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, (a->level_zero ? 1 : 0) + (a->derivs[0] ? 1 : 0) <= 1); - if (HAVE_LLVM < 0x0700) - return ac_build_image_opcode_llvm6(ctx, a); - if (a->opcode == ac_image_get_lod) { switch (dim) { case ac_image_1darray: @@ -2720,9 +2481,6 @@ LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) { - if (!HAVE_32BIT_POINTERS) - return ac_array_in_const_addr_space(elem_type); - return LLVMPointerType(LLVMArrayType(elem_type, 0), AC_ADDR_SPACE_CONST_32BIT); } diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index e90c8c21ad4..ed466033b25 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -34,14 +34,12 @@ extern "C" { #endif -#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700) - enum { - AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */ + AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */ AC_ADDR_SPACE_GLOBAL = 1, - AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5, + AC_ADDR_SPACE_GDS = 2, AC_ADDR_SPACE_LDS = 3, - AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */ + AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */ AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */ }; diff --git a/src/amd/common/ac_llvm_helper.cpp b/src/amd/common/ac_llvm_helper.cpp index e022e12c7f3..dcfb8008546 100644 --- a/src/amd/common/ac_llvm_helper.cpp +++ b/src/amd/common/ac_llvm_helper.cpp @@ -39,9 +39,6 @@ #include <llvm/Transforms/IPO.h> #include <llvm/IR/LegacyPassManager.h> -#if HAVE_LLVM < 0x0700 -#include "llvm/Support/raw_ostream.h" -#endif void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) { @@ -132,9 +129,7 @@ struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm) llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm); if (TM->addPassesToEmitFile(p->passmgr, p->ostream, -#if HAVE_LLVM >= 0x0700 nullptr, -#endif llvm::TargetMachine::CGFT_ObjectFile)) { fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); delete p; @@ -170,7 +165,5 @@ void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr) void ac_enable_global_isel(LLVMTargetMachineRef tm) { -#if HAVE_LLVM >= 0x0700 reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true); -#endif } diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index dc9b684e9dd..174a37f22f8 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -30,9 +30,7 @@ #include <llvm-c/Support.h> #include <llvm-c/Transforms/IPO.h> #include <llvm-c/Transforms/Scalar.h> -#if HAVE_LLVM >= 0x0700 #include <llvm-c/Transforms/Utils.h> -#endif #include "c11/threads.h" #include "gallivm/lp_bld_misc.h" #include "util/u_math.h" @@ -132,9 +130,9 @@ const char *ac_get_llvm_processor_name(enum radeon_family family) case CHIP_RAVEN: return "gfx902"; case CHIP_VEGA12: - return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902"; + return "gfx904"; case CHIP_VEGA20: - return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902"; + return "gfx906"; case CHIP_RAVEN2: return "gfx902"; /* TODO: use gfx909 when it's available */ default: @@ -303,7 +301,6 @@ ac_count_scratch_private_memory(LLVMValueRef function) bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, - bool okay_to_leak_target_library_info, enum radeon_family family, enum ac_target_machine_options tm_options) { @@ -324,12 +321,10 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, goto fail; } - if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) { - compiler->target_library_info = - ac_create_target_library_info(triple); - if (!compiler->target_library_info) - goto fail; - } + compiler->target_library_info = + ac_create_target_library_info(triple); + if (!compiler->target_library_info) + goto fail; compiler->passmgr = ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR); @@ -347,11 +342,8 @@ ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) { if (compiler->passmgr) LLVMDisposePassManager(compiler->passmgr); -#if HAVE_LLVM >= 0x0700 - /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. */ if (compiler->target_library_info) ac_dispose_target_library_info(compiler->target_library_info); -#endif if (compiler->low_opt_tm) LLVMDisposeTargetMachine(compiler->low_opt_tm); if (compiler->tm) diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index eaf5f21876b..6d961c06f8a 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -134,7 +134,6 @@ void ac_init_llvm_once(void); bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, - bool okay_to_leak_target_library_info, enum radeon_family family, enum ac_target_machine_options tm_options); void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index fe65dfff8f3..4294956de13 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -429,22 +429,16 @@ static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx, { LLVMValueRef result; - if (HAVE_LLVM < 0x0700) { - LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); - result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); - result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); - } else { - /* FIXME: LLVM 7 returns incorrect result when count is 0. - * https://bugs.freedesktop.org/show_bug.cgi?id=107276 - */ - LLVMValueRef zero = ctx->i32_0; - LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); - LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, ""); + /* FIXME: LLVM 7+ returns incorrect result when count is 0. + * https://bugs.freedesktop.org/show_bug.cgi?id=107276 + */ + LLVMValueRef zero = ctx->i32_0; + LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); + LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, ""); - result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); - result = LLVMBuildSelect(ctx->builder, icond1, srcs[0], result, ""); - result = LLVMBuildSelect(ctx->builder, icond2, zero, result, ""); - } + result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); + result = LLVMBuildSelect(ctx->builder, icond1, srcs[0], result, ""); + result = LLVMBuildSelect(ctx->builder, icond2, zero, result, ""); return result; } diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 23909a0f7dd..b4aea5bc898 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -594,7 +594,7 @@ radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, if (loc->sgpr_idx == -1) return; - assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2)); + assert(loc->num_sgprs == 1); assert(!loc->indirect); radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, @@ -624,14 +624,12 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, struct radv_userdata_info *loc = &locs->descriptor_sets[start]; unsigned sh_offset = sh_base + loc->sgpr_idx * 4; - radv_emit_shader_pointer_head(cs, sh_offset, count, - HAVE_32BIT_POINTERS); + radv_emit_shader_pointer_head(cs, sh_offset, count, true); for (int i = 0; i < count; i++) { struct radv_descriptor_set *set = descriptors_state->sets[start + i]; - radv_emit_shader_pointer_body(device, cs, set->va, - HAVE_32BIT_POINTERS); + radv_emit_shader_pointer_body(device, cs, set->va, true); } } } @@ -1740,8 +1738,7 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, { struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); - uint8_t ptr_size = HAVE_32BIT_POINTERS ? 1 : 2; - uint32_t size = MAX_SETS * 4 * ptr_size; + uint32_t size = MAX_SETS * 4; uint32_t offset; void *ptr; @@ -1750,14 +1747,12 @@ radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, return; for (unsigned i = 0; i < MAX_SETS; i++) { - uint32_t *uptr = ((uint32_t *)ptr) + i * ptr_size; + uint32_t *uptr = ((uint32_t *)ptr) + i; uint64_t set_va = 0; struct radv_descriptor_set *set = descriptors_state->sets[i]; if (descriptors_state->valid & (1u << i)) set_va = set->va; uptr[0] = set_va & 0xffffffff; - if (ptr_size == 2) - uptr[1] = set_va >> 32; } uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index c4f0a42f5fa..cb51ee44e58 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -747,7 +747,7 @@ void radv_GetPhysicalDeviceFeatures( .shaderCullDistance = true, .shaderFloat64 = true, .shaderInt64 = true, - .shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x700, + .shaderInt16 = pdevice->rad_info.chip_class >= GFX9, .sparseBinding = true, .variableMultisampleRate = true, .inheritedQueries = true, @@ -789,7 +789,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { VkPhysicalDevice16BitStorageFeatures *features = (VkPhysicalDevice16BitStorageFeatures*)ext; - bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI; + bool enabled = pdevice->rad_info.chip_class >= VI; features->storageBuffer16BitAccess = enabled; features->uniformAndStorageBuffer16BitAccess = enabled; features->storagePushConstant16 = enabled; diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 075e0698cca..d14169144f7 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -51,7 +51,7 @@ class Extension: # and dEQP-VK.api.info.device fail due to the duplicated strings. EXTENSIONS = [ Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'), - Extension('VK_KHR_16bit_storage', 1, 'HAVE_LLVM >= 0x0700'), + Extension('VK_KHR_16bit_storage', 1, True), Extension('VK_KHR_bind_memory2', 1, True), Extension('VK_KHR_create_renderpass2', 1, True), Extension('VK_KHR_dedicated_allocation', 1, True), diff --git a/src/amd/vulkan/radv_llvm_helper.cpp b/src/amd/vulkan/radv_llvm_helper.cpp index ed05e1197ec..f651593ca62 100644 --- a/src/amd/vulkan/radv_llvm_helper.cpp +++ b/src/amd/vulkan/radv_llvm_helper.cpp @@ -40,7 +40,6 @@ public: bool init(void) { if (!ac_init_llvm_compiler(&llvm_info, - true, family, tm_options)) return false; @@ -99,7 +98,6 @@ bool radv_compile_to_binary(struct ac_llvm_compiler *info, } bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, - bool okay_to_leak_target_library_info, bool thread_compiler, enum radeon_family family, enum ac_target_machine_options tm_options) @@ -125,7 +123,6 @@ bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, } if (!ac_init_llvm_compiler(info, - okay_to_leak_target_library_info, family, tm_options)) return false; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 2e6f88ac342..322b10b67a0 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -33,9 +33,7 @@ #include <llvm-c/Core.h> #include <llvm-c/TargetMachine.h> #include <llvm-c/Transforms/Scalar.h> -#if HAVE_LLVM >= 0x0700 #include <llvm-c/Transforms/Utils.h> -#endif #include "sid.h" #include "gfx9d.h" @@ -568,8 +566,7 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, static void set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx) { - bool use_32bit_pointers = HAVE_32BIT_POINTERS && - idx != AC_UD_SCRATCH_RING_OFFSETS; + bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS; set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); } @@ -583,7 +580,7 @@ set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; assert(ud_info); - set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect); + set_loc(ud_info, sgpr_idx, 1, indirect); if (!indirect) locs->descriptor_sets_enabled |= 1 << idx; @@ -624,7 +621,7 @@ count_vs_user_sgprs(struct radv_shader_context *ctx) uint8_t count = 0; if (ctx->shader_info->info.vs.has_vertex_buffers) - count += HAVE_32BIT_POINTERS ? 1 : 2; + count++; count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; return count; @@ -693,14 +690,14 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, user_sgpr_count++; if (ctx->shader_info->info.loads_push_constants) - user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2; + user_sgpr_count++; uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; uint32_t num_desc_set = util_bitcount(ctx->shader_info->info.desc_set_used_mask); - if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) { + if (remaining_sgprs < num_desc_set) { user_sgpr_info->indirect_all_descriptor_sets = true; } } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 8d69b016d6f..e3dd301ee8f 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1243,7 +1243,7 @@ radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset, uint64_t va, bool global) { - bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global; + bool use_32bit_pointers = !global; radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers); radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 456c462a230..1ce6baebff0 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -600,7 +600,7 @@ shader_variant_create(struct radv_device *device, thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM); radv_init_llvm_once(); - radv_init_llvm_compiler(&ac_llvm, false, + radv_init_llvm_compiler(&ac_llvm, thread_compiler, chip_family, tm_options); if (gs_copy_shader) { diff --git a/src/amd/vulkan/radv_shader_helper.h b/src/amd/vulkan/radv_shader_helper.h index 3c81f5be54a..faaf965eab5 100644 --- a/src/amd/vulkan/radv_shader_helper.h +++ b/src/amd/vulkan/radv_shader_helper.h @@ -27,7 +27,6 @@ extern "C" { #endif bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, - bool okay_to_leak_target_library_info, bool thread_compiler, enum radeon_family family, enum ac_target_machine_options tm_options); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 06e95e863eb..23059da1bef 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -2055,7 +2055,7 @@ static void si_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count) { - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (HAVE_32BIT_POINTERS ? 1 : 2), 0)); + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count, 0)); radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2); } @@ -2065,10 +2065,7 @@ static void si_emit_shader_pointer_body(struct si_screen *sscreen, { radeon_emit(cs, va); - if (HAVE_32BIT_POINTERS) - assert(va == 0 || (va >> 32) == sscreen->info.address32_hi); - else - radeon_emit(cs, va >> 32); + assert(va == 0 || (va >> 32) == sscreen->info.address32_hi); } static void si_emit_shader_pointer(struct si_context *sctx, @@ -2106,25 +2103,6 @@ static void si_emit_consecutive_shader_pointers(struct si_context *sctx, } } -static void si_emit_disjoint_shader_pointers(struct si_context *sctx, - unsigned pointer_mask, - unsigned sh_base) -{ - if (!sh_base) - return; - - struct radeon_cmdbuf *cs = sctx->gfx_cs; - unsigned mask = sctx->shader_pointers_dirty & pointer_mask; - - while (mask) { - struct si_descriptors *descs = &sctx->descriptors[u_bit_scan(&mask)]; - unsigned sh_offset = sh_base + descs->shader_userdata_offset; - - si_emit_shader_pointer_head(cs, sh_offset, 1); - si_emit_shader_pointer_body(sctx->screen, cs, descs->gpu_address); - } -} - static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs) { @@ -2164,17 +2142,10 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx) sh_base[PIPE_SHADER_TESS_EVAL]); si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT), sh_base[PIPE_SHADER_FRAGMENT]); - if (HAVE_32BIT_POINTERS || sctx->chip_class <= VI) { - si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), - sh_base[PIPE_SHADER_TESS_CTRL]); - si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), - sh_base[PIPE_SHADER_GEOMETRY]); - } else { - si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), - sh_base[PIPE_SHADER_TESS_CTRL]); - si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), - sh_base[PIPE_SHADER_GEOMETRY]); - } + si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL), + sh_base[PIPE_SHADER_TESS_CTRL]); + si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY), + sh_base[PIPE_SHADER_GEOMETRY]); sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE); @@ -2665,10 +2636,6 @@ void si_init_all_descriptors(struct si_context *sctx) { int i; -#if !HAVE_32BIT_POINTERS - STATIC_ASSERT(GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES % 2 == 0); -#endif - for (i = 0; i < SI_NUM_SHADERS; i++) { bool is_2nd = sctx->chip_class >= GFX9 && (i == PIPE_SHADER_TESS_CTRL || @@ -2699,7 +2666,6 @@ void si_init_all_descriptors(struct si_context *sctx) desc->slot_index_to_bind_directly = si_get_constbuf_slot(0); if (is_2nd) { -#if HAVE_32BIT_POINTERS if (i == PIPE_SHADER_TESS_CTRL) { rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4; @@ -2707,9 +2673,6 @@ void si_init_all_descriptors(struct si_context *sctx) rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4; } -#else - rel_dw_offset = GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES; -#endif } else { rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES; } diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 91f38329d59..bb2d8c09eb1 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -455,15 +455,6 @@ static int si_get_shader_param(struct pipe_screen* pscreen, !sscreen->llvm_has_working_vgpr_indexing) return 0; - /* Doing indirect indexing on GFX9 with LLVM 6.0 hangs. - * This means we don't support INTERP instructions with - * indirect indexing on inputs. - */ - if (shader == PIPE_SHADER_FRAGMENT && - !sscreen->llvm_has_working_vgpr_indexing && - HAVE_LLVM < 0x0700) - return 0; - /* TCS and TES load inputs directly from LDS or offchip * memory, so indirect indexing is always supported. * PS has to support indirect indexing, because we can't diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 503d8331906..39bb192b1de 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -127,7 +127,7 @@ static void si_init_compiler(struct si_screen *sscreen, (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0); ac_init_llvm_once(); - ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options); + ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options); compiler->passes = ac_create_llvm_passes(compiler->tm); if (compiler->low_opt_tm) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d455fb5db6a..ee0c668431c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2310,18 +2310,9 @@ static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *c ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); LLVMValueRef desc0, desc1; - if (HAVE_32BIT_POINTERS) { - desc0 = ptr; - desc1 = LLVMConstInt(ctx->i32, - S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); - } else { - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, ""); - desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, ""); - desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, ""); - /* Mask out all bits except BASE_ADDRESS_HI. */ - desc1 = LLVMBuildAnd(ctx->ac.builder, desc1, - LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), ""); - } + desc0 = ptr; + desc1 = LLVMConstInt(ctx->i32, + S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); LLVMValueRef desc_elems[] = { desc0, @@ -3265,19 +3256,9 @@ si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret, LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef ptr, lo, hi; - if (HAVE_32BIT_POINTERS) { - ptr = LLVMGetParam(ctx->main_fn, param); - ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, ""); - return LLVMBuildInsertValue(builder, ret, ptr, return_index, ""); - } - ptr = LLVMGetParam(ctx->main_fn, param); - ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, ""); - ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, ""); - lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, ""); - hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, ""); - ret = LLVMBuildInsertValue(builder, ret, lo, return_index, ""); - return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, ""); + ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, ""); + return LLVMBuildInsertValue(builder, ret, ptr, return_index, ""); } /* This only writes the tessellation factor levels. */ @@ -3378,8 +3359,7 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) LLVMValueRef ret = ctx->return_value; ret = si_insert_input_ptr(ctx, ret, 0, 0); - if (HAVE_32BIT_POINTERS) - ret = si_insert_input_ptr(ctx, ret, 1, 1); + ret = si_insert_input_ptr(ctx, ret, 1, 1); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4); @@ -3394,11 +3374,6 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS); -#if !HAVE_32BIT_POINTERS - ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4, - 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); -#endif - ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets, @@ -3422,8 +3397,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) LLVMValueRef ret = ctx->return_value; ret = si_insert_input_ptr(ctx, ret, 0, 0); - if (HAVE_32BIT_POINTERS) - ret = si_insert_input_ptr(ctx, ret, 1, 1); + ret = si_insert_input_ptr(ctx, ret, 1, 1); ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3); ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5); @@ -3434,11 +3408,6 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) ctx->param_bindless_samplers_and_images, 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); -#if !HAVE_32BIT_POINTERS - ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4, - 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES); -#endif - unsigned vgpr; if (ctx->type == PIPE_SHADER_VERTEX) vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR; @@ -4702,13 +4671,8 @@ static void create_function(struct si_shader_context *ctx) case SI_SHADER_MERGED_VERTEX_TESSCTRL: /* Merged stages have 8 system SGPRs at the beginning. */ /* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */ - if (HAVE_32BIT_POINTERS) { - declare_per_stage_desc_pointers(ctx, &fninfo, - ctx->type == PIPE_SHADER_TESS_CTRL); - } else { - declare_const_and_shader_buffers(ctx, &fninfo, - ctx->type == PIPE_SHADER_TESS_CTRL); - } + declare_per_stage_desc_pointers(ctx, &fninfo, + ctx->type == PIPE_SHADER_TESS_CTRL); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4721,15 +4685,9 @@ static void create_function(struct si_shader_context *ctx) ctx->type == PIPE_SHADER_VERTEX); declare_vs_specific_input_sgprs(ctx, &fninfo); - if (!HAVE_32BIT_POINTERS) { - declare_samplers_and_images(ctx, &fninfo, - ctx->type == PIPE_SHADER_TESS_CTRL); - } ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); - if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */ - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, ac_array_in_const32_addr_space(ctx->v4i32)); @@ -4763,13 +4721,8 @@ static void create_function(struct si_shader_context *ctx) case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY: /* Merged stages have 8 system SGPRs at the beginning. */ /* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */ - if (HAVE_32BIT_POINTERS) { - declare_per_stage_desc_pointers(ctx, &fninfo, - ctx->type == PIPE_SHADER_GEOMETRY); - } else { - declare_const_and_shader_buffers(ctx, &fninfo, - ctx->type == PIPE_SHADER_GEOMETRY); - } + declare_per_stage_desc_pointers(ctx, &fninfo, + ctx->type == PIPE_SHADER_GEOMETRY); ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4788,14 +4741,8 @@ static void create_function(struct si_shader_context *ctx) ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32); /* Declare as many input SGPRs as the VS has. */ - if (!HAVE_32BIT_POINTERS) - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ } - if (!HAVE_32BIT_POINTERS) { - declare_samplers_and_images(ctx, &fninfo, - ctx->type == PIPE_SHADER_GEOMETRY); - } if (ctx->type == PIPE_SHADER_VERTEX) { ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR, ac_array_in_const32_addr_space(ctx->v4i32)); @@ -7157,20 +7104,9 @@ static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) LLVMValueRef ptr[2], list; bool merged_shader = is_merged_shader(ctx); - if (HAVE_32BIT_POINTERS) { - ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); - list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], - ac_array_in_const32_addr_space(ctx->v4i32), ""); - return list; - } - - /* Get the pointer to rw buffers. */ ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); - ptr[1] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1); - list = ac_build_gather_values(&ctx->ac, ptr, 2); - list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, ""); - list = LLVMBuildIntToPtr(ctx->ac.builder, list, - ac_array_in_const_addr_space(ctx->v4i32), ""); + list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], + ac_array_in_const32_addr_space(ctx->v4i32), ""); return list; } @@ -7398,8 +7334,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); - if (!HAVE_32BIT_POINTERS) - add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); add_arg(&fninfo, ARG_SGPR, ctx->i32); ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 09dd558d789..f71e601574d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -158,21 +158,9 @@ struct si_context; /* SGPR user data indices */ enum { SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */ -#if !HAVE_32BIT_POINTERS - SI_SGPR_RW_BUFFERS_HI, -#endif SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, -#if !HAVE_32BIT_POINTERS - SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI, -#endif SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */ -#if !HAVE_32BIT_POINTERS - SI_SGPR_CONST_AND_SHADER_BUFFERS_HI, -#endif SI_SGPR_SAMPLERS_AND_IMAGES, -#if !HAVE_32BIT_POINTERS - SI_SGPR_SAMPLERS_AND_IMAGES_HI, -#endif SI_NUM_RESOURCE_SGPRS, /* API VS, TES without GS, GS copy shader */ @@ -200,35 +188,20 @@ enum { GFX6_TCS_NUM_USER_SGPR, /* GFX9: Merged shaders. */ -#if HAVE_32BIT_POINTERS /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */ /* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */ GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR, -#else - /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */ - GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR, - GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI, - GFX9_MERGED_NUM_USER_SGPR, -#endif /* GFX9: Merged LS-HS (VS-TCS) only. */ GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR, GFX9_SGPR_TCS_OUT_OFFSETS, GFX9_SGPR_TCS_OUT_LAYOUT, -#if !HAVE_32BIT_POINTERS - GFX9_SGPR_align_for_vb_pointer, -#endif GFX9_TCS_NUM_USER_SGPR, /* GS limits */ GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS, -#if HAVE_32BIT_POINTERS GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR, GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR, -#else - GFX9_VSGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR, - GFX9_TESGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR, -#endif SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS, /* PS only */ diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index fca2527f28d..adad3223d99 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -496,36 +496,23 @@ static void emit_bfe(const struct lp_build_tgsi_action *action, { struct si_shader_context *ctx = si_shader_context(bld_base); - if (HAVE_LLVM < 0x0700) { - LLVMValueRef bfe_sm5 = - ac_build_bfe(&ctx->ac, emit_data->args[0], - emit_data->args[1], emit_data->args[2], - emit_data->info->opcode == TGSI_OPCODE_IBFE); - - /* Correct for GLSL semantics. */ - LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], - LLVMConstInt(ctx->i32, 32, 0), ""); - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); - } else { - /* FIXME: LLVM 7 returns incorrect result when count is 0. - * https://bugs.freedesktop.org/show_bug.cgi?id=107276 - */ - LLVMValueRef zero = ctx->i32_0; - LLVMValueRef bfe_sm5 = - ac_build_bfe(&ctx->ac, emit_data->args[0], - emit_data->args[1], emit_data->args[2], - emit_data->info->opcode == TGSI_OPCODE_IBFE); - - /* Correct for GLSL semantics. */ - LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], - LLVMConstInt(ctx->i32, 32, 0), ""); - LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2], - zero, ""); - bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, ""); - } + /* FIXME: LLVM 7 returns incorrect result when count is 0. + * https://bugs.freedesktop.org/show_bug.cgi?id=107276 + */ + LLVMValueRef zero = ctx->i32_0; + LLVMValueRef bfe_sm5 = + ac_build_bfe(&ctx->ac, emit_data->args[0], + emit_data->args[1], emit_data->args[2], + emit_data->info->opcode == TGSI_OPCODE_IBFE); + + /* Correct for GLSL semantics. */ + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], + LLVMConstInt(ctx->i32, 32, 0), ""); + LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2], + zero, ""); + bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); + emit_data->output[emit_data->chan] = + LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, ""); } /* this is ffs in C */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ad7d21e7816..de00df60ae5 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -464,12 +464,7 @@ static struct si_pm4_state *si_get_shader_pm4_state(struct si_shader *shader) static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs) { /* Add the pointer to VBO descriptors. */ - if (HAVE_32BIT_POINTERS) { - return num_always_on_user_sgprs + 1; - } else { - assert(num_always_on_user_sgprs % 2 == 0); - return num_always_on_user_sgprs + 2; - } + return num_always_on_user_sgprs + 1; } static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) |