radeonsi: move code for shader resources into si_shader_llvm_resources.c

Reviewed-by: Timothy Arceri <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3399>
author: Marek Olšák <[email protected]> 2020-01-14 20:17:08 -0500
committer: Marge Bot <[email protected]> 2020-01-15 21:54:55 +0000
commit: 30b14ba67e998cbc12447adc6c8e185636b75390 (patch)
tree: cbe11a40223ed70382cf2a921117671f6682ea2a /src/gallium/drivers/radeonsi/si_shader_llvm_resources.c
parent: da2c12af4b9cfb93bd6880cfa4a218c44b79fe13 (diff)
1 files changed, 320 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_resources.c b/src/gallium/drivers/radeonsi/si_shader_llvm_resources.c
new file mode 100644
index 00000000000..ba23ddb0aa7
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_resources.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_shader_internal.h"
+#include "si_pipe.h"
+#include "sid.h"
+
+/**
+ * Return a value that is equal to the given i32 \p index if it lies in [0,num)
+ * or an undefined value in the same interval otherwise.
+ */
+static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
+				 LLVMValueRef index,
+				 unsigned num)
+{
+	LLVMBuilderRef builder = ctx->ac.builder;
+	LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
+	LLVMValueRef cc;
+
+	if (util_is_power_of_two_or_zero(num)) {
+		index = LLVMBuildAnd(builder, index, c_max, "");
+	} else {
+		/* In theory, this MAX pattern should result in code that is
+		 * as good as the bit-wise AND above.
+		 *
+		 * In practice, LLVM generates worse code (at the time of
+		 * writing), because its value tracking is not strong enough.
+		 */
+		cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
+		index = LLVMBuildSelect(builder, cc, index, c_max, "");
+	}
+
+	return index;
+}
+
+static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
+{
+	LLVMValueRef ptr =
+		ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
+	struct si_shader_selector *sel = ctx->shader->selector;
+
+	/* Do the bounds checking with a descriptor, because
+	 * doing computation and manual bounds checking of 64-bit
+	 * addresses generates horrible VALU code with very high
+	 * VGPR usage and very low SIMD occupancy.
+	 */
+	ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
+
+	LLVMValueRef desc0, desc1;
+	desc0 = ptr;
+	desc1 = LLVMConstInt(ctx->i32,
+			     S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
+
+	uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+			 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+			 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+			 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+	if (ctx->screen->info.chip_class >= GFX10)
+		rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+			 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
+			 S_008F0C_RESOURCE_LEVEL(1);
+	else
+		rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+			 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+	LLVMValueRef desc_elems[] = {
+		desc0,
+		desc1,
+		LLVMConstInt(ctx->i32, sel->info.constbuf0_num_slots * 16, 0),
+		LLVMConstInt(ctx->i32, rsrc3, false)
+	};
+
+	return ac_build_gather_values(&ctx->ac, desc_elems, 4);
+}
+
+static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
+{
+	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+	struct si_shader_selector *sel = ctx->shader->selector;
+
+	LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
+
+	if (sel->info.const_buffers_declared == 1 &&
+	    sel->info.shader_buffers_declared == 0) {
+		return load_const_buffer_desc_fast_path(ctx);
+	}
+
+	index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
+	index = LLVMBuildAdd(ctx->ac.builder, index,
+			     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
+
+	return ac_build_load_to_sgpr(&ctx->ac, ptr, index);
+}
+
+static LLVMValueRef
+load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
+{
+	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+	LLVMValueRef rsrc_ptr = ac_get_arg(&ctx->ac,
+					   ctx->const_and_shader_buffers);
+
+	index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
+	index = LLVMBuildSub(ctx->ac.builder,
+			     LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
+			     index, "");
+
+	return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
+}
+
+/**
+ * Given a 256-bit resource descriptor, force the DCC enable bit to off.
+ *
+ * At least on Tonga, executing image stores on images with DCC enabled and
+ * non-trivial can eventually lead to lockups. This can occur when an
+ * application binds an image as read-only but then uses a shader that writes
+ * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
+ * program termination) in this case, but it doesn't cost much to be a bit
+ * nicer: disabling DCC in the shader still leads to undefined results but
+ * avoids the lockup.
+ */
+static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
+				  LLVMValueRef rsrc)
+{
+	if (ctx->screen->info.chip_class <= GFX7) {
+		return rsrc;
+	} else {
+		LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
+		LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
+		LLVMValueRef tmp;
+
+		tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
+		tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
+		return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
+	}
+}
+
+/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
+ * adjust "index" to point to FMASK. */
+static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
+				       LLVMValueRef list, LLVMValueRef index,
+				       enum ac_descriptor_type desc_type,
+				       bool uses_store, bool bindless)
+{
+	LLVMBuilderRef builder = ctx->ac.builder;
+	LLVMValueRef rsrc;
+
+	if (desc_type == AC_DESC_BUFFER) {
+		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+				      ctx->i32_1);
+		list = LLVMBuildPointerCast(builder, list,
+					    ac_array_in_const32_addr_space(ctx->v4i32), "");
+	} else {
+		assert(desc_type == AC_DESC_IMAGE ||
+		       desc_type == AC_DESC_FMASK);
+	}
+
+	if (bindless)
+		rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
+	else
+		rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
+
+	if (desc_type == AC_DESC_IMAGE && uses_store)
+		rsrc = force_dcc_off(ctx, rsrc);
+	return rsrc;
+}
+
+/**
+ * Load an image view, fmask view. or sampler state descriptor.
+ */
+static LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
+					 LLVMValueRef list, LLVMValueRef index,
+					 enum ac_descriptor_type type)
+{
+	LLVMBuilderRef builder = ctx->ac.builder;
+
+	switch (type) {
+	case AC_DESC_IMAGE:
+		/* The image is at [0:7]. */
+		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
+		break;
+	case AC_DESC_BUFFER:
+		/* The buffer is in [4:7]. */
+		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+				      ctx->i32_1);
+		list = LLVMBuildPointerCast(builder, list,
+					    ac_array_in_const32_addr_space(ctx->v4i32), "");
+		break;
+	case AC_DESC_FMASK:
+		/* The FMASK is at [8:15]. */
+		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+				      ctx->i32_1);
+		break;
+	case AC_DESC_SAMPLER:
+		/* The sampler state is at [12:15]. */
+		index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+				      LLVMConstInt(ctx->i32, 3, 0));
+		list = LLVMBuildPointerCast(builder, list,
+					    ac_array_in_const32_addr_space(ctx->v4i32), "");
+		break;
+	case AC_DESC_PLANE_0:
+	case AC_DESC_PLANE_1:
+	case AC_DESC_PLANE_2:
+		/* Only used for the multiplane image support for Vulkan. Should
+		 * never be reached in radeonsi.
+		 */
+		unreachable("Plane descriptor requested in radeonsi.");
+	}
+
+	return ac_build_load_to_sgpr(&ctx->ac, list, index);
+}
+
+static LLVMValueRef
+si_nir_load_sampler_desc(struct ac_shader_abi *abi,
+		         unsigned descriptor_set, unsigned base_index,
+		         unsigned constant_index, LLVMValueRef dynamic_index,
+		         enum ac_descriptor_type desc_type, bool image,
+			 bool write, bool bindless)
+{
+	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+	LLVMBuilderRef builder = ctx->ac.builder;
+	unsigned const_index = base_index + constant_index;
+
+	assert(!descriptor_set);
+	assert(desc_type <= AC_DESC_BUFFER);
+
+	if (bindless) {
+		LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
+
+		/* dynamic_index is the bindless handle */
+		if (image) {
+			/* Bindless image descriptors use 16-dword slots. */
+			dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
+					     LLVMConstInt(ctx->i64, 2, 0), "");
+			/* FMASK is right after the image. */
+			if (desc_type == AC_DESC_FMASK) {
+				dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
+							     ctx->i32_1, "");
+			}
+
+			return si_load_image_desc(ctx, list, dynamic_index, desc_type,
+						  write, true);
+		}
+
+		/* Since bindless handle arithmetic can contain an unsigned integer
+		 * wraparound and si_load_sampler_desc assumes there isn't any,
+		 * use GEP without "inbounds" (inside ac_build_pointer_add)
+		 * to prevent incorrect code generation and hangs.
+		 */
+		dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
+					     LLVMConstInt(ctx->i64, 2, 0), "");
+		list = ac_build_pointer_add(&ctx->ac, list, dynamic_index);
+		return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type);
+	}
+
+	unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
+	assert(const_index < num_slots || dynamic_index);
+
+	LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
+	LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
+
+	if (dynamic_index) {
+		index = LLVMBuildAdd(builder, index, dynamic_index, "");
+
+		/* From the GL_ARB_shader_image_load_store extension spec:
+		 *
+		 *    If a shader performs an image load, store, or atomic
+		 *    operation using an image variable declared as an array,
+		 *    and if the index used to select an individual element is
+		 *    negative or greater than or equal to the size of the
+		 *    array, the results of the operation are undefined but may
+		 *    not lead to termination.
+		 */
+		index = si_llvm_bound_index(ctx, index, num_slots);
+	}
+
+	if (image) {
+		/* FMASKs are separate from images. */
+		if (desc_type == AC_DESC_FMASK) {
+			index = LLVMBuildAdd(ctx->ac.builder, index,
+					     LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), "");
+		}
+		index = LLVMBuildSub(ctx->ac.builder,
+				     LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
+				     index, "");
+		return si_load_image_desc(ctx, list, index, desc_type, write, false);
+	}
+
+	index = LLVMBuildAdd(ctx->ac.builder, index,
+			     LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
+	return si_load_sampler_desc(ctx, list, index, desc_type);
+}
+
+void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
+{
+	ctx->abi.load_ubo = load_ubo;
+	ctx->abi.load_ssbo = load_ssbo;
+	ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
+}
author	Marek Olšák <[email protected]>	2020-01-14 20:17:08 -0500
committer	Marge Bot <[email protected]>	2020-01-15 21:54:55 +0000
commit	30b14ba67e998cbc12447adc6c8e185636b75390 (patch)
tree	cbe11a40223ed70382cf2a921117671f6682ea2a /src/gallium/drivers/radeonsi/si_shader_llvm_resources.c
parent	da2c12af4b9cfb93bd6880cfa4a218c44b79fe13 (diff)