summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/radeon/AMDIL.h3
-rw-r--r--src/gallium/drivers/radeon/SIISelLowering.cpp3
-rw-r--r--src/gallium/drivers/radeon/SIInstructions.td40
-rw-r--r--src/gallium/drivers/radeon/SIIntrinsics.td5
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.c80
5 files changed, 57 insertions, 74 deletions
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h
index 45638f64280..8bd024a4bd8 100644
--- a/src/gallium/drivers/radeon/AMDIL.h
+++ b/src/gallium/drivers/radeon/AMDIL.h
@@ -129,7 +129,8 @@ enum AddressSpaces {
ADDRESS_NONE = 5, // Address space for unknown memory.
PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
- LAST_ADDRESS = 8
+ USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
+ LAST_ADDRESS = 9
};
// This union/struct combination is an easy way to read out the
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp
index a79aba9358b..1cdcd36c648 100644
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -188,6 +188,9 @@ void SITargetLowering::lowerUSE_SGPR(MachineInstr *MI,
unsigned dstReg = MI->getOperand(0).getReg();
int64_t newIndex = MI->getOperand(1).getImm();
const TargetRegisterClass * dstClass = MRI.getRegClass(dstReg);
+ unsigned DwordWidth = dstClass->getSize() / 4;
+ assert(newIndex % DwordWidth == 0 && "USER_SGPR not properly aligned");
+ newIndex = newIndex / DwordWidth;
unsigned newReg = dstClass->getRegister(newIndex);
addLiveIn(MI, MF, MRI, TII, newReg);
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td
index a77b8bd7c11..fc8ec4a3394 100644
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -7,6 +7,18 @@
//
//===----------------------------------------------------------------------===//
+def load_user_sgpr : PatFrag<(ops node:$ptr),
+ (load node:$ptr),
+ [{
+ const Value *Src = cast<LoadSDNode>(N)->getSrcValue();
+ if (Src) {
+ PointerType * PT = dyn_cast<PointerType>(Src->getType());
+ return PT && PT->getAddressSpace() == AMDILAS::USER_SGPR_ADDRESS;
+ }
+ return false;
+ }]
+>;
+
def isSI : Predicate<"Subtarget.device()"
"->getGeneration() == AMDILDeviceInfo::HD7XXX">;
@@ -826,26 +838,19 @@ def SI_INTERP_CONST : InstSI <
imm:$attr, SReg_32:$params))]
>;
-
def USE_SGPR_32 : InstSI <
(outs SReg_32:$dst),
(ins i32imm:$src0),
"USE_SGPR_32",
- [(set SReg_32:$dst, (int_SI_use_sgpr imm:$src0))]
-
-> {
- field bits<32> Inst = 0;
-}
+ [(set (i32 SReg_32:$dst), (load_user_sgpr imm:$src0))]
+>;
def USE_SGPR_64 : InstSI <
(outs SReg_64:$dst),
(ins i32imm:$src0),
"USE_SGPR_64",
- [(set SReg_64:$dst, (int_SI_use_sgpr imm:$src0))]
-
-> {
- field bits<32> Inst = 0;
-}
+ [(set (i64 SReg_64:$dst), (load_user_sgpr imm:$src0))]
+>;
def VS_LOAD_BUFFER_INDEX : InstSI <
(outs VReg_32:$dst),
@@ -869,19 +874,6 @@ def : Pat<
0, 0, (i32 SREG_LIT_0))
>;
-def : Pat<
- (int_SI_use_sgprptrcf32 imm:$src0),
- (USE_SGPR_64 imm:$src0)
->;
-def : Pat<
- (int_SI_use_sgprptrci128 imm:$src0),
- (USE_SGPR_64 imm:$src0)
->;
-def : Pat<
- (int_SI_use_sgprptrci256 imm:$src0),
- (USE_SGPR_64 imm:$src0)
->;
-
/* int_SI_export */
def : Pat <
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td
index d8bf4fab3db..95273a2a68f 100644
--- a/src/gallium/drivers/radeon/SIIntrinsics.td
+++ b/src/gallium/drivers/radeon/SIIntrinsics.td
@@ -21,11 +21,6 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
- def int_SI_use_sgpr : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem]>;
- class int_SI_use_sgprptr : Intrinsic <[llvm_anyptr_ty], [llvm_i32_ty], []>;
- def int_SI_use_sgprptrcf32 : int_SI_use_sgprptr;
- def int_SI_use_sgprptrci128 : int_SI_use_sgprptr;
- def int_SI_use_sgprptrci256 : int_SI_use_sgprptr;
/* Interpolation Intrinsics */
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 4f995494d1f..e625a74338a 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -67,6 +67,7 @@ static struct si_shader_context * si_shader_context(
#define USE_SGPR_MAX_SUFFIX_LEN 5
#define CONST_ADDR_SPACE 2
+#define USER_SGPR_ADDR_SPACE 8
enum sgpr_type {
SGPR_CONST_PTR_F32,
@@ -99,10 +100,19 @@ static LLVMValueRef build_indexed_load(
return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
}
-/*
- * XXX: Instead of using an intrinsic to use a specific SGPR, we should be
- * using load instructions. The loads should load from the USER_SGPR address
- * space and use the sgpr index as the pointer.
+/**
+ * Load a value stored in one of the user SGPRs
+ *
+ * @param sgpr This is the sgpr to load the value from. If you need to load a
+ * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer),
+ * then you should pass the index of the first SGPR that holds the value. For
+ * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then
+ * use pass 2 for the sgpr parameter.
+ *
+ * The value of the sgpr parameter must also be aligned to the width of the type
+ * being loaded, so that the sgpr parameter is divisible by the dword width of the
+ * type. For example, if the value being loaded is two dwords wide, then the sgpr
+ * parameter must be divisible by two.
*/
static LLVMValueRef use_sgpr(
struct gallivm_state * gallivm,
@@ -111,44 +121,48 @@ static LLVMValueRef use_sgpr(
{
LLVMValueRef sgpr_index;
LLVMTypeRef ret_type;
+ LLVMValueRef ptr;
sgpr_index = lp_build_const_int32(gallivm, sgpr);
switch (type) {
case SGPR_CONST_PTR_F32:
+ assert(sgpr % 2 == 0);
ret_type = LLVMFloatTypeInContext(gallivm->context);
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
- return lp_build_intrinsic_unary(gallivm->builder,
- "llvm.SI.use.sgprptrcf32.",
- ret_type, sgpr_index);
+ break;
+
case SGPR_I32:
ret_type = LLVMInt32TypeInContext(gallivm->context);
- return lp_build_intrinsic_unary(gallivm->builder,
- "llvm.SI.use.sgpr.i32",
- ret_type, sgpr_index);
+ break;
+
case SGPR_I64:
+ assert(sgpr % 2 == 0);
ret_type= LLVMInt64TypeInContext(gallivm->context);
- return lp_build_intrinsic_unary(gallivm->builder,
- "llvm.SI.use.sgpr.i64",
- ret_type, sgpr_index);
+ break;
+
case SGPR_CONST_PTR_V4I32:
+ assert(sgpr % 2 == 0);
ret_type = LLVMInt32TypeInContext(gallivm->context);
ret_type = LLVMVectorType(ret_type, 4);
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
- return lp_build_intrinsic_unary(gallivm->builder,
- "llvm.SI.use.sgprptrci128.",
- ret_type, sgpr_index);
+ break;
+
case SGPR_CONST_PTR_V8I32:
+ assert(sgpr % 2 == 0);
ret_type = LLVMInt32TypeInContext(gallivm->context);
ret_type = LLVMVectorType(ret_type, 8);
ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
- return lp_build_intrinsic_unary(gallivm->builder,
- "llvm.SI.use.sgprptrci256.",
- ret_type, sgpr_index);
+ break;
+
default:
assert(!"Unsupported SGPR type in use_sgpr()");
return NULL;
}
+
+ ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE);
+ ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, "");
+ return LLVMBuildLoad(gallivm->builder, ptr, "");
}
static void declare_input_vs(
@@ -174,7 +188,7 @@ static void declare_input_vs(
/* XXX: Communicate with the rest of the driver about which SGPR the T#
* list pointer is going to be stored in. Hard code to SGPR[6:7] for
* now */
- t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 3);
+ t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6);
t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index);
@@ -320,27 +334,6 @@ static LLVMValueRef fetch_constant(
return build_indexed_load(base->gallivm, const_ptr, offset);
}
-
-/* Declare some intrinsics with the correct attributes */
-static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
-{
- LLVMValueRef function;
- struct gallivm_state * gallivm = bld_base->base.gallivm;
-
- LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context);
- LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
-
- /* declare i32 @llvm.SI.use.sgpr.i32(i32) */
- function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32",
- i32, &i32, 1);
- LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
-
- /* declare i64 @llvm.SI.use.sgpr.i64(i32) */
- function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64",
- i64, &i32, 1);
- LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
-}
-
/* XXX: This is partially implemented for VS only at this point. It is not complete */
static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
@@ -504,14 +497,14 @@ static void tex_fetch_args(
0, LP_CHAN_ALL);
/* Resource */
- ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 2);
+ ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4);
offset = lp_build_const_int32(bld_base->base.gallivm,
8 * emit_data->inst->Src[1].Register.Index);
emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
ptr, offset);
/* Sampler */
- ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 1);
+ ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2);
offset = lp_build_const_int32(bld_base->base.gallivm,
4 * emit_data->inst->Src[1].Register.Index);
emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
@@ -557,7 +550,6 @@ int si_pipe_shader_create(
tgsi_scan_shader(shader->tokens, &shader_info);
bld_base->info = &shader_info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
- bld_base->emit_prologue = si_llvm_emit_prologue;
bld_base->emit_epilogue = si_llvm_emit_epilogue;
bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;