summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_limits.h10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c112
4 files changed, 86 insertions, 44 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index e03bac640df..87be3511d94 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -47,19 +47,21 @@
#define LP_MAX_TGSI_ADDRS 16
-#define LP_MAX_TGSI_IMMEDIATES 256
+#define LP_MAX_TGSI_IMMEDIATES 4096
#define LP_MAX_TGSI_PREDS 16
#define LP_MAX_TGSI_CONST_BUFFERS 16
/*
- * For quick access we cache temps in a statically
- * allocated array. This defines the maximum size
- * of that array.
+ * For quick access we cache registers in statically
+ * allocated arrays. Here we define the maximum size
+ * for those arrays.
*/
#define LP_MAX_INLINED_TEMPS 256
+#define LP_MAX_INLINED_IMMEDIATES 256
+
/**
* Maximum control flow nesting
*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index e0a7c5dc1ab..ffd6e874a89 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -444,7 +444,7 @@ struct lp_build_tgsi_soa_context
struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS];
- LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][TGSI_NUM_CHANNELS];
+ LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES][TGSI_NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_INLINED_TEMPS][TGSI_NUM_CHANNELS];
LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS];
LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS];
@@ -482,7 +482,7 @@ struct lp_build_tgsi_soa_context
struct lp_exec_mask exec_mask;
uint num_immediates;
-
+ boolean use_immediates_array;
};
void
@@ -536,7 +536,7 @@ struct lp_build_tgsi_aos_context
struct lp_build_sampler_aos *sampler;
- LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES];
+ LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES];
LLVMValueRef temps[LP_MAX_INLINED_TEMPS];
LLVMValueRef addr[LP_MAX_TGSI_ADDRS];
LLVMValueRef preds[LP_MAX_TGSI_PREDS];
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index fd5df0eb52f..4dee9bb4dd4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -1042,7 +1042,7 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
float imm[4];
assert(size <= 4);
- assert(num_immediates < LP_MAX_TGSI_IMMEDIATES);
+ assert(num_immediates < LP_MAX_INLINED_IMMEDIATES);
for (chan = 0; chan < 4; ++chan) {
imm[chan] = 0.0f;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 3ba20314203..d2cb0a0975f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1295,33 +1295,42 @@ emit_fetch_immediate(
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res = NULL;
- if (reg->Register.Indirect) {
- LLVMValueRef indirect_index;
- LLVMValueRef index_vec; /* index into the immediate register array */
+ if (bld->use_immediates_array || reg->Register.Indirect) {
LLVMValueRef imms_array;
LLVMTypeRef fptr_type;
- indirect_index = get_indirect_index(bld,
- reg->Register.File,
- reg->Register.Index,
- &reg->Indirect);
- /*
- * Unlike for other reg classes, adding pixel offsets is unnecessary -
- * immediates are stored as full vectors (FIXME??? - might be better
- * to store them the same as constants) but all elements are the same
- * in any case.
- */
- index_vec = get_soa_array_offsets(&bld_base->uint_bld,
- indirect_index,
- swizzle,
- FALSE);
-
/* cast imms_array pointer to float* */
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
- /* Gather values from the immediate register array */
- res = build_gather(&bld_base->base, imms_array, index_vec, NULL);
+ if (reg->Register.Indirect) {
+ LLVMValueRef indirect_index;
+ LLVMValueRef index_vec; /* index into the immediate register array */
+
+ indirect_index = get_indirect_index(bld,
+ reg->Register.File,
+ reg->Register.Index,
+ &reg->Indirect);
+ /*
+ * Unlike for other reg classes, adding pixel offsets is unnecessary -
+ * immediates are stored as full vectors (FIXME??? - might be better
+ * to store them the same as constants) but all elements are the same
+ * in any case.
+ */
+ index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle,
+ FALSE);
+
+ /* Gather values from the immediate register array */
+ res = build_gather(&bld_base->base, imms_array, index_vec, NULL);
+ } else {
+ LLVMValueRef lindex = lp_build_const_int32(gallivm,
+ reg->Register.Index * 4 + swizzle);
+ LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
+ bld->imms_array, &lindex, 1, "");
+ res = LLVMBuildLoad(builder, imms_ptr, "");
+ }
}
else {
res = bld->immediates[reg->Register.Index][swizzle];
@@ -2728,51 +2737,71 @@ void lp_emit_immediate_soa(
{
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
-
- /* simply copy the immediate values into the next immediates[] slot */
+ LLVMValueRef imms[4];
unsigned i;
const uint size = imm->Immediate.NrTokens - 1;
assert(size <= 4);
- assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
for( i = 0; i < size; ++i )
- bld->immediates[bld->num_immediates][i] =
- lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
+ imms[i] =
+ lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
break;
case TGSI_IMM_UINT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
- bld->immediates[bld->num_immediates][i] =
- LLVMConstBitCast(tmp, bld_base->base.vec_type);
+ imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
}
break;
case TGSI_IMM_INT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
- bld->immediates[bld->num_immediates][i] =
- LLVMConstBitCast(tmp, bld_base->base.vec_type);
+ imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
}
-
+
break;
}
for( i = size; i < 4; ++i )
- bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
+ imms[i] = bld_base->base.undef;
- if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
+ if (bld->use_immediates_array) {
unsigned index = bld->num_immediates;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
+
+ assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
for (i = 0; i < 4; ++i ) {
LLVMValueRef lindex = lp_build_const_int32(
- bld->bld_base.base.gallivm, index * 4 + i);
+ bld->bld_base.base.gallivm, index * 4 + i);
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
bld->imms_array, &lindex, 1, "");
- LLVMBuildStore(builder,
- bld->immediates[index][i],
- imm_ptr);
+ LLVMBuildStore(builder, imms[i], imm_ptr);
+ }
+ } else {
+ /* simply copy the immediate values into the next immediates[] slot */
+ unsigned i;
+ const uint size = imm->Immediate.NrTokens - 1;
+ assert(size <= 4);
+ assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
+
+ for(i = 0; i < 4; ++i )
+ bld->immediates[bld->num_immediates][i] = imms[i];
+
+ if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
+ unsigned index = bld->num_immediates;
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ for (i = 0; i < 4; ++i ) {
+ LLVMValueRef lindex = lp_build_const_int32(
+ bld->bld_base.base.gallivm, index * 4 + i);
+ LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
+ bld->imms_array, &lindex, 1, "");
+ LLVMBuildStore(builder,
+ bld->immediates[index][i],
+ imm_ptr);
+ }
}
}
@@ -3629,6 +3658,17 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
}
+ /*
+ * For performance reason immediates are always backed in a static
+ * array, but if their number is too great, we have to use just
+ * a dynamically allocated array.
+ */
+ bld.use_immediates_array =
+ (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
+ if (bld.use_immediates_array) {
+ bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
+ }
+
bld.bld_base.soa = TRUE;
bld.bld_base.emit_debug = emit_debug;