summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c158
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h15
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c415
3 files changed, 516 insertions, 72 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index d108f35f719..4947f304a11 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -42,6 +42,7 @@
#include "gallivm/lp_bld_const.h"
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_flow.h"
#include "lp_bld_interp.h"
@@ -122,6 +123,33 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix
lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
}
+static void
+calc_offsets(struct lp_build_context *coeff_bld,
+ unsigned quad_start_index,
+ LLVMValueRef *pixoffx,
+ LLVMValueRef *pixoffy)
+{
+ unsigned i;
+ unsigned num_pix = coeff_bld->type.length;
+ struct gallivm_state *gallivm = coeff_bld->gallivm;
+ LLVMBuilderRef builder = coeff_bld->gallivm->builder;
+ LLVMValueRef nr, pixxf, pixyf;
+
+ *pixoffx = coeff_bld->undef;
+ *pixoffy = coeff_bld->undef;
+
+ for (i = 0; i < num_pix; i++) {
+ nr = lp_build_const_int32(gallivm, i);
+ pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
+ (quad_start_index & 1) * 2);
+ pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
+ (quad_start_index & 2));
+ *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
+ *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
+ }
+}
+
+
/* Much easier, and significantly less instructions in the per-stamp
* part (less than half) but overall more instructions so a loss if
* most quads are active. Might be a win though with larger vectors.
@@ -210,6 +238,7 @@ static void
attribs_update_simple(struct lp_build_interp_soa_context *bld,
struct gallivm_state *gallivm,
int quad_start_index,
+ LLVMValueRef loop_iter,
int start,
int end)
{
@@ -217,22 +246,22 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld,
struct lp_build_context *coeff_bld = &bld->coeff_bld;
struct lp_build_context *setup_bld = &bld->setup_bld;
LLVMValueRef oow = NULL;
- unsigned attrib, i;
+ unsigned attrib;
LLVMValueRef pixoffx;
LLVMValueRef pixoffy;
- unsigned num_pix = coeff_bld->type.length;
- /* could do this with code-generated passed in pixel offsets */
- pixoffx = coeff_bld->undef;
- pixoffy = coeff_bld->undef;
- for (i = 0; i < coeff_bld->type.length; i++) {
- LLVMValueRef nr = lp_build_const_int32(gallivm, i);
- LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
- (quad_start_index & 1) * 2);
- LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
- (quad_start_index & 2));
- pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
- pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
+ /* could do this with code-generated passed in pixel offsets too */
+ if (bld->dynamic_offsets) {
+ LLVMValueRef ptr;
+
+ assert(loop_iter);
+ ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
+ pixoffx = LLVMBuildLoad(builder, ptr, "");
+ ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
+ pixoffy = LLVMBuildLoad(builder, ptr, "");
+ }
+ else {
+ calc_offsets(coeff_bld, quad_start_index, &pixoffx, &pixoffy);
}
pixoffx = LLVMBuildFAdd(builder, pixoffx,
@@ -498,7 +527,14 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
attrib_name(a, attrib, chan, ".a");
attrib_name(dadq, attrib, chan, ".dadq");
- bld->a [attrib][chan] = a;
+ if (bld->dynamic_offsets) {
+ bld->a[attrib][chan] = lp_build_alloca(gallivm,
+ LLVMTypeOf(a), "");
+ LLVMBuildStore(builder, a, bld->a[attrib][chan]);
+ }
+ else {
+ bld->a[attrib][chan] = a;
+ }
bld->dadq[attrib][chan] = dadq;
}
}
@@ -514,6 +550,7 @@ static void
attribs_update(struct lp_build_interp_soa_context *bld,
struct gallivm_state *gallivm,
int quad_start_index,
+ LLVMValueRef loop_iter,
int start,
int end)
{
@@ -535,6 +572,9 @@ attribs_update(struct lp_build_interp_soa_context *bld,
if (interp == LP_INTERP_CONSTANT ||
interp == LP_INTERP_FACING) {
a = bld->a[attrib][chan];
+ if (bld->dynamic_offsets) {
+ a = LLVMBuildLoad(builder, a, "");
+ }
}
else if (interp == LP_INTERP_POSITION) {
assert(attrib > 0);
@@ -549,8 +589,20 @@ attribs_update(struct lp_build_interp_soa_context *bld,
* Broadcast the attribute value for this quad into all elements
*/
- a = LLVMBuildShuffleVector(builder,
- a, coeff_bld->undef, shuffle, "");
+ if (bld->dynamic_offsets) {
+ /* stored as vector load as float */
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext(
+ gallivm->context), 0);
+ LLVMValueRef ptr;
+ a = LLVMBuildBitCast(builder, a, ptr_type, "");
+ ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, "");
+ a = LLVMBuildLoad(builder, ptr, "");
+ a = lp_build_broadcast_scalar(&bld->coeff_bld, a);
+ }
+ else {
+ a = LLVMBuildShuffleVector(builder,
+ a, coeff_bld->undef, shuffle, "");
+ }
/*
* Get the derivatives.
@@ -639,6 +691,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
const struct lp_shader_input *inputs,
LLVMBuilderRef builder,
struct lp_type type,
+ boolean dynamic_offsets,
LLVMValueRef a0_ptr,
LLVMValueRef dadx_ptr,
LLVMValueRef dady_ptr,
@@ -696,11 +749,42 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
pos_init(bld, x0, y0);
if (coeff_type.length > 4) {
+ bld->simple_interp = TRUE;
+ if (dynamic_offsets) {
+ /* XXX this should use a global static table */
+ unsigned i;
+ unsigned num_loops = 16 / type.length;
+ LLVMValueRef pixoffx, pixoffy, index;
+ LLVMValueRef ptr;
+
+ bld->dynamic_offsets = TRUE;
+ bld->xoffset_store = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm, type),
+ lp_build_const_int32(gallivm, num_loops),
+ "");
+ bld->yoffset_store = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm, type),
+ lp_build_const_int32(gallivm, num_loops),
+ "");
+ for (i = 0; i < num_loops; i++) {
+ index = lp_build_const_int32(gallivm, i);
+ calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
+ ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
+ LLVMBuildStore(builder, pixoffx, ptr);
+ ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
+ LLVMBuildStore(builder, pixoffy, ptr);
+ }
+ }
coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
}
else {
+ bld->simple_interp = FALSE;
+ if (dynamic_offsets) {
+ bld->dynamic_offsets = TRUE;
+ }
coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
}
+
}
@@ -714,26 +798,52 @@ lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
{
assert(quad_start_index < 4);
- if (bld->coeff_bld.type.length > 4) {
- attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+ if (bld->simple_interp) {
+ attribs_update_simple(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs);
}
else {
- attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+ attribs_update(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs);
}
}
void
lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
- struct gallivm_state *gallivm,
- int quad_start_index)
+ struct gallivm_state *gallivm,
+ int quad_start_index)
{
assert(quad_start_index < 4);
- if (bld->coeff_bld.type.length > 4) {
- attribs_update_simple(bld, gallivm, quad_start_index, 0, 1);
+ if (bld->simple_interp) {
+ attribs_update_simple(bld, gallivm, quad_start_index, NULL, 0, 1);
+ }
+ else {
+ attribs_update(bld, gallivm, quad_start_index, NULL, 0, 1);
+ }
+}
+
+void
+lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef quad_start_index)
+{
+ if (bld->simple_interp) {
+ attribs_update_simple(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs);
+ }
+ else {
+ attribs_update(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs);
+ }
+}
+
+void
+lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef quad_start_index)
+{
+ if (bld->simple_interp) {
+ attribs_update_simple(bld, gallivm, 0, quad_start_index, 0, 1);
}
else {
- attribs_update(bld, gallivm, quad_start_index, 0, 1);
+ attribs_update(bld, gallivm, 0, quad_start_index, 0, 1);
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index f293b582318..d273e3f9b99 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -84,6 +84,8 @@ struct lp_build_interp_soa_context
unsigned num_attribs;
unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */
enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS];
+ boolean simple_interp;
+ boolean dynamic_offsets;
LLVMValueRef x;
LLVMValueRef y;
@@ -98,6 +100,9 @@ struct lp_build_interp_soa_context
LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+ LLVMValueRef xoffset_store;
+ LLVMValueRef yoffset_store;
+
/*
* Convenience pointers. Callers may access this one.
*/
@@ -113,6 +118,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
const struct lp_shader_input *inputs,
LLVMBuilderRef builder,
struct lp_type type,
+ boolean dynamic_offsets,
LLVMValueRef a0_ptr,
LLVMValueRef dadx_ptr,
LLVMValueRef dady_ptr,
@@ -129,5 +135,14 @@ lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
struct gallivm_state *gallivm,
int quad__start_index);
+void
+lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef quad_start_index);
+
+void
+lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
+ struct gallivm_state *gallivm,
+ LLVMValueRef quad_start_index);
#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 54f45357fdc..374544fcf70 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -295,7 +295,7 @@ generate_fs(struct gallivm_state *gallivm,
/* Declare the color and z variables */
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
- color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
+ color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color");
}
}
@@ -336,7 +336,7 @@ generate_fs(struct gallivm_state *gallivm,
}
lp_build_interp_soa_update_inputs(interp, gallivm, i*type.length/4);
-
+
/* Build the actual shader */
lp_build_tgsi_soa(gallivm, tokens, type, &mask,
consts_ptr, &system_values,
@@ -436,6 +436,252 @@ generate_fs(struct gallivm_state *gallivm,
/**
+ * Generate the fragment shader, depth/stencil test, and alpha tests.
+ */
+static void
+generate_fs_loop(struct gallivm_state *gallivm,
+ struct lp_fragment_shader *shader,
+ const struct lp_fragment_shader_variant_key *key,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef context_ptr,
+ LLVMValueRef num_loop,
+ struct lp_build_interp_soa_context *interp,
+ struct lp_build_sampler_soa *sampler,
+ LLVMValueRef mask_store,
+ LLVMValueRef (*out_color)[4],
+ LLVMValueRef depth_ptr,
+ unsigned depth_bits,
+ LLVMValueRef facing,
+ LLVMValueRef counter)
+{
+ const struct util_format_description *zs_format_desc = NULL;
+ const struct tgsi_token *tokens = shader->base.tokens;
+ LLVMTypeRef vec_type;
+ LLVMValueRef mask_ptr, mask_val;
+ LLVMValueRef consts_ptr;
+ LLVMValueRef z;
+ LLVMValueRef zs_value = NULL;
+ LLVMValueRef stencil_refs[2];
+ LLVMValueRef depth_ptr_i;
+ LLVMValueRef depth_offset;
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+ struct lp_build_for_loop_state loop_state;
+ struct lp_build_mask_context mask;
+ boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 &&
+ shader->info.base.num_inputs < 3 &&
+ shader->info.base.num_instructions < 8);
+ unsigned attrib;
+ unsigned chan;
+ unsigned cbuf;
+ unsigned depth_mode;
+
+ struct lp_bld_tgsi_system_values system_values;
+
+ memset(&system_values, 0, sizeof(system_values));
+
+ if (key->depth.enabled ||
+ key->stencil[0].enabled ||
+ key->stencil[1].enabled) {
+
+ zs_format_desc = util_format_description(key->zsbuf_format);
+ assert(zs_format_desc);
+
+ if (!shader->info.base.writes_z) {
+ if (key->alpha.enabled || shader->info.base.uses_kill)
+ /* With alpha test and kill, can do the depth test early
+ * and hopefully eliminate some quads. But need to do a
+ * special deferred depth write once the final mask value
+ * is known.
+ */
+ depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE;
+ else
+ depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE;
+ }
+ else {
+ depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE;
+ }
+
+ if (!(key->depth.enabled && key->depth.writemask) &&
+ !(key->stencil[0].enabled && key->stencil[0].writemask))
+ depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
+ }
+ else {
+ depth_mode = 0;
+ }
+
+
+ stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr);
+ stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr);
+
+ vec_type = lp_build_vec_type(gallivm, type);
+
+ consts_ptr = lp_jit_context_constants(gallivm, context_ptr);
+
+ lp_build_for_loop_begin(&loop_state, gallivm,
+ lp_build_const_int32(gallivm, 0),
+ LLVMIntULT,
+ num_loop,
+ lp_build_const_int32(gallivm, 1));
+
+ mask_ptr = LLVMBuildGEP(builder, mask_store,
+ &loop_state.counter, 1, "mask_ptr");
+ mask_val = LLVMBuildLoad(builder, mask_ptr, "");
+
+ depth_offset = LLVMBuildMul(builder, loop_state.counter,
+ lp_build_const_int32(gallivm, depth_bits * type.length),
+ "");
+
+ depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
+
+ memset(outputs, 0, sizeof outputs);
+
+ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ out_color[cbuf][chan] = lp_build_array_alloca(gallivm,
+ lp_build_vec_type(gallivm,
+ type),
+ num_loop, "color");
+ }
+ }
+
+
+
+ /* 'mask' will control execution based on quad's pixel alive/killed state */
+ lp_build_mask_begin(&mask, gallivm, type, mask_val);
+
+ if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader)
+ lp_build_mask_check(&mask);
+
+ lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter);
+ z = interp->pos[2];
+
+ if (depth_mode & EARLY_DEPTH_TEST) {
+ lp_build_depth_stencil_test(gallivm,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z,
+ depth_ptr_i, facing,
+ &zs_value,
+ !simple_shader);
+
+ if (depth_mode & EARLY_DEPTH_WRITE) {
+ lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
+ }
+ }
+
+ lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter);
+
+ /* Build the actual shader */
+ lp_build_tgsi_soa(gallivm, tokens, type, &mask,
+ consts_ptr, &system_values,
+ interp->pos, interp->inputs,
+ outputs, sampler, &shader->info.base);
+
+ /* Alpha test */
+ if (key->alpha.enabled) {
+ int color0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_COLOR,
+ 0);
+
+ if (color0 != -1 && outputs[color0][3]) {
+ const struct util_format_description *cbuf_format_desc;
+ LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha");
+ LLVMValueRef alpha_ref_value;
+
+ alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr);
+ alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value);
+
+ cbuf_format_desc = util_format_description(key->cbuf_format[0]);
+
+ lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc,
+ &mask, alpha, alpha_ref_value,
+ (depth_mode & LATE_DEPTH_TEST) != 0);
+ }
+ }
+
+ /* Late Z test */
+ if (depth_mode & LATE_DEPTH_TEST) {
+ int pos0 = find_output_by_semantic(&shader->info.base,
+ TGSI_SEMANTIC_POSITION,
+ 0);
+
+ if (pos0 != -1 && outputs[pos0][2]) {
+ z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
+ }
+
+ lp_build_depth_stencil_test(gallivm,
+ &key->depth,
+ key->stencil,
+ type,
+ zs_format_desc,
+ &mask,
+ stencil_refs,
+ z,
+ depth_ptr_i, facing,
+ &zs_value,
+ !simple_shader);
+ /* Late Z write */
+ if (depth_mode & LATE_DEPTH_WRITE) {
+ lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value);
+ }
+ }
+ else if ((depth_mode & EARLY_DEPTH_TEST) &&
+ (depth_mode & LATE_DEPTH_WRITE))
+ {
+ /* Need to apply a reduced mask to the depth write. Reload the
+ * depth value, update from zs_value with the new mask value and
+ * write that out.
+ */
+ lp_build_deferred_depth_write(gallivm,
+ type,
+ zs_format_desc,
+ &mask,
+ depth_ptr_i,
+ zs_value);
+ }
+
+
+ /* Color write */
+ for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib)
+ {
+ if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR &&
+ shader->info.base.output_semantic_index[attrib] < key->nr_cbufs)
+ {
+ unsigned cbuf = shader->info.base.output_semantic_index[attrib];
+ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ if(outputs[attrib][chan]) {
+ /* XXX: just initialize outputs to point at colors[] and
+ * skip this.
+ */
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+ LLVMValueRef color_ptr;
+ color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan],
+ &loop_state.counter, 1, "");
+ lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]);
+ LLVMBuildStore(builder, out, color_ptr);
+ }
+ }
+ }
+ }
+
+ if (key->occlusion_count) {
+ lp_build_name(counter, "counter");
+ lp_build_occlusion_count(gallivm, type,
+ lp_build_mask_value(&mask), counter);
+ }
+
+ mask_val = lp_build_mask_end(&mask);
+ LLVMBuildStore(builder, mask_val, mask_ptr);
+ lp_build_for_loop_end(&loop_state);
+}
+
+
+/**
* Generate color blending and color output.
* \param rt the render target index (to index blend, colormask state)
* \param type the pixel color type
@@ -554,6 +800,7 @@ generate_fragment(struct llvmpipe_context *lp,
unsigned chan;
unsigned cbuf;
boolean cbuf0_write_all;
+ boolean try_loop = TRUE;
assert(lp_native_vector_width / 32 >= 4);
@@ -671,54 +918,126 @@ generate_fragment(struct llvmpipe_context *lp,
assert(builder);
LLVMPositionBuilderAtEnd(builder, block);
- /*
- * The shader input interpolation info is not explicitely baked in the
- * shader key, but everything it derives from (TGSI, and flatshade) is
- * already included in the shader key.
- */
- lp_build_interp_soa_init(&interp,
- gallivm,
- shader->info.base.num_inputs,
- inputs,
- builder, fs_type,
- a0_ptr, dadx_ptr, dady_ptr,
- x, y);
-
/* code generated texture sampling */
sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
- /* loop over quads in the block */
zs_format_desc = util_format_description(key->zsbuf_format);
- for(i = 0; i < num_fs; ++i) {
- LLVMValueRef depth_offset = LLVMConstInt(int32_type,
- i*fs_type.length*zs_format_desc->block.bits/8,
- 0);
- LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
- LLVMValueRef depth_ptr_i;
-
- depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
-
- generate_fs(gallivm,
- shader, key,
- builder,
- fs_type,
- context_ptr,
- i,
- &interp,
- sampler,
- &fs_mask[i], /* output */
- out_color,
- depth_ptr_i,
- facing,
- partial_mask,
- mask_input,
- counter);
-
- for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
- for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
- fs_out_color[cbuf][chan][i] =
- out_color[cbuf * !cbuf0_write_all][chan];
+ if (!try_loop) {
+ /*
+ * The shader input interpolation info is not explicitely baked in the
+ * shader key, but everything it derives from (TGSI, and flatshade) is
+ * already included in the shader key.
+ */
+ lp_build_interp_soa_init(&interp,
+ gallivm,
+ shader->info.base.num_inputs,
+ inputs,
+ builder, fs_type,
+ FALSE,
+ a0_ptr, dadx_ptr, dady_ptr,
+ x, y);
+
+ /* loop over quads in the block */
+ for(i = 0; i < num_fs; ++i) {
+ LLVMValueRef depth_offset = LLVMConstInt(int32_type,
+ i*fs_type.length*zs_format_desc->block.bits/8,
+ 0);
+ LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
+ LLVMValueRef depth_ptr_i;
+
+ depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
+
+ generate_fs(gallivm,
+ shader, key,
+ builder,
+ fs_type,
+ context_ptr,
+ i,
+ &interp,
+ sampler,
+ &fs_mask[i], /* output */
+ out_color,
+ depth_ptr_i,
+ facing,
+ partial_mask,
+ mask_input,
+ counter);
+
+ for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
+ fs_out_color[cbuf][chan][i] =
+ out_color[cbuf * !cbuf0_write_all][chan];
+ }
+ }
+ else {
+ unsigned depth_bits = zs_format_desc->block.bits/8;
+ LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
+ LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
+ LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
+ num_loop, "mask_store");
+ LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
+
+ /*
+ * The shader input interpolation info is not explicitely baked in the
+ * shader key, but everything it derives from (TGSI, and flatshade) is
+ * already included in the shader key.
+ */
+ lp_build_interp_soa_init(&interp,
+ gallivm,
+ shader->info.base.num_inputs,
+ inputs,
+ builder, fs_type,
+ TRUE,
+ a0_ptr, dadx_ptr, dady_ptr,
+ x, y);
+
+ for (i = 0; i < num_fs; i++) {
+ LLVMValueRef mask;
+ LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+ LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store,
+ &indexi, 1, "mask_ptr");
+
+ if (partial_mask) {
+ mask = generate_quad_mask(gallivm, fs_type,
+ i*fs_type.length/4, mask_input);
+ }
+ else {
+ mask = lp_build_const_int_vec(gallivm, fs_type, ~0);
+ }
+ LLVMBuildStore(builder, mask, mask_ptr);
+ }
+
+ generate_fs_loop(gallivm,
+ shader, key,
+ builder,
+ fs_type,
+ context_ptr,
+ num_loop,
+ &interp,
+ sampler,
+ mask_store, /* output */
+ color_store,
+ depth_ptr,
+ depth_bits,
+ facing,
+ counter);
+
+ for (i = 0; i < num_fs; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+ LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store,
+ &indexi, 1, "");
+ fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask");
+ /* This is fucked up need to reorganize things */
+ for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ ptr = LLVMBuildGEP(builder,
+ color_store[cbuf * !cbuf0_write_all][chan],
+ &indexi, 1, "");
+ fs_out_color[cbuf][chan][i] = ptr;
+ }
+ }
+ }
}
sampler->destroy(sampler);
@@ -732,7 +1051,7 @@ generate_fragment(struct llvmpipe_context *lp,
unsigned rt;
/*
- * Convert the fs's output color and mask to fit to the blending type.
+ * Convert the fs's output color and mask to fit to the blending type.
*/
for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH];
@@ -759,8 +1078,8 @@ generate_fragment(struct llvmpipe_context *lp,
}
color_ptr = LLVMBuildLoad(builder,
- LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
- "");
+ LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
+ "");
lp_build_name(color_ptr, "color_ptr%d", cbuf);
/* which blend/colormask state to use */