diff options
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_interp.c | 158 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_bld_interp.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 415 |
3 files changed, 516 insertions, 72 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index d108f35f719..4947f304a11 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -42,6 +42,7 @@ #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_flow.h" #include "lp_bld_interp.h" @@ -122,6 +123,33 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); } +static void +calc_offsets(struct lp_build_context *coeff_bld, + unsigned quad_start_index, + LLVMValueRef *pixoffx, + LLVMValueRef *pixoffy) +{ + unsigned i; + unsigned num_pix = coeff_bld->type.length; + struct gallivm_state *gallivm = coeff_bld->gallivm; + LLVMBuilderRef builder = coeff_bld->gallivm->builder; + LLVMValueRef nr, pixxf, pixyf; + + *pixoffx = coeff_bld->undef; + *pixoffy = coeff_bld->undef; + + for (i = 0; i < num_pix; i++) { + nr = lp_build_const_int32(gallivm, i); + pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] + + (quad_start_index & 1) * 2); + pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] + + (quad_start_index & 2)); + *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, ""); + *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, ""); + } +} + + /* Much easier, and significantly less instructions in the per-stamp * part (less than half) but overall more instructions so a loss if * most quads are active. Might be a win though with larger vectors. @@ -210,6 +238,7 @@ static void attribs_update_simple(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, int quad_start_index, + LLVMValueRef loop_iter, int start, int end) { @@ -217,22 +246,22 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld, struct lp_build_context *coeff_bld = &bld->coeff_bld; struct lp_build_context *setup_bld = &bld->setup_bld; LLVMValueRef oow = NULL; - unsigned attrib, i; + unsigned attrib; LLVMValueRef pixoffx; LLVMValueRef pixoffy; - unsigned num_pix = coeff_bld->type.length; - /* could do this with code-generated passed in pixel offsets */ - pixoffx = coeff_bld->undef; - pixoffy = coeff_bld->undef; - for (i = 0; i < coeff_bld->type.length; i++) { - LLVMValueRef nr = lp_build_const_int32(gallivm, i); - LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] + - (quad_start_index & 1) * 2); - LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] + - (quad_start_index & 2)); - pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); - pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); + /* could do this with code-generated passed in pixel offsets too */ + if (bld->dynamic_offsets) { + LLVMValueRef ptr; + + assert(loop_iter); + ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, ""); + pixoffx = LLVMBuildLoad(builder, ptr, ""); + ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, ""); + pixoffy = LLVMBuildLoad(builder, ptr, ""); + } + else { + calc_offsets(coeff_bld, quad_start_index, &pixoffx, &pixoffy); } pixoffx = LLVMBuildFAdd(builder, pixoffx, @@ -498,7 +527,14 @@ coeffs_init(struct lp_build_interp_soa_context *bld, attrib_name(a, attrib, chan, ".a"); attrib_name(dadq, attrib, chan, ".dadq"); - bld->a [attrib][chan] = a; + if (bld->dynamic_offsets) { + bld->a[attrib][chan] = lp_build_alloca(gallivm, + LLVMTypeOf(a), ""); + LLVMBuildStore(builder, a, bld->a[attrib][chan]); + } + else { + bld->a[attrib][chan] = a; + } bld->dadq[attrib][chan] = dadq; } } @@ -514,6 +550,7 @@ static void attribs_update(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, int quad_start_index, + LLVMValueRef loop_iter, int start, int end) { @@ -535,6 +572,9 @@ attribs_update(struct lp_build_interp_soa_context *bld, if (interp == LP_INTERP_CONSTANT || interp == LP_INTERP_FACING) { a = bld->a[attrib][chan]; + if (bld->dynamic_offsets) { + a = LLVMBuildLoad(builder, a, ""); + } } else if (interp == LP_INTERP_POSITION) { assert(attrib > 0); @@ -549,8 +589,20 @@ attribs_update(struct lp_build_interp_soa_context *bld, * Broadcast the attribute value for this quad into all elements */ - a = LLVMBuildShuffleVector(builder, - a, coeff_bld->undef, shuffle, ""); + if (bld->dynamic_offsets) { + /* stored as vector load as float */ + LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext( + gallivm->context), 0); + LLVMValueRef ptr; + a = LLVMBuildBitCast(builder, a, ptr_type, ""); + ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, ""); + a = LLVMBuildLoad(builder, ptr, ""); + a = lp_build_broadcast_scalar(&bld->coeff_bld, a); + } + else { + a = LLVMBuildShuffleVector(builder, + a, coeff_bld->undef, shuffle, ""); + } /* * Get the derivatives. @@ -639,6 +691,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct lp_shader_input *inputs, LLVMBuilderRef builder, struct lp_type type, + boolean dynamic_offsets, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, @@ -696,11 +749,42 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); if (coeff_type.length > 4) { + bld->simple_interp = TRUE; + if (dynamic_offsets) { + /* XXX this should use a global static table */ + unsigned i; + unsigned num_loops = 16 / type.length; + LLVMValueRef pixoffx, pixoffy, index; + LLVMValueRef ptr; + + bld->dynamic_offsets = TRUE; + bld->xoffset_store = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, type), + lp_build_const_int32(gallivm, num_loops), + ""); + bld->yoffset_store = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, type), + lp_build_const_int32(gallivm, num_loops), + ""); + for (i = 0; i < num_loops; i++) { + index = lp_build_const_int32(gallivm, i); + calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy); + ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, ""); + LLVMBuildStore(builder, pixoffx, ptr); + ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, ""); + LLVMBuildStore(builder, pixoffy, ptr); + } + } coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr); } else { + bld->simple_interp = FALSE; + if (dynamic_offsets) { + bld->dynamic_offsets = TRUE; + } coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); } + } @@ -714,26 +798,52 @@ lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, { assert(quad_start_index < 4); - if (bld->coeff_bld.type.length > 4) { - attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs); + if (bld->simple_interp) { + attribs_update_simple(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs); } else { - attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs); + attribs_update(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs); } } void lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, - struct gallivm_state *gallivm, - int quad_start_index) + struct gallivm_state *gallivm, + int quad_start_index) { assert(quad_start_index < 4); - if (bld->coeff_bld.type.length > 4) { - attribs_update_simple(bld, gallivm, quad_start_index, 0, 1); + if (bld->simple_interp) { + attribs_update_simple(bld, gallivm, quad_start_index, NULL, 0, 1); + } + else { + attribs_update(bld, gallivm, quad_start_index, NULL, 0, 1); + } +} + +void +lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index) +{ + if (bld->simple_interp) { + attribs_update_simple(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs); + } + else { + attribs_update(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs); + } +} + +void +lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index) +{ + if (bld->simple_interp) { + attribs_update_simple(bld, gallivm, 0, quad_start_index, 0, 1); } else { - attribs_update(bld, gallivm, quad_start_index, 0, 1); + attribs_update(bld, gallivm, 0, quad_start_index, 0, 1); } } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index f293b582318..d273e3f9b99 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -84,6 +84,8 @@ struct lp_build_interp_soa_context unsigned num_attribs; unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */ enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS]; + boolean simple_interp; + boolean dynamic_offsets; LLVMValueRef x; LLVMValueRef y; @@ -98,6 +100,9 @@ struct lp_build_interp_soa_context LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef xoffset_store; + LLVMValueRef yoffset_store; + /* * Convenience pointers. Callers may access this one. */ @@ -113,6 +118,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, const struct lp_shader_input *inputs, LLVMBuilderRef builder, struct lp_type type, + boolean dynamic_offsets, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr, @@ -129,5 +135,14 @@ lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, struct gallivm_state *gallivm, int quad__start_index); +void +lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index); + +void +lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, + struct gallivm_state *gallivm, + LLVMValueRef quad_start_index); #endif /* LP_BLD_INTERP_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 54f45357fdc..374544fcf70 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -295,7 +295,7 @@ generate_fs(struct gallivm_state *gallivm, /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { - color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color"); + color[cbuf][chan] = lp_build_alloca(gallivm, vec_type, "color"); } } @@ -336,7 +336,7 @@ generate_fs(struct gallivm_state *gallivm, } lp_build_interp_soa_update_inputs(interp, gallivm, i*type.length/4); - + /* Build the actual shader */ lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, &system_values, @@ -436,6 +436,252 @@ generate_fs(struct gallivm_state *gallivm, /** + * Generate the fragment shader, depth/stencil test, and alpha tests. + */ +static void +generate_fs_loop(struct gallivm_state *gallivm, + struct lp_fragment_shader *shader, + const struct lp_fragment_shader_variant_key *key, + LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef context_ptr, + LLVMValueRef num_loop, + struct lp_build_interp_soa_context *interp, + struct lp_build_sampler_soa *sampler, + LLVMValueRef mask_store, + LLVMValueRef (*out_color)[4], + LLVMValueRef depth_ptr, + unsigned depth_bits, + LLVMValueRef facing, + LLVMValueRef counter) +{ + const struct util_format_description *zs_format_desc = NULL; + const struct tgsi_token *tokens = shader->base.tokens; + LLVMTypeRef vec_type; + LLVMValueRef mask_ptr, mask_val; + LLVMValueRef consts_ptr; + LLVMValueRef z; + LLVMValueRef zs_value = NULL; + LLVMValueRef stencil_refs[2]; + LLVMValueRef depth_ptr_i; + LLVMValueRef depth_offset; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + struct lp_build_for_loop_state loop_state; + struct lp_build_mask_context mask; + boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.base.num_inputs < 3 && + shader->info.base.num_instructions < 8); + unsigned attrib; + unsigned chan; + unsigned cbuf; + unsigned depth_mode; + + struct lp_bld_tgsi_system_values system_values; + + memset(&system_values, 0, sizeof(system_values)); + + if (key->depth.enabled || + key->stencil[0].enabled || + key->stencil[1].enabled) { + + zs_format_desc = util_format_description(key->zsbuf_format); + assert(zs_format_desc); + + if (!shader->info.base.writes_z) { + if (key->alpha.enabled || shader->info.base.uses_kill) + /* With alpha test and kill, can do the depth test early + * and hopefully eliminate some quads. But need to do a + * special deferred depth write once the final mask value + * is known. + */ + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + else + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + } + else { + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + } + + if (!(key->depth.enabled && key->depth.writemask) && + !(key->stencil[0].enabled && key->stencil[0].writemask)) + depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); + } + else { + depth_mode = 0; + } + + + stencil_refs[0] = lp_jit_context_stencil_ref_front_value(gallivm, context_ptr); + stencil_refs[1] = lp_jit_context_stencil_ref_back_value(gallivm, context_ptr); + + vec_type = lp_build_vec_type(gallivm, type); + + consts_ptr = lp_jit_context_constants(gallivm, context_ptr); + + lp_build_for_loop_begin(&loop_state, gallivm, + lp_build_const_int32(gallivm, 0), + LLVMIntULT, + num_loop, + lp_build_const_int32(gallivm, 1)); + + mask_ptr = LLVMBuildGEP(builder, mask_store, + &loop_state.counter, 1, "mask_ptr"); + mask_val = LLVMBuildLoad(builder, mask_ptr, ""); + + depth_offset = LLVMBuildMul(builder, loop_state.counter, + lp_build_const_int32(gallivm, depth_bits * type.length), + ""); + + depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); + + memset(outputs, 0, sizeof outputs); + + for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + out_color[cbuf][chan] = lp_build_array_alloca(gallivm, + lp_build_vec_type(gallivm, + type), + num_loop, "color"); + } + } + + + + /* 'mask' will control execution based on quad's pixel alive/killed state */ + lp_build_mask_begin(&mask, gallivm, type, mask_val); + + if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) + lp_build_mask_check(&mask); + + lp_build_interp_soa_update_pos_dyn(interp, gallivm, loop_state.counter); + z = interp->pos[2]; + + if (depth_mode & EARLY_DEPTH_TEST) { + lp_build_depth_stencil_test(gallivm, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr_i, facing, + &zs_value, + !simple_shader); + + if (depth_mode & EARLY_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value); + } + } + + lp_build_interp_soa_update_inputs_dyn(interp, gallivm, loop_state.counter); + + /* Build the actual shader */ + lp_build_tgsi_soa(gallivm, tokens, type, &mask, + consts_ptr, &system_values, + interp->pos, interp->inputs, + outputs, sampler, &shader->info.base); + + /* Alpha test */ + if (key->alpha.enabled) { + int color0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1 && outputs[color0][3]) { + const struct util_format_description *cbuf_format_desc; + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + LLVMValueRef alpha_ref_value; + + alpha_ref_value = lp_jit_context_alpha_ref_value(gallivm, context_ptr); + alpha_ref_value = lp_build_broadcast(gallivm, vec_type, alpha_ref_value); + + cbuf_format_desc = util_format_description(key->cbuf_format[0]); + + lp_build_alpha_test(gallivm, key->alpha.func, type, cbuf_format_desc, + &mask, alpha, alpha_ref_value, + (depth_mode & LATE_DEPTH_TEST) != 0); + } + } + + /* Late Z test */ + if (depth_mode & LATE_DEPTH_TEST) { + int pos0 = find_output_by_semantic(&shader->info.base, + TGSI_SEMANTIC_POSITION, + 0); + + if (pos0 != -1 && outputs[pos0][2]) { + z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); + } + + lp_build_depth_stencil_test(gallivm, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr_i, facing, + &zs_value, + !simple_shader); + /* Late Z write */ + if (depth_mode & LATE_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr_i, zs_value); + } + } + else if ((depth_mode & EARLY_DEPTH_TEST) && + (depth_mode & LATE_DEPTH_WRITE)) + { + /* Need to apply a reduced mask to the depth write. Reload the + * depth value, update from zs_value with the new mask value and + * write that out. + */ + lp_build_deferred_depth_write(gallivm, + type, + zs_format_desc, + &mask, + depth_ptr_i, + zs_value); + } + + + /* Color write */ + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) + { + if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR && + shader->info.base.output_semantic_index[attrib] < key->nr_cbufs) + { + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; + for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + LLVMValueRef color_ptr; + color_ptr = LLVMBuildGEP(builder, out_color[cbuf][chan], + &loop_state.counter, 1, ""); + lp_build_name(out, "color%u.%c", attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color_ptr); + } + } + } + } + + if (key->occlusion_count) { + lp_build_name(counter, "counter"); + lp_build_occlusion_count(gallivm, type, + lp_build_mask_value(&mask), counter); + } + + mask_val = lp_build_mask_end(&mask); + LLVMBuildStore(builder, mask_val, mask_ptr); + lp_build_for_loop_end(&loop_state); +} + + +/** * Generate color blending and color output. * \param rt the render target index (to index blend, colormask state) * \param type the pixel color type @@ -554,6 +800,7 @@ generate_fragment(struct llvmpipe_context *lp, unsigned chan; unsigned cbuf; boolean cbuf0_write_all; + boolean try_loop = TRUE; assert(lp_native_vector_width / 32 >= 4); @@ -671,54 +918,126 @@ generate_fragment(struct llvmpipe_context *lp, assert(builder); LLVMPositionBuilderAtEnd(builder, block); - /* - * The shader input interpolation info is not explicitely baked in the - * shader key, but everything it derives from (TGSI, and flatshade) is - * already included in the shader key. - */ - lp_build_interp_soa_init(&interp, - gallivm, - shader->info.base.num_inputs, - inputs, - builder, fs_type, - a0_ptr, dadx_ptr, dady_ptr, - x, y); - /* code generated texture sampling */ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); - /* loop over quads in the block */ zs_format_desc = util_format_description(key->zsbuf_format); - for(i = 0; i < num_fs; ++i) { - LLVMValueRef depth_offset = LLVMConstInt(int32_type, - i*fs_type.length*zs_format_desc->block.bits/8, - 0); - LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS]; - LLVMValueRef depth_ptr_i; - - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); - - generate_fs(gallivm, - shader, key, - builder, - fs_type, - context_ptr, - i, - &interp, - sampler, - &fs_mask[i], /* output */ - out_color, - depth_ptr_i, - facing, - partial_mask, - mask_input, - counter); - - for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) - for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) - fs_out_color[cbuf][chan][i] = - out_color[cbuf * !cbuf0_write_all][chan]; + if (!try_loop) { + /* + * The shader input interpolation info is not explicitely baked in the + * shader key, but everything it derives from (TGSI, and flatshade) is + * already included in the shader key. + */ + lp_build_interp_soa_init(&interp, + gallivm, + shader->info.base.num_inputs, + inputs, + builder, fs_type, + FALSE, + a0_ptr, dadx_ptr, dady_ptr, + x, y); + + /* loop over quads in the block */ + for(i = 0; i < num_fs; ++i) { + LLVMValueRef depth_offset = LLVMConstInt(int32_type, + i*fs_type.length*zs_format_desc->block.bits/8, + 0); + LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS]; + LLVMValueRef depth_ptr_i; + + depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); + + generate_fs(gallivm, + shader, key, + builder, + fs_type, + context_ptr, + i, + &interp, + sampler, + &fs_mask[i], /* output */ + out_color, + depth_ptr_i, + facing, + partial_mask, + mask_input, + counter); + + for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) + fs_out_color[cbuf][chan][i] = + out_color[cbuf * !cbuf0_write_all][chan]; + } + } + else { + unsigned depth_bits = zs_format_desc->block.bits/8; + LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs); + LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type); + LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type, + num_loop, "mask_store"); + LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS]; + + /* + * The shader input interpolation info is not explicitely baked in the + * shader key, but everything it derives from (TGSI, and flatshade) is + * already included in the shader key. + */ + lp_build_interp_soa_init(&interp, + gallivm, + shader->info.base.num_inputs, + inputs, + builder, fs_type, + TRUE, + a0_ptr, dadx_ptr, dady_ptr, + x, y); + + for (i = 0; i < num_fs; i++) { + LLVMValueRef mask; + LLVMValueRef indexi = lp_build_const_int32(gallivm, i); + LLVMValueRef mask_ptr = LLVMBuildGEP(builder, mask_store, + &indexi, 1, "mask_ptr"); + + if (partial_mask) { + mask = generate_quad_mask(gallivm, fs_type, + i*fs_type.length/4, mask_input); + } + else { + mask = lp_build_const_int_vec(gallivm, fs_type, ~0); + } + LLVMBuildStore(builder, mask, mask_ptr); + } + + generate_fs_loop(gallivm, + shader, key, + builder, + fs_type, + context_ptr, + num_loop, + &interp, + sampler, + mask_store, /* output */ + color_store, + depth_ptr, + depth_bits, + facing, + counter); + + for (i = 0; i < num_fs; i++) { + LLVMValueRef indexi = lp_build_const_int32(gallivm, i); + LLVMValueRef ptr = LLVMBuildGEP(builder, mask_store, + &indexi, 1, ""); + fs_mask[i] = LLVMBuildLoad(builder, ptr, "mask"); + /* This is fucked up need to reorganize things */ + for (cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + ptr = LLVMBuildGEP(builder, + color_store[cbuf * !cbuf0_write_all][chan], + &indexi, 1, ""); + fs_out_color[cbuf][chan][i] = ptr; + } + } + } } sampler->destroy(sampler); @@ -732,7 +1051,7 @@ generate_fragment(struct llvmpipe_context *lp, unsigned rt; /* - * Convert the fs's output color and mask to fit to the blending type. + * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH]; @@ -759,8 +1078,8 @@ generate_fragment(struct llvmpipe_context *lp, } color_ptr = LLVMBuildLoad(builder, - LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), - ""); + LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""), + ""); lp_build_name(color_ptr, "color_ptr%d", cbuf); /* which blend/colormask state to use */ |