diff options
Diffstat (limited to 'src/gallium')
93 files changed, 1478 insertions, 1407 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 2daed382cf3..e0d9b313544 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -113,6 +113,7 @@ C_SOURCES = \ util/u_format_tests.c \ util/u_format_yuv.c \ util/u_format_zs.c \ + util/u_framebuffer.c \ util/u_gen_mipmap.c \ util/u_half.c \ util/u_handle_table.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index a0673df8a8e..29a1bd0ecba 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -162,6 +162,7 @@ source = [ 'util/u_format_tests.c', 'util/u_format_yuv.c', 'util/u_format_zs.c', + 'util/u_framebuffer.c', 'util/u_gen_mipmap.c', 'util/u_half.c', 'util/u_handle_table.c', diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 55e0ded1b30..20a8612dcae 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -36,6 +36,7 @@ */ #include "pipe/p_state.h" +#include "util/u_framebuffer.h" #include "util/u_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -103,10 +104,6 @@ struct cso_context { }; -static void -free_framebuffer_state(struct pipe_framebuffer_state *fb); - - static boolean delete_blend_state(struct cso_context *ctx, void *state) { struct cso_blend *cso = (struct cso_blend *)state; @@ -304,8 +301,8 @@ void cso_release_all( struct cso_context *ctx ) pipe_sampler_view_reference(&ctx->vertex_sampler_views_saved[i], NULL); } - free_framebuffer_state(&ctx->fb); - free_framebuffer_state(&ctx->fb_saved); + util_unreference_framebuffer_state(&ctx->fb); + util_unreference_framebuffer_state(&ctx->fb_saved); if (ctx->cache) { cso_cache_delete( ctx->cache ); @@ -897,42 +894,11 @@ void cso_restore_vertex_shader(struct cso_context *ctx) } -/** - * Copy framebuffer state from src to dst with refcounting of surfaces. - */ -static void -copy_framebuffer_state(struct pipe_framebuffer_state *dst, - const struct pipe_framebuffer_state *src) -{ - uint i; - - dst->width = src->width; - dst->height = src->height; - dst->nr_cbufs = src->nr_cbufs; - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); - } - pipe_surface_reference(&dst->zsbuf, src->zsbuf); -} - - -static void -free_framebuffer_state(struct pipe_framebuffer_state *fb) -{ - uint i; - - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&fb->cbufs[i], NULL); - } - pipe_surface_reference(&fb->zsbuf, NULL); -} - - enum pipe_error cso_set_framebuffer(struct cso_context *ctx, const struct pipe_framebuffer_state *fb) { if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) { - copy_framebuffer_state(&ctx->fb, fb); + util_copy_framebuffer_state(&ctx->fb, fb); ctx->pipe->set_framebuffer_state(ctx->pipe, fb); } return PIPE_OK; @@ -940,15 +906,15 @@ enum pipe_error cso_set_framebuffer(struct cso_context *ctx, void cso_save_framebuffer(struct cso_context *ctx) { - copy_framebuffer_state(&ctx->fb_saved, &ctx->fb); + util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb); } void cso_restore_framebuffer(struct cso_context *ctx) { if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { - copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); + util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb); - free_framebuffer_state(&ctx->fb_saved); + util_unreference_framebuffer_state(&ctx->fb_saved); } } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 3b2df054c3c..ea9b7c90a51 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -182,6 +182,8 @@ draw_llvm_create(struct draw_context *draw) /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(llvm->pass); + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); LLVMAddConstantPropagationPass(llvm->pass); if(util_cpu_caps.has_sse4_1) { /* FIXME: There is a bug in this pass, whereby the combination of fptosi @@ -190,9 +192,7 @@ draw_llvm_create(struct draw_context *draw) */ LLVMAddInstructionCombiningPass(llvm->pass); } - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); LLVMAddGVNPass(llvm->pass); - LLVMAddCFGSimplificationPass(llvm->pass); init_globals(llvm); @@ -207,6 +207,8 @@ draw_llvm_create(struct draw_context *draw) void draw_llvm_destroy(struct draw_llvm *llvm) { + LLVMDisposePassManager(llvm->pass); + FREE(llvm); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index d2a492f2b4c..6d00b0fbd44 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -66,7 +66,6 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vs.vertex_shader; - struct draw_geometry_shader *gs = draw->gs.geometry_shader; struct draw_llvm_variant_key key; struct draw_llvm_variant *variant = NULL; unsigned i; @@ -97,17 +96,6 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); - - draw_pt_fetch_prepare( fpme->fetch, - vs->info.num_inputs, - fpme->vertex_size, - instance_id_index ); - if (opt & PT_SHADE) { - vs->prepare(vs, draw); - draw_geometry_shader_prepare(gs, draw); - } - - /* XXX: it's not really gl rasterization rules we care about here, * but gl vs dx9 clip spaces. */ @@ -349,7 +337,31 @@ static void llvm_middle_end_finish( struct draw_pt_middle_end *middle ) static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_llvm_variant *variant = NULL; + variant = fpme->variants; + while(variant) { + struct draw_llvm_variant *next = variant->next; + + if (variant->function_elts) { + if (variant->function_elts) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function_elts); + LLVMDeleteFunction(variant->function_elts); + } + + if (variant->function) { + if (variant->function) + LLVMFreeMachineCodeForFunction(draw->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + FREE(variant); + + variant = next; + } if (fpme->fetch) draw_pt_fetch_destroy( fpme->fetch ); diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index c2832eefa2a..b9db886a24d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -85,18 +85,22 @@ struct draw_vertex_shader * draw_create_vertex_shader(struct draw_context *draw, const struct pipe_shader_state *shader) { - struct draw_vertex_shader *vs; + struct draw_vertex_shader *vs = NULL; if (draw->dump_vs) { tgsi_dump(shader->tokens, 0); } - vs = draw_create_vs_sse( draw, shader ); - if (!vs) { + if (!draw->pt.middle.llvm) { +#if defined(PIPE_ARCH_X86) + vs = draw_create_vs_sse( draw, shader ); +#elif defined(PIPE_ARCH_PPC) vs = draw_create_vs_ppc( draw, shader ); - if (!vs) { - vs = draw_create_vs_exec( draw, shader ); - } +#endif + } + + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); } if (vs) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 20ae958714b..f372a48846f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1177,9 +1177,34 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) - /* FIXME: improve precision */ + if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + /* + * XXX: Added precision is not always necessary, so only enable this + * when we have a better system in place to track minimum precision. + */ + +#if 0 + /* + * Do one Newton-Raphson step to improve precision: + * + * x1 = (2 - a * rcp(a)) * rcp(a) + */ + + LLVMValueRef two = lp_build_const_vec(bld->type, 2.0); + LLVMValueRef rcp_a; + LLVMValueRef res; + + rcp_a = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); + + res = LLVMBuildMul(bld->builder, a, rcp_a, ""); + res = LLVMBuildSub(bld->builder, two, res, ""); + res = LLVMBuildMul(bld->builder, res, rcp_a, ""); + + return rcp_a; +#else return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a); +#endif + } return LLVMBuildFDiv(bld->builder, bld->one, a, ""); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h new file mode 100644 index 00000000000..e095a0abe3a --- /dev/null +++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef LP_BLD_LIMITS_H_ +#define LP_BLD_LIMITS_H_ + +/* + * TGSI translation limits. + * + * Some are slightly above SM 3.0 requirements to give some wiggle room to + * the state trackers. + */ + +#define LP_MAX_TGSI_TEMPS 256 + +#define LP_MAX_TGSI_ADDRS 16 + +#define LP_MAX_TGSI_IMMEDIATES 256 + +/** + * Maximum control flow nesting + * + * SM3.0 requires 24 + */ +#define LP_MAX_TGSI_NESTING 32 + + +#endif /* LP_BLD_LIMITS_H_ */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 8ceb20473d5..955b1d21ee5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -173,7 +173,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, + LLVMValueRef explicit_lod, LLVMValueRef *texel); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 74dc9e1d812..3f0ea05b795 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -30,6 +30,7 @@ * Texture sampling -- SoA. * * @author Jose Fonseca <jfonseca@vmware.com> + * @author Brian Paul <brianp@vmware.com> */ #include "pipe/p_defines.h" @@ -325,6 +326,18 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, bld->format_desc, x, y, z, y_stride, z_stride); + if (use_border) { + /* If we can sample the border color, it means that texcoords may + * lie outside the bounds of the texture image. We need to do + * something to prevent reading out of bounds and causing a segfault. + * + * Simply AND the texture coords with !use_border. This will cause + * coords which are out of bounds to become zero. Zero's guaranteed + * to be inside the texture image. + */ + offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border); + } + lp_build_fetch_rgba_soa(bld->builder, bld->format_desc, bld->texel_type, @@ -891,17 +904,24 @@ lp_build_minify(struct lp_build_sample_context *bld, * \param s vector of texcoord s values * \param t vector of texcoord t values * \param r vector of texcoord r values - * \param shader_lod_bias vector float with the shader lod bias, + * \param lod_bias optional float vector with the shader lod bias + * \param explicit_lod optional float vector with the explicit lod * \param width scalar int texture width * \param height scalar int texture height * \param depth scalar int texture depth + * + * XXX: The resulting lod is scalar, so ignore all but the first element of + * derivatives, lod_bias, etc that are passed by the shader. */ static LLVMValueRef lp_build_lod_selector(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, - LLVMValueRef shader_lod_bias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth) @@ -914,7 +934,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); } else { - const int dims = texture_dims(bld->static_state->target); struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias); @@ -922,83 +941,76 @@ lp_build_lod_selector(struct lp_build_sample_context *bld, bld->static_state->min_lod); LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod); - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - - LLVMValueRef s0, s1, s2; - LLVMValueRef t0, t1, t2; - LLVMValueRef r0, r1, r2; - LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; - LLVMValueRef rho, lod; - - /* - * dsdx = abs(s[1] - s[0]); - * dsdy = abs(s[2] - s[0]); - * dtdx = abs(t[1] - t[0]); - * dtdy = abs(t[2] - t[0]); - * drdx = abs(r[1] - r[0]); - * drdy = abs(r[2] - r[0]); - * XXX we're assuming a four-element quad in 2x2 layout here. - */ - s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0"); - s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1"); - s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2"); - dsdx = LLVMBuildSub(bld->builder, s1, s0, ""); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildSub(bld->builder, s2, s0, ""); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0"); - t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1"); - t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2"); - dtdx = LLVMBuildSub(bld->builder, t1, t0, ""); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildSub(bld->builder, t2, t0, ""); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0"); - r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1"); - r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2"); - drdx = LLVMBuildSub(bld->builder, r1, r0, ""); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildSub(bld->builder, r2, r0, ""); - drdy = lp_build_abs(float_bld, drdy); - } + LLVMValueRef lod; + + if (explicit_lod) { + lod = LLVMBuildExtractElement(bld->builder, explicit_lod, + index0, ""); } + else { + const int dims = texture_dims(bld->static_state->target); + LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy; + LLVMValueRef rho; + + /* + * dsdx = abs(s[1] - s[0]); + * dsdy = abs(s[2] - s[0]); + * dtdx = abs(t[1] - t[0]); + * dtdy = abs(t[2] - t[0]); + * drdx = abs(r[1] - r[0]); + * drdy = abs(r[2] - r[0]); + */ + dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); + dsdx = lp_build_abs(float_bld, dsdx); + dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); + dsdy = lp_build_abs(float_bld, dsdy); + if (dims > 1) { + dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); + dtdx = lp_build_abs(float_bld, dtdx); + dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); + dtdy = lp_build_abs(float_bld, dtdy); + if (dims > 2) { + drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); + drdx = lp_build_abs(float_bld, drdx); + drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); + drdy = lp_build_abs(float_bld, drdy); + } + } - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat - */ - rho = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { + /* Compute rho = max of all partial derivatives scaled by texture size. + * XXX this could be vectorized somewhat + */ + rho = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, dsdx, dsdy), + lp_build_int_to_float(float_bld, width), ""); + if (dims > 1) { + LLVMValueRef max; max = LLVMBuildMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); + lp_build_max(float_bld, dtdx, dtdy), + lp_build_int_to_float(float_bld, height), ""); rho = lp_build_max(float_bld, rho, max); + if (dims > 2) { + max = LLVMBuildMul(bld->builder, + lp_build_max(float_bld, drdx, drdy), + lp_build_int_to_float(float_bld, depth), ""); + rho = lp_build_max(float_bld, rho, max); + } } - } - /* compute lod = log2(rho) */ - lod = lp_build_log2(float_bld, rho); + /* compute lod = log2(rho) */ + lod = lp_build_log2(float_bld, rho); - /* add sampler lod bias */ - lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias"); + /* add shader lod bias */ + if (lod_bias) { + lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, + index0, ""); + lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); + } + } - /* add shader lod bias */ - /* XXX for now we take only the first element since our lod is scalar */ - shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias, - LLVMConstInt(LLVMInt32Type(), 0, 0), ""); - lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias"); + /* add sampler lod bias */ + lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); /* clamp lod */ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); @@ -1584,7 +1596,10 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef width, LLVMValueRef height, LLVMValueRef depth, @@ -1622,7 +1637,9 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, s, t, r, lodbias, width, height, depth); + lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy, + lod_bias, explicit_lod, + width, height, depth); } /* @@ -2049,6 +2066,24 @@ lp_build_sample_compare(struct lp_build_sample_context *bld, /** + * Just set texels to white instead of actually sampling the texture. + * For debugging. + */ +static void +lp_build_sample_nop(struct lp_build_sample_context *bld, + LLVMValueRef *texel) +{ + struct lp_build_context *texel_bld = &bld->texel_bld; + unsigned chan; + + for (chan = 0; chan < 4; chan++) { + /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/ + texel[chan] = texel_bld->one; + } +} + + +/** * Build texture sampling code. * 'texel' will return a vector of four LLVMValueRefs corresponding to * R, G, B, A. @@ -2062,7 +2097,10 @@ lp_build_sample_soa(LLVMBuilderRef builder, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef *texel) { struct lp_build_sample_context bld; @@ -2113,19 +2151,24 @@ lp_build_sample_soa(LLVMBuilderRef builder, height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); - if (util_format_is_rgba8_variant(bld.format_desc) && - static_state->target == PIPE_TEXTURE_2D && - static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) { + if (0) { + /* For debug: no-op texture sampling */ + lp_build_sample_nop(&bld, texel); + } + else if (util_format_is_rgba8_variant(bld.format_desc) && + static_state->target == PIPE_TEXTURE_2D && + static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && + static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && + is_simple_wrap_mode(static_state->wrap_s) && + is_simple_wrap_mode(static_state->wrap_t)) { /* special case */ lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, row_stride_array, data_array, texel); } else { - lp_build_sample_general(&bld, unit, s, t, r, lodbias, + lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, + lod_bias, explicit_lod, width, height, depth, width_vec, height_vec, depth_vec, row_stride_array, img_stride_array, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 2eac5da6c69..5ce1385c480 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -65,7 +65,10 @@ struct lp_build_sampler_soa unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef *texel); }; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index d3c769e28b8..0b1a28a7ab7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -54,13 +54,10 @@ #include "lp_bld_swizzle.h" #include "lp_bld_flow.h" #include "lp_bld_tgsi.h" +#include "lp_bld_limits.h" #include "lp_bld_debug.h" -#define LP_MAX_TEMPS 256 -#define LP_MAX_IMMEDIATES 256 - - #define FOR_EACH_CHANNEL( CHAN )\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) @@ -84,7 +81,6 @@ #define QUAD_BOTTOM_LEFT 2 #define QUAD_BOTTOM_RIGHT 3 -#define LP_TGSI_MAX_NESTING 16 struct lp_exec_mask { struct lp_build_context *bld; @@ -93,19 +89,19 @@ struct lp_exec_mask { LLVMTypeRef int_vec_type; - LLVMValueRef cond_stack[LP_TGSI_MAX_NESTING]; + LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; int cond_stack_size; LLVMValueRef cond_mask; - LLVMValueRef break_stack[LP_TGSI_MAX_NESTING]; + LLVMValueRef break_stack[LP_MAX_TGSI_NESTING]; int break_stack_size; LLVMValueRef break_mask; - LLVMValueRef cont_stack[LP_TGSI_MAX_NESTING]; + LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING]; int cont_stack_size; LLVMValueRef cont_mask; - LLVMBasicBlockRef loop_stack[LP_TGSI_MAX_NESTING]; + LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING]; int loop_stack_size; LLVMBasicBlockRef loop_block; @@ -124,9 +120,9 @@ struct lp_build_tgsi_soa_context struct lp_build_sampler_soa *sampler; - LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; - LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; - LLVMValueRef addr[LP_MAX_TEMPS][NUM_CHANNELS]; + LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; + LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; + LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; /* we allocate an array of temps if we have indirect * addressing and then the temps above is unused */ @@ -198,6 +194,7 @@ static void lp_exec_mask_update(struct lp_exec_mask *mask) static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, LLVMValueRef val) { + assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val, mask->int_vec_type, ""); @@ -239,6 +236,10 @@ static void lp_exec_bgnloop(struct lp_exec_mask *mask) if (mask->cond_stack_size == 0) mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); + assert(mask->break_stack_size < LP_MAX_TGSI_NESTING); + assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING); + assert(mask->break_stack_size < LP_MAX_TGSI_NESTING); + mask->break_stack[mask->break_stack_size++] = mask->break_mask; mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; @@ -255,16 +256,9 @@ static void lp_exec_break(struct lp_exec_mask *mask) mask->exec_mask, "break"); - /* mask->break_stack_size > 1 implies that we encountered a break - * statemant already and if that's the case we want to make sure - * our mask is a combination of the previous break and the current - * execution mask */ - if (mask->break_stack_size > 1) { - mask->break_mask = LLVMBuildAnd(mask->bld->builder, - mask->break_mask, - exec_mask, "break_full"); - } else - mask->break_mask = exec_mask; + mask->break_mask = LLVMBuildAnd(mask->bld->builder, + mask->break_mask, + exec_mask, "break_full"); lp_exec_mask_update(mask); } @@ -275,12 +269,9 @@ static void lp_exec_continue(struct lp_exec_mask *mask) mask->exec_mask, ""); - if (mask->cont_stack_size > 1) { - mask->cont_mask = LLVMBuildAnd(mask->bld->builder, - mask->cont_mask, - exec_mask, ""); - } else - mask->cont_mask = exec_mask; + mask->cont_mask = LLVMBuildAnd(mask->bld->builder, + mask->cont_mask, + exec_mask, ""); lp_exec_mask_update(mask); } @@ -589,7 +580,6 @@ emit_store( case TGSI_FILE_PREDICATE: /* FIXME */ - assert(0); break; default: @@ -602,21 +592,37 @@ emit_store( * High-level instruction translators. */ +enum tex_modifier { + TEX_MODIFIER_NONE = 0, + TEX_MODIFIER_PROJECTED, + TEX_MODIFIER_LOD_BIAS, + TEX_MODIFIER_EXPLICIT_LOD, + TEX_MODIFIER_EXPLICIT_DERIV +}; static void emit_tex( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_instruction *inst, - boolean apply_lodbias, - boolean projected, + enum tex_modifier modifier, LLVMValueRef *texel) { - const uint unit = inst->Src[1].Register.Index; - LLVMValueRef lodbias; + unsigned unit; + LLVMValueRef lod_bias, explicit_lod; LLVMValueRef oow = NULL; LLVMValueRef coords[3]; + LLVMValueRef ddx[3]; + LLVMValueRef ddy[3]; unsigned num_coords; unsigned i; + if (!bld->sampler) { + _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); + for (i = 0; i < 4; i++) { + texel[i] = bld->base.undef; + } + return; + } + switch (inst->Texture.Texture) { case TGSI_TEXTURE_1D: num_coords = 1; @@ -637,29 +643,57 @@ emit_tex( struct lp_build_tgsi_soa_context *bld, return; } - if(apply_lodbias) - lodbias = emit_fetch( bld, inst, 0, 3 ); - else - lodbias = bld->base.zero; + if (modifier == TEX_MODIFIER_LOD_BIAS) { + lod_bias = emit_fetch( bld, inst, 0, 3 ); + explicit_lod = NULL; + } + else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { + lod_bias = NULL; + explicit_lod = emit_fetch( bld, inst, 0, 3 ); + } + else { + lod_bias = NULL; + explicit_lod = NULL; + } - if (projected) { + if (modifier == TEX_MODIFIER_PROJECTED) { oow = emit_fetch( bld, inst, 0, 3 ); oow = lp_build_rcp(&bld->base, oow); } for (i = 0; i < num_coords; i++) { coords[i] = emit_fetch( bld, inst, 0, i ); - if (projected) + if (modifier == TEX_MODIFIER_PROJECTED) coords[i] = lp_build_mul(&bld->base, coords[i], oow); } for (i = num_coords; i < 3; i++) { coords[i] = bld->base.undef; } + if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) { + for (i = 0; i < num_coords; i++) { + ddx[i] = emit_fetch( bld, inst, 1, i ); + ddy[i] = emit_fetch( bld, inst, 2, i ); + } + unit = inst->Src[3].Register.Index; + } else { + for (i = 0; i < num_coords; i++) { + ddx[i] = emit_ddx( bld, coords[i] ); + ddy[i] = emit_ddy( bld, coords[i] ); + } + unit = inst->Src[1].Register.Index; + } + for (i = num_coords; i < 3; i++) { + ddx[i] = bld->base.undef; + ddy[i] = bld->base.undef; + } + bld->sampler->emit_fetch_texel(bld->sampler, bld->base.builder, bld->base.type, - unit, num_coords, coords, lodbias, + unit, num_coords, coords, + ddx, ddy, + lod_bias, explicit_lod, texel); } @@ -739,7 +773,7 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, lp_build_mask_update(bld->mask, mask); } -static int +static void emit_declaration( struct lp_build_tgsi_soa_context *bld, const struct tgsi_full_declaration *decl) @@ -753,6 +787,7 @@ emit_declaration( for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: + assert(idx < LP_MAX_TGSI_TEMPS); if (bld->has_indirect_addressing) { LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), last*4 + 4, 0); @@ -772,18 +807,21 @@ emit_declaration( break; case TGSI_FILE_ADDRESS: + assert(idx < LP_MAX_TGSI_ADDRS); for (i = 0; i < NUM_CHANNELS; i++) bld->addr[idx][i] = lp_build_alloca(bld->base.builder, vec_type, ""); break; + case TGSI_FILE_PREDICATE: + _debug_printf("warning: predicate registers not yet implemented\n"); + break; + default: /* don't need to declare other vars */ break; } } - - return TRUE; } @@ -1359,12 +1397,11 @@ emit_instruction( break; case TGSI_OPCODE_TEX: - emit_tex( bld, inst, FALSE, FALSE, dst0 ); + emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 ); break; case TGSI_OPCODE_TXD: - /* FIXME */ - return FALSE; + emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); break; case TGSI_OPCODE_UP2H: @@ -1466,7 +1503,7 @@ emit_instruction( break; case TGSI_OPCODE_TXB: - emit_tex( bld, inst, TRUE, FALSE, dst0 ); + emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 ); break; case TGSI_OPCODE_NRM: @@ -1571,11 +1608,11 @@ emit_instruction( break; case TGSI_OPCODE_TXL: - emit_tex( bld, inst, TRUE, FALSE, dst0 ); + emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 ); break; case TGSI_OPCODE_TXP: - emit_tex( bld, inst, FALSE, TRUE, dst0 ); + emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 ); break; case TGSI_OPCODE_BRK: @@ -1765,10 +1802,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: /* Inputs already interpolated */ - { - if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration )) - _debug_printf("warning: failed to define LLVM variable\n"); - } + emit_declaration( &bld, &parse.FullToken.FullDeclaration ); break; case TGSI_TOKEN_TYPE_INSTRUCTION: @@ -1787,7 +1821,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder, { const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; assert(size <= 4); - assert(num_immediates < LP_MAX_IMMEDIATES); + assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); for( i = 0; i < size; ++i ) bld.immediates[num_immediates][i] = lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 83000200189..1ad183b8dd4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -100,7 +100,6 @@ static const char *file_names[TGSI_FILE_COUNT] = "SAMP", "ADDR", "IMM", - "LOOP", "PRED", "SV" }; @@ -494,6 +493,30 @@ iter_instruction( TXT( " " ); ctx->indent += info->post_indent; + if (inst->Instruction.Predicate) { + CHR( '(' ); + + if (inst->Predicate.Negate) + CHR( '!' ); + + TXT( "PRED[" ); + SID( inst->Predicate.Index ); + CHR( ']' ); + + if (inst->Predicate.SwizzleX != TGSI_SWIZZLE_X || + inst->Predicate.SwizzleY != TGSI_SWIZZLE_Y || + inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z || + inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) { + CHR( '.' ); + ENM( inst->Predicate.SwizzleX, swizzle_names ); + ENM( inst->Predicate.SwizzleY, swizzle_names ); + ENM( inst->Predicate.SwizzleZ, swizzle_names ); + ENM( inst->Predicate.SwizzleW, swizzle_names ); + } + + TXT( ") " ); + } + TXT( info->mnemonic ); switch (inst->Instruction.Saturate) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 82eac05dc4d..9b1ca7fa851 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1346,13 +1346,6 @@ store_dest(struct tgsi_exec_machine *mach, dst = &mach->Addrs[index].xyzw[chan_index]; break; - case TGSI_FILE_LOOP: - assert(reg->Register.Index == 0); - assert(mach->LoopCounterStackTop > 0); - assert(chan_index == CHAN_X); - dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; - break; - case TGSI_FILE_PREDICATE: index = reg->Register.Index; assert(index < TGSI_EXEC_NUM_PREDS); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index a22873e4c2b..11bbaf6722e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -299,10 +299,6 @@ struct tgsi_exec_machine uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING]; int LoopLabelStackTop; - /** Loop counter stack (x = index, y = counter, z = step) */ - struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int LoopCounterStackTop; - /** Loop continue mask stack (see comments in tgsi_exec.c) */ uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; int ContStackTop; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 76b7564cc36..ce0a92f7fb3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -235,8 +235,8 @@ static const char *file_names[TGSI_FILE_COUNT] = "SAMP", "ADDR", "IMM", - "LOOP", - "PRED" + "PRED", + "SV" }; static boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 0b468a9184e..71a0d96331a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -278,7 +278,6 @@ static const char *file_names[TGSI_FILE_COUNT] = "SAMP", "ADDR", "IMM", - "LOOP", "PRED", "SV" }; @@ -816,6 +815,45 @@ parse_instruction( struct tgsi_full_instruction inst; uint advance; + inst = tgsi_default_full_instruction(); + + /* Parse predicate. + */ + eat_opt_white( &ctx->cur ); + if (*ctx->cur == '(') { + uint file; + int index; + uint swizzle[4]; + boolean parsed_swizzle; + + inst.Instruction.Predicate = 1; + + ctx->cur++; + if (*ctx->cur == '!') { + ctx->cur++; + inst.Predicate.Negate = 1; + } + + if (!parse_register_dst( ctx, &file, &index )) + return FALSE; + + if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) { + if (parsed_swizzle) { + inst.Predicate.SwizzleX = swizzle[0]; + inst.Predicate.SwizzleY = swizzle[1]; + inst.Predicate.SwizzleZ = swizzle[2]; + inst.Predicate.SwizzleW = swizzle[3]; + } + } + + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + + ctx->cur++; + } + /* Parse instruction name. */ eat_opt_white( &ctx->cur ); @@ -849,7 +887,6 @@ parse_instruction( return FALSE; } - inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = i; inst.Instruction.Saturate = saturate; inst.Instruction.NumDstRegs = info->num_dst; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index f725405ade1..49b854b1234 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -74,7 +74,6 @@ struct ureg_tokens { #define UREG_MAX_IMMEDIATE 32 #define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 -#define UREG_MAX_LOOP 1 #define UREG_MAX_PRED 1 struct const_decl { @@ -151,7 +150,6 @@ struct ureg_program unsigned nr_addrs; unsigned nr_preds; - unsigned nr_loops; unsigned nr_instructions; struct ureg_tokens domain[2]; @@ -537,19 +535,6 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg ) return ureg_dst_register( TGSI_FILE_ADDRESS, 0 ); } -/* Allocate a new loop register. - */ -struct ureg_dst -ureg_DECL_loop(struct ureg_program *ureg) -{ - if (ureg->nr_loops < UREG_MAX_LOOP) { - return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++); - } - - assert(0); - return ureg_dst_register(TGSI_FILE_LOOP, 0); -} - /* Allocate a new predicate register. */ struct ureg_dst @@ -1356,13 +1341,6 @@ static void emit_decls( struct ureg_program *ureg ) 0, ureg->nr_addrs ); } - if (ureg->nr_loops) { - emit_decl_range(ureg, - TGSI_FILE_LOOP, - 0, - ureg->nr_loops); - } - if (ureg->nr_preds) { emit_decl_range(ureg, TGSI_FILE_PREDICATE, diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 0130a77aadb..f32420dd872 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -231,9 +231,6 @@ struct ureg_dst ureg_DECL_address( struct ureg_program * ); struct ureg_dst -ureg_DECL_loop( struct ureg_program * ); - -struct ureg_dst ureg_DECL_predicate(struct ureg_program *); /* Supply an index to the sampler declaration as this is the hook to diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index a9272fbb491..f8dbd2b36a1 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -31,6 +31,7 @@ */ #include "util/u_memory.h" +#include "util/u_format.h" #include "util/u_math.h" #include "pipe/p_state.h" #include "translate.h" @@ -38,7 +39,9 @@ #define DRAW_DBG 0 -typedef void (*fetch_func)(const void *ptr, float *attrib); +typedef void (*fetch_func)(float *dst, + const uint8_t *src, + unsigned i, unsigned j); typedef void (*emit_func)(const float *attrib, void *ptr); @@ -57,7 +60,7 @@ struct translate_generic { emit_func emit; unsigned output_offset; - char *input_ptr; + const uint8_t *input_ptr; unsigned input_stride; unsigned max_index; @@ -79,22 +82,7 @@ static struct translate_generic *translate_generic( struct translate *translate * This is probably needed/dupliocated elsewhere, eg format * conversion, texture sampling etc. */ -#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \ -static void \ -fetch_##NAME(const void *ptr, float *attrib) \ -{ \ - const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \ - unsigned i; \ - \ - for (i = 0; i < SZ; i++) { \ - attrib[i] = FROM(i); \ - } \ - \ - for (; i < 4; i++) { \ - attrib[i] = defaults[i]; \ - } \ -} \ - \ +#define ATTRIB( NAME, SZ, TYPE, TO ) \ static void \ emit_##NAME(const float *attrib, void *ptr) \ { \ @@ -107,27 +95,6 @@ emit_##NAME(const float *attrib, void *ptr) \ } -#define FROM_64_FLOAT(i) ((float) ((double *) ptr)[i]) -#define FROM_32_FLOAT(i) (((float *) ptr)[i]) - -#define FROM_8_USCALED(i) ((float) ((unsigned char *) ptr)[i]) -#define FROM_16_USCALED(i) ((float) ((unsigned short *) ptr)[i]) -#define FROM_32_USCALED(i) ((float) ((unsigned int *) ptr)[i]) - -#define FROM_8_SSCALED(i) ((float) ((char *) ptr)[i]) -#define FROM_16_SSCALED(i) ((float) ((short *) ptr)[i]) -#define FROM_32_SSCALED(i) ((float) ((int *) ptr)[i]) - -#define FROM_8_UNORM(i) ((float) ((unsigned char *) ptr)[i] / 255.0f) -#define FROM_16_UNORM(i) ((float) ((unsigned short *) ptr)[i] / 65535.0f) -#define FROM_32_UNORM(i) ((float) ((unsigned int *) ptr)[i] / 4294967295.0f) - -#define FROM_8_SNORM(i) ((float) ((char *) ptr)[i] / 127.0f) -#define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f) -#define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f) - -#define FROM_32_FIXED(i) (((int *) ptr)[i] / 65536.0f) - #define TO_64_FLOAT(x) ((double) x) #define TO_32_FLOAT(x) (x) @@ -150,95 +117,79 @@ emit_##NAME(const float *attrib, void *ptr) \ #define TO_32_FIXED(x) ((int) (x * 65536.0f)) +ATTRIB( R64G64B64A64_FLOAT, 4, double, TO_64_FLOAT ) +ATTRIB( R64G64B64_FLOAT, 3, double, TO_64_FLOAT ) +ATTRIB( R64G64_FLOAT, 2, double, TO_64_FLOAT ) +ATTRIB( R64_FLOAT, 1, double, TO_64_FLOAT ) + +ATTRIB( R32G32B32A32_FLOAT, 4, float, TO_32_FLOAT ) +ATTRIB( R32G32B32_FLOAT, 3, float, TO_32_FLOAT ) +ATTRIB( R32G32_FLOAT, 2, float, TO_32_FLOAT ) +ATTRIB( R32_FLOAT, 1, float, TO_32_FLOAT ) + +ATTRIB( R32G32B32A32_USCALED, 4, unsigned, TO_32_USCALED ) +ATTRIB( R32G32B32_USCALED, 3, unsigned, TO_32_USCALED ) +ATTRIB( R32G32_USCALED, 2, unsigned, TO_32_USCALED ) +ATTRIB( R32_USCALED, 1, unsigned, TO_32_USCALED ) + +ATTRIB( R32G32B32A32_SSCALED, 4, int, TO_32_SSCALED ) +ATTRIB( R32G32B32_SSCALED, 3, int, TO_32_SSCALED ) +ATTRIB( R32G32_SSCALED, 2, int, TO_32_SSCALED ) +ATTRIB( R32_SSCALED, 1, int, TO_32_SSCALED ) + +ATTRIB( R32G32B32A32_UNORM, 4, unsigned, TO_32_UNORM ) +ATTRIB( R32G32B32_UNORM, 3, unsigned, TO_32_UNORM ) +ATTRIB( R32G32_UNORM, 2, unsigned, TO_32_UNORM ) +ATTRIB( R32_UNORM, 1, unsigned, TO_32_UNORM ) + +ATTRIB( R32G32B32A32_SNORM, 4, int, TO_32_SNORM ) +ATTRIB( R32G32B32_SNORM, 3, int, TO_32_SNORM ) +ATTRIB( R32G32_SNORM, 2, int, TO_32_SNORM ) +ATTRIB( R32_SNORM, 1, int, TO_32_SNORM ) + +ATTRIB( R16G16B16A16_USCALED, 4, ushort, TO_16_USCALED ) +ATTRIB( R16G16B16_USCALED, 3, ushort, TO_16_USCALED ) +ATTRIB( R16G16_USCALED, 2, ushort, TO_16_USCALED ) +ATTRIB( R16_USCALED, 1, ushort, TO_16_USCALED ) + +ATTRIB( R16G16B16A16_SSCALED, 4, short, TO_16_SSCALED ) +ATTRIB( R16G16B16_SSCALED, 3, short, TO_16_SSCALED ) +ATTRIB( R16G16_SSCALED, 2, short, TO_16_SSCALED ) +ATTRIB( R16_SSCALED, 1, short, TO_16_SSCALED ) + +ATTRIB( R16G16B16A16_UNORM, 4, ushort, TO_16_UNORM ) +ATTRIB( R16G16B16_UNORM, 3, ushort, TO_16_UNORM ) +ATTRIB( R16G16_UNORM, 2, ushort, TO_16_UNORM ) +ATTRIB( R16_UNORM, 1, ushort, TO_16_UNORM ) + +ATTRIB( R16G16B16A16_SNORM, 4, short, TO_16_SNORM ) +ATTRIB( R16G16B16_SNORM, 3, short, TO_16_SNORM ) +ATTRIB( R16G16_SNORM, 2, short, TO_16_SNORM ) +ATTRIB( R16_SNORM, 1, short, TO_16_SNORM ) + +ATTRIB( R8G8B8A8_USCALED, 4, ubyte, TO_8_USCALED ) +ATTRIB( R8G8B8_USCALED, 3, ubyte, TO_8_USCALED ) +ATTRIB( R8G8_USCALED, 2, ubyte, TO_8_USCALED ) +ATTRIB( R8_USCALED, 1, ubyte, TO_8_USCALED ) + +ATTRIB( R8G8B8A8_SSCALED, 4, char, TO_8_SSCALED ) +ATTRIB( R8G8B8_SSCALED, 3, char, TO_8_SSCALED ) +ATTRIB( R8G8_SSCALED, 2, char, TO_8_SSCALED ) +ATTRIB( R8_SSCALED, 1, char, TO_8_SSCALED ) + +ATTRIB( R8G8B8A8_UNORM, 4, ubyte, TO_8_UNORM ) +ATTRIB( R8G8B8_UNORM, 3, ubyte, TO_8_UNORM ) +ATTRIB( R8G8_UNORM, 2, ubyte, TO_8_UNORM ) +ATTRIB( R8_UNORM, 1, ubyte, TO_8_UNORM ) + +ATTRIB( R8G8B8A8_SNORM, 4, char, TO_8_SNORM ) +ATTRIB( R8G8B8_SNORM, 3, char, TO_8_SNORM ) +ATTRIB( R8G8_SNORM, 2, char, TO_8_SNORM ) +ATTRIB( R8_SNORM, 1, char, TO_8_SNORM ) + +ATTRIB( A8R8G8B8_UNORM, 4, ubyte, TO_8_UNORM ) +/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, TO_8_UNORM )*/ -ATTRIB( R64G64B64A64_FLOAT, 4, double, FROM_64_FLOAT, TO_64_FLOAT ) -ATTRIB( R64G64B64_FLOAT, 3, double, FROM_64_FLOAT, TO_64_FLOAT ) -ATTRIB( R64G64_FLOAT, 2, double, FROM_64_FLOAT, TO_64_FLOAT ) -ATTRIB( R64_FLOAT, 1, double, FROM_64_FLOAT, TO_64_FLOAT ) - -ATTRIB( R32G32B32A32_FLOAT, 4, float, FROM_32_FLOAT, TO_32_FLOAT ) -ATTRIB( R32G32B32_FLOAT, 3, float, FROM_32_FLOAT, TO_32_FLOAT ) -ATTRIB( R32G32_FLOAT, 2, float, FROM_32_FLOAT, TO_32_FLOAT ) -ATTRIB( R32_FLOAT, 1, float, FROM_32_FLOAT, TO_32_FLOAT ) - -ATTRIB( R32G32B32A32_USCALED, 4, unsigned, FROM_32_USCALED, TO_32_USCALED ) -ATTRIB( R32G32B32_USCALED, 3, unsigned, FROM_32_USCALED, TO_32_USCALED ) -ATTRIB( R32G32_USCALED, 2, unsigned, FROM_32_USCALED, TO_32_USCALED ) -ATTRIB( R32_USCALED, 1, unsigned, FROM_32_USCALED, TO_32_USCALED ) - -ATTRIB( R32G32B32A32_SSCALED, 4, int, FROM_32_SSCALED, TO_32_SSCALED ) -ATTRIB( R32G32B32_SSCALED, 3, int, FROM_32_SSCALED, TO_32_SSCALED ) -ATTRIB( R32G32_SSCALED, 2, int, FROM_32_SSCALED, TO_32_SSCALED ) -ATTRIB( R32_SSCALED, 1, int, FROM_32_SSCALED, TO_32_SSCALED ) - -ATTRIB( R32G32B32A32_UNORM, 4, unsigned, FROM_32_UNORM, TO_32_UNORM ) -ATTRIB( R32G32B32_UNORM, 3, unsigned, FROM_32_UNORM, TO_32_UNORM ) -ATTRIB( R32G32_UNORM, 2, unsigned, FROM_32_UNORM, TO_32_UNORM ) -ATTRIB( R32_UNORM, 1, unsigned, FROM_32_UNORM, TO_32_UNORM ) - -ATTRIB( R32G32B32A32_SNORM, 4, int, FROM_32_SNORM, TO_32_SNORM ) -ATTRIB( R32G32B32_SNORM, 3, int, FROM_32_SNORM, TO_32_SNORM ) -ATTRIB( R32G32_SNORM, 2, int, FROM_32_SNORM, TO_32_SNORM ) -ATTRIB( R32_SNORM, 1, int, FROM_32_SNORM, TO_32_SNORM ) - -ATTRIB( R16G16B16A16_USCALED, 4, ushort, FROM_16_USCALED, TO_16_USCALED ) -ATTRIB( R16G16B16_USCALED, 3, ushort, FROM_16_USCALED, TO_16_USCALED ) -ATTRIB( R16G16_USCALED, 2, ushort, FROM_16_USCALED, TO_16_USCALED ) -ATTRIB( R16_USCALED, 1, ushort, FROM_16_USCALED, TO_16_USCALED ) - -ATTRIB( R16G16B16A16_SSCALED, 4, short, FROM_16_SSCALED, TO_16_SSCALED ) -ATTRIB( R16G16B16_SSCALED, 3, short, FROM_16_SSCALED, TO_16_SSCALED ) -ATTRIB( R16G16_SSCALED, 2, short, FROM_16_SSCALED, TO_16_SSCALED ) -ATTRIB( R16_SSCALED, 1, short, FROM_16_SSCALED, TO_16_SSCALED ) - -ATTRIB( R16G16B16A16_UNORM, 4, ushort, FROM_16_UNORM, TO_16_UNORM ) -ATTRIB( R16G16B16_UNORM, 3, ushort, FROM_16_UNORM, TO_16_UNORM ) -ATTRIB( R16G16_UNORM, 2, ushort, FROM_16_UNORM, TO_16_UNORM ) -ATTRIB( R16_UNORM, 1, ushort, FROM_16_UNORM, TO_16_UNORM ) - -ATTRIB( R16G16B16A16_SNORM, 4, short, FROM_16_SNORM, TO_16_SNORM ) -ATTRIB( R16G16B16_SNORM, 3, short, FROM_16_SNORM, TO_16_SNORM ) -ATTRIB( R16G16_SNORM, 2, short, FROM_16_SNORM, TO_16_SNORM ) -ATTRIB( R16_SNORM, 1, short, FROM_16_SNORM, TO_16_SNORM ) - -ATTRIB( R8G8B8A8_USCALED, 4, ubyte, FROM_8_USCALED, TO_8_USCALED ) -ATTRIB( R8G8B8_USCALED, 3, ubyte, FROM_8_USCALED, TO_8_USCALED ) -ATTRIB( R8G8_USCALED, 2, ubyte, FROM_8_USCALED, TO_8_USCALED ) -ATTRIB( R8_USCALED, 1, ubyte, FROM_8_USCALED, TO_8_USCALED ) - -ATTRIB( R8G8B8A8_SSCALED, 4, char, FROM_8_SSCALED, TO_8_SSCALED ) -ATTRIB( R8G8B8_SSCALED, 3, char, FROM_8_SSCALED, TO_8_SSCALED ) -ATTRIB( R8G8_SSCALED, 2, char, FROM_8_SSCALED, TO_8_SSCALED ) -ATTRIB( R8_SSCALED, 1, char, FROM_8_SSCALED, TO_8_SSCALED ) - -ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) -ATTRIB( R8G8B8_UNORM, 3, ubyte, FROM_8_UNORM, TO_8_UNORM ) -ATTRIB( R8G8_UNORM, 2, ubyte, FROM_8_UNORM, TO_8_UNORM ) -ATTRIB( R8_UNORM, 1, ubyte, FROM_8_UNORM, TO_8_UNORM ) - -ATTRIB( R8G8B8A8_SNORM, 4, char, FROM_8_SNORM, TO_8_SNORM ) -ATTRIB( R8G8B8_SNORM, 3, char, FROM_8_SNORM, TO_8_SNORM ) -ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM ) -ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM ) - -ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) -/*ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM )*/ - -ATTRIB( R32G32B32A32_FIXED, 4, int, FROM_32_FIXED, TO_32_FIXED ) -ATTRIB( R32G32B32_FIXED, 3, int, FROM_32_FIXED, TO_32_FIXED ) -ATTRIB( R32G32_FIXED, 2, int, FROM_32_FIXED, TO_32_FIXED ) -ATTRIB( R32_FIXED, 1, int, FROM_32_FIXED, TO_32_FIXED ) - - - -static void -fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) -{ - attrib[2] = FROM_8_UNORM(0); - attrib[1] = FROM_8_UNORM(1); - attrib[0] = FROM_8_UNORM(2); - attrib[3] = FROM_8_UNORM(3); -} static void emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) @@ -251,181 +202,13 @@ emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) } static void -fetch_NULL( const void *ptr, float *attrib ) -{ - attrib[0] = 0; - attrib[1] = 0; - attrib[2] = 0; - attrib[3] = 1; -} - -static void emit_NULL( const float *attrib, void *ptr ) { /* do nothing is the only sensible option */ } -static fetch_func get_fetch_func( enum pipe_format format ) -{ - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return &fetch_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return &fetch_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return &fetch_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return &fetch_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return &fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return &fetch_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return &fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return &fetch_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return &fetch_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return &fetch_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return &fetch_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return &fetch_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return &fetch_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return &fetch_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return &fetch_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return &fetch_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return &fetch_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return &fetch_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return &fetch_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return &fetch_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return &fetch_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return &fetch_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return &fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return &fetch_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return &fetch_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return &fetch_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return &fetch_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return &fetch_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return &fetch_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return &fetch_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return &fetch_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return &fetch_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return &fetch_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return &fetch_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return &fetch_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return &fetch_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return &fetch_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return &fetch_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return &fetch_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return &fetch_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return &fetch_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return &fetch_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return &fetch_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return &fetch_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return &fetch_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return &fetch_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return &fetch_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return &fetch_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return &fetch_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return &fetch_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return &fetch_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return &fetch_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return &fetch_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return &fetch_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return &fetch_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return &fetch_R8G8B8A8_SSCALED; - - case PIPE_FORMAT_B8G8R8A8_UNORM: - return &fetch_B8G8R8A8_UNORM; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - return &fetch_A8R8G8B8_UNORM; - - case PIPE_FORMAT_R32_FIXED: - return &fetch_R32_FIXED; - case PIPE_FORMAT_R32G32_FIXED: - return &fetch_R32G32_FIXED; - case PIPE_FORMAT_R32G32B32_FIXED: - return &fetch_R32G32B32_FIXED; - case PIPE_FORMAT_R32G32B32A32_FIXED: - return &fetch_R32G32B32A32_FIXED; - - default: - assert(0); - return &fetch_NULL; - } -} - - - - static emit_func get_emit_func( enum pipe_format format ) { - /* silence warnings */ - (void) emit_R32G32B32A32_FIXED; - (void) emit_R32G32B32_FIXED; - (void) emit_R32G32_FIXED; - (void) emit_R32_FIXED; - switch (format) { case PIPE_FORMAT_R64_FLOAT: return &emit_R64_FLOAT; @@ -589,7 +372,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, for (attr = 0; attr < nr_attrs; attr++) { float data[4]; - const char *src; + const uint8_t *src; unsigned index; char *dst = (vert + @@ -606,7 +389,7 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate, src = tg->attrib[attr].input_ptr + tg->attrib[attr].input_stride * index; - tg->attrib[attr].fetch( src, data ); + tg->attrib[attr].fetch( data, src, 0, 0 ); if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", i, elt, attr, data[0], data[1], data[2], data[3]); @@ -644,7 +427,7 @@ static void PIPE_CDECL generic_run( struct translate *translate, tg->attrib[attr].output_offset); if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) { - const char *src; + const uint8_t *src; if (tg->attrib[attr].instance_divisor) { src = tg->attrib[attr].input_ptr + @@ -655,7 +438,7 @@ static void PIPE_CDECL generic_run( struct translate *translate, tg->attrib[attr].input_stride * elt; } - tg->attrib[attr].fetch( src, data ); + tg->attrib[attr].fetch( data, src, 0, 0 ); } else { data[0] = (float)instance_id; } @@ -683,7 +466,7 @@ static void generic_set_buffer( struct translate *translate, for (i = 0; i < tg->nr_attrib; i++) { if (tg->attrib[i].buffer == buf) { - tg->attrib[i].input_ptr = ((char *)ptr + + tg->attrib[i].input_ptr = ((const uint8_t *)ptr + tg->attrib[i].input_offset); tg->attrib[i].input_stride = stride; tg->attrib[i].max_index = max_index; @@ -714,9 +497,15 @@ struct translate *translate_generic_create( const struct translate_key *key ) tg->translate.run = generic_run; for (i = 0; i < key->nr_elements; i++) { + const struct util_format_description *format_desc = + util_format_description(key->element[i].input_format); + + assert(format_desc); + assert(format_desc->fetch_rgba_float); + tg->attrib[i].type = key->element[i].type; - tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); + tg->attrib[i].fetch = format_desc->fetch_rgba_float; tg->attrib[i].buffer = key->element[i].input_buffer; tg->attrib[i].input_offset = key->element[i].input_offset; tg->attrib[i].instance_divisor = key->element[i].instance_divisor; diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 956aedc8a15..1abe31db466 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -47,6 +47,7 @@ #include "util/u_rect.h" #include "util/u_sampler.h" #include "util/u_simple_shaders.h" +#include "util/u_surface.h" #include "util/u_texture.h" #define INVALID_PTR ((void*)~0) diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c index 048bd5c34da..c7c1e830e01 100644 --- a/src/gallium/auxiliary/util/u_caps.c +++ b/src/gallium/auxiliary/util/u_caps.c @@ -73,6 +73,7 @@ util_check_caps_out(struct pipe_screen *screen, const unsigned *list, int *out) *out = i - 2; return FALSE; } + break; case UTIL_CAPS_CHECK_UNIMPLEMENTED: *out = i - 1; return FALSE; diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h index 2c32db61756..40da2d75a72 100644 --- a/src/gallium/auxiliary/util/u_clear.h +++ b/src/gallium/auxiliary/util/u_clear.h @@ -33,6 +33,7 @@ #include "pipe/p_state.h" #include "util/u_pack_color.h" #include "util/u_rect.h" +#include "util/u_surface.h" /** diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h index fb6ade5c06b..8ba076949b6 100644 --- a/src/gallium/auxiliary/util/u_format.h +++ b/src/gallium/auxiliary/util/u_format.h @@ -338,6 +338,34 @@ util_format_name(enum pipe_format format) return desc->name; } +static INLINE const char * +util_format_short_name(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + assert(desc); + if (!desc) { + return "???"; + } + + return desc->short_name; +} + +/** + * Whether this format is plain, see UTIL_FORMAT_LAYOUT_PLAIN for more info. + */ +static INLINE boolean +util_format_is_plain(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + if (!format) { + return FALSE; + } + + return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ? TRUE : FALSE; +} + static INLINE boolean util_format_is_s3tc(enum pipe_format format) { diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c index 5b279b8fe26..bb989c29d81 100644 --- a/src/gallium/auxiliary/util/u_format_s3tc.c +++ b/src/gallium/auxiliary/util/u_format_s3tc.c @@ -120,7 +120,7 @@ util_format_s3tc_init(void) library = util_dl_open(DXTN_LIBNAME); if (!library) { debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " - "compression/decompression unavailable"); + "compression/decompression unavailable\n"); return; } @@ -142,7 +142,7 @@ util_format_s3tc_init(void) !util_format_dxtn_pack) { debug_printf("couldn't reference all symbols in " DXTN_LIBNAME ", software DXTn compression/decompression " - "unavailable"); + "unavailable\n"); util_dl_close(library); return; } diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c new file mode 100644 index 00000000000..bdac12dbca2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_framebuffer.c @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2009-2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Framebuffer utility functions. + * + * @author Brian Paul + */ + + +#include "pipe/p_screen.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + +#include "util/u_memory.h" +#include "util/u_framebuffer.h" + + +/** + * Compare pipe_framebuffer_state objects. + * \return TRUE if same, FALSE if different + */ +boolean +util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + if (dst->width != src->width || + dst->height != src->height) + return FALSE; + + for (i = 0; i < Elements(src->cbufs); i++) { + if (dst->cbufs[i] != src->cbufs[i]) { + return FALSE; + } + } + + if (dst->nr_cbufs != src->nr_cbufs) { + return FALSE; + } + + if (dst->zsbuf != src->zsbuf) { + return FALSE; + } + + return TRUE; +} + + +/** + * Copy framebuffer state from src to dst, updating refcounts. + */ +void +util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + unsigned i; + + dst->width = src->width; + dst->height = src->height; + + for (i = 0; i < Elements(src->cbufs); i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + + dst->nr_cbufs = src->nr_cbufs; + + pipe_surface_reference(&dst->zsbuf, src->zsbuf); +} + + +void +util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb) +{ + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + + pipe_surface_reference(&fb->zsbuf, NULL); + + fb->width = fb->height = 0; + fb->nr_cbufs = 0; +} diff --git a/src/gallium/auxiliary/util/u_framebuffer.h b/src/gallium/auxiliary/util/u_framebuffer.h new file mode 100644 index 00000000000..adf1254e1b6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_framebuffer.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2009-2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_FRAMEBUFFER_H +#define U_FRAMEBUFFER_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + + +extern boolean +util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src); + +extern void +util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src); + + +extern void +util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb); + + +#endif /* U_FRAMEBUFFER_H */ diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 098cdfd58b1..9bbcf1c8c49 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -30,12 +30,7 @@ */ -#include "pipe/p_defines.h" -#include "pipe/p_format.h" -#include "pipe/p_context.h" -#include "pipe/p_screen.h" #include "util/u_format.h" -#include "util/u_inlines.h" #include "util/u_rect.h" @@ -152,156 +147,3 @@ util_fill_rect(ubyte * dst, break; } } - - - -/** - * Fallback function for pipe->surface_copy(). - * Note: (X,Y)=(0,0) is always the upper-left corner. - * if do_flip, flip the image vertically on its way from src rect to dst rect. - * XXX should probably put this in new u_surface.c file... - */ -void -util_surface_copy(struct pipe_context *pipe, - boolean do_flip, - struct pipe_surface *dst, - unsigned dst_x, unsigned dst_y, - struct pipe_surface *src, - unsigned src_x, unsigned src_y, - unsigned w, unsigned h) -{ - struct pipe_transfer *src_trans, *dst_trans; - void *dst_map; - const void *src_map; - enum pipe_format src_format, dst_format; - - assert(src->texture && dst->texture); - if (!src->texture || !dst->texture) - return; - - src_format = src->texture->format; - dst_format = dst->texture->format; - - src_trans = pipe_get_transfer(pipe, - src->texture, - src->face, - src->level, - src->zslice, - PIPE_TRANSFER_READ, - src_x, src_y, w, h); - - dst_trans = pipe_get_transfer(pipe, - dst->texture, - dst->face, - dst->level, - dst->zslice, - PIPE_TRANSFER_WRITE, - dst_x, dst_y, w, h); - - assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format)); - assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format)); - assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format)); - - src_map = pipe->transfer_map(pipe, src_trans); - dst_map = pipe->transfer_map(pipe, dst_trans); - - assert(src_map); - assert(dst_map); - - if (src_map && dst_map) { - /* If do_flip, invert src_y position and pass negative src stride */ - util_copy_rect(dst_map, - dst_format, - dst_trans->stride, - 0, 0, - w, h, - src_map, - do_flip ? -(int) src_trans->stride : src_trans->stride, - 0, - do_flip ? h - 1 : 0); - } - - pipe->transfer_unmap(pipe, src_trans); - pipe->transfer_unmap(pipe, dst_trans); - - pipe->transfer_destroy(pipe, src_trans); - pipe->transfer_destroy(pipe, dst_trans); -} - - - -#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) - - -/** - * Fallback for pipe->surface_fill() function. - * XXX should probably put this in new u_surface.c file... - */ -void -util_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - struct pipe_transfer *dst_trans; - void *dst_map; - - assert(dst->texture); - if (!dst->texture) - return; - dst_trans = pipe_get_transfer(pipe, - dst->texture, - dst->face, - dst->level, - dst->zslice, - PIPE_TRANSFER_WRITE, - dstx, dsty, width, height); - - dst_map = pipe->transfer_map(pipe, dst_trans); - - assert(dst_map); - - if (dst_map) { - assert(dst_trans->stride > 0); - - switch (util_format_get_blocksize(dst->texture->format)) { - case 1: - case 2: - case 4: - util_fill_rect(dst_map, dst->texture->format, - dst_trans->stride, - 0, 0, width, height, value); - break; - case 8: - { - /* expand the 4-byte clear value to an 8-byte value */ - ushort *row = (ushort *) dst_map; - ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); - ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); - ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); - ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); - unsigned i, j; - val0 = (val0 << 8) | val0; - val1 = (val1 << 8) | val1; - val2 = (val2 << 8) | val2; - val3 = (val3 << 8) | val3; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - row[j*4+0] = val0; - row[j*4+1] = val1; - row[j*4+2] = val2; - row[j*4+3] = val3; - } - row += dst_trans->stride/2; - } - } - break; - default: - assert(0); - break; - } - } - - pipe->transfer_unmap(pipe, dst_trans); - pipe->transfer_destroy(pipe, dst_trans); -} diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h index b44d821904b..40d57e662d7 100644 --- a/src/gallium/auxiliary/util/u_rect.h +++ b/src/gallium/auxiliary/util/u_rect.h @@ -37,9 +37,6 @@ #include "pipe/p_format.h" -struct pipe_context; -struct pipe_surface; - extern void util_copy_rect(ubyte * dst, enum pipe_format format, @@ -53,20 +50,4 @@ util_fill_rect(ubyte * dst, enum pipe_format format, unsigned width, unsigned height, uint32_t value); -extern void -util_surface_copy(struct pipe_context *pipe, - boolean do_flip, - struct pipe_surface *dst, - unsigned dst_x, unsigned dst_y, - struct pipe_surface *src, - unsigned src_x, unsigned src_y, - unsigned w, unsigned h); - -extern void -util_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value); - - #endif /* U_RECT_H */ diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c index 42440d0d673..b47c92ca28f 100644 --- a/src/gallium/auxiliary/util/u_surface.c +++ b/src/gallium/auxiliary/util/u_surface.c @@ -32,12 +32,14 @@ */ +#include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_inlines.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "util/u_surface.h" @@ -118,70 +120,150 @@ util_destroy_rgba_surface(struct pipe_resource *texture, /** - * Compare pipe_framebuffer_state objects. - * \return TRUE if same, FALSE if different + * Fallback function for pipe->surface_copy(). + * Note: (X,Y)=(0,0) is always the upper-left corner. + * if do_flip, flip the image vertically on its way from src rect to dst rect. */ -boolean -util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, - const struct pipe_framebuffer_state *src) +void +util_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dst_x, unsigned dst_y, + struct pipe_surface *src, + unsigned src_x, unsigned src_y, + unsigned w, unsigned h) { - unsigned i; - - if (dst->width != src->width || - dst->height != src->height) - return FALSE; - - for (i = 0; i < Elements(src->cbufs); i++) { - if (dst->cbufs[i] != src->cbufs[i]) { - return FALSE; - } - } - - if (dst->nr_cbufs != src->nr_cbufs) { - return FALSE; + struct pipe_transfer *src_trans, *dst_trans; + void *dst_map; + const void *src_map; + enum pipe_format src_format, dst_format; + + assert(src->texture && dst->texture); + if (!src->texture || !dst->texture) + return; + + src_format = src->texture->format; + dst_format = dst->texture->format; + + src_trans = pipe_get_transfer(pipe, + src->texture, + src->face, + src->level, + src->zslice, + PIPE_TRANSFER_READ, + src_x, src_y, w, h); + + dst_trans = pipe_get_transfer(pipe, + dst->texture, + dst->face, + dst->level, + dst->zslice, + PIPE_TRANSFER_WRITE, + dst_x, dst_y, w, h); + + assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format)); + assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format)); + assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format)); + + src_map = pipe->transfer_map(pipe, src_trans); + dst_map = pipe->transfer_map(pipe, dst_trans); + + assert(src_map); + assert(dst_map); + + if (src_map && dst_map) { + /* If do_flip, invert src_y position and pass negative src stride */ + util_copy_rect(dst_map, + dst_format, + dst_trans->stride, + 0, 0, + w, h, + src_map, + do_flip ? -(int) src_trans->stride : src_trans->stride, + 0, + do_flip ? h - 1 : 0); } - if (dst->zsbuf != src->zsbuf) { - return FALSE; - } + pipe->transfer_unmap(pipe, src_trans); + pipe->transfer_unmap(pipe, dst_trans); - return TRUE; + pipe->transfer_destroy(pipe, src_trans); + pipe->transfer_destroy(pipe, dst_trans); } -/** - * Copy framebuffer state from src to dst, updating refcounts. - */ -void -util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, - const struct pipe_framebuffer_state *src) -{ - unsigned i; - - dst->width = src->width; - dst->height = src->height; - for (i = 0; i < Elements(src->cbufs); i++) { - pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); - } - - dst->nr_cbufs = src->nr_cbufs; - - pipe_surface_reference(&dst->zsbuf, src->zsbuf); -} +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) +/** + * Fallback for pipe->surface_fill() function. + */ void -util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb) +util_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) { - unsigned i; - - for (i = 0; i < fb->nr_cbufs; i++) { - pipe_surface_reference(&fb->cbufs[i], NULL); + struct pipe_transfer *dst_trans; + void *dst_map; + + assert(dst->texture); + if (!dst->texture) + return; + dst_trans = pipe_get_transfer(pipe, + dst->texture, + dst->face, + dst->level, + dst->zslice, + PIPE_TRANSFER_WRITE, + dstx, dsty, width, height); + + dst_map = pipe->transfer_map(pipe, dst_trans); + + assert(dst_map); + + if (dst_map) { + assert(dst_trans->stride > 0); + + switch (util_format_get_blocksize(dst->texture->format)) { + case 1: + case 2: + case 4: + util_fill_rect(dst_map, dst->texture->format, + dst_trans->stride, + 0, 0, width, height, value); + break; + case 8: + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) dst_map; + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + unsigned i, j; + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst_trans->stride/2; + } + } + break; + default: + assert(0); + break; + } } - pipe_surface_reference(&fb->zsbuf, NULL); - - fb->width = fb->height = 0; - fb->nr_cbufs = 0; + pipe->transfer_unmap(pipe, dst_trans); + pipe->transfer_destroy(pipe, dst_trans); } diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h index 119fcd4ce8e..c43169b5278 100644 --- a/src/gallium/auxiliary/util/u_surface.h +++ b/src/gallium/auxiliary/util/u_surface.h @@ -62,17 +62,22 @@ util_destroy_rgba_surface(struct pipe_resource *texture, struct pipe_surface *surface); -extern boolean -util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, - const struct pipe_framebuffer_state *src); extern void -util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, - const struct pipe_framebuffer_state *src); - +util_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dst_x, unsigned dst_y, + struct pipe_surface *src, + unsigned src_x, unsigned src_y, + unsigned w, unsigned h); extern void -util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb); +util_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value); + #endif /* U_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index ffb8595d828..6696a4591c1 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "util/u_rect.h" +#include "util/u_surface.h" #include "cell_context.h" #include "cell_surface.h" diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index 3d11fa074cc..e2767264e7e 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -202,7 +202,8 @@ static const char *file_strings[TGSI_FILE_COUNT+1] = { "SAMPLER", "ADDR", "IMM", - "LOOP", + "PRED", + "SV", "PAYLOAD" }; diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index 6c47415cac5..bd8b9174a89 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -40,6 +40,7 @@ #include <stdint.h> #include <string.h> +#include "util/u_string.h" #include "intel_decode.h" /*#include "intel_chipset.h"*/ @@ -478,7 +479,7 @@ i915_get_instruction_src0(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -496,7 +497,7 @@ i915_get_instruction_src1(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -513,7 +514,7 @@ i915_get_instruction_src2(const uint32_t *data, int i, char *srcname) char swizzle[100]; i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); - sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + util_snprintf(swizzle, sizeof(swizzle), ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); if (strcmp(swizzle, ".xyzw") != 0) strcat(srcname, swizzle); } @@ -642,7 +643,7 @@ i915_decode_dcl(const uint32_t *data, uint32_t hw_offset, int i, char *instr_pre switch ((d0 >> 19) & 0x3) { case 1: - sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + util_snprintf(dcl_mask, sizeof(dcl_mask), ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); if (strcmp(dcl_mask, ".") == 0) fprintf(out, "bad (empty) dcl mask\n"); @@ -976,7 +977,7 @@ decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, if (i + 3 >= count) BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM"); - sprintf(instr_prefix, "PS%03d", instr); + util_snprintf(instr_prefix, sizeof(instr_prefix), "PS%03d", instr); i915_decode_instruction(data, hw_offset, i, instr_prefix); i += 3; } diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 0bc8bf21966..7e62213597c 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -684,79 +684,67 @@ identity_flush(struct pipe_context *_pipe, static unsigned int identity_is_resource_referenced(struct pipe_context *_pipe, - struct pipe_resource *_resource, - unsigned face, - unsigned level) + struct pipe_resource *_resource, + unsigned face, + unsigned level) { struct identity_context *id_pipe = identity_context(_pipe); struct identity_resource *id_resource = identity_resource(_resource); struct pipe_context *pipe = id_pipe->pipe; - struct pipe_resource *texture = id_resource->resource; + struct pipe_resource *resource = id_resource->resource; return pipe->is_resource_referenced(pipe, - texture, - face, - level); + resource, + face, + level); } static struct pipe_sampler_view * -identity_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ) +identity_context_create_sampler_view(struct pipe_context *_pipe, + struct pipe_resource *_resource, + const struct pipe_sampler_view *templ) { - struct identity_context *id_pipe = identity_context(pipe); - struct identity_resource *id_resource = identity_resource(texture); - struct pipe_context *pipe_unwrapped = id_pipe->pipe; - struct pipe_resource *texture_unwrapped = id_resource->resource; - struct identity_sampler_view *view = MALLOC(sizeof(struct identity_sampler_view)); - - view->sampler_view = pipe_unwrapped->create_sampler_view(pipe_unwrapped, - texture_unwrapped, - templ); + struct identity_context *id_context = identity_context(_pipe); + struct identity_resource *id_resource = identity_resource(_resource); + struct pipe_context *pipe = id_context->pipe; + struct pipe_resource *resource = id_resource->resource; + struct pipe_sampler_view *result; - view->base = *templ; - view->base.reference.count = 1; - view->base.texture = NULL; - pipe_resource_reference(&view->base.texture, texture); - view->base.context = pipe; + result = pipe->create_sampler_view(pipe, + resource, + templ); - return &view->base; + if (result) + return identity_sampler_view_create(id_context, id_resource, result); + return NULL; } static void -identity_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view) +identity_context_sampler_view_destroy(struct pipe_context *_pipe, + struct pipe_sampler_view *_view) { - struct identity_context *id_pipe = identity_context(pipe); - struct identity_sampler_view *id_view = identity_sampler_view(view); - struct pipe_context *pipe_unwrapped = id_pipe->pipe; - struct pipe_sampler_view *view_unwrapped = id_view->sampler_view; - - pipe_unwrapped->sampler_view_destroy(pipe_unwrapped, - view_unwrapped); - - pipe_resource_reference(&view->texture, NULL); - FREE(view); + identity_sampler_view_destroy(identity_context(_pipe), + identity_sampler_view(_view)); } static struct pipe_transfer * identity_context_get_transfer(struct pipe_context *_context, - struct pipe_resource *_resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box) + struct pipe_resource *_resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box) { struct identity_context *id_context = identity_context(_context); struct identity_resource *id_resource = identity_resource(_resource); struct pipe_context *context = id_context->pipe; - struct pipe_resource *texture = id_resource->resource; + struct pipe_resource *resource = id_resource->resource; struct pipe_transfer *result; result = context->get_transfer(context, - texture, - sr, - usage, - box); + resource, + sr, + usage, + box); if (result) return identity_transfer_create(id_context, id_resource, result); @@ -765,7 +753,7 @@ identity_context_get_transfer(struct pipe_context *_context, static void identity_context_transfer_destroy(struct pipe_context *_pipe, - struct pipe_transfer *_transfer) + struct pipe_transfer *_transfer) { identity_transfer_destroy(identity_context(_pipe), identity_transfer(_transfer)); @@ -773,7 +761,7 @@ identity_context_transfer_destroy(struct pipe_context *_pipe, static void * identity_context_transfer_map(struct pipe_context *_context, - struct pipe_transfer *_transfer) + struct pipe_transfer *_transfer) { struct identity_context *id_context = identity_context(_context); struct identity_transfer *id_transfer = identity_transfer(_transfer); @@ -781,15 +769,15 @@ identity_context_transfer_map(struct pipe_context *_context, struct pipe_transfer *transfer = id_transfer->transfer; return context->transfer_map(context, - transfer); + transfer); } static void -identity_context_transfer_flush_region( struct pipe_context *_context, - struct pipe_transfer *_transfer, - const struct pipe_box *box) +identity_context_transfer_flush_region(struct pipe_context *_context, + struct pipe_transfer *_transfer, + const struct pipe_box *box) { struct identity_context *id_context = identity_context(_context); struct identity_transfer *id_transfer = identity_transfer(_transfer); @@ -797,14 +785,14 @@ identity_context_transfer_flush_region( struct pipe_context *_context, struct pipe_transfer *transfer = id_transfer->transfer; context->transfer_flush_region(context, - transfer, - box); + transfer, + box); } static void identity_context_transfer_unmap(struct pipe_context *_context, - struct pipe_transfer *_transfer) + struct pipe_transfer *_transfer) { struct identity_context *id_context = identity_context(_context); struct identity_transfer *id_transfer = identity_transfer(_transfer); @@ -812,33 +800,33 @@ identity_context_transfer_unmap(struct pipe_context *_context, struct pipe_transfer *transfer = id_transfer->transfer; context->transfer_unmap(context, - transfer); + transfer); } static void -identity_context_transfer_inline_write( struct pipe_context *_context, - struct pipe_resource *_resource, - struct pipe_subresource sr, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned slice_stride) +identity_context_transfer_inline_write(struct pipe_context *_context, + struct pipe_resource *_resource, + struct pipe_subresource sr, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned slice_stride) { struct identity_context *id_context = identity_context(_context); struct identity_resource *id_resource = identity_resource(_resource); struct pipe_context *context = id_context->pipe; - struct pipe_resource *texture = id_resource->resource; + struct pipe_resource *resource = id_resource->resource; context->transfer_inline_write(context, - texture, - sr, - usage, - box, - data, - stride, - slice_stride); + resource, + sr, + usage, + box, + data, + stride, + slice_stride); } @@ -905,8 +893,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.clear = identity_clear; id_pipe->base.flush = identity_flush; id_pipe->base.is_resource_referenced = identity_is_resource_referenced; - id_pipe->base.create_sampler_view = identity_create_sampler_view; - id_pipe->base.sampler_view_destroy = identity_sampler_view_destroy; + id_pipe->base.create_sampler_view = identity_context_create_sampler_view; + id_pipe->base.sampler_view_destroy = identity_context_sampler_view_destroy; id_pipe->base.get_transfer = identity_context_get_transfer; id_pipe->base.transfer_destroy = identity_context_transfer_destroy; id_pipe->base.transfer_map = identity_context_transfer_map; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index d50914e7d5d..ca4743f9ef7 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -108,9 +108,45 @@ identity_surface_destroy(struct identity_surface *id_surface) } +struct pipe_sampler_view * +identity_sampler_view_create(struct identity_context *id_context, + struct identity_resource *id_resource, + struct pipe_sampler_view *view) +{ + struct identity_sampler_view *id_view; + + if (!view) + goto error; + + assert(view->texture == id_resource->resource); + + id_view = MALLOC(sizeof(struct identity_sampler_view)); + + id_view->base = *view; + id_view->base.reference.count = 1; + id_view->base.texture = NULL; + pipe_resource_reference(&id_view->base.texture, id_resource->resource); + id_view->base.context = id_context->pipe; + + return &id_view->base; +error: + return NULL; +} + +void +identity_sampler_view_destroy(struct identity_context *id_context, + struct identity_sampler_view *id_view) +{ + pipe_resource_reference(&id_view->base.texture, NULL); + id_context->pipe->sampler_view_destroy(id_context->pipe, + id_view->sampler_view); + FREE(id_view); +} + + struct pipe_transfer * identity_transfer_create(struct identity_context *id_context, - struct identity_resource *id_resource, + struct identity_resource *id_resource, struct pipe_transfer *transfer) { struct identity_transfer *id_transfer; @@ -144,8 +180,8 @@ identity_transfer_destroy(struct identity_context *id_context, struct identity_transfer *id_transfer) { pipe_resource_reference(&id_transfer->base.resource, NULL); - id_context->pipe->transfer_destroy(id_context->pipe, - id_transfer->transfer); + id_transfer->pipe->transfer_destroy(id_context->pipe, + id_transfer->transfer); FREE(id_transfer); } diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index 058cf3009df..5eea10b0b5a 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -142,7 +142,7 @@ identity_transfer_unwrap(struct pipe_transfer *_transfer) struct pipe_resource * identity_resource_create(struct identity_screen *id_screen, - struct pipe_resource *resource); + struct pipe_resource *resource); void identity_resource_destroy(struct identity_resource *id_resource); @@ -154,9 +154,18 @@ identity_surface_create(struct identity_resource *id_resource, void identity_surface_destroy(struct identity_surface *id_surface); +struct pipe_sampler_view * +identity_sampler_view_create(struct identity_context *id_context, + struct identity_resource *id_resource, + struct pipe_sampler_view *view); + +void +identity_sampler_view_destroy(struct identity_context *id_context, + struct identity_sampler_view *id_sampler_view); + struct pipe_transfer * identity_transfer_create(struct identity_context *id_context, - struct identity_resource *id_resource, + struct identity_resource *id_resource, struct pipe_transfer *transfer); void diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index 52573b211fb..7671bded981 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -120,14 +120,14 @@ identity_screen_context_create(struct pipe_screen *_screen, static struct pipe_resource * identity_screen_resource_create(struct pipe_screen *_screen, - const struct pipe_resource *templat) + const struct pipe_resource *templat) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; struct pipe_resource *result; result = screen->resource_create(screen, - templat); + templat); if (result) return identity_resource_create(id_screen, result); @@ -136,8 +136,8 @@ identity_screen_resource_create(struct pipe_screen *_screen, static struct pipe_resource * identity_screen_resource_from_handle(struct pipe_screen *_screen, - const struct pipe_resource *templ, - struct winsys_handle *handle) + const struct pipe_resource *templ, + struct winsys_handle *handle) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; @@ -154,44 +154,44 @@ identity_screen_resource_from_handle(struct pipe_screen *_screen, static boolean identity_screen_resource_get_handle(struct pipe_screen *_screen, - struct pipe_resource *_texture, - struct winsys_handle *handle) + struct pipe_resource *_resource, + struct winsys_handle *handle) { struct identity_screen *id_screen = identity_screen(_screen); - struct identity_resource *id_resource = identity_resource(_texture); + struct identity_resource *id_resource = identity_resource(_resource); struct pipe_screen *screen = id_screen->screen; - struct pipe_resource *texture = id_resource->resource; + struct pipe_resource *resource = id_resource->resource; /* TODO trace call */ - return screen->resource_get_handle(screen, texture, handle); + return screen->resource_get_handle(screen, resource, handle); } static void identity_screen_resource_destroy(struct pipe_screen *screen, - struct pipe_resource *_texture) + struct pipe_resource *_resource) { - identity_resource_destroy(identity_resource(_texture)); + identity_resource_destroy(identity_resource(_resource)); } static struct pipe_surface * identity_screen_get_tex_surface(struct pipe_screen *_screen, - struct pipe_resource *_texture, + struct pipe_resource *_resource, unsigned face, unsigned level, unsigned zslice, unsigned usage) { struct identity_screen *id_screen = identity_screen(_screen); - struct identity_resource *id_resource = identity_resource(_texture); + struct identity_resource *id_resource = identity_resource(_resource); struct pipe_screen *screen = id_screen->screen; - struct pipe_resource *texture = id_resource->resource; + struct pipe_resource *resource = id_resource->resource; struct pipe_surface *result; result = screen->get_tex_surface(screen, - texture, + resource, face, level, zslice, @@ -214,7 +214,7 @@ static struct pipe_resource * identity_screen_user_buffer_create(struct pipe_screen *_screen, void *ptr, unsigned bytes, - unsigned usage) + unsigned usage) { struct identity_screen *id_screen = identity_screen(_screen); struct pipe_screen *screen = id_screen->screen; @@ -223,7 +223,7 @@ identity_screen_user_buffer_create(struct pipe_screen *_screen, result = screen->user_buffer_create(screen, ptr, bytes, - usage); + usage); if (result) return identity_resource_create(id_screen, result); diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore index 257b72d7b2b..a1b6f56e0d2 100644 --- a/src/gallium/drivers/llvmpipe/.gitignore +++ b/src/gallium/drivers/llvmpipe/.gitignore @@ -1 +1,5 @@ lp_tile_soa.c +lp_test_blend +lp_test_conv +lp_test_format +lp_test_printf diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 466a2f54fbe..30e206a2b42 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -185,6 +185,8 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(screen->pass); + LLVMAddPromoteMemoryToRegisterPass(screen->pass); LLVMAddConstantPropagationPass(screen->pass); if(util_cpu_caps.has_sse4_1) { /* FIXME: There is a bug in this pass, whereby the combination of fptosi @@ -193,9 +195,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) */ LLVMAddInstructionCombiningPass(screen->pass); } - LLVMAddPromoteMemoryToRegisterPass(screen->pass); LLVMAddGVNPass(screen->pass); - LLVMAddCFGSimplificationPass(screen->pass); } lp_jit_init_globals(screen); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 1482a777ff8..887f2dbad91 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include "util/u_framebuffer.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_inlines.h" diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 21509560084..1a2cd55b164 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -33,6 +33,7 @@ */ #include "pipe/p_defines.h" +#include "util/u_framebuffer.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_pack_color.h" diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 965777b6411..5f861d6ca4d 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -618,7 +618,6 @@ generate_fragment(struct llvmpipe_context *lp, struct lp_type fs_type; struct lp_type blend_type; LLVMTypeRef fs_elem_type; - LLVMTypeRef fs_vec_type; LLVMTypeRef fs_int_vec_type; LLVMTypeRef blend_vec_type; LLVMTypeRef arg_types[15]; @@ -675,7 +674,6 @@ generate_fragment(struct llvmpipe_context *lp, */ fs_elem_type = lp_build_elem_type(fs_type); - fs_vec_type = lp_build_vec_type(fs_type); fs_int_vec_type = lp_build_int_vec_type(fs_type); blend_vec_type = lp_build_vec_type(blend_type); @@ -1064,6 +1062,34 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe, /** + * Return the blend factor equivalent to a destination alpha of one. + */ +static INLINE unsigned +force_dst_alpha_one(unsigned factor, boolean alpha) +{ + switch(factor) { + case PIPE_BLENDFACTOR_DST_ALPHA: + return PIPE_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return PIPE_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return PIPE_BLENDFACTOR_ZERO; + } + + if (alpha) { + switch(factor) { + case PIPE_BLENDFACTOR_DST_COLOR: + return PIPE_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return PIPE_BLENDFACTOR_ZERO; + } + } + + return factor; +} + + +/** * We need to generate several variants of the fragment pipeline to match * all the combinations of the contributing state atoms. * @@ -1104,6 +1130,7 @@ make_variant_key(struct llvmpipe_context *lp, key->nr_cbufs = lp->framebuffer.nr_cbufs; for (i = 0; i < lp->framebuffer.nr_cbufs; i++) { + struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i]; const struct util_format_description *format_desc; unsigned chan; @@ -1111,7 +1138,7 @@ make_variant_key(struct llvmpipe_context *lp, assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB); - key->blend.rt[i].colormask = lp->blend->rt[i].colormask; + blend_rt->colormask = lp->blend->rt[i].colormask; /* mask out color channels not present in the color buffer. * Should be simple to incorporate per-cbuf writemasks: @@ -1120,7 +1147,27 @@ make_variant_key(struct llvmpipe_context *lp, enum util_format_swizzle swizzle = format_desc->swizzle[chan]; if(swizzle > UTIL_FORMAT_SWIZZLE_W) - key->blend.rt[i].colormask &= ~(1 << chan); + blend_rt->colormask &= ~(1 << chan); + } + + /* + * Our swizzled render tiles always have an alpha channel, but the linear + * render target format often does not, so force here the dst alpha to be + * one. + * + * This is not a mere optimization. Wrong results will be produced if the + * dst alpha is used, the dst format does not have alpha, and the previous + * rendering was not flushed from the swizzled to linear buffer. For + * example, NonPowTwo DCT. + * + * TODO: This should be generalized to all channels for better + * performance, but only alpha causes correctness issues. + */ + if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W) { + blend_rt->rgb_src_factor = force_dst_alpha_one(blend_rt->rgb_src_factor, FALSE); + blend_rt->rgb_dst_factor = force_dst_alpha_one(blend_rt->rgb_dst_factor, FALSE); + blend_rt->alpha_src_factor = force_dst_alpha_one(blend_rt->alpha_src_factor, TRUE); + blend_rt->alpha_dst_factor = force_dst_alpha_one(blend_rt->alpha_dst_factor, TRUE); } } diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c index 63b8f27b39c..4b135aaf8ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c @@ -30,6 +30,7 @@ #include "pipe/p_state.h" #include "util/u_inlines.h" +#include "util/u_framebuffer.h" #include "util/u_surface.h" #include "lp_context.h" #include "lp_scene.h" diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 8bd83f576f4..245171120dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "util/u_rect.h" +#include "util/u_surface.h" #include "lp_context.h" #include "lp_flush.h" #include "lp_limits.h" diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c index 74b7393e4ec..d3a9d39f616 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c @@ -170,7 +170,10 @@ lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, unsigned unit, unsigned num_coords, const LLVMValueRef *coords, - LLVMValueRef lodbias, + const LLVMValueRef *ddx, + const LLVMValueRef *ddy, + LLVMValueRef lod_bias, /* optional */ + LLVMValueRef explicit_lod, /* optional */ LLVMValueRef *texel) { struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base; @@ -182,9 +185,9 @@ lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, &sampler->dynamic_state.base, type, unit, - num_coords, - coords, - lodbias, + num_coords, coords, + ddx, ddy, + lod_bias, explicit_lod, texel); } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index b8b6b12120b..0156ff95ff9 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -3169,15 +3169,16 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (pc->p->type == PIPE_SHADER_FRAGMENT) nv50_fp_move_results(pc); - /* last insn must be long so it can have the exit bit set */ - if (!is_long(pc->p->exec_tail)) - convert_to_long(pc, pc->p->exec_tail); - else - if (is_immd(pc->p->exec_tail) || + if (!pc->p->exec_tail || + is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail) || is_control_flow(pc->p->exec_tail)) emit_nop(pc); + /* last insn must be long so it can have the exit bit set */ + if (!is_long(pc->p->exec_tail)) + convert_to_long(pc, pc->p->exec_tail); + pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ terminate_mbb(pc); @@ -4162,7 +4163,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) struct pipe_transfer *transfer; if (!p->data[0] && p->immd_nr) { - struct nouveau_resource *heap = nv50->screen->immd_heap[0]; + struct nouveau_resource *heap = nv50->screen->immd_heap; if (nouveau_resource_alloc(heap, p->immd_nr, p, &p->data[0])) { while (heap->next && heap->size < p->immd_nr) { @@ -4180,7 +4181,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) p->immd_nr, NV50_CB_PMISC); } - assert(p->param_nr <= 512); + assert(p->param_nr <= 16384); if (p->param_nr) { unsigned cb; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ad17991be9a..2dd10424245 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -190,9 +190,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) nouveau_grobj_free(&screen->tesla); nouveau_grobj_free(&screen->eng2d); nouveau_grobj_free(&screen->m2mf); - nouveau_resource_destroy(&screen->immd_heap[0]); - nouveau_resource_destroy(&screen->parm_heap[0]); - nouveau_resource_destroy(&screen->parm_heap[1]); + nouveau_resource_destroy(&screen->immd_heap); nouveau_screen_fini(&screen->base); FREE(screen); } @@ -242,7 +240,7 @@ nv50_screen_relocs(struct nv50_screen *screen) OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOC (chan, screen->constbuf_parm[i], - ((NV50_CB_PVP + i) << 16) | 0x0800, rl, 0, 0); + ((NV50_CB_PVP + i) << 16) | 0x0000, rl, 0, 0); } } @@ -411,7 +409,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) OUT_RING (chan, (NV50_CB_AUX << 16) | 0x0200); for (i = 0; i < 3; i++) { - ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4, + ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (4096 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { nv50_screen_destroy(pscreen); @@ -420,14 +418,12 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl); OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl); - OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0800); + /* CB_DEF_SET_SIZE value of 0x0000 means 65536 */ + OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0000); } - if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) || - nouveau_resource_init(&screen->parm_heap[0], 0, 512) || - nouveau_resource_init(&screen->parm_heap[1], 0, 512)) - { - NOUVEAU_ERR("Error initialising constant buffers.\n"); + if (nouveau_resource_init(&screen->immd_heap, 0, 128)) { + NOUVEAU_ERR("Error initialising shader immediates heap.\n"); nv50_screen_destroy(pscreen); return NULL; } diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 40ebbee72e2..fbf15a75967 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -20,8 +20,7 @@ struct nv50_screen { struct nouveau_bo *constbuf_misc[1]; struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; - struct nouveau_resource *immd_heap[1]; - struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES]; + struct nouveau_resource *immd_heap; struct pipe_resource *strm_vbuf[16]; diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index c2d9e835260..d905d95354f 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -31,6 +31,27 @@ #include "util/u_tile.h" #include "util/u_format.h" +/* return TRUE for formats that can be converted among each other by NV50_2D */ +static INLINE boolean +nv50_2d_format_faithful(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B10G10R10A2_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + return TRUE; + default: + return FALSE; + } +} + static INLINE int nv50_format(enum pipe_format format) { @@ -47,9 +68,12 @@ nv50_format(enum pipe_format format) return NV50_2D_DST_FORMAT_R5G6B5_UNORM; case PIPE_FORMAT_B5G5R5A1_UNORM: return NV50_2D_DST_FORMAT_A1R5G5B5_UNORM; + case PIPE_FORMAT_B10G10R10A2_UNORM: + return NV50_2D_DST_FORMAT_A2R10G10B10_UNORM; case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: return NV50_2D_DST_FORMAT_R8_UNORM; case PIPE_FORMAT_R32G32B32A32_FLOAT: return NV50_2D_DST_FORMAT_R32G32B32A32_FLOAT; @@ -178,7 +202,9 @@ nv50_surface_copy(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); struct nv50_screen *screen = nv50->screen; - assert(src->format == dest->format); + assert((src->format == dest->format) || + (nv50_2d_format_faithful(src->format) && + nv50_2d_format_faithful(dest->format))); nv50_surface_do_copy(screen, dest, destx, desty, src, srcx, srcy, width, height); diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 928ad300eee..85c2c149016 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -129,15 +129,16 @@ void r300_surface_copy(struct pipe_context* pipe, if (dst->texture->format != src->texture->format) { debug_printf("r300: Implementation error: Format mismatch in %s\n" " : src: %s dst: %s\n", __FUNCTION__, - util_format_name(src->texture->format), - util_format_name(dst->texture->format)); + util_format_short_name(src->texture->format), + util_format_short_name(dst->texture->format)); debug_assert(0); } if (!pipe->screen->is_format_supported(pipe->screen, old_format, src->texture->target, PIPE_BIND_RENDER_TARGET | - PIPE_BIND_SAMPLER_VIEW, 0)) { + PIPE_BIND_SAMPLER_VIEW, 0) && + util_format_is_plain(old_format)) { switch (util_format_get_blocksize(old_format)) { case 1: new_format = PIPE_FORMAT_I8_UNORM; @@ -154,7 +155,7 @@ void r300_surface_copy(struct pipe_context* pipe, default: debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n" "r300: surface_copy: Software fallback doesn't work for tiled textures.\n", - util_format_name(old_format)); + util_format_short_name(old_format)); } } diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 4f721ebb599..e84bce0010f 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -76,7 +76,6 @@ static void r300_destroy_context(struct pipe_context* context) FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); FREE(r300->textures_state.state); - FREE(r300->vap_output_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300->fs_constants.state); @@ -125,7 +124,6 @@ static void r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(viewport_state, 9); R300_INIT_ATOM(rs_block_state, 0); R300_INIT_ATOM(vertex_stream_state, 0); - R300_INIT_ATOM(vap_output_state, 6); R300_INIT_ATOM(pvs_flush, 2); R300_INIT_ATOM(vs_state, 0); R300_INIT_ATOM(vs_constants, 0); @@ -149,7 +147,6 @@ static void r300_setup_atoms(struct r300_context* r300) r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); - r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state); r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index a05bf3ce099..e9c8fcdc157 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -119,6 +119,10 @@ struct r300_rs_state { }; struct r300_rs_block { + uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ + uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ + uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ + uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */ uint32_t count; /* R300_RS_COUNT */ uint32_t inst_count; /* R300_RS_INST_COUNT */ @@ -190,12 +194,6 @@ struct r300_vertex_stream_state { unsigned count; }; -struct r300_vap_output_state { - uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ - uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ - uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ -}; - struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -381,7 +379,7 @@ struct r300_context { struct r300_atom query_start; /* Rasterizer state. */ struct r300_atom rs_state; - /* RS block state. */ + /* RS block state + VAP (vertex shader) output mapping state. */ struct r300_atom rs_block_state; /* Scissor state. */ struct r300_atom scissor_state; @@ -389,8 +387,6 @@ struct r300_context { struct r300_atom textures_state; /* Vertex stream formatting state. */ struct r300_atom vertex_stream_state; - /* VAP (vertex shader) output mapping state. */ - struct r300_atom vap_output_state; /* Vertex shader. */ struct r300_atom vs_state; /* Vertex shader constant buffer. */ @@ -424,6 +420,9 @@ struct r300_context { struct pipe_viewport_state viewport; + /* Stream locations for SWTCL. */ + int stream_loc_notcl[16]; + /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; /* Whether polygon offset is enabled. */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 4c2836f36a8..8eb321fa08a 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -37,6 +37,7 @@ static struct debug_option debug_options[] = { { "cs", DBG_CS, "Command submissions (for debugging)" }, { "draw", DBG_DRAW, "Draw and emit (for debugging)" }, { "tex", DBG_TEX, "Textures (for debugging)" }, + { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" }, { "fall", DBG_FALL, "Fallbacks (for debugging)" }, { "rs", DBG_RS, "Rasterizer (for debugging)" }, { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 92b7517b8d6..23bbc6a99c8 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -717,6 +717,13 @@ void r300_emit_rs_block_state(struct r300_context* r300, DBG(r300, DBG_DRAW, "r300: RS emit:\n"); BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(rs->vap_vtx_state_cntl); + OUT_CS(rs->vap_vsm_vtx_assm); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(rs->vap_out_vtx_fmt[0]); + OUT_CS(rs->vap_out_vtx_fmt[1]); + if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); } else { @@ -901,25 +908,6 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, END_CS; } -void r300_emit_vap_output_state(struct r300_context* r300, - unsigned size, void* state) -{ - struct r300_vap_output_state *vap_out_state = - (struct r300_vap_output_state*)state; - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: VAP emit:\n"); - - BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(vap_out_state->vap_vtx_state_cntl); - OUT_CS(vap_out_state->vap_vsm_vtx_assm); - OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(vap_out_state->vap_out_vtx_fmt[0]); - OUT_CS(vap_out_state->vap_out_vtx_fmt[1]); - END_CS; -} - void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) { CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index c3eb195d4e7..3c0edf6fdca 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -81,9 +81,6 @@ void r300_emit_vertex_buffer(struct r300_context* r300); void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_vap_output_state(struct r300_context* r300, - unsigned size, void* state); - void r300_emit_vs_constants(struct r300_context* r300, unsigned size, void *state); diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 735c233c9e2..d58aa138a70 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -73,6 +73,7 @@ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { #define DBG_NO_IMMD 0x0000200 #define DBG_STATS 0x0000400 #define DBG_RS 0x0000800 +#define DBG_TEXALLOC 0x0001000 /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 9319dadfd1e..446422ca0f0 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -538,46 +538,12 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, } /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ -static void r300_fb_update_tiling_flags(struct r300_context *r300, +static void r300_fb_set_tiling_flags(struct r300_context *r300, const struct pipe_framebuffer_state *old_state, const struct pipe_framebuffer_state *new_state) { struct r300_texture *tex; - unsigned i, j, level; - - /* Reset tiling flags for old surfaces to default values. */ - for (i = 0; i < old_state->nr_cbufs; i++) { - for (j = 0; j < new_state->nr_cbufs; j++) { - if (old_state->cbufs[i]->texture == new_state->cbufs[j]->texture) { - break; - } - } - /* If not binding the surface again... */ - if (j != new_state->nr_cbufs) { - continue; - } - - tex = r300_texture(old_state->cbufs[i]->texture); - - if (tex) { - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); - } - } - if (old_state->zsbuf && - (!new_state->zsbuf || - old_state->zsbuf->texture != new_state->zsbuf->texture)) { - tex = r300_texture(old_state->zsbuf->texture); - - if (tex) { - r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[0], - tex->microtile, - tex->macrotile); - } - } + unsigned i, level; /* Set tiling flags for new surfaces. */ for (i = 0; i < new_state->nr_cbufs; i++) { @@ -585,7 +551,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, level = new_state->cbufs[i]->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[level], + tex->pitch[0], tex->microtile, tex->mip_macrotile[level]); } @@ -594,7 +560,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, level = new_state->zsbuf->level; r300->rws->buffer_set_tiling(r300->rws, tex->buffer, - tex->pitch[level], + tex->pitch[0], tex->microtile, tex->mip_macrotile[level]); } @@ -644,7 +610,8 @@ static void r300->dsa_state.dirty = TRUE; } - r300_fb_update_tiling_flags(r300, r300->fb_state.state, state); + /* The tiling flags are dependent on the surface miplevel, unfortunately. */ + r300_fb_set_tiling_flags(r300, r300->fb_state.state, state); memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); @@ -719,10 +686,6 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300_mark_fs_code_dirty(r300); r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ - - if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) { - r300->vap_output_state.dirty = TRUE; - } } /* Delete fragment shader state. */ @@ -853,11 +816,8 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF; - /* XXX Disable point sprites until we know what's wrong with them. */ - rs->rs.sprite_coord_enable = 0; - /* Point sprites */ - if (rs->rs.sprite_coord_enable) { + if (state->sprite_coord_enable) { rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE; for (i = 0; i < 8; i++) { if (state->sprite_coord_enable & (1 << i)) @@ -1357,7 +1317,7 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe, /* XXX Shouldn't we align the format? */ fprintf(stderr, "r300_create_vertex_elements_state: " "Unaligned format %s:%i isn't supported\n", - util_format_name(*format), size); + util_format_short_name(*format), size); assert(0); abort(); } @@ -1428,14 +1388,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs_state.state = vs; - // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block - if (r300->fs.state) { - r300_vertex_shader_setup_wpos(r300); - } - memcpy(r300->vap_output_state.state, &vs->vap_out, - sizeof(struct r300_vap_output_state)); - r300->vap_output_state.dirty = TRUE; - /* The majority of the RS block bits is dependent on the vertex shader. */ r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index ccc4b583a6a..e3adace0faa 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -115,12 +115,11 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300) static void r300_swtcl_vertex_psc(struct r300_context *r300) { struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state; - struct r300_vertex_shader* vs = r300->vs_state.state; - struct vertex_info* vinfo = &r300->vertex_info; + struct vertex_info* vinfo = &r300->vertex_info; uint16_t type, swizzle; enum pipe_format format; unsigned i, attrib_count; - int* vs_output_tab = vs->stream_loc_notcl; + int* vs_output_tab = r300->stream_loc_notcl; /* XXX hax */ memset(vstream, 0, sizeof(struct r300_vertex_stream_state)); @@ -269,21 +268,29 @@ static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset) /* Set up the RS block. * - * This is the part of the chipset that actually does the rasterization - * of vertices into fragments. This is also the part of the chipset that - * locks up if any part of it is even slightly wrong. */ -static void r300_update_rs_block(struct r300_context* r300, - struct r300_shader_semantics* vs_outputs, - struct r300_shader_semantics* fs_inputs) + * This is the part of the chipset that is responsible for linking vertex + * and fragment shaders and stuffed texture coordinates. + * + * The rasterizer reads data from VAP, which produces vertex shader outputs, + * and GA, which produces stuffed texture coordinates. VAP outputs have + * precedence over GA. All outputs must be rasterized otherwise it locks up. + * If there are more outputs rasterized than is set in VAP/GA, it locks up + * too. The funky part is that this info has been pretty much obtained by trial + * and error. */ +static void r300_update_rs_block(struct r300_context *r300) { - struct r300_rs_block rs = { { 0 } }; - int i, col_count = 0, tex_count = 0, fp_offset = 0, count; + struct r300_vertex_shader *vs = r300->vs_state.state; + struct r300_shader_semantics *vs_outputs = &vs->outputs; + struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs; + struct r300_rs_block rs = {0}; + int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0; void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || vs_outputs->bcolor[1] != ATTR_UNUSED; + int *stream_loc_notcl = r300->stream_loc_notcl; if (r300->screen->caps.is_r500) { rX00_rs_col = r500_rs_col; @@ -297,15 +304,31 @@ static void r300_update_rs_block(struct r300_context* r300, rX00_rs_tex_write = r300_rs_tex_write; } - /* Rasterize colors. */ + /* The position is always present in VAP. */ + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + stream_loc_notcl[loc++] = 0; + + /* Set up the point size in VAP. */ + if (vs_outputs->psize != ATTR_UNUSED) { + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + stream_loc_notcl[loc++] = 1; + } + + /* Set up and rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || vs_outputs->color[1] != ATTR_UNUSED) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + /* Set up the color in VAP. */ + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + rs.vap_out_vtx_fmt[0] |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + stream_loc_notcl[loc++] = 2 + i; + + /* Rasterize it. */ rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->color[i] != ATTR_UNUSED) { rX00_rs_col_write(&rs, col_count, fp_offset); fp_offset++; @@ -328,17 +351,33 @@ static void r300_update_rs_block(struct r300_context* r300, } } + /* Set up back-face colors. The rasterizer will do the color selection + * automatically. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + stream_loc_notcl[loc++] = 4 + i; + } + } + /* Rasterize texture coordinates. */ - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + if (!sprite_coord) { + /* Set up the texture coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + } + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, sprite_coord ? SWIZ_XY01 : SWIZ_XYZW); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->generic[i] != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; @@ -365,12 +404,16 @@ static void r300_update_rs_block(struct r300_context* r300, } /* Rasterize fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - /* Always rasterize if it's written by the VS, - * otherwise it locks up. */ + if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) { + /* Set up the fog coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001); - /* Write it to the FS input register if it's used by the FS. */ + /* Write it to the FS input register if it's needed by the FS. */ if (fs_inputs->fog != ATTR_UNUSED) { rX00_rs_tex_write(&rs, tex_count, fp_offset); fp_offset++; @@ -391,9 +434,17 @@ static void r300_update_rs_block(struct r300_context* r300, } /* Rasterize WPOS. */ - /* If the FS doesn't need it, it's not written by the VS. */ - if (vs_outputs->wpos != ATTR_UNUSED && fs_inputs->wpos != ATTR_UNUSED) { + /* Don't set it in VAP if the FS doesn't need it. */ + if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) { + /* Set up the WPOS coordinates in VAP. */ + rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count); + rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count)); + stream_loc_notcl[loc++] = 6 + tex_count; + + /* Rasterize it. */ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW); + + /* Write it to the FS input register. */ rX00_rs_tex_write(&rs, tex_count, fp_offset); DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n"); @@ -402,6 +453,11 @@ static void r300_update_rs_block(struct r300_context* r300, tex_count++; } + /* Invalidate the rest of the no-TCL (GA) stream locations. */ + for (; loc < 16;) { + stream_loc_notcl[loc++] = -1; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { rX00_rs_col(&rs, 0, 0, SWIZ_0001); @@ -422,18 +478,10 @@ static void r300_update_rs_block(struct r300_context* r300, /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 5 + count*2; + r300->rs_block_state.size = 11 + count*2; } } -/* Update the shader-dependant states. */ -static void r300_update_derived_shader_state(struct r300_context* r300) -{ - struct r300_vertex_shader* vs = r300->vs_state.state; - - r300_update_rs_block(r300, &vs->outputs, &r300_fs(r300)->shader->inputs); -} - static void r300_merge_textures_and_samplers(struct r300_context* r300) { struct r300_textures_state *state = @@ -529,7 +577,7 @@ void r300_update_derived_state(struct r300_context* r300) } if (r300->rs_block_state.dirty) { - r300_update_derived_shader_state(r300); + r300_update_rs_block(r300); } if (r300->draw) { diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index fcbdb91b67e..715601042fa 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -371,7 +371,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { desc = util_format_description(format); if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { - fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_name(format), + fprintf(stderr, "r300: Bad format %s in %s:%d\n", util_format_short_name(format), __FUNCTION__, __LINE__); assert(0); abort(); @@ -394,7 +394,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { break; default: fprintf(stderr, "r300: Bad format %s in %s:%d\n", - util_format_name(format), __FUNCTION__, __LINE__); + util_format_short_name(format), __FUNCTION__, __LINE__); assert(0); abort(); } @@ -416,7 +416,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { break; default: fprintf(stderr, "r300: Bad format %s in %s:%d\n", - util_format_name(format), __FUNCTION__, __LINE__); + util_format_short_name(format), __FUNCTION__, __LINE__); fprintf(stderr, "r300: desc->channel[0].size == %d\n", desc->channel[0].size); assert(0); @@ -425,7 +425,7 @@ r300_translate_vertex_data_type(enum pipe_format format) { break; default: fprintf(stderr, "r300: Bad format %s in %s:%d\n", - util_format_name(format), __FUNCTION__, __LINE__); + util_format_short_name(format), __FUNCTION__, __LINE__); assert(0); abort(); } @@ -449,7 +449,7 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) { if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { fprintf(stderr, "r300: Bad format %s in %s:%d\n", - util_format_name(format), __FUNCTION__, __LINE__); + util_format_short_name(format), __FUNCTION__, __LINE__); return 0; } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 69e6a124458..d6f629cf9c6 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -46,18 +46,6 @@ static const unsigned microblock_table[5][3][2] = { {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ }; -/* Return true for non-compressed and non-YUV formats. */ -static boolean r300_format_is_plain(enum pipe_format format) -{ - const struct util_format_description *desc = util_format_description(format); - - if (!format) { - return FALSE; - } - - return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN; -} - /* Translate a pipe_format into a useful texture format for sampling. * * Some special formats are translated directly using R300_EASY_TX_FORMAT, @@ -585,9 +573,6 @@ static void r300_texture_setup_immutable_state(struct r300_screen* screen, f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | R300_TXO_MICRO_TILE(tex->microtile); - - SCREEN_DBG(screen, DBG_TEX, "r300: Set texture state (%dx%d, %d levels)\n", - pt->width0, pt->height0, pt->last_level); } static void r300_texture_setup_fb_state(struct r300_screen* screen, @@ -622,8 +607,8 @@ void r300_texture_reinterpret_format(struct pipe_screen *screen, { struct r300_screen *r300screen = r300_screen(screen); - SCREEN_DBG(r300screen, DBG_TEX, "r300: Reinterpreting format: %s -> %s\n", - util_format_name(tex->format), util_format_name(new_format)); + SCREEN_DBG(r300screen, DBG_TEX, "r300: texture_reinterpret_format: %s -> %s\n", + util_format_short_name(tex->format), util_format_short_name(new_format)); tex->format = new_format; @@ -714,7 +699,7 @@ unsigned r300_texture_get_stride(struct r300_screen* screen, width = u_minify(tex->b.b.width0, level); - if (r300_format_is_plain(tex->b.b.format)) { + if (util_format_is_plain(tex->b.b.format)) { tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH, tex->mip_macrotile[level]); width = align(width, tile_width); @@ -732,7 +717,7 @@ static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, height = u_minify(tex->b.b.height0, level); - if (r300_format_is_plain(tex->b.b.format)) { + if (util_format_is_plain(tex->b.b.format)) { tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT, tex->mip_macrotile[level]); height = align(height, tile_height); @@ -774,8 +759,8 @@ static void r300_setup_miptree(struct r300_screen* screen, unsigned stride, size, layer_size, nblocksy, i; boolean rv350_mode = screen->caps.is_rv350; - SCREEN_DBG(screen, DBG_TEX, "r300: Making miptree for texture, format %s\n", - util_format_name(base->format)); + SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Making miptree for texture, format %s\n", + util_format_short_name(base->format)); for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ @@ -801,7 +786,7 @@ static void r300_setup_miptree(struct r300_screen* screen, tex->hwpitch[i] = tex->pitch[i] * util_format_get_blockwidth(base->format); - SCREEN_DBG(screen, DBG_TEX, "r300: Texture miptree: Level %d " + SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d " "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n", i, u_minify(base->width0, i), u_minify(base->height0, i), u_minify(base->depth0, i), stride, tex->size, @@ -825,7 +810,7 @@ static void r300_setup_tiling(struct pipe_screen *screen, boolean is_zb = util_format_is_depth_or_stencil(format); boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING); - if (!r300_format_is_plain(format)) { + if (!util_format_is_plain(format)) { return; } @@ -927,6 +912,17 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, return NULL; } + /* Refuse to create a texture with size 0. */ + if (!base->width0 || + (!base->height0 && (base->target == PIPE_TEXTURE_2D || + base->target == PIPE_TEXTURE_CUBE)) || + (!base->depth0 && base->target == PIPE_TEXTURE_3D)) { + fprintf(stderr, "r300: texture_create: " + "Got invalid texture dimensions: %ix%ix%i\n", + base->width0, base->height0, base->depth0); + return NULL; + } + tex->b.b = *base; tex->b.vtbl = &r300_texture_vtbl; pipe_reference_init(&tex->b.b.reference, 1); @@ -942,6 +938,15 @@ struct pipe_resource* r300_texture_create(struct pipe_screen* screen, r300_texture_setup_immutable_state(rscreen, tex); r300_texture_setup_fb_state(rscreen, tex); + SCREEN_DBG(rscreen, DBG_TEX, + "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, " + "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n", + tex->macrotile ? "YES" : " NO", + tex->microtile ? "YES" : " NO", + tex->hwpitch[0], + base->width0, base->height0, base->depth0, base->last_level, + util_format_short_name(base->format)); + tex->buffer = rws->buffer_create(rws, 2048, PIPE_BIND_SAMPLER_VIEW, /* XXX */ tex->size); @@ -1039,6 +1044,14 @@ r300_texture_from_handle(struct pipe_screen* screen, rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile); r300_setup_flags(tex); + SCREEN_DBG(rscreen, DBG_TEX, + "r300: texture_from_handle: Macro: %s, Micro: %s, " + "Pitch: % 4i, Dim: %ix%i, Format: %s\n", + tex->macrotile ? "YES" : " NO", + tex->microtile ? "YES" : " NO", + stride / util_format_get_blocksize(base->format), + base->width0, base->height0, + util_format_short_name(base->format)); /* Enforce microtiled zbuffer. */ override_zb_flags = util_format_is_depth_or_stencil(base->format) && diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index bfab9c3b014..f3186431e1d 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -22,7 +22,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_vs.h" -#include "r300_fs.h" #include "r300_context.h" #include "r300_screen.h" @@ -94,94 +93,6 @@ static void r300_shader_read_vs_outputs( vs_outputs->wpos = i; } -/* This function sets up: - * - VAP mapping, which maps VS registers to output semantics and - * at the same time it indicates which attributes are enabled and should - * be rasterized. - * - Stream mapping to VS outputs if TCL is not present. */ -static void r300_init_vs_output_mapping(struct r300_vertex_shader* vs) -{ - struct r300_shader_semantics* vs_outputs = &vs->outputs; - struct r300_vap_output_state *vap_out = &vs->vap_out; - int *stream_loc = vs->stream_loc_notcl; - int i, gen_count, tabi = 0; - boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || - vs_outputs->bcolor[1] != ATTR_UNUSED; - - vap_out->vap_vtx_state_cntl = 0x5555; /* XXX this is classic Mesa bonghits */ - - /* Position. */ - if (vs_outputs->pos != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - - stream_loc[tabi++] = 0; - } else { - assert(0); - } - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - - stream_loc[tabi++] = 1; - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || - vs_outputs->color[1] != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; - - stream_loc[tabi++] = 2 + i; - } - } - - /* Back-face colors. */ - if (any_bcolor_used) { - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; - vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); - - stream_loc[tabi++] = 4 + i; - } - } - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); - - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } - } - - /* Fog coordinates. */ - if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) { - vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); - - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } - - /* WPOS. */ - if (gen_count < 8) { - vs->wpos_tex_output = gen_count; - stream_loc[tabi++] = 6 + gen_count; - } else { - vs_outputs->wpos = ATTR_UNUSED; - } - - for (; tabi < 16;) { - stream_loc[tabi++] = -1; - } -} - static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) { struct r300_vertex_shader * vs = c->UserData; @@ -246,9 +157,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) } /* WPOS. */ - if (outputs->wpos != ATTR_UNUSED) { - c->code->outputs[outputs->wpos] = reg++; - } + c->code->outputs[outputs->wpos] = reg++; } static void r300_dummy_vertex_shader( @@ -286,7 +195,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, tgsi_scan_shader(tokens, &vs->info); r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_init_vs_output_mapping(vs); /* Setup the compiler */ rc_init(&compiler.Base); @@ -307,16 +215,11 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, tokens); - compiler.RequiredOutputs = - ~(~0 << (vs->info.num_outputs + - (vs->outputs.wpos != ATTR_UNUSED ? 1 : 0))); - + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; /* Insert the WPOS output. */ - if (vs->outputs.wpos != ATTR_UNUSED) { - rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); - } + rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); @@ -343,32 +246,3 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* And, finally... */ rc_destroy(&compiler.Base); } - -boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) -{ - struct r300_vertex_shader* vs = r300->vs_state.state; - struct r300_vap_output_state *vap_out = &vs->vap_out; - int tex_output = vs->wpos_tex_output; - uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; - - if (vs->outputs.wpos == ATTR_UNUSED) { - return FALSE; - } - - if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) { - /* Enable WPOS in VAP. */ - if (!(vap_out->vap_vsm_vtx_assm & tex_fmt)) { - vap_out->vap_vsm_vtx_assm |= tex_fmt; - vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * tex_output)); - return TRUE; - } - } else { - /* Disable WPOS in VAP. */ - if (vap_out->vap_vsm_vtx_assm & tex_fmt) { - vap_out->vap_vsm_vtx_assm &= ~tex_fmt; - vap_out->vap_out_vtx_fmt[1] &= ~(4 << (3 * tex_output)); - return TRUE; - } - } - return FALSE; -} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 56bcc3b70b8..57b3fbca0bb 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -39,7 +39,6 @@ struct r300_vertex_shader { struct tgsi_shader_info info; struct r300_shader_semantics outputs; - struct r300_vap_output_state vap_out; /* Whether the shader was replaced by a dummy one due to a shader * compilation failure. */ @@ -49,12 +48,6 @@ struct r300_vertex_shader { unsigned externals_count; unsigned immediates_count; - /* Stream locations for SWTCL or if TCL is bypassed. */ - int stream_loc_notcl[16]; - - /* Output stream location for WPOS. */ - int wpos_tex_output; - /* HWTCL-specific. */ /* Machine code (if translated) */ struct r300_vertex_program_code code; @@ -67,7 +60,4 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs, const struct tgsi_token *tokens); -/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ -boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); - #endif /* R300_VS_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index ead489393ef..00187febf0c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_format.h" #include "sp_context.h" #include "sp_quad.h" #include "sp_tile_cache.h" @@ -224,7 +225,8 @@ static void blend_quad(struct quad_stage *qs, float (*quadColor)[4], float (*dest)[4], - unsigned cbuf) + unsigned cbuf, + boolean has_dst_alpha) { static const float zero[4] = { 0, 0, 0, 0 }; static const float one[4] = { 1, 1, 1, 1 }; @@ -259,24 +261,34 @@ blend_quad(struct quad_stage *qs, VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ break; case PIPE_BLENDFACTOR_DST_ALPHA: - { - const float *alpha = dest[3]; - VEC4_MUL(source[0], quadColor[0], alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], alpha); /* B */ - } - break; + if (has_dst_alpha) { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + else { + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + } + break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(temp, alpha, diff); - VEC4_MUL(source[0], quadColor[0], temp); /* R */ - VEC4_MUL(source[1], quadColor[1], temp); /* G */ - VEC4_MUL(source[2], quadColor[2], temp); /* B */ - } - break; + if (has_dst_alpha) { + const float *alpha = quadColor[3]; + float diff[4], temp[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(source[0], quadColor[0], temp); /* R */ + VEC4_MUL(source[1], quadColor[1], temp); /* G */ + VEC4_MUL(source[2], quadColor[2], temp); /* B */ + } + else { + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + } + break; case PIPE_BLENDFACTOR_CONST_COLOR: { float comp[4]; @@ -329,14 +341,19 @@ blend_quad(struct quad_stage *qs, } break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ - VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ - VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ - } - break; + if (has_dst_alpha) { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + else { + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + } + break; case PIPE_BLENDFACTOR_INV_DST_COLOR: { float inv_comp[4]; @@ -399,7 +416,10 @@ blend_quad(struct quad_stage *qs, case PIPE_BLENDFACTOR_DST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + if (has_dst_alpha) + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + else + VEC4_COPY(source[3], quadColor[3]); /* A */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* multiply alpha by 1.0 */ @@ -429,12 +449,15 @@ blend_quad(struct quad_stage *qs, case PIPE_BLENDFACTOR_INV_DST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_alpha[4]; - VEC4_SUB(inv_alpha, one, dest[3]); - VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ - } - break; + if (has_dst_alpha) { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + else { + VEC4_COPY(source[3], zero); /* A */ + } + break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: @@ -468,9 +491,14 @@ blend_quad(struct quad_stage *qs, VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ break; case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ - VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ - VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + if (has_dst_alpha) { + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + } + else { + /* dest = dest * 1 NO-OP, leave dest as-is */ + } break; case PIPE_BLENDFACTOR_DST_COLOR: VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ @@ -478,15 +506,20 @@ blend_quad(struct quad_stage *qs, VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - { - const float *alpha = quadColor[3]; - float diff[4], temp[4]; - VEC4_SUB(diff, one, dest[3]); - VEC4_MIN(temp, alpha, diff); - VEC4_MUL(dest[0], quadColor[0], temp); /* R */ - VEC4_MUL(dest[1], quadColor[1], temp); /* G */ - VEC4_MUL(dest[2], quadColor[2], temp); /* B */ - } + if (has_dst_alpha) { + const float *alpha = quadColor[3]; + float diff[4], temp[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(dest[0], quadColor[0], temp); /* R */ + VEC4_MUL(dest[1], quadColor[1], temp); /* G */ + VEC4_MUL(dest[2], quadColor[2], temp); /* B */ + } + else { + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + } break; case PIPE_BLENDFACTOR_CONST_COLOR: { @@ -539,13 +572,18 @@ blend_quad(struct quad_stage *qs, } break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ - VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ - VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ - } + if (has_dst_alpha) { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + else { + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + } break; case PIPE_BLENDFACTOR_INV_DST_COLOR: { @@ -605,7 +643,12 @@ blend_quad(struct quad_stage *qs, case PIPE_BLENDFACTOR_DST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_DST_ALPHA: - VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + if (has_dst_alpha) { + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + } + else { + /* dest = dest * 1 NO-OP, leave dest as-is */ + } break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* dest = dest * 1 NO-OP, leave dest as-is */ @@ -634,12 +677,15 @@ blend_quad(struct quad_stage *qs, case PIPE_BLENDFACTOR_INV_DST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_INV_DST_ALPHA: - { - float inv_comp[4]; - VEC4_SUB(inv_comp, one, dest[3]); /* A */ - VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ - } - break; + if (has_dst_alpha) { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + else { + VEC4_COPY(dest[3], zero); /* A */ + } + break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: /* fall-through */ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: @@ -751,6 +797,8 @@ blend_fallback(struct quad_stage *qs, = sp_get_cached_tile(softpipe->cbuf_cache[cbuf], quads[0]->input.x0, quads[0]->input.y0); + boolean has_dst_alpha + = util_format_has_alpha(softpipe->framebuffer.cbufs[cbuf]->format); uint q, i, j; for (q = 0; q < nr; q++) { @@ -774,7 +822,7 @@ blend_fallback(struct quad_stage *qs, logicop_quad( qs, quadColor, dest ); } else if (blend->rt[blend_buf].blend_enable) { - blend_quad( qs, quadColor, dest, cbuf ); + blend_quad( qs, quadColor, dest, cbuf, has_dst_alpha ); } if (blend->rt[blend_buf].colormask != 0xf) diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index b04c2a63ad6..32cab06004f 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "util/u_rect.h" +#include "util/u_surface.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index fbce9e042ba..b3e1c494069 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -115,6 +115,20 @@ sp_tex_tile_cache_validate_texture(struct softpipe_tex_tile_cache *tc) } } +static boolean +sp_tex_tile_is_compat_view(struct softpipe_tex_tile_cache *tc, + struct pipe_sampler_view *view) +{ + if (!view) + return FALSE; + return (tc->texture == view->texture && + tc->format == view->format && + tc->swizzle_r == view->swizzle_r && + tc->swizzle_g == view->swizzle_g && + tc->swizzle_b == view->swizzle_b && + tc->swizzle_a == view->swizzle_a); +} + /** * Specify the sampler view to cache. */ @@ -127,7 +141,7 @@ sp_tex_tile_cache_set_sampler_view(struct softpipe_tex_tile_cache *tc, assert(!tc->transfer); - if (tc->texture != texture) { + if (!sp_tex_tile_is_compat_view(tc, view)) { pipe_resource_reference(&tc->texture, texture); if (tc->tex_trans) { diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index 3228a6d3d7f..75d8afb2ea8 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -194,6 +194,7 @@ void svga_context_flush( struct svga_context *svga, struct pipe_fence_handle **pfence ) { struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); + struct pipe_fence_handle *fence = NULL; svga->curr.nr_fbs = 0; @@ -202,21 +203,26 @@ void svga_context_flush( struct svga_context *svga, u_upload_flush(svga->upload_vb); u_upload_flush(svga->upload_ib); - /* Flush screen, to ensure that texture dma uploads are processed + /* Ensure that texture dma uploads are processed * before submitting commands. */ - svga_screen_flush(svgascreen, NULL); - svga_context_flush_buffers(svga); /* Flush pending commands to hardware: */ - svga->swc->flush(svga->swc, pfence); + svga->swc->flush(svga->swc, &fence); + + svga_screen_cache_flush(svgascreen, fence); if (SVGA_DEBUG & DEBUG_SYNC) { - if (pfence && *pfence) - svga->pipe.screen->fence_finish( svga->pipe.screen, *pfence, 0); + if (fence) + svga->pipe.screen->fence_finish( svga->pipe.screen, fence, 0); } + + if(pfence) + *pfence = fence; + else + svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL); } diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c index 005996d05d3..da33fae62f1 100644 --- a/src/gallium/drivers/svga/svga_draw_arrays.c +++ b/src/gallium/drivers/svga/svga_draw_arrays.c @@ -277,12 +277,13 @@ svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl, ret = svga_hwtnl_simple_draw_range_elements( hwtnl, gen_buf, gen_size, + start, 0, count - 1, gen_prim, 0, - gen_nr, - start ); + gen_nr ); + if (ret) goto done; diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 9c6f5858ba4..96fb4b8e536 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -68,6 +68,7 @@ static boolean svga_get_query_result(struct pipe_context *pipe, static struct pipe_query *svga_create_query( struct pipe_context *pipe, unsigned query_type ) { + struct svga_context *svga = svga_context( pipe ); struct svga_screen *svgascreen = svga_screen(pipe->screen); struct svga_winsys_screen *sws = svgascreen->sws; struct svga_query *sq; @@ -80,7 +81,7 @@ static struct pipe_query *svga_create_query( struct pipe_context *pipe, sq->type = SVGA3D_QUERYTYPE_OCCLUSION; - sq->hwbuf = svga_winsys_buffer_create(svgascreen, + sq->hwbuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED, sizeof *sq->queryResult); diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index 18eeff9c67e..198d4013328 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -125,8 +125,6 @@ svga_buffer_map_range( struct pipe_screen *screen, } if(map) { - pipe_mutex_lock(ss->swc_mutex); - ++sbuf->map.count; if (usage & PIPE_TRANSFER_WRITE) { @@ -135,8 +133,6 @@ svga_buffer_map_range( struct pipe_screen *screen, if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) sbuf->map.flush_explicit = TRUE; } - - pipe_mutex_unlock(ss->swc_mutex); } return map; diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h index 55e7321184f..d3ec11bfd52 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.h +++ b/src/gallium/drivers/svga/svga_resource_buffer.h @@ -238,7 +238,7 @@ void svga_context_flush_buffers(struct svga_context *svga); struct svga_winsys_buffer * -svga_winsys_buffer_create(struct svga_screen *ss, +svga_winsys_buffer_create(struct svga_context *svga, unsigned alignment, unsigned usage, unsigned size); diff --git a/src/gallium/drivers/svga/svga_resource_buffer_host.c b/src/gallium/drivers/svga/svga_resource_buffer_host.c deleted file mode 100644 index 139597f9cb0..00000000000 --- a/src/gallium/drivers/svga/svga_resource_buffer_host.c +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index acef60f4647..3de5216a949 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -40,15 +40,21 @@ #include "svga_debug.h" -/* Allocate a winsys_buffer (ie. DMA, aka GMR memory). +/** + * Allocate a winsys_buffer (ie. DMA, aka GMR memory). + * + * It will flush and retry in case the first attempt to create a DMA buffer + * fails, so it should not be called from any function involved in flushing + * to avoid recursion. */ struct svga_winsys_buffer * -svga_winsys_buffer_create( struct svga_screen *ss, +svga_winsys_buffer_create( struct svga_context *svga, unsigned alignment, unsigned usage, unsigned size ) { - struct svga_winsys_screen *sws = ss->sws; + struct svga_screen *svgascreen = svga_screen(svga->pipe.screen); + struct svga_winsys_screen *sws = svgascreen->sws; struct svga_winsys_buffer *buf; /* Just try */ @@ -59,9 +65,8 @@ svga_winsys_buffer_create( struct svga_screen *ss, size); /* Try flushing all pending DMAs */ - svga_screen_flush(ss, NULL); + svga_context_flush(svga, NULL); buf = sws->buffer_create(sws, alignment, usage, size); - } return buf; @@ -95,11 +100,12 @@ svga_buffer_create_hw_storage(struct svga_screen *ss, assert(!sbuf->user); if(!sbuf->hwbuf) { + struct svga_winsys_screen *sws = ss->sws; unsigned alignment = 16; unsigned usage = 0; unsigned size = sbuf->b.b.width0; - sbuf->hwbuf = svga_winsys_buffer_create(ss, alignment, usage, size); + sbuf->hwbuf = sws->buffer_create(sws, alignment, usage, size); if(!sbuf->hwbuf) return PIPE_ERROR_OUT_OF_MEMORY; @@ -476,12 +482,12 @@ svga_buffer_upload_piecewise(struct svga_screen *ss, if (offset + size > range->end) size = range->end - offset; - hwbuf = svga_winsys_buffer_create(ss, alignment, usage, size); + hwbuf = sws->buffer_create(sws, alignment, usage, size); while (!hwbuf) { size /= 2; if (!size) return PIPE_ERROR_OUT_OF_MEMORY; - hwbuf = svga_winsys_buffer_create(ss, alignment, usage, size); + hwbuf = sws->buffer_create(sws, alignment, usage, size); } SVGA_DBG(DEBUG_DMA, " bytes %u - %u\n", diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index f06b0323d8c..ff83c750aaf 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -163,12 +163,12 @@ svga_translate_format_render(enum pipe_format format) static INLINE void -svga_transfer_dma_band(struct svga_transfer *st, +svga_transfer_dma_band(struct svga_context *svga, + struct svga_transfer *st, SVGA3dTransferType transfer, unsigned y, unsigned h, unsigned srcy) { struct svga_texture *texture = svga_texture(st->base.resource); - struct svga_screen *screen = svga_screen(texture->b.b.screen); SVGA3dCopyBox box; enum pipe_error ret; @@ -195,20 +195,19 @@ svga_transfer_dma_band(struct svga_transfer *st, box.srcy = srcy; box.srcz = 0; - pipe_mutex_lock(screen->swc_mutex); - ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1); + ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); if(ret != PIPE_OK) { - screen->swc->flush(screen->swc, NULL); - ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1); + svga->swc->flush(svga->swc, NULL); + ret = SVGA3D_SurfaceDMA(svga->swc, st, transfer, &box, 1); assert(ret == PIPE_OK); } - pipe_mutex_unlock(screen->swc_mutex); } static INLINE void -svga_transfer_dma(struct svga_transfer *st, - SVGA3dTransferType transfer) +svga_transfer_dma(struct svga_context *svga, + struct svga_transfer *st, + SVGA3dTransferType transfer) { struct svga_texture *texture = svga_texture(st->base.resource); struct svga_screen *screen = svga_screen(texture->b.b.screen); @@ -223,10 +222,10 @@ svga_transfer_dma(struct svga_transfer *st, if(!st->swbuf) { /* Do the DMA transfer in a single go */ - svga_transfer_dma_band(st, transfer, st->base.box.y, st->base.box.height, 0); + svga_transfer_dma_band(svga, st, transfer, st->base.box.y, st->base.box.height, 0); if(transfer == SVGA3D_READ_HOST_VRAM) { - svga_screen_flush(screen, &fence); + svga_context_flush(svga, &fence); sws->fence_finish(sws, fence, 0); sws->fence_reference(sws, &fence, NULL); } @@ -256,7 +255,7 @@ svga_transfer_dma(struct svga_transfer *st, /* Wait for the previous DMAs to complete */ /* TODO: keep one DMA (at half the size) in the background */ if(y) { - svga_screen_flush(screen, &fence); + svga_context_flush(svga, &fence); sws->fence_finish(sws, fence, 0); sws->fence_reference(sws, &fence, NULL); } @@ -269,10 +268,10 @@ svga_transfer_dma(struct svga_transfer *st, } } - svga_transfer_dma_band(st, transfer, y, h, srcy); + svga_transfer_dma_band(svga, st, transfer, y, h, srcy); if(transfer == SVGA3D_READ_HOST_VRAM) { - svga_screen_flush(screen, &fence); + svga_context_flush(svga, &fence); sws->fence_finish(sws, fence, 0); hw = sws->buffer_map(sws, st->hwbuf, PIPE_TRANSFER_READ); @@ -342,6 +341,7 @@ svga_texture_get_transfer(struct pipe_context *pipe, unsigned usage, const struct pipe_box *box) { + struct svga_context *svga = svga_context(pipe); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st; @@ -365,12 +365,12 @@ svga_texture_get_transfer(struct pipe_context *pipe, st->hw_nblocksy = nblocksy; - st->hwbuf = svga_winsys_buffer_create(ss, + st->hwbuf = svga_winsys_buffer_create(svga, 1, 0, st->hw_nblocksy*st->base.stride); while(!st->hwbuf && (st->hw_nblocksy /= 2)) { - st->hwbuf = svga_winsys_buffer_create(ss, + st->hwbuf = svga_winsys_buffer_create(svga, 1, 0, st->hw_nblocksy*st->base.stride); @@ -393,7 +393,7 @@ svga_texture_get_transfer(struct pipe_context *pipe, } if (usage & PIPE_TRANSFER_READ) - svga_transfer_dma(st, SVGA3D_READ_HOST_VRAM); + svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM); return &st->base; @@ -445,13 +445,14 @@ static void svga_texture_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *transfer) { + struct svga_context *svga = svga_context(pipe); struct svga_texture *tex = svga_texture(transfer->resource); struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); if (st->base.usage & PIPE_TRANSFER_WRITE) { - svga_transfer_dma(st, SVGA3D_WRITE_HOST_VRAM); + svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM); ss->texture_timestamp++; tex->view_age[transfer->sr.level] = ++(tex->age); tex->defined[transfer->sr.face][transfer->sr.level] = TRUE; diff --git a/src/gallium/drivers/svga/svga_sampler_view.c b/src/gallium/drivers/svga/svga_sampler_view.c index fbae552f78f..6911f13f778 100644 --- a/src/gallium/drivers/svga/svga_sampler_view.c +++ b/src/gallium/drivers/svga/svga_sampler_view.c @@ -169,7 +169,7 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view * for (i = v->min_lod; i <= v->max_lod; i++) { for (k = 0; k < numFaces; k++) { if (v->age < tex->view_age[i]) - svga_texture_copy_handle(svga, NULL, + svga_texture_copy_handle(svga, tex->handle, 0, 0, 0, i, k, v->handle, 0, 0, 0, i - v->min_lod, k, u_minify(tex->b.b.width0, i), diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index aeda3dcad51..27ac09e275d 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -34,6 +34,7 @@ #include "svga_resource_texture.h" #include "svga_resource.h" #include "svga_debug.h" +#include "svga_surface.h" #include "svga3d_shaderdefs.h" @@ -346,8 +347,6 @@ svga_destroy_screen( struct pipe_screen *screen ) pipe_mutex_destroy(svgascreen->swc_mutex); pipe_mutex_destroy(svgascreen->tex_mutex); - svgascreen->swc->destroy(svgascreen->swc); - svgascreen->sws->destroy(svgascreen->sws); FREE(svgascreen); @@ -397,6 +396,7 @@ svga_screen_create(struct svga_winsys_screen *sws) screen->fence_finish = svga_fence_finish; svgascreen->sws = sws; + svga_screen_init_surface_functions(screen); svga_init_screen_resource_functions(svgascreen); svgascreen->use_ps30 = @@ -416,10 +416,6 @@ svga_screen_create(struct svga_winsys_screen *sws) svgascreen->use_vs30 = svgascreen->use_ps30 = FALSE; #endif - svgascreen->swc = sws->context_create(sws); - if(!svgascreen->swc) - goto error2; - pipe_mutex_init(svgascreen->tex_mutex); pipe_mutex_init(svgascreen->swc_mutex); @@ -432,25 +428,6 @@ error1: return NULL; } -void svga_screen_flush( struct svga_screen *svgascreen, - struct pipe_fence_handle **pfence ) -{ - struct pipe_fence_handle *fence = NULL; - - SVGA_DBG(DEBUG_PERF, "%s\n", __FUNCTION__); - - pipe_mutex_lock(svgascreen->swc_mutex); - svgascreen->swc->flush(svgascreen->swc, &fence); - pipe_mutex_unlock(svgascreen->swc_mutex); - - svga_screen_cache_flush(svgascreen, fence); - - if(pfence) - *pfence = fence; - else - svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL); -} - struct svga_winsys_screen * svga_winsys_screen(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h index 9dc229b0a87..86ec89d88c1 100644 --- a/src/gallium/drivers/svga/svga_screen.h +++ b/src/gallium/drivers/svga/svga_screen.h @@ -60,14 +60,11 @@ struct svga_screen boolean no_sampler_view; } debug; - /* The screen needs its own context */ - struct svga_winsys_context *swc; - struct SVGACmdMemory *fifo; - unsigned texture_timestamp; pipe_mutex tex_mutex; - pipe_mutex swc_mutex; /* Protects the use of swc and dirty_buffers */ - + + pipe_mutex swc_mutex; /* Used for buffer uploads */ + struct svga_host_surface_cache cache; }; @@ -83,7 +80,4 @@ struct svga_screen * svga_screen(struct pipe_screen *screen); #endif -void svga_screen_flush( struct svga_screen *svga_screen, - struct pipe_fence_handle **pfence ); - #endif /* SVGA_SCREEN_H */ diff --git a/src/gallium/drivers/svga/svga_surface.c b/src/gallium/drivers/svga/svga_surface.c index dc1d9a850df..bd2cc380048 100644 --- a/src/gallium/drivers/svga/svga_surface.c +++ b/src/gallium/drivers/svga/svga_surface.c @@ -43,7 +43,6 @@ void svga_texture_copy_handle(struct svga_context *svga, - struct svga_screen *ss, struct svga_winsys_surface *src_handle, unsigned src_x, unsigned src_y, unsigned src_z, unsigned src_level, unsigned src_face, @@ -56,7 +55,7 @@ svga_texture_copy_handle(struct svga_context *svga, enum pipe_error ret; SVGA3dCopyBox box, *boxes; - assert(svga || ss); + assert(svga); src.handle = src_handle; src.real_level = src_level; @@ -84,39 +83,20 @@ svga_texture_copy_handle(struct svga_context *svga, dst_handle, dst_level, dst_x, dst_y, dst_z); */ - if (svga) { + ret = SVGA3D_BeginSurfaceCopy(svga->swc, + &src.base, + &dst.base, + &boxes, 1); + if(ret != PIPE_OK) { + svga_context_flush(svga, NULL); ret = SVGA3D_BeginSurfaceCopy(svga->swc, &src.base, &dst.base, &boxes, 1); - if(ret != PIPE_OK) { - svga_context_flush(svga, NULL); - ret = SVGA3D_BeginSurfaceCopy(svga->swc, - &src.base, - &dst.base, - &boxes, 1); - assert(ret == PIPE_OK); - } - *boxes = box; - SVGA_FIFOCommitAll(svga->swc); - } else { - pipe_mutex_lock(ss->swc_mutex); - ret = SVGA3D_BeginSurfaceCopy(ss->swc, - &src.base, - &dst.base, - &boxes, 1); - if(ret != PIPE_OK) { - ss->swc->flush(ss->swc, NULL); - ret = SVGA3D_BeginSurfaceCopy(ss->swc, - &src.base, - &dst.base, - &boxes, 1); - assert(ret == PIPE_OK); - } - *boxes = box; - SVGA_FIFOCommitAll(ss->swc); - pipe_mutex_unlock(ss->swc_mutex); + assert(ret == PIPE_OK); } + *boxes = box; + SVGA_FIFOCommitAll(svga->swc); } @@ -183,7 +163,6 @@ svga_texture_view_surface(struct pipe_context *pipe, 1); svga_texture_copy_handle(svga_context(pipe), - ss, tex->handle, 0, 0, z_offset, i + start_mip, @@ -346,7 +325,7 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf) if (s->handle != tex->handle) { SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->level, surf); - svga_texture_copy_handle(svga_context(pipe), ss, + svga_texture_copy_handle(svga_context(pipe), s->handle, 0, 0, 0, s->real_level, s->real_face, tex->handle, 0, 0, surf->zslice, surf->level, surf->face, u_minify(tex->b.b.width0, surf->level), diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h index b50ecdc9942..13bd5b19b61 100644 --- a/src/gallium/drivers/svga/svga_surface.h +++ b/src/gallium/drivers/svga/svga_surface.h @@ -74,7 +74,6 @@ svga_texture_view_surface(struct pipe_context *pipe, void svga_texture_copy_handle(struct svga_context *svga, - struct svga_screen *ss, struct svga_winsys_surface *src_handle, unsigned src_x, unsigned src_y, unsigned src_z, unsigned src_level, unsigned src_face, diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index 3892addafd1..c155f5dae2f 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -85,7 +85,7 @@ struct svga_winsys_context /** * Emit a relocation for a host surface. * - * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE + * @param flags bitmask of SVGA_RELOC_* flags * * NOTE: Order of this call does matter. It should be the same order * as relocations appear in the command buffer. @@ -99,7 +99,7 @@ struct svga_winsys_context /** * Emit a relocation for a guest memory region. * - * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE + * @param flags bitmask of SVGA_RELOC_* flags * * NOTE: Order of this call does matter. It should be the same order * as relocations appear in the command buffer. @@ -230,7 +230,7 @@ struct svga_winsys_screen /** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * - * XXX usage seems to be a bitmask of SVGA_BUFFER_USAGE_* flags. + * @param usage bitmask of SVGA_BUFFER_USAGE_* flags. * * alignment indicates the client's alignment requirements, eg for * SSE instructions. @@ -243,15 +243,12 @@ struct svga_winsys_screen /** * Map the entire data store of a buffer object into the client's address. - * flags is a bitmask of: - * - PB_USAGE_CPU_READ/WRITE - * - PB_USAGE_DONTBLOCK - * - PB_USAGE_UNSYNCHRONIZED + * flags is a bitmaks of PIPE_TRANSFER_* */ void * (*buffer_map)( struct svga_winsys_screen *sws, struct svga_winsys_buffer *buf, - unsigned usage ); + unsigned flags ); void (*buffer_unmap)( struct svga_winsys_screen *sws, diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 71ba1e909df..3d1fb96b91e 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -812,7 +812,7 @@ trace_context_delete_vertex_elements_state(struct pipe_context *_pipe, struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - trace_dump_call_begin("pipe_context", "delete_verte_elements_state"); + trace_dump_call_begin("pipe_context", "delete_vertex_elements_state"); trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, state); diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index e21aaacc18a..0a70237f260 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -73,9 +73,8 @@ enum tgsi_file_type { TGSI_FILE_SAMPLER =5, TGSI_FILE_ADDRESS =6, TGSI_FILE_IMMEDIATE =7, - TGSI_FILE_LOOP =8, - TGSI_FILE_PREDICATE =9, - TGSI_FILE_SYSTEM_VALUE =10, + TGSI_FILE_PREDICATE =8, + TGSI_FILE_SYSTEM_VALUE =9, TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ }; diff --git a/src/gallium/state_trackers/dri/common/dri_context.c b/src/gallium/state_trackers/dri/common/dri_context.c index a808d2d9ddf..203682ef330 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.c +++ b/src/gallium/state_trackers/dri/common/dri_context.c @@ -49,7 +49,7 @@ dri_init_extensions(struct dri_context *ctx) } GLboolean -dri_create_context(const __GLcontextModes * visual, +dri_create_context(gl_api api, const __GLcontextModes * visual, __DRIcontext * cPriv, void *sharedContextPrivate) { __DRIscreen *sPriv = cPriv->driScreenPriv; diff --git a/src/gallium/state_trackers/dri/common/dri_context.h b/src/gallium/state_trackers/dri/common/dri_context.h index 9fe6b581016..54e56c64998 100644 --- a/src/gallium/state_trackers/dri/common/dri_context.h +++ b/src/gallium/state_trackers/dri/common/dri_context.h @@ -34,6 +34,7 @@ #include "pipe/p_compiler.h" #include "dri_wrapper.h" +#include "main/mtypes.h" struct pipe_context; struct pipe_fence; @@ -83,7 +84,8 @@ struct dri_context * dri_get_current(__DRIscreen * driScreenPriv); boolean -dri_create_context(const __GLcontextModes * visual, +dri_create_context(gl_api api, + const __GLcontextModes * visual, __DRIcontext * driContextPriv, void *sharedContextPrivate); diff --git a/src/gallium/state_trackers/dri/drm/dri1.c b/src/gallium/state_trackers/dri/drm/dri1.c index 23c21ed8398..326ff8bcada 100644 --- a/src/gallium/state_trackers/dri/drm/dri1.c +++ b/src/gallium/state_trackers/dri/drm/dri1.c @@ -33,6 +33,7 @@ #include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_surface.h" #include "util/u_inlines.h" #include "pipe/p_context.h" #include "state_tracker/dri1_api.h" diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c index 48fbc3b330e..e6aea482a76 100644 --- a/src/gallium/state_trackers/vega/renderer.c +++ b/src/gallium/state_trackers/vega/renderer.c @@ -40,6 +40,7 @@ #include "util/u_memory.h" #include "util/u_rect.h" #include "util/u_sampler.h" +#include "util/u_surface.h" #include "cso_cache/cso_context.h" diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c index d5a1be81747..65be8c332a5 100644 --- a/src/gallium/state_trackers/xorg/xorg_exa.c +++ b/src/gallium/state_trackers/xorg/xorg_exa.c @@ -46,6 +46,7 @@ #include "util/u_math.h" #include "util/u_debug.h" #include "util/u_format.h" +#include "util/u_surface.h" #define DEBUG_PRINT 0 #define ROUND_UP_TEXTURES 1 diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c index 13fa561390f..583493116d5 100644 --- a/src/gallium/state_trackers/xorg/xorg_renderer.c +++ b/src/gallium/state_trackers/xorg/xorg_renderer.c @@ -9,6 +9,7 @@ #include "util/u_memory.h" #include "util/u_rect.h" #include "util/u_sampler.h" +#include "util/u_surface.h" #include "util/u_inlines.h" diff --git a/src/gallium/targets/dri-swrast/Makefile b/src/gallium/targets/dri-swrast/Makefile index fcfd690e438..3db9781c209 100644 --- a/src/gallium/targets/dri-swrast/Makefile +++ b/src/gallium/targets/dri-swrast/Makefile @@ -8,7 +8,8 @@ DRIVER_DEFINES = -D__NOT_HAVE_DRM_H -DGALLIUM_SOFTPIPE PIPE_DRIVERS = \ $(TOP)/src/gallium/state_trackers/dri/sw/libdrisw.a \ $(TOP)/src/gallium/winsys/sw/dri/libswdri.a \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a SWRAST_COMMON_GALLIUM_SOURCES = \ $(TOP)/src/mesa/drivers/dri/common/utils.c \ diff --git a/src/gallium/targets/dri-swrast/swrast_drm_api.c b/src/gallium/targets/dri-swrast/swrast_drm_api.c index e8d6d8069cc..84142be80c8 100644 --- a/src/gallium/targets/dri-swrast/swrast_drm_api.c +++ b/src/gallium/targets/dri-swrast/swrast_drm_api.c @@ -31,6 +31,7 @@ #include "state_tracker/drm_api.h" #include "state_tracker/sw_winsys.h" #include "dri_sw_winsys.h" +#include "trace/tr_public.h" /* Copied from targets/libgl-xlib */ @@ -80,7 +81,7 @@ swrast_create_screen(struct sw_winsys *winsys) screen = softpipe_create_screen( winsys ); #endif - return screen; + return trace_screen_create(screen);; } struct pipe_screen * diff --git a/src/gallium/tests/python/retrace/interpreter.py b/src/gallium/tests/python/retrace/interpreter.py index 7118ff85ed8..e58a69322ea 100755 --- a/src/gallium/tests/python/retrace/interpreter.py +++ b/src/gallium/tests/python/retrace/interpreter.py @@ -432,6 +432,9 @@ class Context(Object): swizzle_b = templ.swizzle_g, swizzle_a = templ.swizzle_a) + def sampler_view_destroy(self, view): + pass + def set_fragment_sampler_views(self, num, views): for i in range(num): self.real.set_fragment_sampler_view(i, views[i]) @@ -456,6 +459,10 @@ class Context(Object): return elements[0:num_elements] def bind_vertex_elements_state(self, state): + if state is None: + self.real.set_vertex_elements(0) + return + elements = state num_elements = len(elements) self.velems = elements diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index 9824ada5b33..b8366498922 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -22,6 +22,8 @@ struct radeon_drm_buffer { boolean flinked; uint32_t flink; + uint32_t tileflags; + uint32_t pitch; struct radeon_drm_buffer *next, *prev; }; @@ -318,6 +320,9 @@ void radeon_drm_bufmgr_get_tiling(struct pb_buffer *_buf, radeon_bo_get_tiling(buf->bo, &flags, &pitch); + buf->tileflags = flags; + buf->pitch = pitch; + *microtiled = R300_BUFFER_LINEAR; *macrotiled = R300_BUFFER_LINEAR; if (flags & RADEON_BO_FLAGS_MICRO_TILE) @@ -333,7 +338,7 @@ void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, uint32_t pitch) { struct radeon_drm_buffer *buf = get_drm_buffer(_buf); - uint32_t flags = 0, old_flags, old_pitch; + uint32_t flags = 0; if (microtiled == R300_BUFFER_TILED) flags |= RADEON_BO_FLAGS_MICRO_TILE; /* XXX Remove this ifdef when libdrm version 2.4.19 becomes mandatory. */ @@ -344,17 +349,15 @@ void radeon_drm_bufmgr_set_tiling(struct pb_buffer *_buf, if (macrotiled == R300_BUFFER_TILED) flags |= RADEON_BO_FLAGS_MACRO_TILE; - radeon_bo_get_tiling(buf->bo, &old_flags, &old_pitch); - - if (flags != old_flags || pitch != old_pitch) { + if (flags != buf->tileflags || pitch != buf->pitch) { /* Tiling determines how DRM treats the buffer data. * We must flush CS when changing it if the buffer is referenced. */ if (radeon_bo_is_referenced_by_cs(buf->bo, buf->mgr->rws->cs)) { buf->mgr->rws->flush_cb(buf->mgr->rws->flush_data); } - } - radeon_bo_set_tiling(buf->bo, flags, pitch); + radeon_bo_set_tiling(buf->bo, flags, pitch); + } } boolean radeon_drm_bufmgr_add_buffer(struct pb_buffer *_buf, diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index 80923de9373..fb779e4033b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -181,7 +181,7 @@ static boolean radeon_check_cs(struct r300_winsys_screen *rws, int size) struct radeon_libdrm_winsys *ws = radeon_winsys_screen(rws); struct radeon_cs *cs = ws->cs; - return radeon_validate(rws) && cs->cdw + size <= cs->ndw; + return cs->cdw + size <= cs->ndw; } static void radeon_begin_cs(struct r300_winsys_screen *rws, |