From bdfbeb9633eb3f8cf1ad76723f6c3839e57a08a3 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 3 Apr 2013 02:49:56 +0200 Subject: gallivm: minor rho calculation optimization for 1 or 3 coords Using a different packing for the single coord case should save a shuffle. Plus some minor style fixes. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_quad.c | 20 ++++++++--------- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 31 +++++++++++---------------- 2 files changed, 22 insertions(+), 29 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index 1955add8883..f2a762aec2a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -81,7 +81,8 @@ lp_build_ddy(struct lp_build_context *bld, /* * Helper for building packed ddx/ddy vector for one coord (scalar per quad * values). The vector will look like this (8-wide): - * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____ + * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____ + * This only requires one shuffle instead of two for more straightforward packing. */ LLVMValueRef lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, @@ -91,19 +92,15 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, LLVMBuilderRef builder = gallivm->builder; LLVMValueRef vec1, vec2; - /* same packing as _twocoord, but can use aos swizzle helper */ + /* use aos swizzle helper */ - /* - * XXX could make swizzle1 a noop swizzle by using right top/bottom - * pair for ddy - */ - static const unsigned char swizzle1[] = { - LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT, - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE + static const unsigned char swizzle1[] = { /* no-op swizzle */ + LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE, + LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE }; static const unsigned char swizzle2[] = { - LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT, - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE + LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE, + LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE }; vec1 = lp_build_swizzle_aos(bld, a, swizzle1); @@ -120,6 +117,7 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, * Helper for building packed ddx/ddy vector for one coord (scalar per quad * values). The vector will look like this (8-wide): * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy + * This only needs 2 (v)shufps. */ LLVMValueRef lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index fc8bae70152..9a008971c04 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -226,7 +226,6 @@ lp_build_rho(struct lp_build_sample_context *bld, LLVMValueRef int_size, float_size; LLVMValueRef rho; LLVMValueRef first_level, first_level_vec; - LLVMValueRef abs_ddx_ddy[2]; unsigned length = coord_bld->type.length; unsigned num_quads = length / 4; unsigned i; @@ -279,32 +278,28 @@ lp_build_rho(struct lp_build_sample_context *bld, ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s); } else if (dims >= 2) { - ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, - s, t); + ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t); if (dims > 2) { ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r); } } - abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); + ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]); if (dims > 2) { - abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]); - } - else { - abs_ddx_ddy[1] = NULL; + ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]); } - if (dims == 1) { - static const unsigned char swizzle1[] = { + if (dims < 2) { + static const unsigned char swizzle1[] = { /* no-op swizzle */ 0, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; static const unsigned char swizzle2[] = { - 1, LP_BLD_SWIZZLE_DONTCARE, + 2, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; - rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1); - rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2); + rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle1); + rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2); } else if (dims == 2) { static const unsigned char swizzle1[] = { @@ -315,8 +310,8 @@ lp_build_rho(struct lp_build_sample_context *bld, 1, 3, LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE }; - rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1); - rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2); + rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle1); + rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2); } else { LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH]; @@ -329,12 +324,12 @@ lp_build_rho(struct lp_build_sample_context *bld, shuffles1[4*i + 3] = i32undef; shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1); shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3); - shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1); + shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2); shuffles2[4*i + 3] = i32undef; } - rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1], + rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1], LLVMConstVector(shuffles1, length), ""); - rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1], + rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1], LLVMConstVector(shuffles2, length), ""); } -- cgit v1.2.3