From 5b4c43d98556c5a4806757513bcb196a724518c5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 5 Sep 2010 13:17:43 +0100 Subject: llvmpipe: use llvm for attribute interpolant calculation Basically no change relative to hard-coded version, but this will be useful for other changes later. --- src/gallium/drivers/llvmpipe/SConscript | 5 +- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 26 +- src/gallium/drivers/llvmpipe/lp_context.c | 3 + src/gallium/drivers/llvmpipe/lp_context.h | 10 +- src/gallium/drivers/llvmpipe/lp_flush.h | 1 + src/gallium/drivers/llvmpipe/lp_limits.h | 10 + src/gallium/drivers/llvmpipe/lp_setup.c | 19 +- src/gallium/drivers/llvmpipe/lp_setup.h | 29 +- src/gallium/drivers/llvmpipe/lp_setup_coef.c | 279 -------- src/gallium/drivers/llvmpipe/lp_setup_coef.h | 64 -- .../drivers/llvmpipe/lp_setup_coef_intrin.c | 228 ------ src/gallium/drivers/llvmpipe/lp_setup_context.h | 12 +- src/gallium/drivers/llvmpipe/lp_setup_line.c | 20 +- src/gallium/drivers/llvmpipe/lp_setup_point.c | 13 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 42 +- src/gallium/drivers/llvmpipe/lp_state.h | 3 + src/gallium/drivers/llvmpipe/lp_state_derived.c | 69 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 80 ++- src/gallium/drivers/llvmpipe/lp_state_fs.h | 4 + src/gallium/drivers/llvmpipe/lp_state_setup.c | 768 +++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_state_setup.h | 81 +++ .../drivers/llvmpipe/lp_state_setup_fallback.c | 265 +++++++ 22 files changed, 1315 insertions(+), 716 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_setup_coef.c delete mode 100644 src/gallium/drivers/llvmpipe/lp_setup_coef.h delete mode 100644 src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c create mode 100644 src/gallium/drivers/llvmpipe/lp_state_setup.c create mode 100644 src/gallium/drivers/llvmpipe/lp_state_setup.h create mode 100644 src/gallium/drivers/llvmpipe/lp_state_setup_fallback.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 650435f0f19..6ddce659206 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -61,16 +61,17 @@ llvmpipe = env.ConvenienceLibrary( 'lp_scene_queue.c', 'lp_screen.c', 'lp_setup.c', + 'lp_setup_debug.c', 'lp_setup_line.c', 'lp_setup_point.c', 'lp_setup_tri.c', - 'lp_setup_coef.c', - 'lp_setup_coef_intrin.c', 'lp_setup_vbuf.c', 'lp_state_blend.c', 'lp_state_clip.c', 'lp_state_derived.c', 'lp_state_fs.c', + 'lp_state_setup.c', + 'lp_state_setup_fallback.c', 'lp_state_gs.c', 'lp_state_rasterizer.c', 'lp_state_sampler.c', diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 3054030f739..37479fca9dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -46,7 +46,31 @@ #include "tgsi/tgsi_exec.h" -#include "lp_setup.h" +/** + * Describes how to compute the interpolation coefficients (a0, dadx, dady) + * from the vertices passed into our triangle/line/point functions by the + * draw module. + * + * Vertices are treated as an array of float[4] values, indexed by + * src_index. + * + * LP_INTERP_COLOR is translated to either LP_INTERP_CONSTANT or + * LINEAR depending on flatshade state. + */ +enum lp_interp { + LP_INTERP_CONSTANT, + LP_INTERP_COLOR, + LP_INTERP_LINEAR, + LP_INTERP_PERSPECTIVE, + LP_INTERP_POSITION, + LP_INTERP_FACING +}; + +struct lp_shader_input { + ushort interp:4; /* enum lp_interp */ + ushort usage_mask:4; /* bitmask of TGSI_WRITEMASK_x flags */ + ushort src_index:8; /* where to find values in incoming vertices */ +}; struct lp_build_interp_soa_context diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 39f2c6085ef..763432ed712 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -82,6 +82,8 @@ static void llvmpipe_destroy( struct pipe_context *pipe ) } } + lp_delete_setup_variants(llvmpipe); + align_free( llvmpipe ); } @@ -108,6 +110,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) memset(llvmpipe, 0, sizeof *llvmpipe); make_empty_list(&llvmpipe->fs_variants_list); + make_empty_list(&llvmpipe->setup_variants_list); llvmpipe->pipe.winsys = screen->winsys; llvmpipe->pipe.screen = screen; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 34fa20e204a..db09c95b272 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -39,6 +39,7 @@ #include "lp_jit.h" #include "lp_setup.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" struct llvmpipe_vbuf_render; @@ -48,6 +49,7 @@ struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; struct lp_setup_context; +struct lp_setup_variant; struct lp_velems_state; struct llvmpipe_context { @@ -105,12 +107,9 @@ struct llvmpipe_context { /** Which vertex shader output slot contains point size */ int psize_slot; - /** Fragment shader input interpolation info */ - unsigned num_inputs; - struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; - /** The tiling engine */ struct lp_setup_context *setup; + struct lp_setup_variant setup_variant; /** The primitive drawing context */ struct draw_context *draw; @@ -120,6 +119,9 @@ struct llvmpipe_context { struct lp_fs_variant_list_item fs_variants_list; unsigned nr_fs_variants; + + struct lp_setup_variant_list_item setup_variants_list; + unsigned nr_setup_variants; }; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index bb538b2bd83..3626ce4a86c 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -32,6 +32,7 @@ struct pipe_context; struct pipe_fence_handle; +struct pipe_resource; void llvmpipe_flush(struct pipe_context *pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h index d1c431475d8..2538164ffaa 100644 --- a/src/gallium/drivers/llvmpipe/lp_limits.h +++ b/src/gallium/drivers/llvmpipe/lp_limits.h @@ -72,4 +72,14 @@ */ #define LP_MAX_SHADER_VARIANTS 1024 +/** + * Max number of setup variants that will be kept around. + * + * These are determined by the combination of the fragment shader + * input signature and a small amount of rasterization state (eg + * flatshading). It is likely that many active fragment shaders will + * share the same setup variant. + */ +#define LP_MAX_SETUP_VARIANTS 64 + #endif /* LP_LIMITS_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 6674d281d1e..3854fd70af7 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -500,14 +500,12 @@ lp_setup_set_point_state( struct lp_setup_context *setup, } void -lp_setup_set_fs_inputs( struct lp_setup_context *setup, - const struct lp_shader_input *input, - unsigned nr ) +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant) { - LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr); - - memcpy( setup->fs.input, input, nr * sizeof input[0] ); - setup->fs.nr_inputs = nr; + LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + + setup->setup.variant = variant; } void @@ -863,6 +861,13 @@ lp_setup_update_state( struct lp_setup_context *setup, setup->psize = lp->psize_slot; assert(lp->dirty == 0); + + assert(lp->setup_variant.key.size == + setup->setup.variant->key.size); + + assert(memcmp(&lp->setup_variant.key, + &setup->setup.variant->key, + setup->setup.variant->key.size) == 0); } if (update_scene) diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index b94061b7d49..19078ebbcb4 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -33,28 +33,6 @@ struct draw_context; struct vertex_info; -enum lp_interp { - LP_INTERP_CONSTANT, - LP_INTERP_LINEAR, - LP_INTERP_PERSPECTIVE, - LP_INTERP_POSITION, - LP_INTERP_FACING -}; - - -/** - * Describes how to compute the interpolation coefficients (a0, dadx, dady) - * from the vertices passed into our triangle/line/point functions by the - * draw module. - * - * Vertices are treated as an array of float[4] values, indexed by - * src_index. - */ -struct lp_shader_input { - enum lp_interp interp; /* how to interpolate values */ - unsigned src_index; /* where to find values in incoming vertices */ - unsigned usage_mask; /* bitmask of TGSI_WRITEMASK_x flags */ -}; struct pipe_resource; struct pipe_query; @@ -66,7 +44,7 @@ struct lp_fragment_shader_variant; struct lp_jit_context; struct llvmpipe_query; struct pipe_fence_handle; - +struct lp_setup_variant; struct lp_setup_context * lp_setup_create( struct pipe_context *pipe, @@ -110,9 +88,8 @@ lp_setup_set_point_state( struct lp_setup_context *setup, uint sprite); void -lp_setup_set_fs_inputs( struct lp_setup_context *setup, - const struct lp_shader_input *interp, - unsigned nr ); +lp_setup_set_setup_variant( struct lp_setup_context *setup, + const struct lp_setup_variant *variant ); void lp_setup_set_fs_variant( struct lp_setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c deleted file mode 100644 index 8dc2688ddb6..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c +++ /dev/null @@ -1,279 +0,0 @@ -/************************************************************************** - * - * Copyright 2010, VMware. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Binning code for triangles - */ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_perf.h" -#include "lp_setup_context.h" -#include "lp_setup_coef.h" -#include "lp_rast.h" -#include "lp_state_fs.h" - -#if !defined(PIPE_ARCH_SSE) - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - */ -static void constant_coef( struct lp_rast_shader_inputs *inputs, - unsigned slot, - const float value, - unsigned i ) -{ - inputs->a0[slot][i] = value; - inputs->dadx[slot][i] = 0.0f; - inputs->dady[slot][i] = 0.0f; -} - - - -static void linear_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - float a0 = info->v0[vert_attr][i]; - float a1 = info->v1[vert_attr][i]; - float a2 = info->v2[vert_attr][i]; - - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20); - float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01); - - inputs->dadx[slot][i] = dadx; - inputs->dady[slot][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - inputs->a0[slot][i] = a0 - (dadx * info->x0_center + - dady * info->y0_center); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr, - unsigned i) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a0 = info->v0[vert_attr][i] * info->v0[0][3]; - float a1 = info->v1[vert_attr][i] * info->v1[0][3]; - float a2 = info->v2[vert_attr][i] * info->v2[0][3]; - float da01 = a0 - a1; - float da20 = a2 - a0; - float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20; - float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01; - - inputs->dadx[slot][i] = dadx; - inputs->dady[slot][i] = dady; - inputs->a0[slot][i] = a0 - (dadx * info->x0_center + - dady * info->y0_center); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial - * Z and W are copied from position_coef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned usage_mask) -{ - /*X*/ - if (usage_mask & TGSI_WRITEMASK_X) { - inputs->a0[slot][0] = 0.0; - inputs->dadx[slot][0] = 1.0; - inputs->dady[slot][0] = 0.0; - } - - /*Y*/ - if (usage_mask & TGSI_WRITEMASK_Y) { - inputs->a0[slot][1] = 0.0; - inputs->dadx[slot][1] = 0.0; - inputs->dady[slot][1] = 1.0; - } - - /*Z*/ - if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(inputs, info, slot, 0, 2); - } - - /*W*/ - if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(inputs, info, slot, 0, 3); - } -} - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, - unsigned slot, - boolean frontface, - unsigned usage_mask) -{ - /* convert TRUE to 1.0 and FALSE to -1.0 */ - if (usage_mask & TGSI_WRITEMASK_X) - constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 ); - - if (usage_mask & TGSI_WRITEMASK_Y) - constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_Z) - constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */ - - if (usage_mask & TGSI_WRITEMASK_W) - constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */ -} - - -/** - * Compute the tri->coef[] array dadx, dady, a0 values. - */ -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing) -{ - unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; - unsigned slot; - unsigned i; - struct lp_tri_info info; - float dx01 = v0[0][0] - v1[0][0]; - float dy01 = v0[0][1] - v1[0][1]; - float dx20 = v2[0][0] - v0[0][0]; - float dy20 = v2[0][1] - v0[0][1]; - float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); - - info.v0 = v0; - info.v1 = v1; - info.v2 = v2; - info.frontfacing = frontfacing; - info.x0_center = v0[0][0] - setup->pixel_offset; - info.y0_center = v0[0][1] - setup->pixel_offset; - info.dx01_ooa = dx01 * oneoverarea; - info.dx20_ooa = dx20 * oneoverarea; - info.dy01_ooa = dy01 * oneoverarea; - info.dy20_ooa = dy20 * oneoverarea; - - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info.v0[vert_attr][i], i); - } - else { - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info.v2[vert_attr][i], i); - } - break; - - case LP_INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - linear_coef(inputs, &info, slot+1, vert_attr, i); - break; - - case LP_INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - if (usage_mask & (1 << i)) - perspective_coef(inputs, &info, slot+1, vert_attr, i); - fragcoord_usage_mask |= TGSI_WRITEMASK_W; - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0, so all need to ensure that the usage mask is covers all - * usages. - */ - fragcoord_usage_mask |= usage_mask; - break; - - case LP_INTERP_FACING: - setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask); - break; - - default: - assert(0); - } - } - - /* The internal position input is in slot zero: - */ - setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask); -} - -#else -extern void lp_setup_coef_dummy(void); -void lp_setup_coef_dummy(void) -{ -} - -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h deleted file mode 100644 index 87a3255ccc6..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.h +++ /dev/null @@ -1,64 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * The setup code is concerned with point/line/triangle setup and - * putting commands/data into the bins. - */ - - -#ifndef LP_SETUP_COEF_H -#define LP_SETUP_COEF_H - - -struct lp_tri_info { - - float x0_center; - float y0_center; - - /* turn these into an aligned float[4] */ - float dy01_ooa; - float dy20_ooa; - float dx01_ooa; - float dx20_ooa; - - const float (*v0)[4]; - const float (*v1)[4]; - const float (*v2)[4]; - - boolean frontfacing; /* remove eventually */ -}; - -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing); - -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c deleted file mode 100644 index 3742fd672b2..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c +++ /dev/null @@ -1,228 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 VMware. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Binning code for triangles - */ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_perf.h" -#include "lp_setup_context.h" -#include "lp_setup_coef.h" -#include "lp_rast.h" - -#if defined(PIPE_ARCH_SSE) -#include - - -static void constant_coef4( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - const float *attr) -{ - *(__m128 *)inputs->a0[slot] = *(__m128 *)attr; - *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); -} - - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot ) -{ - /* XXX: just pass frontface directly to the shader, don't bother - * treating it as an input. - */ - __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0, - 0, 0, 0); - - *(__m128 *)inputs->a0[slot] = a0; - *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); -} - - - -static void calc_coef4( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - __m128 a0, - __m128 a1, - __m128 a2) -{ - __m128 da01 = _mm_sub_ps(a0, a1); - __m128 da20 = _mm_sub_ps(a2, a0); - - __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa)); - __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa)); - __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa); - - __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa)); - __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa)); - __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa); - - __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center)); - __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center)); - __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0); - __m128 attr_0 = _mm_sub_ps(a0, attr_v0); - - *(__m128 *)inputs->a0[slot] = attr_0; - *(__m128 *)inputs->dadx[slot] = dadx; - *(__m128 *)inputs->dady[slot] = dady; -} - - -static void linear_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr) -{ - __m128 a0 = *(const __m128 *)info->v0[vert_attr]; - __m128 a1 = *(const __m128 *)info->v1[vert_attr]; - __m128 a2 = *(const __m128 *)info->v2[vert_attr]; - - calc_coef4(inputs, info, slot, a0, a1, a2); -} - - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, - unsigned slot, - unsigned vert_attr) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - __m128 a0 = *(const __m128 *)info->v0[vert_attr]; - __m128 a1 = *(const __m128 *)info->v1[vert_attr]; - __m128 a2 = *(const __m128 *)info->v2[vert_attr]; - - __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3])); - __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3])); - __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3])); - - calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow); -} - - - - - -/** - * Compute the inputs-> dadx, dady, a0 values. - */ -void lp_setup_tri_coef( struct lp_setup_context *setup, - struct lp_rast_shader_inputs *inputs, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean frontfacing) -{ - unsigned slot; - struct lp_tri_info info; - float dx01 = v0[0][0] - v1[0][0]; - float dy01 = v0[0][1] - v1[0][1]; - float dx20 = v2[0][0] - v0[0][0]; - float dy20 = v2[0][1] - v0[0][1]; - float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); - - info.v0 = v0; - info.v1 = v1; - info.v2 = v2; - info.frontfacing = frontfacing; - info.x0_center = v0[0][0] - setup->pixel_offset; - info.y0_center = v0[0][1] - setup->pixel_offset; - info.dx01_ooa = dx01 * oneoverarea; - info.dx20_ooa = dx20 * oneoverarea; - info.dy01_ooa = dy01 * oneoverarea; - info.dy20_ooa = dy20 * oneoverarea; - - - /* The internal position input is in slot zero: - */ - linear_coef(inputs, &info, 0, 0); - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]); - } - else { - constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]); - } - break; - - case LP_INTERP_LINEAR: - linear_coef(inputs, &info, slot+1, vert_attr); - break; - - case LP_INTERP_PERSPECTIVE: - perspective_coef(inputs, &info, slot+1, vert_attr); - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0. - */ - break; - - case LP_INTERP_FACING: - setup_facing_coef(inputs, &info, slot+1); - break; - - default: - assert(0); - } - } -} - -#else -extern void lp_setup_coef_dummy(void); -void lp_setup_coef_dummy(void) -{ -} -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 80b356476ab..ff3b69afa83 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -39,6 +39,7 @@ #include "lp_rast.h" #include "lp_tile_soa.h" /* for TILE_SIZE */ #include "lp_scene.h" +#include "lp_bld_interp.h" /* for struct lp_shader_input */ #include "draw/draw_vbuf.h" #include "util/u_rect.h" @@ -49,6 +50,8 @@ #define LP_SETUP_NEW_SCISSOR 0x08 +struct lp_setup_variant; + /** Max number of scenes */ #define MAX_SCENES 2 @@ -118,9 +121,6 @@ struct lp_setup_context } state; struct { - struct lp_shader_input input[PIPE_MAX_ATTRIBS]; - unsigned nr_inputs; - const struct lp_rast_state *stored; /**< what's in the scene */ struct lp_rast_state current; /**< currently set state */ struct pipe_resource *current_tex[PIPE_MAX_SAMPLERS]; @@ -139,6 +139,10 @@ struct lp_setup_context } blend_color; + struct { + const struct lp_setup_variant *variant; + } setup; + unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ void (*point)( struct lp_setup_context *, @@ -181,7 +185,7 @@ lp_setup_print_vertex(struct lp_setup_context *setup, struct lp_rast_triangle * lp_setup_alloc_triangle(struct lp_scene *scene, - unsigned nr_inputs, + unsigned num_inputs, unsigned nr_planes, unsigned *tri_size); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 9f090d1992e..928ffdc5cb5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -35,6 +35,7 @@ #include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #define NUM_CHANNELS 4 @@ -162,19 +163,20 @@ static void setup_line_coefficients( struct lp_setup_context *setup, struct lp_rast_triangle *tri, struct lp_line_info *info) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; /* setup interpolation for all the remaining attributes: */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; unsigned i; - switch (setup->fs.input[slot].interp) { + switch (key->inputs[slot].interp) { case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { + if (key->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); @@ -235,14 +237,15 @@ print_line(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4]) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; uint i; debug_printf("llvmpipe line\n"); - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + key->num_inputs; i++) { debug_printf(" v1[%d]: %f %f %f %f\n", i, v1[i][0], v1[i][1], v1[i][2], v1[i][3]); } - for (i = 0; i < 1 + setup->fs.nr_inputs; i++) { + for (i = 0; i < 1 + key->num_inputs; i++) { debug_printf(" v2[%d]: %f %f %f %f\n", i, v2[i][0], v2[i][1], v2[i][2], v2[i][3]); } @@ -269,6 +272,7 @@ try_setup_line( struct lp_setup_context *setup, const float (*v2)[4]) { struct lp_scene *scene = setup->scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *line; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); @@ -548,7 +552,7 @@ try_setup_line( struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); line = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &tri_bytes); if (!line) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 55389871518..c98966022ee 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -36,6 +36,7 @@ #include "lp_setup_context.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #include "tgsi/tgsi_scan.h" #define NUM_CHANNELS 4 @@ -152,17 +153,18 @@ setup_point_coefficients( struct lp_setup_context *setup, struct lp_rast_triangle *point, const struct point_info *info) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; unsigned slot; /* setup interpolation for all the remaining attributes: */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned usage_mask = setup->fs.input[slot].usage_mask; + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + unsigned usage_mask = key->inputs[slot].usage_mask; unsigned i; - switch (setup->fs.input[slot].interp) { + switch (key->inputs[slot].interp) { case LP_INTERP_POSITION: /* * The generated pixel interpolators will pick up the coeffs from @@ -215,6 +217,7 @@ try_setup_point( struct lp_setup_context *setup, const float (*v0)[4] ) { /* x/y positions in fixed point */ + const struct lp_setup_variant_key *key = &setup->setup.variant->key; const int sizeAttr = setup->psize; const float size = (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0] @@ -266,7 +269,7 @@ try_setup_point( struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); point = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &bytes); if (!point) diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 5090f82ab5f..43617a6b672 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -34,9 +34,9 @@ #include "util/u_rect.h" #include "lp_perf.h" #include "lp_setup_context.h" -#include "lp_setup_coef.h" #include "lp_rast.h" #include "lp_state_fs.h" +#include "lp_state_setup.h" #define NUM_CHANNELS 4 @@ -65,16 +65,16 @@ fixed_to_float(int a) * immediately after it. * The memory is allocated from the per-scene pool, not per-tile. * \param tri_size returns number of bytes allocated - * \param nr_inputs number of fragment shader inputs + * \param num_inputs number of fragment shader inputs * \return pointer to triangle space */ struct lp_rast_triangle * lp_setup_alloc_triangle(struct lp_scene *scene, - unsigned nr_inputs, + unsigned num_inputs, unsigned nr_planes, unsigned *tri_size) { - unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + unsigned input_array_sz = NUM_CHANNELS * (num_inputs + 1) * sizeof(float); struct lp_rast_triangle *tri; unsigned tri_bytes, bytes; char *inputs; @@ -101,25 +101,26 @@ lp_setup_print_vertex(struct lp_setup_context *setup, const char *name, const float (*v)[4]) { + const struct lp_setup_variant_key *key = &setup->setup.variant->key; int i, j; debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", name, v[0][0], v[0][1], v[0][2], v[0][3]); - for (i = 0; i < setup->fs.nr_inputs; i++) { - const float *in = v[setup->fs.input[i].src_index]; + for (i = 0; i < key->num_inputs; i++) { + const float *in = v[key->inputs[i].src_index]; debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", i, - name, setup->fs.input[i].src_index, - (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ", - (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ", - (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ", - (setup->fs.input[i].usage_mask & 0x8) ? "w" : " "); + name, key->inputs[i].src_index, + (key->inputs[i].usage_mask & 0x1) ? "x" : " ", + (key->inputs[i].usage_mask & 0x2) ? "y" : " ", + (key->inputs[i].usage_mask & 0x4) ? "z" : " ", + (key->inputs[i].usage_mask & 0x8) ? "w" : " "); for (j = 0; j < 4; j++) - if (setup->fs.input[i].usage_mask & (1<inputs[i].usage_mask & (1<scene; + const struct lp_setup_variant_key *key = &setup->setup.variant->key; struct lp_rast_triangle *tri; int x[3]; int y[3]; @@ -288,7 +290,7 @@ do_triangle_ccw(struct lp_setup_context *setup, u_rect_find_intersection(&setup->draw_region, &bbox); tri = lp_setup_alloc_triangle(scene, - setup->fs.nr_inputs, + key->num_inputs, nr_planes, &tri_bytes); if (!tri) @@ -328,13 +330,25 @@ do_triangle_ccw(struct lp_setup_context *setup, /* Setup parameter interpolants: */ - lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing ); + setup->setup.variant->jit_function( v0, + v1, + v2, + frontfacing, + tri->inputs.a0, + tri->inputs.dadx, + tri->inputs.dady, + &setup->setup.variant->key ); tri->inputs.facing = frontfacing ? 1.0F : -1.0F; tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; tri->inputs.state = setup->fs.stored; + if (0) + lp_dump_setup_coef(&setup->setup.variant->key, + (const float (*)[4])tri->inputs.a0, + (const float (*)[4])tri->inputs.dadx, + (const float (*)[4])tri->inputs.dady); for (i = 0; i < 3; i++) { struct lp_rast_plane *plane = &tri->plane[i]; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 86313e1c484..7893e9cdc0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -97,6 +97,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *, void llvmpipe_update_fs(struct llvmpipe_context *lp); +void +llvmpipe_update_setup(struct llvmpipe_context *lp); + void llvmpipe_update_derived(struct llvmpipe_context *llvmpipe); diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index edd723f65f2..de242aa93ca 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -50,12 +50,13 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) { const struct lp_fragment_shader *lpfs = llvmpipe->fs; struct vertex_info *vinfo = &llvmpipe->vertex_info; - struct lp_shader_input *inputs = llvmpipe->inputs; unsigned vs_index; uint i; /* - * Match FS inputs against VS outputs, emitting the necessary attributes. + * Match FS inputs against VS outputs, emitting the necessary + * attributes. Could cache these structs and look them up with a + * combination of fragment shader, vertex shader ids. */ vinfo->num_attribs = 0; @@ -74,64 +75,10 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) vs_index = draw_find_shader_output(llvmpipe->draw, lpfs->info.input_semantic_name[i], lpfs->info.input_semantic_index[i]); - if (vs_index < 0) { - /* - * This can happen with sprite coordinates - the vertex - * shader doesn't need to provide an output as we generate - * them internally. However, lets keep pretending that there - * is something there to not confuse other code. - */ - vs_index = 0; - } - - /* This can be pre-computed, except for flatshade: - */ - inputs[i].usage_mask = lpfs->info.input_usage_mask[i]; - - switch (lpfs->info.input_interpolate[i]) { - case TGSI_INTERPOLATE_CONSTANT: - inputs[i].interp = LP_INTERP_CONSTANT; - break; - case TGSI_INTERPOLATE_LINEAR: - inputs[i].interp = LP_INTERP_LINEAR; - break; - case TGSI_INTERPOLATE_PERSPECTIVE: - inputs[i].interp = LP_INTERP_PERSPECTIVE; - break; - default: - assert(0); - break; - } - - switch (lpfs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_FACE: - inputs[i].interp = LP_INTERP_FACING; - break; - case TGSI_SEMANTIC_POSITION: - /* Position was already emitted above - */ - inputs[i].interp = LP_INTERP_POSITION; - inputs[i].src_index = 0; - continue; - case TGSI_SEMANTIC_COLOR: - /* Colors are linearly inputs[i].interpolated in the fragment shader - * even when flatshading is active. This just tells the - * setup module to use coefficients with ddx==0 and - * ddy==0. - */ - if (llvmpipe->rasterizer->flatshade) - inputs[i].interp = LP_INTERP_CONSTANT; - break; - - default: - break; - } /* * Emit the requested fs attribute for all but position. */ - - inputs[i].src_index = vinfo->num_attribs; draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); } @@ -145,15 +92,9 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); } - llvmpipe->num_inputs = lpfs->info.num_inputs; - draw_compute_vertex_size(vinfo); - lp_setup_set_vertex_info(llvmpipe->setup, vinfo); - lp_setup_set_fs_inputs(llvmpipe->setup, - inputs, - lpfs->info.num_inputs); } @@ -190,6 +131,10 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe ) LP_NEW_QUERY)) llvmpipe_update_fs( llvmpipe ); + if (llvmpipe->dirty & (LP_NEW_FS | + LP_NEW_RASTERIZER)) + llvmpipe_update_setup( llvmpipe ); + if (llvmpipe->dirty & LP_NEW_BLEND_COLOR) lp_setup_set_blend_color(llvmpipe->setup, &llvmpipe->blend_color); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index e54dd9f0a3c..ad058e384ad 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -255,8 +255,7 @@ generate_quad_mask(LLVMBuilderRef builder, * \param partial_mask if 1, do mask_input testing */ static void -generate_fs(struct llvmpipe_context *lp, - struct lp_fragment_shader *shader, +generate_fs(struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key, LLVMBuilderRef builder, struct lp_type type, @@ -468,13 +467,13 @@ generate_blend(const struct pipe_blend_state *blend, * 2x2 pixels. */ static void -generate_fragment(struct llvmpipe_context *lp, +generate_fragment(struct llvmpipe_screen *screen, struct lp_fragment_shader *shader, struct lp_fragment_shader_variant *variant, unsigned partial_mask) { - struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); const struct lp_fragment_shader_variant_key *key = &variant->key; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; char func_name[256]; struct lp_type fs_type; struct lp_type blend_type; @@ -507,6 +506,18 @@ generate_fragment(struct llvmpipe_context *lp, unsigned chan; unsigned cbuf; + /* Adjust color input interpolation according to flatshade state: + */ + memcpy(inputs, shader->inputs, shader->info.num_inputs * sizeof inputs[0]); + for (i = 0; i < shader->info.num_inputs; i++) { + if (inputs[i].interp == LP_INTERP_COLOR) { + if (key->flatshade) + inputs[i].interp = LP_INTERP_CONSTANT; + else + inputs[i].interp = LP_INTERP_LINEAR; + } + } + /* TODO: actually pick these based on the fs and color buffer * characteristics. */ @@ -558,7 +569,6 @@ generate_fragment(struct llvmpipe_context *lp, variant->function[partial_mask] = function; - /* XXX: need to propagate noalias down into color param now we are * passing a pointer-to-pointer? */ @@ -606,8 +616,8 @@ generate_fragment(struct llvmpipe_context *lp, * already included in the shader key. */ lp_build_interp_soa_init(&interp, - lp->num_inputs, - lp->inputs, + shader->info.num_inputs, + inputs, builder, fs_type, a0_ptr, dadx_ptr, dady_ptr, x, y); @@ -626,7 +636,7 @@ generate_fragment(struct llvmpipe_context *lp, depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); - generate_fs(lp, shader, key, + generate_fs(shader, key, builder, fs_type, context_ptr, @@ -823,7 +833,7 @@ lp_debug_fs_variant(const struct lp_fragment_shader_variant *variant) } static struct lp_fragment_shader_variant * -generate_variant(struct llvmpipe_context *lp, +generate_variant(struct llvmpipe_screen *screen, struct lp_fragment_shader *shader, const struct lp_fragment_shader_variant_key *key) { @@ -869,11 +879,11 @@ generate_variant(struct llvmpipe_context *lp, lp_debug_fs_variant(variant); } - generate_fragment(lp, shader, variant, RAST_EDGE_TEST); + generate_fragment(screen, shader, variant, RAST_EDGE_TEST); if (variant->opaque) { /* Specialized shader, which doesn't need to read the color buffer. */ - generate_fragment(lp, shader, variant, RAST_WHOLE); + generate_fragment(screen, shader, variant, RAST_WHOLE); } else { variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST]; } @@ -888,6 +898,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, { struct lp_fragment_shader *shader; int nr_samplers; + int i; shader = CALLOC_STRUCT(lp_fragment_shader); if (!shader) @@ -907,6 +918,46 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, sampler[nr_samplers]); + for (i = 0; i < shader->info.num_inputs; i++) { + shader->inputs[i].usage_mask = shader->info.input_usage_mask[i]; + + switch (shader->info.input_interpolate[i]) { + case TGSI_INTERPOLATE_CONSTANT: + shader->inputs[i].interp = LP_INTERP_CONSTANT; + break; + case TGSI_INTERPOLATE_LINEAR: + shader->inputs[i].interp = LP_INTERP_LINEAR; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + shader->inputs[i].interp = LP_INTERP_PERSPECTIVE; + break; + default: + assert(0); + break; + } + + switch (shader->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_COLOR: + /* Colors may be either linearly or constant interpolated in + * the fragment shader, but that information isn't available + * here. Mark color inputs and fix them up later. + */ + shader->inputs[i].interp = LP_INTERP_COLOR; + break; + case TGSI_SEMANTIC_FACE: + shader->inputs[i].interp = LP_INTERP_FACING; + break; + case TGSI_SEMANTIC_POSITION: + /* Position was already emitted above + */ + shader->inputs[i].interp = LP_INTERP_POSITION; + shader->inputs[i].src_index = 0; + continue; + } + + shader->inputs[i].src_index = i+1; + } + if (LP_DEBUG & DEBUG_TGSI) { unsigned attrib; debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); @@ -1161,6 +1212,7 @@ make_variant_key(struct llvmpipe_context *lp, void llvmpipe_update_fs(struct llvmpipe_context *lp) { + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); struct lp_fragment_shader *shader = lp->fs; struct lp_fragment_shader_variant_key key; struct lp_fragment_shader_variant *variant = NULL; @@ -1201,7 +1253,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) } t0 = os_time_get(); - variant = generate_variant(lp, shader, &key); + variant = generate_variant(screen, shader, &key); t1 = os_time_get(); dt = t1 - t0; @@ -1221,6 +1273,10 @@ llvmpipe_update_fs(struct llvmpipe_context *lp) + + + + void llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe) { diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 2914e7d7efd..f73c7801c00 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -34,6 +34,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "lp_bld_interp.h" /* for struct lp_shader_input */ struct tgsi_token; @@ -105,6 +106,9 @@ struct lp_fragment_shader unsigned no; unsigned variants_created; unsigned variants_cached; + + /** Fragment shader input interpolation info */ + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; }; diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c new file mode 100644 index 00000000000..aa9147a1a15 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -0,0 +1,768 @@ +/************************************************************************** + * + * Copyright 2010 VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" +#include "os/os_time.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" +#include /* for LLVMVerifyFunction */ + +#include "lp_perf.h" +#include "lp_debug.h" +#include "lp_flush.h" +#include "lp_screen.h" +#include "lp_context.h" +#include "lp_setup_context.h" +#include "lp_rast.h" +#include "lp_state.h" +#include "lp_state_fs.h" +#include "lp_state_setup.h" + + + +/* currently organized to interpolate full float[4] attributes even + * when some elements are unused. Later, can pack vertex data more + * closely. + */ + + +struct lp_setup_args +{ + /* Function arguments: + */ + LLVMValueRef v0; + LLVMValueRef v1; + LLVMValueRef v2; + LLVMValueRef facing; /* boolean */ + LLVMValueRef a0; + LLVMValueRef dadx; + LLVMValueRef dady; + + /* Derived: + */ + LLVMValueRef x0_center; + LLVMValueRef y0_center; + LLVMValueRef dy20_ooa; + LLVMValueRef dy01_ooa; + LLVMValueRef dx20_ooa; + LLVMValueRef dx01_ooa; +}; + +static LLVMTypeRef type4f(void) +{ + return LLVMVectorType(LLVMFloatType(), 4); +} + + +/* Equivalent of _mm_setr_ps(a,b,c,d) + */ +static LLVMValueRef vec4f(LLVMBuilderRef bld, + LLVMValueRef a, LLVMValueRef b, LLVMValueRef c, LLVMValueRef d, + const char *name) +{ + LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0); + LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0); + LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + + LLVMValueRef res = LLVMGetUndef(type4f()); + + res = LLVMBuildInsertElement(bld, res, a, i0, ""); + res = LLVMBuildInsertElement(bld, res, b, i1, ""); + res = LLVMBuildInsertElement(bld, res, c, i2, ""); + res = LLVMBuildInsertElement(bld, res, d, i3, name); + + return res; +} + +/* Equivalent of _mm_set1_ps(a) + */ +static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld, + LLVMValueRef a, + const char *name) +{ + LLVMValueRef res = LLVMGetUndef(type4f()); + int i; + + for(i = 0; i < 4; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : ""); + } + + return res; +} + +static void +store_coef(LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef dadx, + LLVMValueRef dady) +{ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), slot, 0); + + LLVMBuildStore(builder, + a0, + LLVMBuildGEP(builder, args->a0, &idx, 1, "")); + + LLVMBuildStore(builder, + dadx, + LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); + + LLVMBuildStore(builder, + dady, + LLVMBuildGEP(builder, args->dady, &idx, 1, "")); +} + + + +static void +emit_constant_coef4( LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef vert, + unsigned attr) +{ + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), attr, 0); + LLVMValueRef attr_ptr = LLVMBuildGEP(builder, vert, &idx, 1, "attr_ptr"); + LLVMValueRef vert_attr = LLVMBuildLoad(builder, attr_ptr, "vert_attr"); + + store_coef(builder, args, slot, vert_attr, zerovec, zerovec); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void +emit_facing_coef( LLVMBuilderRef builder, + struct lp_setup_args *args, + unsigned slot ) +{ + LLVMValueRef a0_0 = args->facing; + LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef a0 = vec4f(builder, a0_0, zero, zero, zero, "facing"); + LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); + + store_coef(builder, args, slot, a0, zerovec, zerovec); +} + + +static LLVMValueRef +vert_attrib(LLVMBuilderRef b, + LLVMValueRef vert, + int attr, + int elem, + const char *name) +{ + LLVMValueRef idx[2]; + idx[0] = LLVMConstInt(LLVMInt32Type(), attr, 0); + idx[1] = LLVMConstInt(LLVMInt32Type(), elem, 0); + return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); +} + + + +static void +emit_coef4( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + LLVMValueRef a0, + LLVMValueRef a1, + LLVMValueRef a2) +{ + LLVMValueRef dy20_ooa = args->dy20_ooa; + LLVMValueRef dy01_ooa = args->dy01_ooa; + LLVMValueRef dx20_ooa = args->dx20_ooa; + LLVMValueRef dx01_ooa = args->dx01_ooa; + LLVMValueRef x0_center = args->x0_center; + LLVMValueRef y0_center = args->y0_center; + + /* XXX: using fsub, fmul on vector types -- does this work?? + */ + LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); + LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); + + /* Calculate dadx (vec4f) + */ + LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa"); + LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa"); + LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx"); + + /* Calculate dady (vec4f) + */ + LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa"); + LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa"); + LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady"); + + /* Calculate a0 - the attribute value at the origin + */ + LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); + LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); + LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); + LLVMValueRef attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); + + store_coef(b, args, slot, attr_0, dadx, dady); +} + + +static void +emit_linear_coef( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + + LLVMValueRef a0 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + LLVMValueRef a1 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + LLVMValueRef a2 = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + emit_coef4(b, args, slot, a0, a1, a2); +} + + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void +emit_perspective_coef( LLVMBuilderRef b, + struct lp_setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + LLVMValueRef idx = LLVMConstInt(LLVMInt32Type(), vert_attr, 0); + + LLVMValueRef v0a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); + LLVMValueRef v1a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); + LLVMValueRef v2a = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); + + LLVMValueRef v0_oow = vec4f_from_scalar(b, vert_attrib(b, args->v0, 0, 3, ""), "v0_oow"); + LLVMValueRef v1_oow = vec4f_from_scalar(b, vert_attrib(b, args->v1, 0, 3, ""), "v1_oow"); + LLVMValueRef v2_oow = vec4f_from_scalar(b, vert_attrib(b, args->v2, 0, 3, ""), "v2_oow"); + + LLVMValueRef v0_oow_v0a = LLVMBuildFMul(b, v0a, v0_oow, "v0_oow_v0a"); + LLVMValueRef v1_oow_v1a = LLVMBuildFMul(b, v1a, v1_oow, "v1_oow_v1a"); + LLVMValueRef v2_oow_v2a = LLVMBuildFMul(b, v2a, v2_oow, "v2_oow_v2a"); + + emit_coef4(b, args, slot, v0_oow_v0a, v1_oow_v1a, v2_oow_v2a); +} + + +static void +emit_position_coef( LLVMBuilderRef builder, + struct lp_setup_args *args, + int slot, int attrib ) +{ + emit_linear_coef(builder, args, slot, attrib); +} + + + + +/** + * Compute the inputs-> dadx, dady, a0 values. + */ +static void +emit_tri_coef( LLVMBuilderRef builder, + const struct lp_setup_variant_key *key, + struct lp_setup_args *args ) +{ + unsigned slot; + + /* The internal position input is in slot zero: + */ + emit_position_coef(builder, args, 0, 0); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + + switch (key->inputs[slot].interp) { + case LP_INTERP_CONSTANT: + if (key->flatshade_first) { + emit_constant_coef4(builder, args, slot+1, args->v0, vert_attr); + } + else { + emit_constant_coef4(builder, args, slot+1, args->v2, vert_attr); + } + break; + + case LP_INTERP_LINEAR: + emit_linear_coef(builder, args, slot+1, vert_attr); + break; + + case LP_INTERP_PERSPECTIVE: + emit_perspective_coef(builder, args, slot+1, vert_attr); + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0. + */ + break; + + case LP_INTERP_FACING: + emit_facing_coef(builder, args, slot+1); + break; + + default: + assert(0); + } + } +} + + +/* XXX: This is generic code, share with fs/vs codegen: + */ +static lp_jit_setup_triangle +finalize_function(struct llvmpipe_screen *screen, + LLVMBuilderRef builder, + LLVMValueRef function) +{ + void *f; + + /* Verify the LLVM IR. If invalid, dump and abort */ +#ifdef DEBUG + if (LLVMVerifyFunction(function, LLVMPrintMessageAction)) { + if (1) + lp_debug_dump_value(function); + abort(); + } +#endif + + /* Apply optimizations to LLVM IR */ + LLVMRunFunctionPassManager(screen->pass, function); + + if (gallivm_debug & GALLIVM_DEBUG_IR) + { + /* Print the LLVM IR to stderr */ + lp_debug_dump_value(function); + debug_printf("\n"); + } + + /* + * Translate the LLVM IR into machine code. + */ + f = LLVMGetPointerToGlobal(screen->engine, function); + + if (gallivm_debug & GALLIVM_DEBUG_ASM) + { + lp_disassemble(f); + } + + lp_func_delete_body(function); + + return f; +} + +/* XXX: Generic code: + */ +static void +lp_emit_emms(LLVMBuilderRef builder) +{ +#ifdef PIPE_ARCH_X86 + /* Avoid corrupting the FPU stack on 32bit OSes. */ + lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); +#endif +} + + +/* XXX: generic code: + */ +static void +set_noalias(LLVMBuilderRef builder, + LLVMValueRef function, + const LLVMTypeRef *arg_types, + int nr_args) +{ + int i; + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(function, i), + LLVMNoAliasAttribute); +} + +static void +init_args(LLVMBuilderRef b, + struct lp_setup_args *args, + const struct lp_setup_variant *variant) +{ + LLVMValueRef v0_x = vert_attrib(b, args->v0, 0, 0, "v0_x"); + LLVMValueRef v0_y = vert_attrib(b, args->v0, 0, 1, "v0_y"); + + LLVMValueRef v1_x = vert_attrib(b, args->v1, 0, 0, "v1_x"); + LLVMValueRef v1_y = vert_attrib(b, args->v1, 0, 1, "v1_y"); + + LLVMValueRef v2_x = vert_attrib(b, args->v2, 0, 0, "v2_x"); + LLVMValueRef v2_y = vert_attrib(b, args->v2, 0, 1, "v2_y"); + + LLVMValueRef pixel_center = LLVMConstReal(LLVMFloatType(), + variant->key.pixel_center_half ? 0.5 : 0); + + LLVMValueRef x0_center = LLVMBuildFSub(b, v0_x, pixel_center, "x0_center" ); + LLVMValueRef y0_center = LLVMBuildFSub(b, v0_y, pixel_center, "y0_center" ); + + LLVMValueRef dx01 = LLVMBuildFSub(b, v0_x, v1_x, "dx01"); + LLVMValueRef dy01 = LLVMBuildFSub(b, v0_y, v1_y, "dy01"); + LLVMValueRef dx20 = LLVMBuildFSub(b, v2_x, v0_x, "dx20"); + LLVMValueRef dy20 = LLVMBuildFSub(b, v2_y, v0_y, "dy20"); + + LLVMValueRef one = LLVMConstReal(LLVMFloatType(), 1.0); + LLVMValueRef e = LLVMBuildFMul(b, dx01, dy20, "e"); + LLVMValueRef f = LLVMBuildFMul(b, dx20, dy01, "f"); + LLVMValueRef ooa = LLVMBuildFDiv(b, one, LLVMBuildFSub(b, e, f, ""), "ooa"); + + LLVMValueRef dy20_ooa = LLVMBuildFMul(b, dy20, ooa, "dy20_ooa"); + LLVMValueRef dy01_ooa = LLVMBuildFMul(b, dy01, ooa, "dy01_ooa"); + LLVMValueRef dx20_ooa = LLVMBuildFMul(b, dx20, ooa, "dx20_ooa"); + LLVMValueRef dx01_ooa = LLVMBuildFMul(b, dx01, ooa, "dx01_ooa"); + + args->dy20_ooa = vec4f_from_scalar(b, dy20_ooa, "dy20_ooa_4f"); + args->dy01_ooa = vec4f_from_scalar(b, dy01_ooa, "dy01_ooa_4f"); + + args->dx20_ooa = vec4f_from_scalar(b, dx20_ooa, "dx20_ooa_4f"); + args->dx01_ooa = vec4f_from_scalar(b, dx01_ooa, "dx01_ooa_4f"); + + args->x0_center = vec4f_from_scalar(b, x0_center, "x0_center_4f"); + args->y0_center = vec4f_from_scalar(b, y0_center, "y0_center_4f"); +} + +/** + * Generate the runtime callable function for the coefficient calculation. + * + */ +static struct lp_setup_variant * +generate_setup_variant(struct llvmpipe_screen *screen, + struct lp_setup_variant_key *key) +{ + struct lp_setup_variant *variant = NULL; + struct lp_setup_args args; + char func_name[256]; + LLVMTypeRef vec4f_type; + LLVMTypeRef func_type; + LLVMTypeRef arg_types[8]; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + int64_t t0, t1; + + if (0) + goto fail; + + variant = CALLOC_STRUCT(lp_setup_variant); + if (variant == NULL) + goto fail; + + if (LP_DEBUG & DEBUG_COUNTERS) { + t0 = os_time_get(); + } + + memcpy(&variant->key, key, key->size); + variant->list_item_global.base = variant; + + util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u", + 0, + variant->no); + + /* Currently always deal with full 4-wide vertex attributes from + * the vertices. + */ + + vec4f_type = LLVMVectorType(LLVMFloatType(), 4); + + arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ + arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ + arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ + arg_types[3] = LLVMInt32Type(); /* facing */ + arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ + arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ + arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ + arg_types[7] = LLVMPointerType(LLVMVoidType(), 0); /* key, unused */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function = LLVMAddFunction(screen->module, func_name, func_type); + if (!variant->function) + goto fail; + + LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); + + args.v0 = LLVMGetParam(variant->function, 0); + args.v1 = LLVMGetParam(variant->function, 1); + args.v2 = LLVMGetParam(variant->function, 2); + args.facing = LLVMGetParam(variant->function, 3); + args.a0 = LLVMGetParam(variant->function, 4); + args.dadx = LLVMGetParam(variant->function, 5); + args.dady = LLVMGetParam(variant->function, 6); + + lp_build_name(args.v0, "in_v0"); + lp_build_name(args.v1, "in_v1"); + lp_build_name(args.v2, "in_v2"); + lp_build_name(args.facing, "in_facing"); + lp_build_name(args.a0, "out_a0"); + lp_build_name(args.dadx, "out_dadx"); + lp_build_name(args.dady, "out_dady"); + + /* + * Function body + */ + block = LLVMAppendBasicBlock(variant->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + set_noalias(builder, variant->function, arg_types, Elements(arg_types)); + init_args(builder, &args, variant); + emit_tri_coef(builder, &variant->key, &args); + + lp_emit_emms(builder); + LLVMBuildRetVoid(builder); + LLVMDisposeBuilder(builder); + + variant->jit_function = finalize_function(screen, builder, + variant->function); + if (!variant->jit_function) + goto fail; + + /* + * Update timing information: + */ + if (LP_DEBUG & DEBUG_COUNTERS) { + t1 = os_time_get(); + LP_COUNT_ADD(llvm_compile_time, t1 - t0); + LP_COUNT_ADD(nr_llvm_compiles, 1); + } + + return variant; + +fail: + if (variant) { + if (variant->function) { + if (variant->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + FREE(variant); + } + + return NULL; +} + + + +static void +lp_make_setup_variant_key(struct llvmpipe_context *lp, + struct lp_setup_variant_key *key) +{ + struct lp_fragment_shader *fs = lp->fs; + unsigned i; + + assert(sizeof key->inputs[0] == sizeof(ushort)); + + key->num_inputs = fs->info.num_inputs; + key->flatshade_first = lp->rasterizer->flatshade_first; + key->pixel_center_half = lp->rasterizer->gl_rasterization_rules; + key->size = Offset(struct lp_setup_variant_key, + inputs[key->num_inputs]); + key->pad = 0; + + memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); + for (i = 0; i < key->num_inputs; i++) { + if (key->inputs[i].interp == LP_INTERP_COLOR) { + if (lp->rasterizer->flatshade) + key->inputs[i].interp = LP_INTERP_CONSTANT; + else + key->inputs[i].interp = LP_INTERP_LINEAR; + } + } + +} + + +static void +remove_setup_variant(struct llvmpipe_context *lp, + struct lp_setup_variant *variant) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + + if (gallivm_debug & GALLIVM_DEBUG_IR) { + debug_printf("llvmpipe: del setup_variant #%u total %u\n", + variant->no, lp->nr_setup_variants); + } + + if (variant->function) { + if (variant->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, + variant->function); + LLVMDeleteFunction(variant->function); + } + + remove_from_list(&variant->list_item_global); + lp->nr_setup_variants--; + FREE(variant); +} + + + +/* When the number of setup variants exceeds a threshold, cull a + * fraction (currently a quarter) of them. + */ +static void +cull_setup_variants(struct llvmpipe_context *lp) +{ + struct pipe_context *pipe = &lp->pipe; + int i; + + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + * Flushing alone might not be sufficient we need to wait on it too. + */ + llvmpipe_finish(pipe, __FUNCTION__); + + for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { + struct lp_setup_variant_list_item *item = last_elem(&lp->setup_variants_list); + remove_setup_variant(lp, item->base); + } +} + + +/** + * Update fragment/vertex shader linkage state. This is called just + * prior to drawing something when some fragment-related state has + * changed. + */ +void +llvmpipe_update_setup(struct llvmpipe_context *lp) +{ + struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen); + + struct lp_setup_variant_key *key = &lp->setup_variant.key; + struct lp_setup_variant *variant = NULL; + struct lp_setup_variant_list_item *li; + + lp_make_setup_variant_key(lp, key); + + foreach(li, &lp->setup_variants_list) { + if(li->base->key.size == key->size && + memcmp(&li->base->key, key, key->size) == 0) { + variant = li->base; + break; + } + } + + if (variant) { + move_to_head(&lp->setup_variants_list, &variant->list_item_global); + } + else { + if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { + cull_setup_variants(lp); + } + + variant = generate_setup_variant(screen, key); + if (variant) { + insert_at_head(&lp->setup_variants_list, &variant->list_item_global); + lp->nr_setup_variants++; + } + else { + /* Keep the old path around for debugging, and also perhaps + * in case malloc fails during compilation. + */ + variant = &lp->setup_variant; + variant->jit_function = lp_setup_tri_fallback; + } + } + + lp_setup_set_setup_variant(lp->setup, + variant); +} + +void +lp_delete_setup_variants(struct llvmpipe_context *lp) +{ + struct lp_setup_variant_list_item *li; + li = first_elem(&lp->setup_variants_list); + while(!at_end(&lp->setup_variants_list, li)) { + struct lp_setup_variant_list_item *next = next_elem(li); + remove_setup_variant(lp, li->base); + li = next; + } +} + +void +lp_dump_setup_coef( const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]) +{ + int i, slot; + + for (i = 0; i < NUM_CHANNELS; i++) { + float a0 = sa0 [0][i]; + float dadx = sdadx[0][i]; + float dady = sdady[0][i]; + + debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", + "xyzw"[i], + a0, dadx, dady); + } + + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned usage_mask = key->inputs[slot].usage_mask; + for (i = 0; i < NUM_CHANNELS; i++) { + if (usage_mask & (1 << i)) { + float a0 = sa0 [1 + slot][i]; + float dadx = sdadx[1 + slot][i]; + float dady = sdady[1 + slot][i]; + + debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", + slot, + "xyzw"[i], + a0, dadx, dady); + } + } + } +} diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h new file mode 100644 index 00000000000..2b080fbc321 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h @@ -0,0 +1,81 @@ +#ifndef LP_STATE_SETUP_H +#define LP_STATE_SETUP_H + +#include "lp_bld_interp.h" + + +struct llvmpipe_context; +struct lp_setup_variant; + +struct lp_setup_variant_list_item +{ + struct lp_setup_variant *base; + struct lp_setup_variant_list_item *next, *prev; +}; + + +struct lp_setup_variant_key { + unsigned num_inputs:8; + unsigned flatshade_first:1; + unsigned pixel_center_half:1; + unsigned pad:7; + unsigned size:16; + struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS]; +}; + + +typedef void (*lp_jit_setup_triangle)( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4], + const struct lp_setup_variant_key *key ); + + + + +/* At this stage, for a given variant key, we create a + * draw_vertex_info struct telling the draw module how to format the + * vertices, and an llvm-generated function which calculates the + * attribute interpolants (a0, dadx, dady) from three of those + * vertices. + */ +struct lp_setup_variant { + struct lp_setup_variant_key key; + + struct lp_setup_variant_list_item list_item_global; + + /* XXX: this is a pointer to the LLVM IR. Once jit_function is + * generated, we never need to use the IR again - need to find a + * way to release this data without destroying the generated + * assembly. + */ + LLVMValueRef function; + + /* The actual generated setup function: + */ + lp_jit_setup_triangle jit_function; + + unsigned no; +}; + +void lp_setup_tri_fallback( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4], + const struct lp_setup_variant_key *key ); + +void lp_delete_setup_variants(struct llvmpipe_context *lp); + +void +lp_dump_setup_coef( const struct lp_setup_variant_key *key, + const float (*sa0)[4], + const float (*sdadx)[4], + const float (*sdady)[4]); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup_fallback.c b/src/gallium/drivers/llvmpipe/lp_state_setup_fallback.c new file mode 100644 index 00000000000..1922efcc88d --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_state_setup_fallback.c @@ -0,0 +1,265 @@ +/************************************************************************** + * + * Copyright 2010, VMware. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Fallback (non-llvm) path for triangle setup. Will remove once llvm + * is up and running. + * + * TODO: line/point setup. + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "lp_state_setup.h" + + + +#if defined(PIPE_ARCH_SSE) +#include + +struct setup_args { + float (*a0)[4]; /* aligned */ + float (*dadx)[4]; /* aligned */ + float (*dady)[4]; /* aligned */ + + float x0_center; + float y0_center; + + /* turn these into an aligned float[4] */ + float dy01_ooa; + float dy20_ooa; + float dx01_ooa; + float dx20_ooa; + + const float (*v0)[4]; /* aligned */ + const float (*v1)[4]; /* aligned */ + const float (*v2)[4]; /* aligned */ + + boolean frontfacing; /* remove eventually */ +}; + + +static void constant_coef4( struct setup_args *args, + unsigned slot, + const float *attr) +{ + *(__m128 *)args->a0[slot] = *(__m128 *)attr; + *(__m128 *)args->dadx[slot] = _mm_set1_ps(0.0); + *(__m128 *)args->dady[slot] = _mm_set1_ps(0.0); +} + + + +/** + * Setup the fragment input attribute with the front-facing value. + * \param frontface is the triangle front facing? + */ +static void setup_facing_coef( struct setup_args *args, + unsigned slot ) +{ + /* XXX: just pass frontface directly to the shader, don't bother + * treating it as an input. + */ + __m128 a0 = _mm_setr_ps(args->frontfacing ? 1.0 : -1.0, + 0, 0, 0); + + *(__m128 *)args->a0[slot] = a0; + *(__m128 *)args->dadx[slot] = _mm_set1_ps(0.0); + *(__m128 *)args->dady[slot] = _mm_set1_ps(0.0); +} + + + +static void calc_coef4( struct setup_args *args, + unsigned slot, + __m128 a0, + __m128 a1, + __m128 a2) +{ + __m128 da01 = _mm_sub_ps(a0, a1); + __m128 da20 = _mm_sub_ps(a2, a0); + + __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(args->dy20_ooa)); + __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(args->dy01_ooa)); + __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa); + + __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(args->dx20_ooa)); + __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(args->dx01_ooa)); + __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa); + + __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(args->x0_center)); + __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(args->y0_center)); + __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0); + __m128 attr_0 = _mm_sub_ps(a0, attr_v0); + + *(__m128 *)args->a0[slot] = attr_0; + *(__m128 *)args->dadx[slot] = dadx; + *(__m128 *)args->dady[slot] = dady; +} + + +static void linear_coef( struct setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + __m128 a0 = *(const __m128 *)args->v0[vert_attr]; + __m128 a1 = *(const __m128 *)args->v1[vert_attr]; + __m128 a2 = *(const __m128 *)args->v2[vert_attr]; + + calc_coef4(args, slot, a0, a1, a2); +} + + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void perspective_coef( struct setup_args *args, + unsigned slot, + unsigned vert_attr) +{ + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + __m128 a0 = *(const __m128 *)args->v0[vert_attr]; + __m128 a1 = *(const __m128 *)args->v1[vert_attr]; + __m128 a2 = *(const __m128 *)args->v2[vert_attr]; + + __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(args->v0[0][3])); + __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(args->v1[0][3])); + __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(args->v2[0][3])); + + calc_coef4(args, slot, a0_oow, a1_oow, a2_oow); +} + + + + + +/** + * Compute the args-> dadx, dady, a0 values. + * + * Note that this was effectively a little interpreted program, where + * the opcodes were LP_INTERP_*. This is the program which is now + * being code-generated in lp_state_setup.c. + */ +void lp_setup_tri_fallback( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4], + const struct lp_setup_variant_key *key ) +{ + struct setup_args args; + float pixel_offset = key->pixel_center_half ? 0.5 : 0.0; + float dx01 = v0[0][0] - v1[0][0]; + float dy01 = v0[0][1] - v1[0][1]; + float dx20 = v2[0][0] - v0[0][0]; + float dy20 = v2[0][1] - v0[0][1]; + float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); + unsigned slot; + + args.v0 = v0; + args.v1 = v1; + args.v2 = v2; + args.frontfacing = front_facing; + args.a0 = a0; + args.dadx = dadx; + args.dady = dady; + + args.x0_center = v0[0][0] - pixel_offset; + args.y0_center = v0[0][1] - pixel_offset; + args.dx01_ooa = dx01 * oneoverarea; + args.dx20_ooa = dx20 * oneoverarea; + args.dy01_ooa = dy01 * oneoverarea; + args.dy20_ooa = dy20 * oneoverarea; + + /* The internal position input is in slot zero: + */ + linear_coef(&args, 0, 0); + + /* setup interpolation for all the remaining attributes: + */ + for (slot = 0; slot < key->num_inputs; slot++) { + unsigned vert_attr = key->inputs[slot].src_index; + + switch (key->inputs[slot].interp) { + case LP_INTERP_CONSTANT: + if (key->flatshade_first) { + constant_coef4(&args, slot+1, args.v0[vert_attr]); + } + else { + constant_coef4(&args, slot+1, args.v2[vert_attr]); + } + break; + + case LP_INTERP_LINEAR: + linear_coef(&args, slot+1, vert_attr); + break; + + case LP_INTERP_PERSPECTIVE: + perspective_coef(&args, slot+1, vert_attr); + break; + + case LP_INTERP_POSITION: + /* + * The generated pixel interpolators will pick up the coeffs from + * slot 0. + */ + break; + + case LP_INTERP_FACING: + setup_facing_coef(&args, slot+1); + break; + + default: + assert(0); + } + } +} + +#else + +void lp_setup_tri_fallback( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front_facing, + float (*a0)[4], + float (*dadx)[4], + float (*dady)[4], + const struct lp_setup_variant_key *key ) +{ + /* this path for debugging only, don't need a non-sse version. */ +} + +#endif -- cgit v1.2.3 From 75d22e71a812bbe78414d3f9519f4c7a7157c748 Mon Sep 17 00:00:00 2001 From: Hui Qi Tay Date: Sun, 26 Sep 2010 16:01:59 +0800 Subject: llvmpipe: minor changes in llvm coefficient calcs --- src/gallium/drivers/llvmpipe/lp_setup_debug.c | 1 + src/gallium/drivers/llvmpipe/lp_state_setup.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 src/gallium/drivers/llvmpipe/lp_setup_debug.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_debug.c b/src/gallium/drivers/llvmpipe/lp_setup_debug.c new file mode 100644 index 00000000000..a71a4719a86 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_setup_debug.c @@ -0,0 +1 @@ +/* Some debugging stuff */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index aa9147a1a15..3261c53f516 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -173,8 +173,9 @@ emit_facing_coef( LLVMBuilderRef builder, unsigned slot ) { LLVMValueRef a0_0 = args->facing; + LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, LLVMFloatType(), ""); LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); - LLVMValueRef a0 = vec4f(builder, a0_0, zero, zero, zero, "facing"); + LLVMValueRef a0 = vec4f(builder, a0_0f, zero, zero, zero, "facing"); LLVMValueRef zerovec = vec4f_from_scalar(builder, zero, "zero"); store_coef(builder, args, slot, a0, zerovec, zerovec); @@ -520,7 +521,7 @@ generate_setup_variant(struct llvmpipe_screen *screen, arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ - arg_types[7] = LLVMPointerType(LLVMVoidType(), 0); /* key, unused */ + arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key, unused */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); -- cgit v1.2.3 From 6316d540564d116460bfd1382e3eee98480e28ff Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 7 Oct 2010 16:26:17 -0400 Subject: llvmpipe: fix rasterization of vertical lines on pixel boundaries --- src/gallium/drivers/llvmpipe/lp_setup_line.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 4d7d6235b07..693ac281752 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -475,7 +475,7 @@ try_setup_line( struct lp_setup_context *setup, else { /* do intersection test */ float xintersect = fracf(v2[0][0]) + y2diff * dxdy; - draw_end = (xintersect < 1.0 && xintersect > 0.0); + draw_end = (xintersect < 1.0 && xintersect >= 0.0); } /* Are we already drawing start/end? @@ -513,7 +513,7 @@ try_setup_line( struct lp_setup_context *setup, x_offset_end = y_offset_end * dxdy; } } - + /* x/y positions in fixed point */ x[0] = subpixel_snap(v1[0][0] + x_offset - setup->pixel_offset) - fixed_width/2; x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2; -- cgit v1.2.3 From d0bfb3c5144a9434efd4d53ced149d42016b5bdc Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 6 Oct 2010 20:42:30 +0100 Subject: llvmpipe: Prevent z > 1.0 The current interpolation schemes causes precision loss. Changing the operation order helps, but does not completely avoid the problem. The only short term solution is to clamp z to 1.0. This is unfortunate, but probably unavoidable until interpolation is improved. --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index 2a374f8c390..ee92ce3cdc1 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -206,7 +206,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld, dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); /* - * a = a0 + x * dadx + y * dady + * a = a0 + (x * dadx + y * dady) */ if (attrib == 0 && chan == 0) { @@ -219,11 +219,11 @@ coeffs_init(struct lp_build_interp_soa_context *bld, a = a0; if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { - LLVMValueRef tmp; - tmp = LLVMBuildFMul(builder, bld->x, dadx, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); - tmp = LLVMBuildFMul(builder, bld->y, dady, ""); - a = LLVMBuildFAdd(builder, a, tmp, ""); + LLVMValueRef ax, ay, axy; + ax = LLVMBuildFMul(builder, bld->x, dadx, ""); + ay = LLVMBuildFMul(builder, bld->y, dady, ""); + axy = LLVMBuildFAdd(builder, ax, ay, ""); + a = LLVMBuildFAdd(builder, a, axy, ""); } } @@ -350,6 +350,14 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) } #endif + if (attrib == 0 && chan == 2) { + /* FIXME: Depth values can exceed 1.0, due to the fact that + * setup interpolation coefficients refer to (0,0) which causes + * precision loss. So we must clamp to 1.0 here to avoid artifacts + */ + a = lp_build_min(coeff_bld, a, coeff_bld->one); + } + attrib_name(a, attrib, chan, ""); } bld->attribs[attrib][chan] = a; -- cgit v1.2.3 From 8009886b0092df2783472deaac1bcaad4a802c19 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Oct 2010 22:25:48 +0100 Subject: llvmpipe: defer attribute interpolation until after mask and ztest Don't calculate 1/w for quads which aren't visible... --- src/gallium/drivers/llvmpipe/lp_bld_interp.c | 25 ++++++++++++++++++------- src/gallium/drivers/llvmpipe/lp_bld_interp.h | 6 +++++- src/gallium/drivers/llvmpipe/lp_state_fs.c | 17 +++++++++++------ 3 files changed, 34 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c index ee92ce3cdc1..c9da8900d0c 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c @@ -272,7 +272,10 @@ coeffs_init(struct lp_build_interp_soa_context *bld, * This is called when we move from one quad to the next. */ static void -attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) +attribs_update(struct lp_build_interp_soa_context *bld, + int quad_index, + int start, + int end) { struct lp_build_context *coeff_bld = &bld->coeff_bld; LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index); @@ -282,7 +285,7 @@ attribs_update(struct lp_build_interp_soa_context *bld, int quad_index) assert(quad_index < 4); - for(attrib = 0; attrib < bld->num_attribs; ++attrib) { + for(attrib = start; attrib < end; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -442,8 +445,6 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, pos_init(bld, x0, y0); coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); - - attribs_update(bld, 0); } @@ -451,10 +452,20 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, * Advance the position and inputs to the given quad within the block. */ void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, - int quad_index) +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index) +{ + assert(quad_index < 4); + + attribs_update(bld, quad_index, 1, bld->num_attribs); +} + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, + int quad_index) { assert(quad_index < 4); - attribs_update(bld, quad_index); + attribs_update(bld, quad_index, 0, 1); } + diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h index 3054030f739..6588f7f2755 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h @@ -89,7 +89,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, LLVMValueRef y); void -lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld, +lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, + int quad_index); + +void +lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, int quad_index); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 3ce8be5a0a9..0530c613230 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -262,7 +262,7 @@ generate_fs(struct llvmpipe_context *lp, struct lp_type type, LLVMValueRef context_ptr, unsigned i, - const struct lp_build_interp_soa_context *interp, + struct lp_build_interp_soa_context *interp, struct lp_build_sampler_soa *sampler, LLVMValueRef *pmask, LLVMValueRef (*color)[4], @@ -276,7 +276,7 @@ generate_fs(struct llvmpipe_context *lp, LLVMTypeRef vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; - LLVMValueRef z = interp->pos[2]; + LLVMValueRef z; LLVMValueRef stencil_refs[2]; struct lp_build_flow_context *flow; struct lp_build_mask_context mask; @@ -307,7 +307,6 @@ generate_fs(struct llvmpipe_context *lp, lp_build_flow_scope_declare(flow, &color[cbuf][chan]); } } - lp_build_flow_scope_declare(flow, &z); /* do triangle edge testing */ if (partial_mask) { @@ -321,6 +320,13 @@ generate_fs(struct llvmpipe_context *lp, /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); + lp_build_interp_soa_update_pos(interp, i); + + /* Try to avoid the 1/w for quads where mask is zero. TODO: avoid + * this for depth-fail quads also. + */ + z = interp->pos[2]; + early_depth_stencil_test = (key->depth.enabled || key->stencil[0].enabled) && !key->alpha.enabled && @@ -332,6 +338,8 @@ generate_fs(struct llvmpipe_context *lp, type, &mask, stencil_refs, z, depth_ptr, facing, counter); + lp_build_interp_soa_update_inputs(interp, i); + lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, outputs, sampler, &shader->info); @@ -621,9 +629,6 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; - if(i != 0) - lp_build_interp_soa_update(&interp, i); - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); generate_fs(lp, shader, key, -- cgit v1.2.3 From 40d7be52619fbff2479dcdf56929e3e0c5b12e72 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 7 Oct 2010 18:59:54 +0100 Subject: llvmpipe: use alloca for fs color outputs Don't try to emit our own phi's, let llvm mem2reg do it for us. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 0530c613230..f75ae284cb3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -303,8 +303,7 @@ generate_fs(struct llvmpipe_context *lp, /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { - color[cbuf][chan] = LLVMGetUndef(vec_type); - lp_build_flow_scope_declare(flow, &color[cbuf][chan]); + color[cbuf][chan] = lp_build_alloca(builder, vec_type, "color"); } } @@ -369,7 +368,7 @@ generate_fs(struct llvmpipe_context *lp, &mask, alpha, alpha_ref_value); } - color[cbuf][chan] = out; + LLVMBuildStore(builder, out, color[cbuf][chan]); break; } @@ -665,9 +664,18 @@ generate_fragment(struct llvmpipe_context *lp, * Convert the fs's output color and mask to fit to the blending type. */ for(chan = 0; chan < NUM_CHANNELS; ++chan) { + LLVMValueRef fs_color_vals[LP_MAX_VECTOR_LENGTH]; + + for (i = 0; i < num_fs; i++) { + fs_color_vals[i] = + LLVMBuildLoad(builder, fs_out_color[cbuf][chan][i], "fs_color_vals"); + } + lp_build_conv(builder, fs_type, blend_type, - fs_out_color[cbuf][chan], num_fs, + fs_color_vals, + num_fs, &blend_in_color[chan], 1); + lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]); } -- cgit v1.2.3 From 954965366fee3fa2eec8a11b6663d4cf218e1d5d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 7 Oct 2010 19:01:12 +0100 Subject: llvmpipe: dump fragment shader ir and asm when LP_DEBUG=fs Better than GALLIVM_DEBUG if you're only interested in fragment shaders. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index f75ae284cb3..07b4f74dbc9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -730,7 +730,7 @@ generate_fragment(struct llvmpipe_context *lp, /* Apply optimizations to LLVM IR */ LLVMRunFunctionPassManager(screen->pass, function); - if (gallivm_debug & GALLIVM_DEBUG_IR) { + if ((gallivm_debug & GALLIVM_DEBUG_IR) || (LP_DEBUG & DEBUG_FS)) { /* Print the LLVM IR to stderr */ lp_debug_dump_value(function); debug_printf("\n"); @@ -744,7 +744,7 @@ generate_fragment(struct llvmpipe_context *lp, variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f); - if (gallivm_debug & GALLIVM_DEBUG_ASM) { + if ((gallivm_debug & GALLIVM_DEBUG_ASM) || (LP_DEBUG & DEBUG_FS)) { lp_disassemble(f); } lp_func_delete_body(function); -- cgit v1.2.3 From d2cf757f44f4ee5554243f3279483a25886d9927 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 6 Oct 2010 18:21:56 +0100 Subject: gallivm: specialized x8z24 depthtest path Avoid unnecessary masking of non-existant stencil component. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 95 ++++++++++++++++++++++++++++- src/gallium/drivers/llvmpipe/lp_state_fs.c | 30 +-------- 2 files changed, 94 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 7eabe0508de..09b82fbe9ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -71,6 +71,7 @@ #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_conv.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_intr.h" @@ -446,7 +447,7 @@ lp_build_occlusion_count(LLVMBuilderRef builder, * \param format_desc description of the depth/stencil surface * \param mask the alive/dead pixel mask for the quad (vector) * \param stencil_refs the front/back stencil ref values (scalar) - * \param z_src the incoming depth/stencil values (a 2x2 quad) + * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer * \param facing contains float value indicating front/back facing polygon */ @@ -454,7 +455,7 @@ void lp_build_depth_stencil_test(LLVMBuilderRef builder, const struct pipe_depth_state *depth, const struct pipe_stencil_state stencil[2], - struct lp_type type, + struct lp_type z_src_type, const struct util_format_description *format_desc, struct lp_build_mask_context *mask, LLVMValueRef stencil_refs[2], @@ -463,6 +464,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef face, LLVMValueRef counter) { + struct lp_type type; struct lp_build_context bld; struct lp_build_context sbld; struct lp_type s_type; @@ -473,6 +475,95 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef orig_mask = mask->value; LLVMValueRef front_facing = NULL; + /* Prototype a simpler path: + */ + if (z_src_type.floating && + format_desc->format == PIPE_FORMAT_X8Z24_UNORM && + depth->enabled) + { + LLVMValueRef zscaled; + LLVMValueRef const_ffffff_float; + LLVMValueRef const_8_int; + LLVMTypeRef int32_vec_type; + + /* We know the values in z_dst are all >= 0, so allow + * lp_build_compare to use signed compare intrinsics: + */ + type.floating = 0; + type.fixed = 0; + type.sign = 1; + type.norm = 1; + type.width = 32; + type.length = z_src_type.length; + + int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length); + + const_8_int = lp_build_const_int_vec(type, 8); + const_ffffff_float = lp_build_const_vec(z_src_type, (float)0xffffff); + + zscaled = LLVMBuildFMul(builder, z_src, const_ffffff_float, "zscaled"); + z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src"); + + /* Load current z/stencil value from z/stencil buffer */ + z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); + z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst"); + + /* compare src Z to dst Z, returning 'pass' mask */ + z_pass = lp_build_compare(builder, + type, + depth->func, z_src, z_dst); + + lp_build_mask_update(mask, z_pass); + + /* No need to worry about old stencil contents, just blend the + * old and new values and shift into the correct position for + * storage. + */ + if (depth->writemask) { + type.sign = 0; + lp_build_context_init(&bld, builder, type); + + z_dst = lp_build_select(&bld, mask->value, z_src, z_dst); + z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); + LLVMBuildStore(builder, z_dst, zs_dst_ptr); + } + + if (counter) + lp_build_occlusion_count(builder, type, mask->value, counter); + + return; + } + + /* + * Depths are expected to be between 0 and 1, even if they are stored in + * floats. Setting these bits here will ensure that the lp_build_conv() call + * below won't try to unnecessarily clamp the incoming values. + */ + if(z_src_type.floating) { + z_src_type.sign = FALSE; + z_src_type.norm = TRUE; + } + else { + assert(!z_src_type.sign); + assert(z_src_type.norm); + } + + /* Pick the depth type. */ + type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + + /* FIXME: Cope with a depth test type with a different bit width. */ + assert(type.width == z_src_type.width); + assert(type.length == z_src_type.length); + + /* Convert fragment Z from float to integer */ + lp_build_conv(builder, z_src_type, type, &z_src, 1, &z_src, 1); + + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(lp_build_vec_type(type), 0), ""); + + + /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 07b4f74dbc9..b7a51cd6679 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -119,7 +119,6 @@ generate_depth_stencil(LLVMBuilderRef builder, LLVMValueRef counter) { const struct util_format_description *format_desc; - struct lp_type dst_type; if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) return; @@ -127,37 +126,10 @@ generate_depth_stencil(LLVMBuilderRef builder, format_desc = util_format_description(key->zsbuf_format); assert(format_desc); - /* - * Depths are expected to be between 0 and 1, even if they are stored in - * floats. Setting these bits here will ensure that the lp_build_conv() call - * below won't try to unnecessarily clamp the incoming values. - */ - if(src_type.floating) { - src_type.sign = FALSE; - src_type.norm = TRUE; - } - else { - assert(!src_type.sign); - assert(src_type.norm); - } - - /* Pick the depth type. */ - dst_type = lp_depth_type(format_desc, src_type.width*src_type.length); - - /* FIXME: Cope with a depth test type with a different bit width. */ - assert(dst_type.width == src_type.width); - assert(dst_type.length == src_type.length); - - /* Convert fragment Z from float to integer */ - lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1); - - dst_ptr = LLVMBuildBitCast(builder, - dst_ptr, - LLVMPointerType(lp_build_vec_type(dst_type), 0), ""); lp_build_depth_stencil_test(builder, &key->depth, key->stencil, - dst_type, + src_type, format_desc, mask, stencil_refs, -- cgit v1.2.3 From aa4cb5e2d8d48c7dcc9653c61a9e25494e3e7b2a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 7 Oct 2010 15:01:07 +0100 Subject: llvmpipe: try to be sensible about whether to branch after mask updates Don't branch more than once in quick succession. Don't branch at the end of the shader. --- src/gallium/auxiliary/gallivm/lp_bld_flow.c | 6 +-- src/gallium/auxiliary/gallivm/lp_bld_flow.h | 3 ++ src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 11 +++- src/gallium/drivers/llvmpipe/lp_bld_alpha.c | 6 ++- src/gallium/drivers/llvmpipe/lp_bld_alpha.h | 3 +- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 14 ++++- src/gallium/drivers/llvmpipe/lp_bld_depth.h | 3 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 69 ++++++++++++++++--------- 8 files changed, 80 insertions(+), 35 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index cd5fbc24638..1ec33c742e2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -450,7 +450,7 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) /** * Check if the mask predicate is zero. If so, jump to the end of the block. */ -static void +void lp_build_mask_check(struct lp_build_mask_context *mask) { LLVMBuilderRef builder = mask->flow->builder; @@ -490,8 +490,6 @@ lp_build_mask_begin(struct lp_build_mask_context *mask, lp_build_flow_scope_begin(flow); lp_build_flow_scope_declare(flow, &mask->value); lp_build_flow_skip_begin(flow); - - lp_build_mask_check(mask); } @@ -505,8 +503,6 @@ lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); - - lp_build_mask_check(mask); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index fffb493a93b..095c781ec54 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -94,6 +94,9 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value); +void +lp_build_mask_check(struct lp_build_mask_context *mask); + LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 441aebae298..03020a62f85 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -959,8 +959,13 @@ emit_kil( } } - if(mask) + if(mask) { lp_build_mask_update(bld->mask, mask); + + /* XXX: figure out if we are at the end of the shader and skip this: + */ + lp_build_mask_check(bld->mask); + } } @@ -987,6 +992,10 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld, } lp_build_mask_update(bld->mask, mask); + + /* XXX: figure out if we are at the end of the shader and skip this: + */ + lp_build_mask_check(bld->mask); } static void diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c index e28efe778f9..e50643790c8 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref) + LLVMValueRef ref, + boolean do_branch) { struct lp_build_context bld; LLVMValueRef test; @@ -60,4 +61,7 @@ lp_build_alpha_test(LLVMBuilderRef builder, lp_build_name(test, "alpha_mask"); lp_build_mask_update(mask, test); + + if (do_branch) + lp_build_mask_check(mask); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h index 44603b418c0..27ca8aad4d4 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h @@ -48,7 +48,8 @@ lp_build_alpha_test(LLVMBuilderRef builder, struct lp_type type, struct lp_build_mask_context *mask, LLVMValueRef alpha, - LLVMValueRef ref); + LLVMValueRef ref, + boolean do_branch); #endif /* !LP_BLD_ALPHA_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 09b82fbe9ba..6b8ffb6ca26 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -462,7 +462,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, - LLVMValueRef counter) + LLVMValueRef counter, + boolean do_branch) { struct lp_type type; struct lp_build_context bld; @@ -515,6 +516,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, lp_build_mask_update(mask, z_pass); + if (do_branch) + lp_build_mask_check(mask); + /* No need to worry about old stencil contents, just blend the * old and new values and shift into the correct position for * storage. @@ -701,6 +705,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * buffer values. Don't need to update Z buffer values. */ lp_build_mask_update(mask, z_pass); + + if (do_branch) { + lp_build_mask_check(mask); + do_branch = FALSE; + } } if (depth->writemask) { @@ -779,6 +788,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); + if (do_branch) + lp_build_mask_check(mask); + if (counter) lp_build_occlusion_count(builder, type, mask->value, counter); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index e257a5bd7d0..2a63bb9378b 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -61,7 +61,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, LLVMValueRef facing, - LLVMValueRef counter); + LLVMValueRef counter, + boolean do_branch); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index b7a51cd6679..df5dd83c875 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -116,7 +116,8 @@ generate_depth_stencil(LLVMBuilderRef builder, LLVMValueRef src, LLVMValueRef dst_ptr, LLVMValueRef facing, - LLVMValueRef counter) + LLVMValueRef counter, + boolean do_branch) { const struct util_format_description *format_desc; @@ -136,7 +137,8 @@ generate_depth_stencil(LLVMBuilderRef builder, src, dst_ptr, facing, - counter); + counter, + do_branch); } @@ -253,6 +255,9 @@ generate_fs(struct llvmpipe_context *lp, struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean early_depth_stencil_test; + boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.num_inputs < 3 && + shader->info.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; @@ -288,15 +293,6 @@ generate_fs(struct llvmpipe_context *lp, *pmask = lp_build_const_int_vec(type, ~0); } - /* 'mask' will control execution based on quad's pixel alive/killed state */ - lp_build_mask_begin(&mask, flow, type, *pmask); - - lp_build_interp_soa_update_pos(interp, i); - - /* Try to avoid the 1/w for quads where mask is zero. TODO: avoid - * this for depth-fail quads also. - */ - z = interp->pos[2]; early_depth_stencil_test = (key->depth.enabled || key->stencil[0].enabled) && @@ -304,10 +300,22 @@ generate_fs(struct llvmpipe_context *lp, !shader->info.uses_kill && !shader->info.writes_z; + /* 'mask' will control execution based on quad's pixel alive/killed state */ + lp_build_mask_begin(&mask, flow, type, *pmask); + + if (!early_depth_stencil_test && !simple_shader) + lp_build_mask_check(&mask); + + lp_build_interp_soa_update_pos(interp, i); + z = interp->pos[2]; + if (early_depth_stencil_test) generate_depth_stencil(builder, key, type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + stencil_refs, + z, depth_ptr, + facing, counter, + !simple_shader); lp_build_interp_soa_update_inputs(interp, i); @@ -337,7 +345,7 @@ generate_fs(struct llvmpipe_context *lp, alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); lp_build_alpha_test(builder, key->alpha.func, type, - &mask, alpha, alpha_ref_value); + &mask, alpha, alpha_ref_value, FALSE); } LLVMBuildStore(builder, out, color[cbuf][chan]); @@ -356,7 +364,8 @@ generate_fs(struct llvmpipe_context *lp, if (!early_depth_stencil_test) generate_depth_stencil(builder, key, type, &mask, - stencil_refs, z, depth_ptr, facing, counter); + stencil_refs, z, depth_ptr, + facing, counter, FALSE); lp_build_mask_end(&mask); @@ -386,7 +395,8 @@ generate_blend(const struct pipe_blend_state *blend, LLVMValueRef context_ptr, LLVMValueRef mask, LLVMValueRef *src, - LLVMValueRef dst_ptr) + LLVMValueRef dst_ptr, + boolean do_branch) { struct lp_build_context bld; struct lp_build_flow_context *flow; @@ -401,9 +411,9 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); flow = lp_build_flow_create(builder); - - /* we'll use this mask context to skip blending if all pixels are dead */ lp_build_mask_begin(&mask_ctx, flow, type, mask); + if (do_branch) + lp_build_mask_check(&mask_ctx); vec_type = lp_build_vec_type(type); @@ -670,14 +680,23 @@ generate_fragment(struct llvmpipe_context *lp, /* * Blending. */ - generate_blend(&key->blend, - rt, - builder, - blend_type, - context_ptr, - blend_mask, - blend_in_color, - color_ptr); + { + /* Could the 4x4 have been killed? + */ + boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) && + !key->alpha.enabled && + !shader->info.uses_kill); + + generate_blend(&key->blend, + rt, + builder, + blend_type, + context_ptr, + blend_mask, + blend_in_color, + color_ptr, + do_branch); + } } #ifdef PIPE_ARCH_X86 -- cgit v1.2.3 From 5b7eb868fde98388d80601d8dea39e679828f42f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 9 Oct 2010 11:28:00 +0100 Subject: llvmpipe: clean up shader pre/postamble, try to catch more early-z Specifically, can do early-depth-test even when alpahtest or kill-pixel are active, providing we defer the actual z write until the final mask is avaialable. Improves demos/fire.c especially in the case where you get close to the trees. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 40 +++-- src/gallium/drivers/llvmpipe/lp_bld_depth.h | 15 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 241 +++++++++++++++++----------- 3 files changed, 193 insertions(+), 103 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 6b8ffb6ca26..8d9be2ebbbf 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -410,7 +410,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ -static void +void lp_build_occlusion_count(LLVMBuilderRef builder, struct lp_type type, LLVMValueRef maskvalue, @@ -462,7 +462,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_src, LLVMValueRef zs_dst_ptr, LLVMValueRef face, - LLVMValueRef counter, + LLVMValueRef *zs_value, boolean do_branch) { struct lp_type type; @@ -524,17 +524,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * storage. */ if (depth->writemask) { - type.sign = 0; + type.sign = 1; lp_build_context_init(&bld, builder, type); z_dst = lp_build_select(&bld, mask->value, z_src, z_dst); z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); - LLVMBuildStore(builder, z_dst, zs_dst_ptr); + *zs_value = z_dst; } - if (counter) - lp_build_occlusion_count(builder, type, mask->value, counter); - return; } @@ -779,7 +776,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, else zs_dst = stencil_vals; - LLVMBuildStore(builder, zs_dst, zs_dst_ptr); + *zs_value = zs_dst; } if (s_pass_mask) @@ -791,6 +788,29 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (do_branch) lp_build_mask_check(mask); - if (counter) - lp_build_occlusion_count(builder, type, mask->value, counter); +} + + + +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + struct lp_type type; + struct lp_build_context bld; + LLVMValueRef z_dst; + + /* XXX: pointlessly redo type logic: + */ + type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + lp_build_context_init(&bld, builder, type); + + z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); + z_dst = lp_build_select(&bld, mask->value, zs_value, z_dst); + + LLVMBuildStore(builder, z_dst, zs_dst_ptr); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 2a63bb9378b..0f89668123a 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -61,8 +61,21 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef zs_src, LLVMValueRef zs_dst_ptr, LLVMValueRef facing, - LLVMValueRef counter, + LLVMValueRef *zs_value, boolean do_branch); +void +lp_build_deferred_depth_write(LLVMBuilderRef builder, + struct lp_type z_src_type, + const struct util_format_description *format_desc, + struct lp_build_mask_context *mask, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + +void +lp_build_occlusion_count(LLVMBuilderRef builder, + struct lp_type type, + LLVMValueRef maskvalue, + LLVMValueRef counter); #endif /* !LP_BLD_DEPTH_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index df5dd83c875..f45f36f6332 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -104,43 +104,6 @@ static unsigned fs_no = 0; -/** - * Generate the depth /stencil test code. - */ -static void -generate_depth_stencil(LLVMBuilderRef builder, - const struct lp_fragment_shader_variant_key *key, - struct lp_type src_type, - struct lp_build_mask_context *mask, - LLVMValueRef stencil_refs[2], - LLVMValueRef src, - LLVMValueRef dst_ptr, - LLVMValueRef facing, - LLVMValueRef counter, - boolean do_branch) -{ - const struct util_format_description *format_desc; - - if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled) - return; - - format_desc = util_format_description(key->zsbuf_format); - assert(format_desc); - - lp_build_depth_stencil_test(builder, - &key->depth, - key->stencil, - src_type, - format_desc, - mask, - stencil_refs, - src, - dst_ptr, - facing, - counter, - do_branch); -} - /** * Expand the relevent bits of mask_input to a 4-dword mask for the @@ -222,6 +185,26 @@ generate_quad_mask(LLVMBuilderRef builder, } +#define EARLY_DEPTH_TEST 0x1 +#define LATE_DEPTH_TEST 0x2 +#define EARLY_DEPTH_WRITE 0x4 +#define LATE_DEPTH_WRITE 0x8 + +static int +find_output_by_semantic( const struct tgsi_shader_info *info, + unsigned semantic, + unsigned index ) +{ + int i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return i; + + return -1; +} + /** * Generate the fragment shader, depth/stencil test, and alpha tests. @@ -246,21 +229,53 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef mask_input, LLVMValueRef counter) { + const struct util_format_description *zs_format_desc = NULL; const struct tgsi_token *tokens = shader->base.tokens; LLVMTypeRef vec_type; LLVMValueRef consts_ptr; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; LLVMValueRef z; + LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; struct lp_build_flow_context *flow; struct lp_build_mask_context mask; - boolean early_depth_stencil_test; boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 && shader->info.num_inputs < 3 && shader->info.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; + unsigned depth_mode; + + if (key->depth.enabled || + key->stencil[0].enabled || + key->stencil[1].enabled) { + + zs_format_desc = util_format_description(key->zsbuf_format); + assert(zs_format_desc); + + if (!shader->info.writes_z) { + if (key->alpha.enabled || shader->info.uses_kill) + /* With alpha test and kill, can do the depth test early + * and hopefully eliminate some quads. But need to do a + * special deferred depth write once the final mask value + * is known. + */ + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + else + depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; + } + else { + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; + } + + if (!(key->depth.enabled && key->depth.writemask) && + !(key->stencil[0].enabled && key->stencil[0].writemask)) + depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE); + } + else { + depth_mode = 0; + } assert(i < 4); @@ -293,79 +308,121 @@ generate_fs(struct llvmpipe_context *lp, *pmask = lp_build_const_int_vec(type, ~0); } - - early_depth_stencil_test = - (key->depth.enabled || key->stencil[0].enabled) && - !key->alpha.enabled && - !shader->info.uses_kill && - !shader->info.writes_z; - /* 'mask' will control execution based on quad's pixel alive/killed state */ lp_build_mask_begin(&mask, flow, type, *pmask); - if (!early_depth_stencil_test && !simple_shader) + if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) lp_build_mask_check(&mask); lp_build_interp_soa_update_pos(interp, i); z = interp->pos[2]; - if (early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, - z, depth_ptr, - facing, counter, - !simple_shader); + if (depth_mode & EARLY_DEPTH_TEST) { + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + + if (depth_mode & EARLY_DEPTH_WRITE) + LLVMBuildStore(builder, zs_value, depth_ptr); + } lp_build_interp_soa_update_inputs(interp, i); - + + /* Build the actual shader */ lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, outputs, sampler, &shader->info); - /* loop over fragment shader outputs/results */ - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { + + /* Alpha test */ + if (key->alpha.enabled) { + int color0 = find_output_by_semantic(&shader->info, + TGSI_SEMANTIC_COLOR, + 0); + + if (color0 != -1) { + LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); + LLVMValueRef alpha_ref_value; + + alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); + alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); + + lp_build_alpha_test(builder, key->alpha.func, type, + &mask, alpha, alpha_ref_value, + (depth_mode & LATE_DEPTH_TEST) != 0); + } + } + + /* Late Z test */ + if (depth_mode & LATE_DEPTH_TEST) { + int pos0 = find_output_by_semantic(&shader->info, + TGSI_SEMANTIC_POSITION, + 0); + + if (pos0 != -1) { + z = LLVMBuildLoad(builder, outputs[pos0][2], "z"); + lp_build_name(z, "output%u.%u.%c", i, pos0, "xyzw"[chan]); + } + + lp_build_depth_stencil_test(builder, + &key->depth, + key->stencil, + type, + zs_format_desc, + &mask, + stencil_refs, + z, + depth_ptr, facing, + &zs_value, + !simple_shader); + /* Late Z write */ + if (depth_mode & LATE_DEPTH_WRITE) + LLVMBuildStore(builder, zs_value, depth_ptr); + } + else if ((depth_mode & EARLY_DEPTH_TEST) && + (depth_mode & LATE_DEPTH_WRITE)) + { + /* Need to apply a reduced mask to the depth write. Reload the + * depth value, update from zs_value with the new mask value and + * write that out. + */ + lp_build_deferred_depth_write(builder, + type, + zs_format_desc, + &mask, + depth_ptr, + zs_value); + } + + + /* Color write */ + for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) + { + if (shader->info.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) + { + unsigned cbuf = shader->info.output_semantic_index[attrib]; + for(chan = 0; chan < NUM_CHANNELS; ++chan) + { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); - lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]); - - switch (shader->info.output_semantic_name[attrib]) { - case TGSI_SEMANTIC_COLOR: - { - unsigned cbuf = shader->info.output_semantic_index[attrib]; - - lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); - - /* Alpha test */ - /* XXX: should only test the final assignment to alpha */ - if (cbuf == 0 && chan == 3 && key->alpha.enabled) { - LLVMValueRef alpha = out; - LLVMValueRef alpha_ref_value; - alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr); - alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value); - lp_build_alpha_test(builder, key->alpha.func, type, - &mask, alpha, alpha_ref_value, FALSE); - } - - LLVMBuildStore(builder, out, color[cbuf][chan]); - break; - } - - case TGSI_SEMANTIC_POSITION: - if(chan == 2) - z = out; - break; - } + lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color[cbuf][chan]); } } } - if (!early_depth_stencil_test) - generate_depth_stencil(builder, key, - type, &mask, - stencil_refs, z, depth_ptr, - facing, counter, FALSE); + if (counter) + lp_build_occlusion_count(builder, type, mask.value, counter); lp_build_mask_end(&mask); -- cgit v1.2.3 From 2de720dc8ff89676aa7bb5eb74aeb6d44e028fa2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 1 Oct 2010 15:13:51 +0100 Subject: llvmpipe: simplified SSE2 swz/unswz routines We've been using these in the linear path for a while now. Based on Chris's SSSE3 code, but using only sse2 opcodes. Speed seems to be identical, but code is simpler & removes dependency on SSE3. Should be easier to extend to other rgba8 formats. --- src/gallium/drivers/llvmpipe/SConscript | 8 +- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 245 ++++++++++++---------------- 2 files changed, 107 insertions(+), 146 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 650435f0f19..774ad91a074 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -27,13 +27,7 @@ env.Depends('lp_tile_soa.c', [ ]) -# Only enable SSSE3 for lp_tile_soa_sse3.c -ssse3_env = env.Clone() -if env['gcc'] \ - and distutils.version.LooseVersion(env['CCVERSION']) >= distutils.version.LooseVersion('4.3') \ - and env['machine'] in ('x86', 'x86_64') : - ssse3_env.Append(CCFLAGS = ['-mssse3']) -lp_tile_soa_os = ssse3_env.SharedObject('lp_tile_soa.c') +lp_tile_soa_os = env.SharedObject('lp_tile_soa.c') llvmpipe = env.ConvenienceLibrary( diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 2ba39052aba..c76549cdadf 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -295,87 +295,98 @@ def generate_ssse3(): #include "util/u_sse.h" +static INLINE void swz4( __m128i x, + __m128i y, + __m128i z, + __m128i w, + __m128i *a, + __m128i *b, + __m128i *c, + __m128i *d) +{ + __m128i i, j, k, l; + __m128i m, n, o, p; + __m128i e, f, g, h; + + m = _mm_unpacklo_epi8(x,y); + n = _mm_unpackhi_epi8(x,y); + o = _mm_unpacklo_epi8(z,w); + p = _mm_unpackhi_epi8(z,w); + + i = _mm_unpacklo_epi16(m,n); + j = _mm_unpackhi_epi16(m,n); + k = _mm_unpacklo_epi16(o,p); + l = _mm_unpackhi_epi16(o,p); + + e = _mm_unpacklo_epi8(i,j); + f = _mm_unpackhi_epi8(i,j); + g = _mm_unpacklo_epi8(k,l); + h = _mm_unpackhi_epi8(k,l); + + *a = _mm_unpacklo_epi64(e,g); + *b = _mm_unpackhi_epi64(e,g); + *c = _mm_unpacklo_epi64(f,h); + *d = _mm_unpackhi_epi64(f,h); +} + +static INLINE void unswz4( __m128i a, + __m128i b, + __m128i c, + __m128i d, + __m128i *x, + __m128i *y, + __m128i *z, + __m128i *w) +{ + __m128i i, j, k, l; + __m128i m, n, o, p; + + i = _mm_unpacklo_epi8(a,b); + j = _mm_unpackhi_epi8(a,b); + k = _mm_unpacklo_epi8(c,d); + l = _mm_unpackhi_epi8(c,d); + + m = _mm_unpacklo_epi16(i,k); + n = _mm_unpackhi_epi16(i,k); + o = _mm_unpacklo_epi16(j,l); + p = _mm_unpackhi_epi16(j,l); + + *x = _mm_unpacklo_epi64(m,n); + *y = _mm_unpackhi_epi64(m,n); + *z = _mm_unpacklo_epi64(o,p); + *w = _mm_unpackhi_epi64(o,p); +} + static void lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0) { - + __m128i *dst128 = (__m128i *) dst; unsigned x, y; - __m128i *pdst = (__m128i*) dst; - const uint8_t *ysrc0 = src + y0*src_stride + x0*sizeof(uint32_t); - unsigned int tile_stridex = src_stride*(TILE_VECTOR_HEIGHT - 1) - sizeof(uint32_t)*TILE_VECTOR_WIDTH; - unsigned int tile_stridey = src_stride*TILE_VECTOR_HEIGHT; - - const __m128i shuffle00 = _mm_setr_epi8(0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x02,0x06,0xff,0xff,0x0a,0x0e); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x01,0x05,0xff,0xff,0x09,0x0d); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x04,0xff,0xff,0x08,0x0c); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x03,0x07,0xff,0xff,0x0b,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i line0 = *(__m128i*)ysrc0; - const uint8_t *ysrc = ysrc0 + src_stride; - ysrc0 += tile_stridey; - - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - __m128i r, g, b, a, line1; - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_shuffle_epi8(line0, shuffle00); - g = _mm_shuffle_epi8(line0, shuffle01); - b = _mm_shuffle_epi8(line0, shuffle02); - a = _mm_shuffle_epi8(line0, shuffle03); - - line0 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc += src_stride; - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle10)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle11)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle12)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle13)); - - line1 = *(__m128i*)ysrc; - PIPE_READ_WRITE_BARRIER(); - ysrc -= tile_stridex; - r = _mm_or_si128(r, _mm_shuffle_epi8(line0, shuffle20)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line0, shuffle21)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line0, shuffle22)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line0, shuffle23)); - - if (x + 1 < TILE_SIZE) { - line0 = *(__m128i*)ysrc; - ysrc += src_stride; - } - - PIPE_READ_WRITE_BARRIER(); - r = _mm_or_si128(r, _mm_shuffle_epi8(line1, shuffle30)); - g = _mm_or_si128(g, _mm_shuffle_epi8(line1, shuffle31)); - b = _mm_or_si128(b, _mm_shuffle_epi8(line1, shuffle32)); - a = _mm_or_si128(a, _mm_shuffle_epi8(line1, shuffle33)); - - *pdst++ = r; - *pdst++ = g; - *pdst++ = b; - *pdst++ = a; + + src += y0 * src_stride; + src += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *src_row = src; + + for (x = 0; x < TILE_SIZE; x += 4) { + swz4(*(__m128i *) (src_row + 0 * src_stride), + *(__m128i *) (src_row + 1 * src_stride), + *(__m128i *) (src_row + 2 * src_stride), + *(__m128i *) (src_row + 3 * src_stride), + dst128 + 2, /* b */ + dst128 + 1, /* g */ + dst128 + 0, /* r */ + dst128 + 3); /* a */ + + dst128 += 4; + src_row += sizeof(__m128i); } - } + src += 4 * src_stride; + } } static void @@ -384,73 +395,29 @@ lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, unsigned x0, unsigned y0) { unsigned int x, y; - const __m128i *psrc = (__m128i*) src; - const __m128i *end = (__m128i*) (src + (y0 + TILE_SIZE - 1)*dst_stride + (x0 + TILE_SIZE - 1)*sizeof(uint32_t)); - uint8_t *pdst = dst + y0 * dst_stride + x0 * sizeof(uint32_t); - __m128i c0 = *psrc++; - __m128i c1; - - const __m128i shuffle00 = _mm_setr_epi8(0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff); - const __m128i shuffle01 = _mm_setr_epi8(0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff); - const __m128i shuffle02 = _mm_setr_epi8(0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff); - const __m128i shuffle03 = _mm_setr_epi8(0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff); - - const __m128i shuffle10 = _mm_setr_epi8(0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff); - const __m128i shuffle11 = _mm_setr_epi8(0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff); - const __m128i shuffle12 = _mm_setr_epi8(0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff); - const __m128i shuffle13 = _mm_setr_epi8(0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff); - - const __m128i shuffle20 = _mm_setr_epi8(0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05,0xff,0xff,0xff); - const __m128i shuffle21 = _mm_setr_epi8(0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07,0xff,0xff,0xff); - const __m128i shuffle22 = _mm_setr_epi8(0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d,0xff,0xff,0xff); - const __m128i shuffle23 = _mm_setr_epi8(0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f,0xff,0xff,0xff); - - const __m128i shuffle30 = _mm_setr_epi8(0xff,0xff,0xff,0x00,0xff,0xff,0xff,0x01,0xff,0xff,0xff,0x04,0xff,0xff,0xff,0x05); - const __m128i shuffle31 = _mm_setr_epi8(0xff,0xff,0xff,0x02,0xff,0xff,0xff,0x03,0xff,0xff,0xff,0x06,0xff,0xff,0xff,0x07); - const __m128i shuffle32 = _mm_setr_epi8(0xff,0xff,0xff,0x08,0xff,0xff,0xff,0x09,0xff,0xff,0xff,0x0c,0xff,0xff,0xff,0x0d); - const __m128i shuffle33 = _mm_setr_epi8(0xff,0xff,0xff,0x0a,0xff,0xff,0xff,0x0b,0xff,0xff,0xff,0x0e,0xff,0xff,0xff,0x0f); - - for (y = 0; y < TILE_SIZE; y += TILE_VECTOR_HEIGHT) { - __m128i *tile = (__m128i*) pdst; - pdst += dst_stride * TILE_VECTOR_HEIGHT; - for (x = 0; x < TILE_SIZE; x += TILE_VECTOR_WIDTH) { - uint8_t *linep = (uint8_t*) (tile++); - __m128i line0, line1, line2, line3; - - c1 = *psrc++; /* r */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_shuffle_epi8(c0, shuffle00); - line1 = _mm_shuffle_epi8(c0, shuffle01); - line2 = _mm_shuffle_epi8(c0, shuffle02); - line3 = _mm_shuffle_epi8(c0, shuffle03); - - c0 = *psrc++; /* g */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle10)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle11)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle12)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle13)); - - c1 = *psrc++; /* b */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c0, shuffle20)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c0, shuffle21)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c0, shuffle22)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c0, shuffle23)); - - if (psrc != end) - c0 = *psrc++; /* a */ - PIPE_READ_WRITE_BARRIER(); - line0 = _mm_or_si128(line0, _mm_shuffle_epi8(c1, shuffle30)); - line1 = _mm_or_si128(line1, _mm_shuffle_epi8(c1, shuffle31)); - line2 = _mm_or_si128(line2, _mm_shuffle_epi8(c1, shuffle32)); - line3 = _mm_or_si128(line3, _mm_shuffle_epi8(c1, shuffle33)); - - *(__m128i*) (linep) = line0; - *(__m128i*) (((char*)linep) + dst_stride) = line1; - *(__m128i*) (((char*)linep) + 2 * dst_stride) = line2; - *(__m128i*) (((char*)linep) + 3 * dst_stride) = line3; + const __m128i *src128 = (const __m128i *) src; + + dst += y0 * dst_stride; + dst += x0 * sizeof(uint32_t); + + for (y = 0; y < TILE_SIZE; y += 4) { + const uint8_t *dst_row = dst; + + for (x = 0; x < TILE_SIZE; x += 4) { + unswz4( src128[2], /* b */ + src128[1], /* g */ + src128[0], /* r */ + src128[3], /* a */ + (__m128i *) (dst_row + 0 * dst_stride), + (__m128i *) (dst_row + 1 * dst_stride), + (__m128i *) (dst_row + 2 * dst_stride), + (__m128i *) (dst_row + 3 * dst_stride)); + + src128 += 4; + dst_row += sizeof(__m128i);; } + + dst += 4 * dst_stride; } } -- cgit v1.2.3 From edba53024f85a27fcbca7cbe139ceda172406653 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 6 Oct 2010 21:01:38 +0100 Subject: llvmpipe: Fix MSVC build. Enable the new SSE2 code on non SSE3 systems. --- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 86 +++++++++++++++-------------- 1 file changed, 44 insertions(+), 42 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index c76549cdadf..e49f9c62fe7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -289,29 +289,30 @@ def generate_format_write(format, src_channel, src_native_type, src_suffix): print -def generate_ssse3(): +def generate_sse2(): print ''' #if defined(PIPE_ARCH_SSE) #include "util/u_sse.h" -static INLINE void swz4( __m128i x, - __m128i y, - __m128i z, - __m128i w, - __m128i *a, - __m128i *b, - __m128i *c, - __m128i *d) +static ALWAYS_INLINE void +swz4( const __m128i * restrict x, + const __m128i * restrict y, + const __m128i * restrict z, + const __m128i * restrict w, + __m128i * restrict a, + __m128i * restrict b, + __m128i * restrict c, + __m128i * restrict d) { __m128i i, j, k, l; __m128i m, n, o, p; __m128i e, f, g, h; - m = _mm_unpacklo_epi8(x,y); - n = _mm_unpackhi_epi8(x,y); - o = _mm_unpacklo_epi8(z,w); - p = _mm_unpackhi_epi8(z,w); + m = _mm_unpacklo_epi8(*x,*y); + n = _mm_unpackhi_epi8(*x,*y); + o = _mm_unpacklo_epi8(*z,*w); + p = _mm_unpackhi_epi8(*z,*w); i = _mm_unpacklo_epi16(m,n); j = _mm_unpackhi_epi16(m,n); @@ -329,22 +330,23 @@ static INLINE void swz4( __m128i x, *d = _mm_unpackhi_epi64(f,h); } -static INLINE void unswz4( __m128i a, - __m128i b, - __m128i c, - __m128i d, - __m128i *x, - __m128i *y, - __m128i *z, - __m128i *w) +static ALWAYS_INLINE void +unswz4( const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict x, + __m128i * restrict y, + __m128i * restrict z, + __m128i * restrict w) { __m128i i, j, k, l; __m128i m, n, o, p; - i = _mm_unpacklo_epi8(a,b); - j = _mm_unpackhi_epi8(a,b); - k = _mm_unpacklo_epi8(c,d); - l = _mm_unpackhi_epi8(c,d); + i = _mm_unpacklo_epi8(*a,*b); + j = _mm_unpackhi_epi8(*a,*b); + k = _mm_unpacklo_epi8(*c,*d); + l = _mm_unpackhi_epi8(*c,*d); m = _mm_unpacklo_epi16(i,k); n = _mm_unpackhi_epi16(i,k); @@ -358,9 +360,9 @@ static INLINE void unswz4( __m128i a, } static void -lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, - const uint8_t *src, unsigned src_stride, - unsigned x0, unsigned y0) +lp_tile_b8g8r8a8_unorm_swizzle_4ub_sse2(uint8_t * restrict dst, + const uint8_t * restrict src, unsigned src_stride, + unsigned x0, unsigned y0) { __m128i *dst128 = (__m128i *) dst; unsigned x, y; @@ -372,10 +374,10 @@ lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, const uint8_t *src_row = src; for (x = 0; x < TILE_SIZE; x += 4) { - swz4(*(__m128i *) (src_row + 0 * src_stride), - *(__m128i *) (src_row + 1 * src_stride), - *(__m128i *) (src_row + 2 * src_stride), - *(__m128i *) (src_row + 3 * src_stride), + swz4((const __m128i *) (src_row + 0 * src_stride), + (const __m128i *) (src_row + 1 * src_stride), + (const __m128i *) (src_row + 2 * src_stride), + (const __m128i *) (src_row + 3 * src_stride), dst128 + 2, /* b */ dst128 + 1, /* g */ dst128 + 0, /* r */ @@ -390,8 +392,8 @@ lp_tile_b8g8r8a8_unorm_swizzle_4ub_ssse3(uint8_t *dst, } static void -lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, - uint8_t *dst, unsigned dst_stride, +lp_tile_b8g8r8a8_unorm_unswizzle_4ub_sse2(const uint8_t * restrict src, + uint8_t * restrict dst, unsigned dst_stride, unsigned x0, unsigned y0) { unsigned int x, y; @@ -404,10 +406,10 @@ lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, const uint8_t *dst_row = dst; for (x = 0; x < TILE_SIZE; x += 4) { - unswz4( src128[2], /* b */ - src128[1], /* g */ - src128[0], /* r */ - src128[3], /* a */ + unswz4( &src128[2], /* b */ + &src128[1], /* g */ + &src128[0], /* r */ + &src128[3], /* a */ (__m128i *) (dst_row + 0 * dst_stride), (__m128i *) (dst_row + 1 * dst_stride), (__m128i *) (dst_row + 2 * dst_stride), @@ -421,7 +423,7 @@ lp_tile_b8g8r8a8_unorm_unswizzle_4ub_ssse3(const uint8_t *src, } } -#endif /* PIPE_ARCH_SSSE3 */ +#endif /* PIPE_ARCH_SSE */ ''' @@ -446,7 +448,7 @@ def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix): func_name = 'lp_tile_%s_swizzle_%s' % (format.short_name(), dst_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -484,7 +486,7 @@ def generate_unswizzle(formats, src_channel, src_native_type, src_suffix): func_name = 'lp_tile_%s_unswizzle_%s' % (format.short_name(), src_suffix) if format.name == 'PIPE_FORMAT_B8G8R8A8_UNORM': print '#ifdef PIPE_ARCH_SSE' - print ' func = util_cpu_caps.has_ssse3 ? %s_ssse3 : %s;' % (func_name, func_name) + print ' func = util_cpu_caps.has_sse2 ? %s_sse2 : %s;' % (func_name, func_name) print '#else' print ' func = %s;' % (func_name,) print '#endif' @@ -544,7 +546,7 @@ def main(): print '};' print - generate_ssse3() + generate_sse2() channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' -- cgit v1.2.3 From cc40abad519cc0f765c6d8f6fad4154bed8dd9c2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 9 Oct 2010 12:55:31 +0100 Subject: gallivm: Don't generate Phis for execution mask. --- src/gallium/auxiliary/gallivm/lp_bld_flow.c | 28 +++++++++++++++++++++------- src/gallium/auxiliary/gallivm/lp_bld_flow.h | 5 ++++- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 8 ++++---- src/gallium/drivers/llvmpipe/lp_state_fs.c | 8 +++----- 4 files changed, 32 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index 1ec33c742e2..a5d65e9b396 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -454,12 +454,15 @@ void lp_build_mask_check(struct lp_build_mask_context *mask) { LLVMBuilderRef builder = mask->flow->builder; + LLVMValueRef value; LLVMValueRef cond; + value = lp_build_mask_value(mask); + /* cond = (mask == 0) */ cond = LLVMBuildICmp(builder, LLVMIntEQ, - LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""), + LLVMBuildBitCast(builder, value, mask->reg_type, ""), LLVMConstNull(mask->reg_type), ""); @@ -485,14 +488,23 @@ lp_build_mask_begin(struct lp_build_mask_context *mask, mask->flow = flow; mask->reg_type = LLVMIntType(type.width * type.length); - mask->value = value; + mask->var = lp_build_alloca(flow->builder, + lp_build_int_vec_type(type), + "execution_mask"); + + LLVMBuildStore(flow->builder, value, mask->var); - lp_build_flow_scope_begin(flow); - lp_build_flow_scope_declare(flow, &mask->value); lp_build_flow_skip_begin(flow); } +LLVMValueRef +lp_build_mask_value(struct lp_build_mask_context *mask) +{ + return LLVMBuildLoad(mask->flow->builder, mask->var, ""); +} + + /** * Update boolean mask with given value (bitwise AND). * Typically used to update the quad's pixel alive/killed mask @@ -502,7 +514,10 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { - mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, ""); + value = LLVMBuildAnd(mask->flow->builder, + lp_build_mask_value(mask), + value, ""); + LLVMBuildStore(mask->flow->builder, value, mask->var); } @@ -513,8 +528,7 @@ LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask) { lp_build_flow_skip_end(mask->flow); - lp_build_flow_scope_end(mask->flow); - return mask->value; + return lp_build_mask_value(mask); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 095c781ec54..0fc6317b339 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -77,7 +77,7 @@ struct lp_build_mask_context LLVMTypeRef reg_type; - LLVMValueRef value; + LLVMValueRef var; }; @@ -87,6 +87,9 @@ lp_build_mask_begin(struct lp_build_mask_context *mask, struct lp_type type, LLVMValueRef value); +LLVMValueRef +lp_build_mask_value(struct lp_build_mask_context *mask); + /** * Bitwise AND the mask with the given value, if a previous mask was set. */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 8d9be2ebbbf..e768493103e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -473,7 +473,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; LLVMValueRef z_pass = NULL, s_pass_mask = NULL; - LLVMValueRef orig_mask = mask->value; + LLVMValueRef orig_mask = lp_build_mask_value(mask); LLVMValueRef front_facing = NULL; /* Prototype a simpler path: @@ -527,7 +527,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, type.sign = 1; lp_build_context_init(&bld, builder, type); - z_dst = lp_build_select(&bld, mask->value, z_src, z_dst); + z_dst = lp_build_select(&bld, lp_build_mask_value(mask), z_src, z_dst); z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); *zs_value = z_dst; } @@ -710,7 +710,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (depth->writemask) { - LLVMValueRef zselectmask = mask->value; + LLVMValueRef zselectmask = lp_build_mask_value(mask); /* mask off bits that failed Z test */ zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); @@ -810,7 +810,7 @@ lp_build_deferred_depth_write(LLVMBuilderRef builder, lp_build_context_init(&bld, builder, type); z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); - z_dst = lp_build_select(&bld, mask->value, zs_value, z_dst); + z_dst = lp_build_select(&bld, lp_build_mask_value(mask), zs_value, z_dst); LLVMBuildStore(builder, z_dst, zs_dst_ptr); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index f45f36f6332..cf07cb49764 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -422,16 +422,14 @@ generate_fs(struct llvmpipe_context *lp, } if (counter) - lp_build_occlusion_count(builder, type, mask.value, counter); + lp_build_occlusion_count(builder, type, + lp_build_mask_value(&mask), counter); - lp_build_mask_end(&mask); + *pmask = lp_build_mask_end(&mask); lp_build_flow_scope_end(flow); lp_build_flow_destroy(flow); - - *pmask = mask.value; - } -- cgit v1.2.3 From d45c379027054e563c4f4379fb69fc9f68612f75 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 9 Oct 2010 20:14:03 +0100 Subject: gallivm: Remove support for Phi generation. Simply rely on mem2reg pass. It's easier and more reliable. --- src/gallium/auxiliary/gallivm/lp_bld_flow.c | 211 ---------------------------- src/gallium/auxiliary/gallivm/lp_bld_flow.h | 10 -- src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 - 3 files changed, 225 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index a5d65e9b396..22c2db8c449 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -45,22 +45,11 @@ * Enumeration of all possible flow constructs. */ enum lp_build_flow_construct_kind { - LP_BUILD_FLOW_SCOPE, LP_BUILD_FLOW_SKIP, LP_BUILD_FLOW_IF }; -/** - * Variable declaration scope. - */ -struct lp_build_flow_scope -{ - /** Number of variables declared in this scope */ - unsigned num_variables; -}; - - /** * Early exit. Useful to skip to the end of a function or block when * the execution mask becomes zero or when there is an error condition. @@ -69,11 +58,6 @@ struct lp_build_flow_skip { /** Block to skip to */ LLVMBasicBlockRef block; - - /** Number of variables declared at the beginning */ - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ }; @@ -82,10 +66,6 @@ struct lp_build_flow_skip */ struct lp_build_flow_if { - unsigned num_variables; - - LLVMValueRef *phi; /**< array [num_variables] */ - LLVMValueRef condition; LLVMBasicBlockRef entry_block, true_block, false_block, merge_block; }; @@ -96,7 +76,6 @@ struct lp_build_flow_if */ union lp_build_flow_construct_data { - struct lp_build_flow_scope scope; struct lp_build_flow_skip skip; struct lp_build_flow_if ifthen; }; @@ -127,12 +106,6 @@ struct lp_build_flow_context */ struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; unsigned num_constructs; - - /** - * Variable stack - */ - LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES]; - unsigned num_variables; }; @@ -155,7 +128,6 @@ void lp_build_flow_destroy(struct lp_build_flow_context *flow) { assert(flow->num_constructs == 0); - assert(flow->num_variables == 0); FREE(flow); } @@ -217,93 +189,6 @@ lp_build_flow_pop(struct lp_build_flow_context *flow, } -/** - * Begin a variable scope. - * - * - */ -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - scope->num_variables = 0; -} - - -/** - * Declare a variable. - * - * A variable is a named entity which can have different LLVMValueRef's at - * different points of the program. This is relevant for control flow because - * when there are multiple branches to a same location we need to replace - * the variable's value with a Phi function as explained in - * http://en.wikipedia.org/wiki/Static_single_assignment_form . - * - * We keep track of variables by keeping around a pointer to where they're - * current. - * - * There are a few cautions to observe: - * - * - Variable's value must not be NULL. If there is no initial value then - * LLVMGetUndef() should be used. - * - * - Variable's value must be kept up-to-date. If the variable is going to be - * modified by a function then a pointer should be passed so that its value - * is accurate. Failure to do this will cause some of the variables' - * transient values to be lost, leading to wrong results. - * - * - A program should be written from top to bottom, by always appending - * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or - * modifying existing statements will most likely lead to wrong results. - * - */ -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(*variable); - if(!*variable) - return; - - assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES); - if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES) - return; - - flow->variables[flow->num_variables++] = variable; - ++scope->num_variables; -} - - -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow) -{ - struct lp_build_flow_scope *scope; - - scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope; - if(!scope) - return; - - assert(flow->num_variables >= scope->num_variables); - if(flow->num_variables < scope->num_variables) { - flow->num_variables = 0; - return; - } - - flow->num_variables -= scope->num_variables; -} - - /** * Note: this function has no dependencies on the flow code and could * be used elsewhere. @@ -350,7 +235,6 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) { struct lp_build_flow_skip *skip; LLVMBuilderRef builder; - unsigned i; skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; if(!skip) @@ -359,26 +243,9 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) /* create new basic block */ skip->block = lp_build_flow_insert_block(flow); - skip->num_variables = flow->num_variables; - if(!skip->num_variables) { - skip->phi = NULL; - return; - } - - /* Allocate a Phi node for each variable in this skip scope */ - skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi); - if(!skip->phi) { - skip->num_variables = 0; - return; - } - builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, skip->block); - /* create a Phi node for each variable */ - for(i = 0; i < skip->num_variables; ++i) - skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - LLVMDisposeBuilder(builder); } @@ -392,25 +259,14 @@ lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, LLVMValueRef cond) { struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; LLVMBasicBlockRef new_block; - unsigned i; skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; if(!skip) return; - current_block = LLVMGetInsertBlock(flow->builder); - new_block = lp_build_flow_insert_block(flow); - /* for each variable, update the Phi node with a (variable, block) pair */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - } - /* if cond is true, goto skip->block, else goto new_block */ LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); @@ -422,28 +278,14 @@ void lp_build_flow_skip_end(struct lp_build_flow_context *flow) { struct lp_build_flow_skip *skip; - LLVMBasicBlockRef current_block; - unsigned i; skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; if(!skip) return; - current_block = LLVMGetInsertBlock(flow->builder); - - /* add (variable, block) tuples to the phi nodes */ - for(i = 0; i < skip->num_variables; ++i) { - assert(*flow->variables[i]); - assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i])); - LLVMAddIncoming(skip->phi[i], flow->variables[i], ¤t_block, 1); - *flow->variables[i] = skip->phi[i]; - } - /* goto block */ LLVMBuildBr(flow->builder, skip->block); LLVMPositionBuilderAtEnd(flow->builder, skip->block); - - FREE(skip->phi); } @@ -659,7 +501,6 @@ lp_build_if(struct lp_build_if_state *ctx, { LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); struct lp_build_flow_if *ifthen; - unsigned i; memset(ctx, 0, sizeof(*ctx)); ctx->builder = builder; @@ -669,31 +510,13 @@ lp_build_if(struct lp_build_if_state *ctx, ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); - ifthen->num_variables = flow->num_variables; ifthen->condition = condition; ifthen->entry_block = block; - /* create a Phi node for each variable in this flow scope */ - ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi)); - if (!ifthen->phi) { - ifthen->num_variables = 0; - return; - } - /* create endif/merge basic block for the phi functions */ ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block"); LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); - /* create a phi node for each variable */ - for (i = 0; i < flow->num_variables; i++) { - ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), ""); - - /* add add the initial value of the var from the entry block */ - if (!LLVMIsUndef(*flow->variables[i])) - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], - &ifthen->entry_block, 1); - } - /* create/insert true_block before merge_block */ ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block"); @@ -710,18 +533,10 @@ lp_build_else(struct lp_build_if_state *ctx) { struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; - unsigned i; ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); - /* for each variable, update the Phi node with a (variable, block) pair */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - } - /* create/insert false_block before the merge block */ ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block"); @@ -738,8 +553,6 @@ lp_build_endif(struct lp_build_if_state *ctx) { struct lp_build_flow_context *flow = ctx->flow; struct lp_build_flow_if *ifthen; - LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder); - unsigned i; ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen; assert(ifthen); @@ -747,30 +560,6 @@ lp_build_endif(struct lp_build_if_state *ctx) /* Insert branch to the merge block from current block */ LLVMBuildBr(ctx->builder, ifthen->merge_block); - if (ifthen->false_block) { - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - /* for each variable, update the Phi node with a (variable, block) pair */ - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1); - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - else { - /* no else clause */ - LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block); - for (i = 0; i < flow->num_variables; i++) { - assert(*flow->variables[i]); - LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1); - - /* replace the variable ref with the phi function */ - *flow->variables[i] = ifthen->phi[i]; - } - } - - FREE(ifthen->phi); - /*** *** Now patch in the various branch instructions. ***/ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 0fc6317b339..403e46e52e8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -50,16 +50,6 @@ lp_build_flow_create(LLVMBuilderRef builder); void lp_build_flow_destroy(struct lp_build_flow_context *flow); -void -lp_build_flow_scope_begin(struct lp_build_flow_context *flow); - -void -lp_build_flow_scope_declare(struct lp_build_flow_context *flow, - LLVMValueRef *variable); - -void -lp_build_flow_scope_end(struct lp_build_flow_context *flow); - void lp_build_flow_skip_begin(struct lp_build_flow_context *flow); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index cf07cb49764..3b0706e3ec1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -290,8 +290,6 @@ generate_fs(struct llvmpipe_context *lp, memset(outputs, 0, sizeof outputs); - lp_build_flow_scope_begin(flow); - /* Declare the color and z variables */ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) { for(chan = 0; chan < NUM_CHANNELS; ++chan) { @@ -427,8 +425,6 @@ generate_fs(struct llvmpipe_context *lp, *pmask = lp_build_mask_end(&mask); - lp_build_flow_scope_end(flow); - lp_build_flow_destroy(flow); } -- cgit v1.2.3 From 307df6a858dcab1bc10f3f52d9968acb3ea6d74f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 9 Oct 2010 21:39:14 +0100 Subject: gallivm: Cleanup the rest of the flow module. --- src/gallium/auxiliary/gallivm/lp_bld_flow.c | 210 +++------------------------- src/gallium/auxiliary/gallivm/lp_bld_flow.h | 28 ++-- src/gallium/drivers/llvmpipe/lp_state_fs.c | 12 +- 3 files changed, 39 insertions(+), 211 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c index ac63bd544f6..99a49df317c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c @@ -38,146 +38,6 @@ #include "lp_bld_flow.h" -#define LP_BUILD_FLOW_MAX_VARIABLES 64 -#define LP_BUILD_FLOW_MAX_DEPTH 32 - -/** - * Enumeration of all possible flow constructs. - */ -enum lp_build_flow_construct_kind { - LP_BUILD_FLOW_SKIP, - LP_BUILD_FLOW_IF -}; - - -/** - * Early exit. Useful to skip to the end of a function or block when - * the execution mask becomes zero or when there is an error condition. - */ -struct lp_build_flow_skip -{ - /** Block to skip to */ - LLVMBasicBlockRef block; -}; - - -/** - * Union of all possible flow constructs' data - */ -union lp_build_flow_construct_data -{ - struct lp_build_flow_skip skip; -}; - - -/** - * Element of the flow construct stack. - */ -struct lp_build_flow_construct -{ - enum lp_build_flow_construct_kind kind; - union lp_build_flow_construct_data data; -}; - - -/** - * All necessary data to generate LLVM control flow constructs. - * - * Besides keeping track of the control flow construct themselves we also - * need to keep track of variables in order to generate SSA Phi values. - */ -struct lp_build_flow_context -{ - LLVMBuilderRef builder; - - /** - * Control flow stack. - */ - struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH]; - unsigned num_constructs; -}; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder) -{ - struct lp_build_flow_context *flow; - - flow = CALLOC_STRUCT(lp_build_flow_context); - if(!flow) - return NULL; - - flow->builder = builder; - - return flow; -} - - -void -lp_build_flow_destroy(struct lp_build_flow_context *flow) -{ - assert(flow->num_constructs == 0); - FREE(flow); -} - - -/** - * Begin/push a new flow control construct, such as a loop, skip block - * or variable scope. - */ -static union lp_build_flow_construct_data * -lp_build_flow_push(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH); - if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH) - return NULL; - - flow->constructs[flow->num_constructs].kind = kind; - return &flow->constructs[flow->num_constructs++].data; -} - - -/** - * Return the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_peek(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[flow->num_constructs - 1].data; -} - - -/** - * End/pop the current/top flow control construct on the stack. - * \param kind the expected type of the top-most construct - */ -static union lp_build_flow_construct_data * -lp_build_flow_pop(struct lp_build_flow_context *flow, - enum lp_build_flow_construct_kind kind) -{ - assert(flow->num_constructs); - if(!flow->num_constructs) - return NULL; - - assert(flow->constructs[flow->num_constructs - 1].kind == kind); - if(flow->constructs[flow->num_constructs - 1].kind != kind) - return NULL; - - return &flow->constructs[--flow->num_constructs].data; -} - - /** * Note: this function has no dependencies on the flow code and could * be used elsewhere. @@ -208,34 +68,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name) } -static LLVMBasicBlockRef -lp_build_flow_insert_block(struct lp_build_flow_context *flow) -{ - return lp_build_insert_new_block(flow->builder, ""); -} - - /** * Begin a "skip" block. Inside this block we can test a condition and * skip to the end of the block if the condition is false. */ void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow) +lp_build_flow_skip_begin(struct lp_build_skip_context *skip, + LLVMBuilderRef builder) { - struct lp_build_flow_skip *skip; - LLVMBuilderRef builder; - - skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; + skip->builder = builder; /* create new basic block */ - skip->block = lp_build_flow_insert_block(flow); - - builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, skip->block); - - LLVMDisposeBuilder(builder); + skip->block = lp_build_insert_new_block(skip->builder, "skip"); } @@ -244,37 +88,26 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow) * skip block if the condition is true. */ void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, +lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip, LLVMValueRef cond) { - struct lp_build_flow_skip *skip; LLVMBasicBlockRef new_block; - skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - - new_block = lp_build_flow_insert_block(flow); + new_block = lp_build_insert_new_block(skip->builder, ""); /* if cond is true, goto skip->block, else goto new_block */ - LLVMBuildCondBr(flow->builder, cond, skip->block, new_block); + LLVMBuildCondBr(skip->builder, cond, skip->block, new_block); - LLVMPositionBuilderAtEnd(flow->builder, new_block); + LLVMPositionBuilderAtEnd(skip->builder, new_block); } void -lp_build_flow_skip_end(struct lp_build_flow_context *flow) +lp_build_flow_skip_end(struct lp_build_skip_context *skip) { - struct lp_build_flow_skip *skip; - - skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip; - if(!skip) - return; - /* goto block */ - LLVMBuildBr(flow->builder, skip->block); - LLVMPositionBuilderAtEnd(flow->builder, skip->block); + LLVMBuildBr(skip->builder, skip->block); + LLVMPositionBuilderAtEnd(skip->builder, skip->block); } @@ -284,7 +117,7 @@ lp_build_flow_skip_end(struct lp_build_flow_context *flow) void lp_build_mask_check(struct lp_build_mask_context *mask) { - LLVMBuilderRef builder = mask->flow->builder; + LLVMBuilderRef builder = mask->skip.builder; LLVMValueRef value; LLVMValueRef cond; @@ -298,7 +131,7 @@ lp_build_mask_check(struct lp_build_mask_context *mask) ""); /* if cond, goto end of block */ - lp_build_flow_skip_cond_break(mask->flow, cond); + lp_build_flow_skip_cond_break(&mask->skip, cond); } @@ -311,28 +144,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask) */ void lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, + LLVMBuilderRef builder, struct lp_type type, LLVMValueRef value) { memset(mask, 0, sizeof *mask); - mask->flow = flow; mask->reg_type = LLVMIntType(type.width * type.length); - mask->var = lp_build_alloca(flow->builder, + mask->var = lp_build_alloca(builder, lp_build_int_vec_type(type), "execution_mask"); - LLVMBuildStore(flow->builder, value, mask->var); + LLVMBuildStore(builder, value, mask->var); - lp_build_flow_skip_begin(flow); + lp_build_flow_skip_begin(&mask->skip, builder); } LLVMValueRef lp_build_mask_value(struct lp_build_mask_context *mask) { - return LLVMBuildLoad(mask->flow->builder, mask->var, ""); + return LLVMBuildLoad(mask->skip.builder, mask->var, ""); } @@ -345,10 +177,10 @@ void lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef value) { - value = LLVMBuildAnd(mask->flow->builder, + value = LLVMBuildAnd(mask->skip.builder, lp_build_mask_value(mask), value, ""); - LLVMBuildStore(mask->flow->builder, value, mask->var); + LLVMBuildStore(mask->skip.builder, value, mask->var); } @@ -358,7 +190,7 @@ lp_build_mask_update(struct lp_build_mask_context *mask, LLVMValueRef lp_build_mask_end(struct lp_build_mask_context *mask) { - lp_build_flow_skip_end(mask->flow); + lp_build_flow_skip_end(&mask->skip); return lp_build_mask_value(mask); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index a4fc8d19556..e21d9de280f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -41,29 +41,33 @@ struct lp_type; -struct lp_build_flow_context; - - -struct lp_build_flow_context * -lp_build_flow_create(LLVMBuilderRef builder); +/** + * Early exit. Useful to skip to the end of a function or block when + * the execution mask becomes zero or when there is an error condition. + */ +struct lp_build_skip_context +{ + LLVMBuilderRef builder; -void -lp_build_flow_destroy(struct lp_build_flow_context *flow); + /** Block to skip to */ + LLVMBasicBlockRef block; +}; void -lp_build_flow_skip_begin(struct lp_build_flow_context *flow); +lp_build_flow_skip_begin(struct lp_build_skip_context *ctx, + LLVMBuilderRef builder); void -lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow, +lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx, LLVMValueRef cond); void -lp_build_flow_skip_end(struct lp_build_flow_context *flow); +lp_build_flow_skip_end(struct lp_build_skip_context *ctx); struct lp_build_mask_context { - struct lp_build_flow_context *flow; + struct lp_build_skip_context skip; LLVMTypeRef reg_type; @@ -73,7 +77,7 @@ struct lp_build_mask_context void lp_build_mask_begin(struct lp_build_mask_context *mask, - struct lp_build_flow_context *flow, + LLVMBuilderRef builder, struct lp_type type, LLVMValueRef value); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 3b0706e3ec1..6bfd02061d5 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -237,7 +237,6 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef z; LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask; boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 && shader->info.num_inputs < 3 && @@ -286,8 +285,6 @@ generate_fs(struct llvmpipe_context *lp, consts_ptr = lp_jit_context_constants(builder, context_ptr); - flow = lp_build_flow_create(builder); - memset(outputs, 0, sizeof outputs); /* Declare the color and z variables */ @@ -307,7 +304,7 @@ generate_fs(struct llvmpipe_context *lp, } /* 'mask' will control execution based on quad's pixel alive/killed state */ - lp_build_mask_begin(&mask, flow, type, *pmask); + lp_build_mask_begin(&mask, builder, type, *pmask); if (!(depth_mode & EARLY_DEPTH_TEST) && !simple_shader) lp_build_mask_check(&mask); @@ -424,8 +421,6 @@ generate_fs(struct llvmpipe_context *lp, lp_build_mask_value(&mask), counter); *pmask = lp_build_mask_end(&mask); - - lp_build_flow_destroy(flow); } @@ -450,7 +445,6 @@ generate_blend(const struct pipe_blend_state *blend, boolean do_branch) { struct lp_build_context bld; - struct lp_build_flow_context *flow; struct lp_build_mask_context mask_ctx; LLVMTypeRef vec_type; LLVMValueRef const_ptr; @@ -461,8 +455,7 @@ generate_blend(const struct pipe_blend_state *blend, lp_build_context_init(&bld, builder, type); - flow = lp_build_flow_create(builder); - lp_build_mask_begin(&mask_ctx, flow, type, mask); + lp_build_mask_begin(&mask_ctx, builder, type, mask); if (do_branch) lp_build_mask_check(&mask_ctx); @@ -497,7 +490,6 @@ generate_blend(const struct pipe_blend_state *blend, } lp_build_mask_end(&mask_ctx); - lp_build_flow_destroy(flow); } -- cgit v1.2.3 From bd89da79a1acc8b896a7f5afc71435befe2ff7e4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 8 Oct 2010 09:51:09 +1000 Subject: r600g: fix input/output Z export mixup for evergreen. --- src/gallium/drivers/r600/evergreen_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 0fd1d399518..81db2e25e8b 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1546,7 +1546,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); } for (i = 0; i < rshader->noutput; i++) { - if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL, S_02880C_Z_EXPORT_ENABLE(1), -- cgit v1.2.3 From 2c47f302af48fe2a464230efb63dfe543740d1fb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 8 Oct 2010 10:17:51 +1000 Subject: r600g: evergreen has no request size bit in texture word4 --- src/gallium/drivers/r600/evergreen_state.c | 1 - src/gallium/drivers/r600/evergreend.h | 3 --- 2 files changed, 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 81db2e25e8b..bdd54811fb2 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -464,7 +464,6 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, word4 | S_030010_NUM_FORMAT_ALL(V_030010_SQ_NUM_FORMAT_NORM) | S_030010_SRF_MODE_ALL(V_030010_SFR_MODE_NO_ZERO) | - S_030010_REQUEST_SIZE(1) | S_030010_BASE_LEVEL(state->first_level), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, S_030014_LAST_LEVEL(state->last_level) | diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 9971dded782..eb36a35165e 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -984,9 +984,6 @@ #define S_030010_ENDIAN_SWAP(x) (((x) & 0x3) << 12) #define G_030010_ENDIAN_SWAP(x) (((x) >> 12) & 0x3) #define C_030010_ENDIAN_SWAP 0xFFFFCFFF -#define S_030010_REQUEST_SIZE(x) (((x) & 0x3) << 14) -#define G_030010_REQUEST_SIZE(x) (((x) >> 14) & 0x3) -#define C_030010_REQUEST_SIZE 0xFFFF3FFF #define S_030010_DST_SEL_X(x) (((x) & 0x7) << 16) #define G_030010_DST_SEL_X(x) (((x) >> 16) & 0x7) #define C_030010_DST_SEL_X 0xFFF8FFFF -- cgit v1.2.3 From ea1d818b58d6ff9e4cd0c40eb865beabde8f268c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 11 Oct 2010 11:58:27 +1000 Subject: r600g: enable vertex samplers. We need to move the texture sampler resources out of the range of the vertex attribs. We could probably improve this using an allocator but this is the simple answer for now. makes mesa-demos/src/glsl/vert-tex work. --- src/gallium/drivers/r600/evergreen_state.c | 11 ++++++++--- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/r600/r600_shader.c | 6 ++++-- src/gallium/drivers/r600/r600_state.c | 11 ++++++++--- 4 files changed, 21 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index bdd54811fb2..323509f6402 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -480,8 +480,14 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte static void evergreen_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { - /* TODO */ - assert(1); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + + for (int i = 0; i < count; i++) { + if (resource[i]) { + evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + } + } } static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, @@ -523,7 +529,6 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - /* TODO implement */ for (int i = 0; i < count; i++) { evergreen_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 0589652f705..832a2b2b606 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -257,7 +257,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 14; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: /* FIXME allow this once infrastructure is there */ - return 0; + return 16; case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 16; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 366d5d9c351..d22325e8b78 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1866,8 +1866,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; - tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.sampler_id = tex.resource_id; + tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; + tex.resource_id = tex.sampler_id; + if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) + tex.resource_id += PIPE_MAX_ATTRIBS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7ceedf6363b..29d9d154a24 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -683,8 +683,14 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, struct pipe_sampler_view **views) { - /* TODO */ - assert(1); + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + + for (int i = 0; i < count; i++) { + if (resource[i]) { + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + } + } } static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, @@ -726,7 +732,6 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - /* TODO implement */ for (int i = 0; i < count; i++) { r600_context_pipe_state_set_vs_sampler(&rctx->ctx, rstates[i], i); } -- cgit v1.2.3 From ef2702fb2003944998ab1578119fb44fe16d1c82 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 11 Oct 2010 12:18:05 +1000 Subject: r600g: add TXL opcode support. fixes glsl1-2D Texture lookup with explicit lod (Vertex shader) --- src/gallium/drivers/r600/r600_shader.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d22325e8b78..341800306d6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -2926,7 +2926,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ @@ -3084,7 +3084,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ -- cgit v1.2.3 From 3322416de44f27974edaba2aee8b2d2d21de6a8f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 11 Oct 2010 16:20:56 +1000 Subject: r600g: don't run with scissors. This could probably be done much nicer, I've spent a day chasing a coherency problem in the kernel, that turned out to be incorrect scissor setup. --- src/gallium/drivers/r600/evergreen_state.c | 45 ++++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_state.c | 47 ++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 323509f6402..99085647a75 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -899,6 +899,51 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028210_PA_SC_CLIPRECT_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028214_PA_SC_CLIPRECT_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028218_PA_SC_CLIPRECT_1_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02821C_PA_SC_CLIPRECT_1_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028220_PA_SC_CLIPRECT_2_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028224_PA_SC_CLIPRECT_2_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028228_PA_SC_CLIPRECT_3_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02822C_PA_SC_CLIPRECT_3_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028238_CB_TARGET_MASK, 0x00000000, target_mask, NULL); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 29d9d154a24..a2a76cdeb75 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1074,6 +1074,18 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, tl = S_028240_TL_X(0) | S_028240_TL_Y(0) | S_028240_WINDOW_OFFSET_DISABLE(1); br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); + r600_pipe_state_add_reg(rstate, + R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028034_PA_SC_SCREEN_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028208_PA_SC_WINDOW_SCISSOR_BR, br, + 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, 0xFFFFFFFF, NULL); @@ -1086,6 +1098,41 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028210_PA_SC_CLIPRECT_0_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028214_PA_SC_CLIPRECT_0_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028218_PA_SC_CLIPRECT_1_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02821C_PA_SC_CLIPRECT_1_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028220_PA_SC_CLIPRECT_2_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028224_PA_SC_CLIPRECT_2_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028228_PA_SC_CLIPRECT_3_TL, tl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02822C_PA_SC_CLIPRECT_3_BR, br, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, + 0xFFFFFFFF, NULL); + if (rctx->family >= CHIP_RV770) { + r600_pipe_state_add_reg(rstate, + R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, + 0xFFFFFFFF, NULL); + } r600_pipe_state_add_reg(rstate, R_0287A0_CB_SHADER_CONTROL, shader_control, 0xFFFFFFFF, NULL); -- cgit v1.2.3 From b18fecbd0ea33c9db7e3fd676ed7b5877ebb1bd5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 10 Oct 2010 23:36:14 +0100 Subject: llvmpipe: Remove outdated comment about stencil testing. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index e768493103e..264fce8d6a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2009 VMware, Inc. + * Copyright 2009-2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -53,15 +53,8 @@ * ... ... ... ... ... ... ... ... ... * * - * Stencil test: - * Two-sided stencil test is supported but probably not as efficient as - * it could be. Currently, we use if/then/else constructs to do the - * operations for front vs. back-facing polygons. We could probably do - * both the front and back arithmetic then use a Select() instruction to - * choose the result depending on polyon orientation. We'd have to - * measure performance both ways and see which is better. - * * @author Jose Fonseca + * @author Brian Paul */ #include "pipe/p_state.h" -- cgit v1.2.3 From 986cb9d5cf60bc11c7facc19017b5432b17240f7 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 2 Sep 2010 16:30:23 +0100 Subject: llvmpipe: Use lp_tgsi_info. --- src/gallium/drivers/llvmpipe/lp_setup_point.c | 4 +-- src/gallium/drivers/llvmpipe/lp_state_derived.c | 16 +++++------ src/gallium/drivers/llvmpipe/lp_state_fs.c | 38 ++++++++++++------------- src/gallium/drivers/llvmpipe/lp_state_fs.h | 3 +- 4 files changed, 31 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 31d85f43c2f..64b24a88d54 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -239,8 +239,8 @@ setup_point_coefficients( struct lp_setup_context *setup, /* check if the sprite coord flag is set for this attribute. * If so, set it up so it up so x and y vary from 0 to 1. */ - if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { - unsigned semantic_index = shader->info.input_semantic_index[slot]; + if (shader->info.base.input_semantic_name[slot] == TGSI_SEMANTIC_GENERIC) { + unsigned semantic_index = shader->info.base.input_semantic_index[slot]; /* Note that sprite_coord enable is a bitfield of * PIPE_MAX_SHADER_OUTPUTS bits. */ diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c index bb059d04599..7f68818ab47 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_derived.c +++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c @@ -66,14 +66,14 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index); - for (i = 0; i < lpfs->info.num_inputs; i++) { + for (i = 0; i < lpfs->info.base.num_inputs; i++) { /* * Search for each input in current vs output: */ vs_index = draw_find_shader_output(llvmpipe->draw, - lpfs->info.input_semantic_name[i], - lpfs->info.input_semantic_index[i]); + lpfs->info.base.input_semantic_name[i], + lpfs->info.base.input_semantic_index[i]); if (vs_index < 0) { /* * This can happen with sprite coordinates - the vertex @@ -86,9 +86,9 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) /* This can be pre-computed, except for flatshade: */ - inputs[i].usage_mask = lpfs->info.input_usage_mask[i]; + inputs[i].usage_mask = lpfs->info.base.input_usage_mask[i]; - switch (lpfs->info.input_interpolate[i]) { + switch (lpfs->info.base.input_interpolate[i]) { case TGSI_INTERPOLATE_CONSTANT: inputs[i].interp = LP_INTERP_CONSTANT; break; @@ -103,7 +103,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) break; } - switch (lpfs->info.input_semantic_name[i]) { + switch (lpfs->info.base.input_semantic_name[i]) { case TGSI_SEMANTIC_FACE: inputs[i].interp = LP_INTERP_FACING; break; @@ -145,7 +145,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index); } - llvmpipe->num_inputs = lpfs->info.num_inputs; + llvmpipe->num_inputs = lpfs->info.base.num_inputs; draw_compute_vertex_size(vinfo); @@ -153,7 +153,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe) lp_setup_set_fs_inputs(llvmpipe->setup, inputs, - lpfs->info.num_inputs); + lpfs->info.base.num_inputs); } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 6bfd02061d5..6872f2d3c6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -238,9 +238,9 @@ generate_fs(struct llvmpipe_context *lp, LLVMValueRef zs_value = NULL; LLVMValueRef stencil_refs[2]; struct lp_build_mask_context mask; - boolean simple_shader = (shader->info.file_count[TGSI_FILE_SAMPLER] == 0 && - shader->info.num_inputs < 3 && - shader->info.num_instructions < 8); + boolean simple_shader = (shader->info.base.file_count[TGSI_FILE_SAMPLER] == 0 && + shader->info.base.num_inputs < 3 && + shader->info.base.num_instructions < 8); unsigned attrib; unsigned chan; unsigned cbuf; @@ -253,8 +253,8 @@ generate_fs(struct llvmpipe_context *lp, zs_format_desc = util_format_description(key->zsbuf_format); assert(zs_format_desc); - if (!shader->info.writes_z) { - if (key->alpha.enabled || shader->info.uses_kill) + if (!shader->info.base.writes_z) { + if (key->alpha.enabled || shader->info.base.uses_kill) /* With alpha test and kill, can do the depth test early * and hopefully eliminate some quads. But need to do a * special deferred depth write once the final mask value @@ -334,12 +334,12 @@ generate_fs(struct llvmpipe_context *lp, /* Build the actual shader */ lp_build_tgsi_soa(builder, tokens, type, &mask, consts_ptr, interp->pos, interp->inputs, - outputs, sampler, &shader->info); + outputs, sampler, &shader->info.base); /* Alpha test */ if (key->alpha.enabled) { - int color0 = find_output_by_semantic(&shader->info, + int color0 = find_output_by_semantic(&shader->info.base, TGSI_SEMANTIC_COLOR, 0); @@ -358,7 +358,7 @@ generate_fs(struct llvmpipe_context *lp, /* Late Z test */ if (depth_mode & LATE_DEPTH_TEST) { - int pos0 = find_output_by_semantic(&shader->info, + int pos0 = find_output_by_semantic(&shader->info.base, TGSI_SEMANTIC_POSITION, 0); @@ -399,11 +399,11 @@ generate_fs(struct llvmpipe_context *lp, /* Color write */ - for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) + for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) { - if (shader->info.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) + if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) { - unsigned cbuf = shader->info.output_semantic_index[attrib]; + unsigned cbuf = shader->info.base.output_semantic_index[attrib]; for(chan = 0; chan < NUM_CHANNELS; ++chan) { /* XXX: just initialize outputs to point at colors[] and @@ -728,7 +728,7 @@ generate_fragment(struct llvmpipe_context *lp, */ boolean do_branch = ((key->depth.enabled || key->stencil[0].enabled) && !key->alpha.enabled && - !shader->info.uses_kill); + !shader->info.base.uses_kill); generate_blend(&key->blend, rt, @@ -917,7 +917,7 @@ generate_variant(struct llvmpipe_context *lp, !key->stencil[0].enabled && !key->alpha.enabled && !key->depth.enabled && - !shader->info.uses_kill + !shader->info.base.uses_kill ? TRUE : FALSE; @@ -954,7 +954,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, make_empty_list(&shader->variants); /* get/save the summary info for this shader */ - tgsi_scan_shader(templ->tokens, &shader->info); + lp_build_tgsi_info(templ->tokens, &shader->info); /* we need to keep a local copy of the tokens */ shader->base.tokens = tgsi_dup_tokens(templ->tokens); @@ -966,7 +966,7 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, return NULL; } - nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; shader->variant_key_size = Offset(struct lp_fragment_shader_variant_key, sampler[nr_samplers]); @@ -976,8 +976,8 @@ llvmpipe_create_fs_state(struct pipe_context *pipe, debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader); tgsi_dump(templ->tokens, 0); debug_printf("usage masks:\n"); - for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) { - unsigned usage_mask = shader->info.input_usage_mask[attrib]; + for (attrib = 0; attrib < shader->info.base.num_inputs; ++attrib) { + unsigned usage_mask = shader->info.base.input_usage_mask[attrib]; debug_printf(" IN[%u].%s%s%s%s\n", attrib, usage_mask & TGSI_WRITEMASK_X ? "x" : "", @@ -1206,10 +1206,10 @@ make_variant_key(struct llvmpipe_context *lp, /* This value will be the same for all the variants of a given shader: */ - key->nr_samplers = shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + key->nr_samplers = shader->info.base.file_max[TGSI_FILE_SAMPLER] + 1; for(i = 0; i < key->nr_samplers; ++i) { - if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + if(shader->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h index 4999b8dca1a..ddad117acac 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.h +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h @@ -34,6 +34,7 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */ #include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */ +#include "gallivm/lp_bld_tgsi.h" /* for lp_tgsi_info */ struct tgsi_token; @@ -96,7 +97,7 @@ struct lp_fragment_shader { struct pipe_shader_state base; - struct tgsi_shader_info info; + struct lp_tgsi_info info; struct lp_fs_variant_list_item variants; -- cgit v1.2.3 From 965f69cb0c51ca5a5c332e207cc86367fa31e6b1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 12 Oct 2010 09:41:02 +1000 Subject: r600g: fix typo in vertex sampling on r600 fixes https://bugs.freedesktop.org/show_bug.cgi?id=30771 Reported-by: Kevin DeKorte --- src/gallium/drivers/r600/r600_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a2a76cdeb75..70461d24ea2 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -688,7 +688,7 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, unsigned count, for (int i = 0; i < count; i++) { if (resource[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); + r600_context_pipe_state_set_vs_resource(&rctx->ctx, &resource[i]->state, i + PIPE_MAX_ATTRIBS); } } } -- cgit v1.2.3 From c25fcf5aa5beccd7731706b8f85682170a2eca56 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 28 Sep 2010 22:51:28 +0200 Subject: nouveau: Get larger push buffers. Useful to amortize the command submission/reloc overhead (e.g. etracer goes from 72 to 109 FPS on nv4b). --- src/gallium/drivers/nouveau/nouveau_screen.c | 2 +- src/mesa/drivers/dri/nouveau/nouveau_context.c | 2 +- src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index ebb21a6e5a3..a9426df686f 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -236,7 +236,7 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) int ret; ret = nouveau_channel_alloc(dev, 0xbeef0201, 0xbeef0202, - &screen->channel); + 512*1024, &screen->channel); if (ret) return ret; screen->device = dev; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index 6369e8d7b14..1c7f600d2b0 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -128,7 +128,7 @@ nouveau_context_init(GLcontext *ctx, struct nouveau_screen *screen, /* Allocate a hardware channel. */ ret = nouveau_channel_alloc(context_dev(ctx), 0xbeef0201, 0xbeef0202, - &nctx->hw.chan); + 512*1024, &nctx->hw.chan); if (ret) { nouveau_error("Error initializing the FIFO.\n"); return GL_FALSE; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c index e4415cbedb5..9373b936ab7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c @@ -32,7 +32,7 @@ /* Arbitrary pushbuf length we can assume we can get with a single * WAIT_RING. */ -#define PUSHBUF_DWORDS 2048 +#define PUSHBUF_DWORDS 65536 /* Functions to set up struct nouveau_array_state from something like * a GL array or index buffer. */ -- cgit v1.2.3 From 4cb3b4ced80891ce8760cf5a0c06db9dbee36b76 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 11 Oct 2010 19:45:52 +0100 Subject: llvmpipe: Do not dispose the execution engine. The engine is a global owned by gallivm module. --- src/gallium/drivers/llvmpipe/lp_jit.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 04b12dedccf..e09ec504ab7 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -162,9 +162,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) void lp_jit_screen_cleanup(struct llvmpipe_screen *screen) { - if(screen->engine) - LLVMDisposeExecutionEngine(screen->engine); - if(screen->pass) LLVMDisposePassManager(screen->pass); } -- cgit v1.2.3 From 2cf98d5a6dccba3fd69b8469e67f66dfb5fc9651 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 11 Oct 2010 16:30:14 +0100 Subject: llvmpipe: try to do more of rast_tri_3_16 with intrinsics There was actually a large quantity of scalar code in these functions previously. This tries to move more into intrinsics. Introduce an sse2 mm_mullo_epi32 replacement to avoid sse4 dependency in the new rasterization code. --- src/gallium/drivers/llvmpipe/lp_rast.h | 16 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 264 ++++++++++++++++++++++++++++- 2 files changed, 271 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index df0bea04b9a..e2bcc450168 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -89,19 +89,21 @@ struct lp_rast_shader_inputs { const struct lp_rast_state *state; }; - +/* Note: the order of these values is important as they are loaded by + * sse code in rasterization: + */ struct lp_rast_plane { - /* one-pixel sized trivial accept offsets for each plane */ - int ei; - - /* one-pixel sized trivial reject offsets for each plane */ - int eo; - /* edge function values at minx,miny ?? */ int c; int dcdx; int dcdy; + + /* one-pixel sized trivial reject offsets for each plane */ + int eo; + + /* one-pixel sized trivial accept offsets for each plane */ + int ei; }; /** diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index f870a187db5..7a6cbb8b63c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -32,6 +32,7 @@ #include #include "util/u_math.h" #include "lp_debug.h" +#include "lp_debug_intrin.h" #include "lp_perf.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" @@ -254,8 +255,8 @@ sign_bits4(const __m128i *cstep, int cdiff) #define TAG(x) x##_3 #define NR_PLANES 3 -#define TRI_4 lp_rast_triangle_3_4 -#define TRI_16 lp_rast_triangle_3_16 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ #include "lp_rast_tri_tmp.h" #define TAG(x) x##_4 @@ -279,3 +280,262 @@ sign_bits4(const __m128i *cstep, int cdiff) #define NR_PLANES 8 #include "lp_rast_tri_tmp.h" + +static INLINE void +transpose4_epi32(__m128i a, + __m128i b, + __m128i c, + __m128i d, + __m128i *o, + __m128i *p, + __m128i *q, + __m128i *r) +{ + __m128i t0 = _mm_unpacklo_epi32(a, b); + __m128i t1 = _mm_unpacklo_epi32(c, d); + __m128i t2 = _mm_unpackhi_epi32(a, b); + __m128i t3 = _mm_unpackhi_epi32(c, d); + + *o = _mm_unpacklo_epi64(t0, t1); + *p = _mm_unpackhi_epi64(t0, t1); + *q = _mm_unpacklo_epi64(t2, t3); + *r = _mm_unpackhi_epi64(t2, t3); +} + + +#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) + +#define NR_PLANES 3 + + + +/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of + * _mm_mul_epu32(). + * + * I suspect this works fine for us because one of our operands is + * always positive, but not sure that this can be used for general + * signed integer multiplication. + * + * This seems close enough to the speed of SSE4 and the real + * _mm_mullo_epi32() intrinsic as to not justify adding an sse4 + * dependency at this point. + */ +static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) +{ + __m128i a4 = _mm_srli_si128(a, 4); /* shift by one dword */ + __m128i b4 = _mm_srli_si128(b, 4); /* shift by one dword */ + __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ + __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ + + /* Interleave the results, either with shuffles or (slightly + * faster) direct bit operations: + */ +#if 0 + __m128i ba8 = _mm_shuffle_epi32(ba, 8); + __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8); + __m128i result = _mm_unpacklo_epi32(ba8, b4a48); +#else + __m128i mask = _mm_setr_epi32(~0,0,~0,0); + __m128i ba_mask = _mm_and_si128(ba, mask); + __m128i b4a4_mask = _mm_and_si128(b4a4, mask); + __m128i b4a4_mask_shift = _mm_slli_si128(b4a4_mask, 4); + __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); +#endif + + return result; +} + + + + +void +lp_rast_triangle_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = tri->plane; + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + unsigned i, j; + + struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; + unsigned nr = 0; + + __m128i p0 = _mm_loadu_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_loadu_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + __m128i rej4; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(p0, p1, p2, zero, + &c, &dcdx, &dcdy, &rej4); + + /* Adjust dcdx; + */ + dcdx = _mm_sub_epi32(zero, dcdx); + + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + rej4 = _mm_slli_epi32(rej4, 2); + + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); + + transpose4_epi32(zero, dcdx, dcdx2, dcdx3, + &span_0, &span_1, &span_2, &unused); + + for (i = 0; i < 4; i++) { + __m128i cx = c; + + for (j = 0; j < 4; j++) { + __m128i c4rej = _mm_add_epi32(cx, rej4); + __m128i rej_masks = _mm_srai_epi32(c4rej, 31); + + /* if (is_zero(rej_masks)) */ + if (_mm_movemask_epi8(rej_masks) == 0) { + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(cx, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(cx, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(cx, 2), span_2); + + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); + + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); + + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); + + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); + + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); + + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); + + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + out[nr].i = i; + out[nr].j = j; + out[nr].mask = mask; + if (mask != 0xffff) + nr++; + } + cx = _mm_add_epi32(cx, _mm_slli_epi32(dcdx, 2)); + } + + c = _mm_add_epi32(c, _mm_slli_epi32(dcdy, 2)); + } + + for (i = 0; i < nr; i++) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x + 4 * out[i].j, + y + 4 * out[i].i, + 0xffff & ~out[i].mask); +} + + + + + +void +lp_rast_triangle_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + const struct lp_rast_triangle *tri = arg.triangle.tri; + const struct lp_rast_plane *plane = tri->plane; + int x = (arg.triangle.plane_mask & 0xff) + task->x; + int y = (arg.triangle.plane_mask >> 8) + task->y; + + __m128i p0 = _mm_loadu_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_loadu_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i zero = _mm_setzero_si128(); + + __m128i c; + __m128i dcdx; + __m128i dcdy; + + __m128i dcdx2; + __m128i dcdx3; + + __m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */ + __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ + __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ + __m128i unused; + + transpose4_epi32(p0, p1, p2, zero, + &c, &dcdx, &dcdy, &unused); + + /* Adjust dcdx; + */ + dcdx = _mm_sub_epi32(zero, dcdx); + + c = _mm_add_epi32(c, _mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, _mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + + dcdx2 = _mm_add_epi32(dcdx, dcdx); + dcdx3 = _mm_add_epi32(dcdx2, dcdx); + + transpose4_epi32(zero, dcdx, dcdx2, dcdx3, + &span_0, &span_1, &span_2, &unused); + + + { + __m128i c0_0 = _mm_add_epi32(SCALAR_EPI32(c, 0), span_0); + __m128i c1_0 = _mm_add_epi32(SCALAR_EPI32(c, 1), span_1); + __m128i c2_0 = _mm_add_epi32(SCALAR_EPI32(c, 2), span_2); + + __m128i c_0 = _mm_or_si128(_mm_or_si128(c0_0, c1_0), c2_0); + + __m128i c0_1 = _mm_add_epi32(c0_0, SCALAR_EPI32(dcdy, 0)); + __m128i c1_1 = _mm_add_epi32(c1_0, SCALAR_EPI32(dcdy, 1)); + __m128i c2_1 = _mm_add_epi32(c2_0, SCALAR_EPI32(dcdy, 2)); + + __m128i c_1 = _mm_or_si128(_mm_or_si128(c0_1, c1_1), c2_1); + __m128i c_01 = _mm_packs_epi32(c_0, c_1); + + __m128i c0_2 = _mm_add_epi32(c0_1, SCALAR_EPI32(dcdy, 0)); + __m128i c1_2 = _mm_add_epi32(c1_1, SCALAR_EPI32(dcdy, 1)); + __m128i c2_2 = _mm_add_epi32(c2_1, SCALAR_EPI32(dcdy, 2)); + + __m128i c_2 = _mm_or_si128(_mm_or_si128(c0_2, c1_2), c2_2); + + __m128i c0_3 = _mm_add_epi32(c0_2, SCALAR_EPI32(dcdy, 0)); + __m128i c1_3 = _mm_add_epi32(c1_2, SCALAR_EPI32(dcdy, 1)); + __m128i c2_3 = _mm_add_epi32(c2_2, SCALAR_EPI32(dcdy, 2)); + + __m128i c_3 = _mm_or_si128(_mm_or_si128(c0_3, c1_3), c2_3); + __m128i c_23 = _mm_packs_epi32(c_2, c_3); + __m128i c_0123 = _mm_packs_epi16(c_01, c_23); + + unsigned mask = _mm_movemask_epi8(c_0123); + + if (mask != 0xffff) + lp_rast_shade_quads_mask(task, + &tri->inputs, + x, + y, + 0xffff & ~mask); + } +} -- cgit v1.2.3 From 9d59e148f86c1de2c69639d389398d7435cc193e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 11 Oct 2010 18:20:02 +0100 Subject: llvmpipe: add debug helpers for epi32 etc --- src/gallium/drivers/llvmpipe/lp_debug_intrin.h | 115 +++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 src/gallium/drivers/llvmpipe/lp_debug_intrin.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_debug_intrin.h b/src/gallium/drivers/llvmpipe/lp_debug_intrin.h new file mode 100644 index 00000000000..5237e7893b6 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_debug_intrin.h @@ -0,0 +1,115 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +#ifndef _LP_DEBUG_INTRIN_H_ +#define _LP_DEBUG_INTRIN_H_ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_SSE) + +#include + +static INLINE void print_epi8(const char *name, __m128i r) +{ + union { __m128i m; ubyte ub[16]; } u; + u.m = r; + + debug_printf("%s: " + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x\n", + name, + u.ub[0], u.ub[1], u.ub[2], u.ub[3], + u.ub[4], u.ub[5], u.ub[6], u.ub[7], + u.ub[8], u.ub[9], u.ub[10], u.ub[11], + u.ub[12], u.ub[13], u.ub[14], u.ub[15]); +} + +static INLINE void print_epi16(const char *name, __m128i r) +{ + union { __m128i m; ushort us[8]; } u; + u.m = r; + + debug_printf("%s: " + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x\n", + name, + u.us[0], u.us[1], u.us[2], u.us[3], + u.us[4], u.us[5], u.us[6], u.us[7]); +} + +static INLINE void print_epi32(const char *name, __m128i r) +{ + union { __m128i m; uint ui[4]; } u; + u.m = r; + + debug_printf("%s: " + "%08x/" + "%08x/" + "%08x/" + "%08x\n", + name, + u.ui[0], u.ui[1], u.ui[2], u.ui[3]); +} + +static INLINE void print_ps(const char *name, __m128 r) +{ + union { __m128 m; float f[4]; } u; + u.m = r; + + debug_printf("%s: " + "%f/" + "%f/" + "%f/" + "%f\n", + name, + u.f[0], u.f[1], u.f[2], u.f[3]); +} + + +#endif +#endif -- cgit v1.2.3 From 9773722c2b09d5f0615a47cecf4347859474dc56 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 12 Oct 2010 11:02:19 +0100 Subject: llvmpipe: try to keep plane c values small Avoid accumulating more and more fixed point bits. --- src/gallium/drivers/llvmpipe/lp_setup_line.c | 3 +-- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 38 +++++++++++++++++----------- 2 files changed, 24 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 693ac281752..c940860850e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -640,8 +640,7 @@ try_setup_line( struct lp_setup_context *setup, } } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane->c = (plane->c + (FIXED_ONE-1)) / FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 8fd034666c3..dfe1bd11ea0 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -343,26 +343,34 @@ do_triangle_ccw(struct lp_setup_context *setup, * Also, sometimes (in FBO cases) GL will render upside down * to its usual method, in which case it will probably want * to use the opposite, top-left convention. + * + * XXX: Chances are this will get stripped away. In fact this + * is only meaningful if: + * + * (plane->c & (FIXED_ONE-1)) == 0 + * */ - if (plane->dcdx < 0) { - /* both fill conventions want this - adjust for left edges */ - plane->c++; - } - else if (plane->dcdx == 0) { - if (setup->pixel_offset == 0) { - /* correct for top-left fill convention: - */ - if (plane->dcdy > 0) plane->c++; + if ((plane->c & (FIXED_ONE-1)) == 0) { + if (plane->dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane->c++; } - else { - /* correct for bottom-left fill convention: - */ - if (plane->dcdy < 0) plane->c++; + else if (plane->dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane->dcdy > 0) plane->c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane->dcdy < 0) plane->c++; + } } } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane->c = (plane->c + (FIXED_ONE-1)) / FIXED_ONE; + /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to -- cgit v1.2.3 From b4277bc5843aca7f9e0ecc7e956733f1becd6ad6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 12 Oct 2010 11:51:28 +0100 Subject: llvmpipe: fix typo in last commit --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 7a6cbb8b63c..854fd5cc1e8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -491,8 +491,8 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, */ dcdx = _mm_sub_epi32(zero, dcdx); - c = _mm_add_epi32(c, _mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); - c = _mm_add_epi32(c, _mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); + c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); -- cgit v1.2.3 From 39331be44efc5b5ae749df3f6987626837c7b8ff Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 12 Oct 2010 12:27:55 +0100 Subject: llvmpipe: Fix MSVC build. MSVC doesn't accept more than 3 __m128i arguments. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 854fd5cc1e8..5e8918b1d87 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -282,19 +282,19 @@ sign_bits4(const __m128i *cstep, int cdiff) static INLINE void -transpose4_epi32(__m128i a, - __m128i b, - __m128i c, - __m128i d, - __m128i *o, - __m128i *p, - __m128i *q, - __m128i *r) +transpose4_epi32(const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict o, + __m128i * restrict p, + __m128i * restrict q, + __m128i * restrict r) { - __m128i t0 = _mm_unpacklo_epi32(a, b); - __m128i t1 = _mm_unpacklo_epi32(c, d); - __m128i t2 = _mm_unpackhi_epi32(a, b); - __m128i t3 = _mm_unpackhi_epi32(c, d); + __m128i t0 = _mm_unpacklo_epi32(*a, *b); + __m128i t1 = _mm_unpacklo_epi32(*c, *d); + __m128i t2 = _mm_unpackhi_epi32(*a, *b); + __m128i t3 = _mm_unpackhi_epi32(*c, *d); *o = _mm_unpacklo_epi64(t0, t1); *p = _mm_unpackhi_epi64(t0, t1); @@ -379,8 +379,8 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; - transpose4_epi32(p0, p1, p2, zero, - &c, &dcdx, &dcdy, &rej4); + transpose4_epi32(&p0, &p1, &p2, &zero, + &c, &dcdx, &dcdy, &rej4); /* Adjust dcdx; */ @@ -393,8 +393,8 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); - transpose4_epi32(zero, dcdx, dcdx2, dcdx3, - &span_0, &span_1, &span_2, &unused); + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, + &span_0, &span_1, &span_2, &unused); for (i = 0; i < 4; i++) { __m128i cx = c; @@ -484,7 +484,7 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; - transpose4_epi32(p0, p1, p2, zero, + transpose4_epi32(&p0, &p1, &p2, &zero, &c, &dcdx, &dcdy, &unused); /* Adjust dcdx; @@ -497,7 +497,7 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); - transpose4_epi32(zero, dcdx, dcdx2, dcdx3, + transpose4_epi32(&zero, &dcdx, &dcdx2, &dcdx3, &span_0, &span_1, &span_2, &unused); -- cgit v1.2.3 From 1a574afabc8840da82e68ac643ec3a7b05afb631 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 12 Oct 2010 13:02:28 +0100 Subject: gallium: move sse intrinsics debug helpers to u_sse.h --- src/gallium/auxiliary/util/u_sse.h | 80 ++++++++++++++++- src/gallium/drivers/llvmpipe/lp_debug_intrin.h | 115 ------------------------- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 1 - 3 files changed, 79 insertions(+), 117 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_debug_intrin.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 03198c91da4..8fd0e52a3a4 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -72,6 +72,84 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ +static INLINE void u_print_epi8(const char *name, __m128i r) +{ + union { __m128i m; ubyte ub[16]; } u; + u.m = r; + + debug_printf("%s: " + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x/" + "%02x\n", + name, + u.ub[0], u.ub[1], u.ub[2], u.ub[3], + u.ub[4], u.ub[5], u.ub[6], u.ub[7], + u.ub[8], u.ub[9], u.ub[10], u.ub[11], + u.ub[12], u.ub[13], u.ub[14], u.ub[15]); +} + +static INLINE void u_print_epi16(const char *name, __m128i r) +{ + union { __m128i m; ushort us[8]; } u; + u.m = r; + + debug_printf("%s: " + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x/" + "%04x\n", + name, + u.us[0], u.us[1], u.us[2], u.us[3], + u.us[4], u.us[5], u.us[6], u.us[7]); +} + +static INLINE void u_print_epi32(const char *name, __m128i r) +{ + union { __m128i m; uint ui[4]; } u; + u.m = r; + + debug_printf("%s: " + "%08x/" + "%08x/" + "%08x/" + "%08x\n", + name, + u.ui[0], u.ui[1], u.ui[2], u.ui[3]); +} + +static INLINE void u_print_ps(const char *name, __m128 r) +{ + union { __m128 m; float f[4]; } u; + u.m = r; + + debug_printf("%s: " + "%f/" + "%f/" + "%f/" + "%f\n", + name, + u.f[0], u.f[1], u.f[2], u.f[3]); +} + + + #if defined(PIPE_ARCH_SSSE3) #include @@ -98,6 +176,6 @@ _mm_shuffle_epi8(__m128i a, __m128i mask) #endif /* !PIPE_ARCH_SSSE3 */ -#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ +#endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/drivers/llvmpipe/lp_debug_intrin.h b/src/gallium/drivers/llvmpipe/lp_debug_intrin.h deleted file mode 100644 index 5237e7893b6..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_debug_intrin.h +++ /dev/null @@ -1,115 +0,0 @@ -/************************************************************************** - * - * Copyright 2010 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - **************************************************************************/ - -#ifndef _LP_DEBUG_INTRIN_H_ -#define _LP_DEBUG_INTRIN_H_ - -#include "pipe/p_config.h" - -#if defined(PIPE_ARCH_SSE) - -#include - -static INLINE void print_epi8(const char *name, __m128i r) -{ - union { __m128i m; ubyte ub[16]; } u; - u.m = r; - - debug_printf("%s: " - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x/" - "%02x\n", - name, - u.ub[0], u.ub[1], u.ub[2], u.ub[3], - u.ub[4], u.ub[5], u.ub[6], u.ub[7], - u.ub[8], u.ub[9], u.ub[10], u.ub[11], - u.ub[12], u.ub[13], u.ub[14], u.ub[15]); -} - -static INLINE void print_epi16(const char *name, __m128i r) -{ - union { __m128i m; ushort us[8]; } u; - u.m = r; - - debug_printf("%s: " - "%04x/" - "%04x/" - "%04x/" - "%04x/" - "%04x/" - "%04x/" - "%04x/" - "%04x\n", - name, - u.us[0], u.us[1], u.us[2], u.us[3], - u.us[4], u.us[5], u.us[6], u.us[7]); -} - -static INLINE void print_epi32(const char *name, __m128i r) -{ - union { __m128i m; uint ui[4]; } u; - u.m = r; - - debug_printf("%s: " - "%08x/" - "%08x/" - "%08x/" - "%08x\n", - name, - u.ui[0], u.ui[1], u.ui[2], u.ui[3]); -} - -static INLINE void print_ps(const char *name, __m128 r) -{ - union { __m128 m; float f[4]; } u; - u.m = r; - - debug_printf("%s: " - "%f/" - "%f/" - "%f/" - "%f\n", - name, - u.f[0], u.f[1], u.f[2], u.f[3]); -} - - -#endif -#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5e8918b1d87..19b0bd686a8 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -32,7 +32,6 @@ #include #include "util/u_math.h" #include "lp_debug.h" -#include "lp_debug_intrin.h" #include "lp_perf.h" #include "lp_rast_priv.h" #include "lp_tile_soa.h" -- cgit v1.2.3 From d0eb854f58ffb7d01fb37c0939078d8d117e7386 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 5 Oct 2010 23:15:44 +0100 Subject: r600g: add missing file to sconscript --- src/gallium/drivers/r600/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/SConscript b/src/gallium/drivers/r600/SConscript index b6d9b7d8d22..bf0ad8571ba 100644 --- a/src/gallium/drivers/r600/SConscript +++ b/src/gallium/drivers/r600/SConscript @@ -18,6 +18,7 @@ r600 = env.ConvenienceLibrary( source = [ 'r600_asm.c', 'r600_buffer.c', + 'r600_blit.c', 'r600_helper.c', 'r600_pipe.c', 'r600_query.c', -- cgit v1.2.3 From 0ca0382d1bfd1e9128fa4b588ce1411f7b8a85df Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 12 Oct 2010 13:20:39 +0100 Subject: Revert "llvmpipe: try to keep plane c values small" This reverts commit 9773722c2b09d5f0615a47cecf4347859474dc56. Looks like there are some floor/rounding issues here that need to be better understood. --- src/gallium/drivers/llvmpipe/lp_setup_line.c | 3 ++- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 38 +++++++++++----------------- 2 files changed, 17 insertions(+), 24 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index c940860850e..693ac281752 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -640,7 +640,8 @@ try_setup_line( struct lp_setup_context *setup, } } - plane->c = (plane->c + (FIXED_ONE-1)) / FIXED_ONE; + plane->dcdx *= FIXED_ONE; + plane->dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index dfe1bd11ea0..8fd034666c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -343,34 +343,26 @@ do_triangle_ccw(struct lp_setup_context *setup, * Also, sometimes (in FBO cases) GL will render upside down * to its usual method, in which case it will probably want * to use the opposite, top-left convention. - * - * XXX: Chances are this will get stripped away. In fact this - * is only meaningful if: - * - * (plane->c & (FIXED_ONE-1)) == 0 - * */ - if ((plane->c & (FIXED_ONE-1)) == 0) { - if (plane->dcdx < 0) { - /* both fill conventions want this - adjust for left edges */ - plane->c++; + if (plane->dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane->c++; + } + else if (plane->dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane->dcdy > 0) plane->c++; } - else if (plane->dcdx == 0) { - if (setup->pixel_offset == 0) { - /* correct for top-left fill convention: - */ - if (plane->dcdy > 0) plane->c++; - } - else { - /* correct for bottom-left fill convention: - */ - if (plane->dcdy < 0) plane->c++; - } + else { + /* correct for bottom-left fill convention: + */ + if (plane->dcdy < 0) plane->c++; } } - plane->c = (plane->c + (FIXED_ONE-1)) / FIXED_ONE; - + plane->dcdx *= FIXED_ONE; + plane->dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to -- cgit v1.2.3 From e3ec0fdd546259005c9ed2bf7b05cead2ab95b43 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 12 Oct 2010 14:15:59 +0100 Subject: llmvpipe: improve mm_mullo_epi32 Apply Jose's suggestions for a small but measurable improvement in isosurf. --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 19b0bd686a8..c3eefb724cf 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -321,8 +321,8 @@ transpose4_epi32(const __m128i * restrict a, */ static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) { - __m128i a4 = _mm_srli_si128(a, 4); /* shift by one dword */ - __m128i b4 = _mm_srli_si128(b, 4); /* shift by one dword */ + __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */ + __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */ __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ @@ -336,8 +336,7 @@ static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) #else __m128i mask = _mm_setr_epi32(~0,0,~0,0); __m128i ba_mask = _mm_and_si128(ba, mask); - __m128i b4a4_mask = _mm_and_si128(b4a4, mask); - __m128i b4a4_mask_shift = _mm_slli_si128(b4a4_mask, 4); + __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32); __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); #endif -- cgit v1.2.3 From 7533c374570b333b5e0d626d36d18c41d4611cb5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 12 Oct 2010 18:26:41 +0100 Subject: llvmpipe: make sure intrinsics code is guarded with PIPE_ARCH_SSE --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 82 +++++++++++++++--------------- 1 file changed, 42 insertions(+), 40 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index c3eefb724cf..bae772b9c50 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -239,46 +239,6 @@ sign_bits4(const __m128i *cstep, int cdiff) return _mm_movemask_epi8(result); } -#endif - - - - -#define TAG(x) x##_1 -#define NR_PLANES 1 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_2 -#define NR_PLANES 2 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_3 -#define NR_PLANES 3 -/*#define TRI_4 lp_rast_triangle_3_4*/ -/*#define TRI_16 lp_rast_triangle_3_16*/ -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_4 -#define NR_PLANES 4 -#define TRI_16 lp_rast_triangle_4_16 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_5 -#define NR_PLANES 5 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_6 -#define NR_PLANES 6 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_7 -#define NR_PLANES 7 -#include "lp_rast_tri_tmp.h" - -#define TAG(x) x##_8 -#define NR_PLANES 8 -#include "lp_rast_tri_tmp.h" - static INLINE void transpose4_epi32(const __m128i * restrict a, @@ -537,3 +497,45 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, 0xffff & ~mask); } } + +#undef NR_PLANES +#endif + + + + +#define TAG(x) x##_1 +#define NR_PLANES 1 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_2 +#define NR_PLANES 2 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_3 +#define NR_PLANES 3 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_4 +#define NR_PLANES 4 +#define TRI_16 lp_rast_triangle_4_16 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_5 +#define NR_PLANES 5 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_6 +#define NR_PLANES 6 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_7 +#define NR_PLANES 7 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + -- cgit v1.2.3 From 4ecb2c105da590abf79421a06234b636cd1afcd6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 6 Oct 2010 09:28:46 +1000 Subject: gallium/tgsi: add support for stencil writes. this adds the capability + a stencil semantic id, + tgsi scan support. Signed-off-by: Dave Airlie --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 3 ++- src/gallium/auxiliary/tgsi/tgsi_scan.c | 8 +++++--- src/gallium/auxiliary/tgsi/tgsi_scan.h | 1 + src/gallium/docs/source/tgsi.rst | 6 ++++++ src/gallium/drivers/softpipe/sp_fs_exec.c | 12 ++++++++++-- src/gallium/include/pipe/p_defines.h | 3 ++- src/gallium/include/pipe/p_shader_tokens.h | 3 ++- 7 files changed, 28 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 9b61797ace9..77bde86684e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -126,7 +126,8 @@ static const char *semantic_names[] = "FACE", "EDGEFLAG", "PRIM_ID", - "INSTANCEID" + "INSTANCEID", + "STENCIL" }; static const char *immediate_type_names[] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 90198a4f604..538274887b9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -157,9 +157,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* extra info for special outputs */ if (procType == TGSI_PROCESSOR_FRAGMENT && - fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; - } + fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) + info->writes_z = TRUE; + if (procType == TGSI_PROCESSOR_FRAGMENT && + fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL) + info->writes_stencil = TRUE; if (procType == TGSI_PROCESSOR_VERTEX && fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) { info->writes_edgeflag = TRUE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index f8aa90cf065..374c7ed551d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -60,6 +60,7 @@ struct tgsi_shader_info uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ boolean writes_z; /**< does fragment shader write Z value? */ + boolean writes_stencil; /**< does fragment shader write stencil value? */ boolean writes_edgeflag; /**< vertex shader outputs edgeflag */ boolean uses_kill; /**< KIL or KILP instruction used? */ diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 9e02d43ab74..c767680c62a 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -1415,6 +1415,12 @@ Edge flags are used to control which lines or points are actually drawn when the polygon mode converts triangles/quads/polygons into points or lines. +TGSI_SEMANTIC_STENCIL +"""""""""""""""""""""" + +For fragment shaders, this semantic label indicates than an output +is a writable stencil reference value. Only the Y component is writable. +This allows the fragment shader to change the fragments stencilref value. Properties diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 67e2c8f8bc4..346e1b402ba 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -158,9 +158,17 @@ exec_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { + + for (j = 0; j < 4; j++) quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; - } + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + + for (j = 0; j < 4; j++) + quad->output.stencil[j] = (unsigned)machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 8b4663742fa..b6894c09e82 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -464,7 +464,8 @@ enum pipe_cap { PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER, - PIPE_CAP_DEPTH_CLAMP + PIPE_CAP_DEPTH_CLAMP, + PIPE_CAP_SHADER_STENCIL_EXPORT, }; /* Shader caps not specific to any single stage */ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 74488de17eb..ba433b2bd2a 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -143,7 +143,8 @@ struct tgsi_declaration_dimension #define TGSI_SEMANTIC_EDGEFLAG 8 #define TGSI_SEMANTIC_PRIMID 9 #define TGSI_SEMANTIC_INSTANCEID 10 -#define TGSI_SEMANTIC_COUNT 11 /**< number of semantic values */ +#define TGSI_SEMANTIC_STENCIL 11 +#define TGSI_SEMANTIC_COUNT 12 /**< number of semantic values */ struct tgsi_declaration_semantic { -- cgit v1.2.3 From d8f6ef456581644ab9444a1ed23542c2b0fff9e4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 6 Oct 2010 09:32:32 +1000 Subject: softpipe: add support for shader stencil export capability this allows softpipe to be used to test shader stencil ref exporting. Signed-off-by: Dave Airlie --- src/gallium/drivers/softpipe/sp_fs_sse.c | 12 +++-- src/gallium/drivers/softpipe/sp_quad.h | 1 + src/gallium/drivers/softpipe/sp_quad_depth_test.c | 61 ++++++++++++++++++++--- src/gallium/drivers/softpipe/sp_quad_pipe.c | 3 +- src/gallium/drivers/softpipe/sp_screen.c | 2 + 5 files changed, 67 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index daa158df7c4..5b18cd035e3 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -169,9 +169,15 @@ fs_sse_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_POSITION: { uint j; - for (j = 0; j < 4; j++) { - quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; - } + for (j = 0; j < 4; j++) + quad->output.depth[j] = machine->Outputs[i].xyzw[2].f[j]; + } + break; + case TGSI_SEMANTIC_STENCIL: + { + uint j; + for (j = 0; j < 4; j++) + quad->output.stencil[j] = machine->Outputs[i].xyzw[1].f[j]; } break; } diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index a3236bd1169..e745aa80619 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -85,6 +85,7 @@ struct quad_header_output /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; float depth[QUAD_SIZE]; + uint8_t stencil[QUAD_SIZE]; }; diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index e9b92626176..c8f5f89568a 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -47,6 +47,8 @@ struct depth_data { unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ ubyte stencilVals[QUAD_SIZE]; + boolean use_shader_stencil_refs; + ubyte shader_stencil_refs[QUAD_SIZE]; struct softpipe_cached_tile *tile; }; @@ -186,6 +188,33 @@ convert_quad_depth( struct depth_data *data, } +/** + * Compute the depth_data::shader_stencil_refs[] values from the float fragment stencil values. + */ +static void +convert_quad_stencil( struct depth_data *data, + const struct quad_header *quad ) +{ + unsigned j; + + data->use_shader_stencil_refs = TRUE; + /* Copy quads stencil values + */ + switch (data->format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + { + for (j = 0; j < QUAD_SIZE; j++) { + data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j])); + } + } + break; + default: + assert(0); + } +} /** * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer. @@ -272,8 +301,14 @@ do_stencil_test(struct depth_data *data, { unsigned passMask = 0x0; unsigned j; + ubyte refs[QUAD_SIZE]; - ref &= valMask; + for (j = 0; j < QUAD_SIZE; j++) { + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j] & valMask; + else + refs[j] = ref & valMask; + } switch (func) { case PIPE_FUNC_NEVER: @@ -281,42 +316,42 @@ do_stencil_test(struct depth_data *data, break; case PIPE_FUNC_LESS: for (j = 0; j < QUAD_SIZE; j++) { - if (ref < (data->stencilVals[j] & valMask)) { + if (refs[j] < (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_EQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref == (data->stencilVals[j] & valMask)) { + if (refs[j] == (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_LEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref <= (data->stencilVals[j] & valMask)) { + if (refs[j] <= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GREATER: for (j = 0; j < QUAD_SIZE; j++) { - if (ref > (data->stencilVals[j] & valMask)) { + if (refs[j] > (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_NOTEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref != (data->stencilVals[j] & valMask)) { + if (refs[j] != (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } break; case PIPE_FUNC_GEQUAL: for (j = 0; j < QUAD_SIZE; j++) { - if (ref >= (data->stencilVals[j] & valMask)) { + if (refs[j] >= (data->stencilVals[j] & valMask)) { passMask |= (1 << j); } } @@ -348,9 +383,14 @@ apply_stencil_op(struct depth_data *data, { unsigned j; ubyte newstencil[QUAD_SIZE]; + ubyte refs[QUAD_SIZE]; for (j = 0; j < QUAD_SIZE; j++) { newstencil[j] = data->stencilVals[j]; + if (data->use_shader_stencil_refs) + refs[j] = data->shader_stencil_refs[j]; + else + refs[j] = ref; } switch (op) { @@ -367,7 +407,7 @@ apply_stencil_op(struct depth_data *data, case PIPE_STENCIL_OP_REPLACE: for (j = 0; j < QUAD_SIZE; j++) { if (mask & (1 << j)) { - newstencil[j] = ref; + newstencil[j] = refs[j]; } } break; @@ -688,8 +728,10 @@ depth_test_quads_fallback(struct quad_stage *qs, unsigned i, pass = 0; const struct sp_fragment_shader *fs = qs->softpipe->fs; boolean interp_depth = !fs->info.writes_z; + boolean shader_stencil_ref = fs->info.writes_stencil; struct depth_data data; + data.use_shader_stencil_refs = FALSE; if (qs->softpipe->depth_stencil->alpha.enabled) { nr = alpha_test_quads(qs, quads, nr); @@ -716,6 +758,9 @@ depth_test_quads_fallback(struct quad_stage *qs, } if (qs->softpipe->depth_stencil->stencil[0].enabled) { + if (shader_stencil_ref) + convert_quad_stencil(&data, quads[i]); + depth_stencil_test_quad(qs, &data, quads[i]); write_depth_stencil_values(&data, quads[i]); } diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 43b8e88e334..2cfd02a22c6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_pipe.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c @@ -47,7 +47,8 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp->framebuffer.zsbuf && !sp->depth_stencil->alpha.enabled && !sp->fs->info.uses_kill && - !sp->fs->info.writes_z; + !sp->fs->info.writes_z && + !sp->fs->info.writes_stencil; sp->quad.first = sp->quad.blend; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 2053d02f628..37557d11940 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -114,6 +114,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 1; default: return 0; } -- cgit v1.2.3 From 40acb109de61ba445b9247f7d53eaf1c2b9b1245 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 7 Oct 2010 13:01:37 +1000 Subject: r600g: add support for S8, X24S8 and S8X24 sampler formats. --- src/gallium/drivers/r600/r600_texture.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index db88346afbe..496b28a3b9c 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -498,14 +498,22 @@ uint32_t r600_translate_texformat(enum pipe_format format, case PIPE_FORMAT_Z16_UNORM: result = V_0280A0_COLOR_16; goto out_word4; + case PIPE_FORMAT_X24S8_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: result = V_0280A0_COLOR_8_24; goto out_word4; + case PIPE_FORMAT_S8X24_USCALED: + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: result = V_0280A0_COLOR_24_8; goto out_word4; + case PIPE_FORMAT_S8_USCALED: + result = V_0280A0_COLOR_8; + word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); + goto out_word4; default: goto out_unknown; } -- cgit v1.2.3 From 39d1feb51e9dac794751e72f48faf26409a84b1c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 6 Oct 2010 10:14:33 +1000 Subject: r600g: add shader stencil export support. --- src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/r600/r600_shader.c | 16 ++++++++++++++-- src/gallium/drivers/r600/r600d.h | 3 +++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 832a2b2b606..52fe3c777b3 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -242,6 +242,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_DEPTH_CLAMP: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 1; /* Unsupported features (boolean caps). */ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 341800306d6..8930d4e1831 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -137,12 +137,17 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade R_02880C_DB_SHADER_CONTROL, S_02880C_Z_EXPORT_ENABLE(1), S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), + S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); } exports_ps = 0; num_cout = 0; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { num_cout++; @@ -628,7 +633,14 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { output[i].array_base = 61; output[i].swizzle_x = 2; - output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].swizzle_y = 7; + output[i].swizzle_z = output[i].swizzle_w = 7; + output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { + output[i].array_base = 61; + output[i].swizzle_x = 7; + output[i].swizzle_y = 1; + output[i].swizzle_z = output[i].swizzle_w = 7; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index a96d2ce26c1..f32f5286c95 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -667,6 +667,9 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_REF_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) +#define C_02880C_STENCIL_REF_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF -- cgit v1.2.3 From 048a90c1cb926fdeae47392582cb91b0a689905f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Oct 2010 18:38:22 -0600 Subject: draw/llvmpipe: replace DRAW_MAX_TEXTURE_LEVELS with PIPE_MAX_TEXTURE_LEVELS There's no apparent reason for the former to exist. And they didn't even have the same value. --- src/gallium/auxiliary/draw/draw_context.c | 6 +++--- src/gallium/auxiliary/draw/draw_context.h | 7 +++---- src/gallium/auxiliary/draw/draw_llvm.c | 12 ++++++------ src/gallium/auxiliary/draw/draw_llvm.h | 13 ++++++------- src/gallium/drivers/llvmpipe/lp_state_sampler.c | 6 +++--- 5 files changed, 21 insertions(+), 23 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 032fcbbc70a..40f654643bb 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -721,9 +721,9 @@ draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { #ifdef HAVE_LLVM if(draw->llvm) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 1f27cbf488a..ff4f753604f 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -49,7 +49,6 @@ struct draw_geometry_shader; struct draw_fragment_shader; struct tgsi_sampler; -#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_context *draw_create( struct pipe_context *pipe ); @@ -120,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]); + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]); /* diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 7fb86d7cb27..d94340367c4 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -72,12 +72,12 @@ init_globals(struct draw_llvm *llvm) elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); + LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), - DRAW_MAX_TEXTURE_LEVELS); + PIPE_MAX_TEXTURE_LEVELS); elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); @@ -1066,9 +1066,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { unsigned j; struct draw_jit_texture *jit_tex; diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index d0a68ae412d..de89b657f3d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -41,7 +41,6 @@ #include #include -#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */ struct draw_llvm; struct llvm_vertex_shader; @@ -52,9 +51,9 @@ struct draw_jit_texture uint32_t height; uint32_t depth; uint32_t last_level; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; float min_lod; float max_lod; float lod_bias; @@ -290,8 +289,8 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]); + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c index 17a4a0ed02d..1dd866195d3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c +++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c @@ -246,9 +246,9 @@ llvmpipe_prepare_vertex_sampling(struct llvmpipe_context *lp, struct pipe_sampler_view **views) { unsigned i; - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS]; - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS]; - const void *data[DRAW_MAX_TEXTURE_LEVELS]; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + const void *data[PIPE_MAX_TEXTURE_LEVELS]; assert(num <= PIPE_MAX_VERTEX_SAMPLERS); if (!num) -- cgit v1.2.3 From 833b4fc11e0fcac36490b036135298232310568a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 12 Oct 2010 14:43:44 +1000 Subject: r600g: fix depth0 setting --- src/gallium/drivers/r600/r600_texture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 496b28a3b9c..1eaf40fdf0a 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -244,7 +244,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.format = texture->format; resource.width0 = texture->width0; resource.height0 = texture->height0; - resource.depth0 = 0; + resource.depth0 = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; @@ -297,7 +297,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, resource.format = texture->format; resource.width0 = box->width; resource.height0 = box->height; - resource.depth0 = 0; + resource.depth0 = 1; resource.last_level = 0; resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; -- cgit v1.2.3 From a8d1d7253ed281fd5c3a8a86658998eb5b9af847 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 14:23:36 +1000 Subject: r600g: fix scissor/cliprect confusion gallium calls them scissors, but r600 hw like r300 is better off using cliprects to implement them as we can turn them on/off a lot easier. --- src/gallium/drivers/r600/evergreen_state.c | 52 +++------------------------ src/gallium/drivers/r600/r600_state.c | 58 ++++-------------------------- 2 files changed, 10 insertions(+), 100 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 99085647a75..a5298e3fdf9 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -241,6 +241,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, struct r600_pipe_state *rstate; unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; if (rs == NULL) { return NULL; @@ -250,6 +251,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; + /* offset */ rs->offset_units = state->offset_units; rs->offset_scale = state->offset_scale * 12.0f; @@ -299,6 +302,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, 0x00000005, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); return rstate; } @@ -629,18 +633,6 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_SCISSOR; tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - r600_pipe_state_add_reg(rstate, - R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, 0xFFFFFFFF, NULL); @@ -665,15 +657,6 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -911,36 +894,9 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028208_PA_SC_WINDOW_SCISSOR_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 70461d24ea2..25310eeda4e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -446,6 +446,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, struct r600_pipe_state *rstate; unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; + unsigned clip_rule; if (rs == NULL) { return NULL; @@ -455,6 +456,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, rs->flatshade = state->flatshade; rs->sprite_coord_enable = state->sprite_coord_enable; + clip_rule = state->scissor ? 0xAAAA : 0xFFFF; /* offset */ rs->offset_units = state->offset_units; rs->offset_scale = state->offset_scale * 12.0f; @@ -505,6 +507,8 @@ static void *r600_create_rs_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); + return rstate; } @@ -832,18 +836,6 @@ static void r600_set_scissor_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_SCISSOR; tl = S_028240_TL_X(state->minx) | S_028240_TL_Y(state->miny) | S_028240_WINDOW_OFFSET_DISABLE(1); br = S_028244_BR_X(state->maxx) | S_028244_BR_Y(state->maxy); - r600_pipe_state_add_reg(rstate, - R_028030_PA_SC_SCREEN_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028034_PA_SC_SCREEN_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028204_PA_SC_WINDOW_SCISSOR_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028208_PA_SC_WINDOW_SCISSOR_BR, br, - 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028210_PA_SC_CLIPRECT_0_TL, tl, 0xFFFFFFFF, NULL); @@ -868,17 +860,6 @@ static void r600_set_scissor_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_02822C_PA_SC_CLIPRECT_3_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); - if (rctx->family >= CHIP_RV770) { - r600_pipe_state_add_reg(rstate, - R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, - 0xFFFFFFFF, NULL); - } free(rctx->states[R600_PIPE_STATE_SCISSOR]); rctx->states[R600_PIPE_STATE_SCISSOR] = rstate; @@ -1098,36 +1079,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_028254_PA_SC_VPORT_SCISSOR_0_BR, br, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028210_PA_SC_CLIPRECT_0_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028214_PA_SC_CLIPRECT_0_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028218_PA_SC_CLIPRECT_1_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02821C_PA_SC_CLIPRECT_1_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028220_PA_SC_CLIPRECT_2_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028224_PA_SC_CLIPRECT_2_BR, br, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_028228_PA_SC_CLIPRECT_3_TL, tl, - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02822C_PA_SC_CLIPRECT_3_BR, br, - 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_02820C_PA_SC_CLIPRECT_RULE, 0x0000FFFF, - 0xFFFFFFFF, NULL); if (rctx->family >= CHIP_RV770) { r600_pipe_state_add_reg(rstate, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA, @@ -1532,14 +1486,14 @@ void r600_init_config(struct r600_pipe_context *rctx) r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x00420204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00514002, 0xFFFFFFFF, NULL); } else { r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009508_TA_CNTL_AUX, 0x07000003, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009830_DB_DEBUG, 0x82000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_009838_DB_WATERMARKS, 0x01020204, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286C8_SPI_THREAD_GROUPING, 0x00000001, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004010, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL, 0x00004012, 0xFFFFFFFF, NULL); } r600_pipe_state_add_reg(rstate, R_0288A8_SQ_ESGS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0288AC_SQ_GSVS_RING_ITEMSIZE, 0x00000000, 0xFFFFFFFF, NULL); -- cgit v1.2.3 From c8d4108fbee679735a1cc3f405d848d01bfb23f6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 12 Oct 2010 13:24:01 +1000 Subject: r600g: store samplers/views across blit when we need to modify them also fixup framebuffer state copies to avoid bad state. --- src/gallium/drivers/r600/evergreen_state.c | 18 ++++++----- src/gallium/drivers/r600/r600_blit.c | 51 +++++++++++++++++++++--------- src/gallium/drivers/r600/r600_pipe.h | 10 ++++++ src/gallium/drivers/r600/r600_state.c | 18 ++++++----- 4 files changed, 66 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index a5298e3fdf9..4e95d4c4905 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "r600.h" #include "evergreend.h" @@ -500,6 +501,9 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + rctx->ps_samplers.views = resource; + rctx->ps_samplers.n_views = count; + for (int i = 0; i < count; i++) { if (resource[i]) { evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); @@ -523,6 +527,9 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + rctx->ps_samplers.samplers = states; + rctx->ps_samplers.n_samplers = count; + for (int i = 0; i < count; i++) { evergreen_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); } @@ -842,14 +849,9 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL); - } - for (int i = 0; i < state->nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); - } - pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); - rctx->framebuffer = *state; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + rctx->pframebuffer = &rctx->framebuffer; /* build states */ diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 4bf44a171af..aecc87f7da1 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -24,10 +24,19 @@ #include #include "r600_pipe.h" -static void r600_blitter_save_states(struct pipe_context *ctx) +enum r600_blitter_op /* bitmask */ +{ + R600_CLEAR = 1, + R600_CLEAR_SURFACE = 2, + R600_COPY = 4 +}; + +static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_context_queries_suspend(&rctx->ctx); + util_blitter_save_blend(rctx->blitter, rctx->states[R600_PIPE_STATE_BLEND]); util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->states[R600_PIPE_STATE_DSA]); if (rctx->states[R600_PIPE_STATE_STENCIL_REF]) { @@ -47,7 +56,25 @@ static void r600_blitter_save_states(struct pipe_context *ctx) rctx->vertex_elements = NULL; - /* TODO queries */ + if (op & (R600_CLEAR_SURFACE | R600_COPY)) + util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer); + + if (op & R600_COPY) { + util_blitter_save_fragment_sampler_states( + rctx->blitter, rctx->ps_samplers.n_samplers, + (void**)rctx->ps_samplers.samplers); + + util_blitter_save_fragment_sampler_views( + rctx->blitter, rctx->ps_samplers.n_views, + (struct pipe_sampler_view**)rctx->ps_samplers.views); + } + +} + +static void r600_blitter_end(struct pipe_context *ctx) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + r600_context_queries_resume(&rctx->ctx); } int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) @@ -73,9 +100,8 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te (struct pipe_resource*)texture->flushed_depth_texture, 0, level, 0, PIPE_BIND_RENDER_TARGET); - r600_blitter_save_states(ctx); + r600_blitter_begin(ctx, R600_CLEAR); util_blitter_save_framebuffer(rctx->blitter, &fb); - if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) depth = 0.0f; @@ -99,12 +125,11 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_context_queries_suspend(&rctx->ctx); - r600_blitter_save_states(ctx); + r600_blitter_begin(ctx, R600_CLEAR); util_blitter_clear(rctx->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, rgba, depth, stencil); - r600_context_queries_resume(&rctx->ctx); + r600_blitter_end(ctx); } static void r600_clear_render_target(struct pipe_context *ctx, @@ -114,13 +139,11 @@ static void r600_clear_render_target(struct pipe_context *ctx, unsigned width, unsigned height) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_context_queries_suspend(&rctx->ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); - r600_context_queries_resume(&rctx->ctx); + r600_blitter_end(ctx); } static void r600_clear_depth_stencil(struct pipe_context *ctx, @@ -132,13 +155,11 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, unsigned width, unsigned height) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state *fb = &rctx->framebuffer; - r600_context_queries_suspend(&rctx->ctx); - util_blitter_save_framebuffer(rctx->blitter, fb); + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); - r600_context_queries_resume(&rctx->ctx); + r600_blitter_end(ctx); } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index c46029a5617..34a59646d51 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -92,6 +92,14 @@ struct r600_pipe_shader { struct r600_vertex_element vertex_elements; }; +/* needed for blitter save */ +struct r600_textures_info { + struct r600_pipe_sampler_view **views; + unsigned n_views; + void **samplers; + unsigned n_samplers; +}; + struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; @@ -130,6 +138,8 @@ struct r600_pipe_context { struct u_upload_mgr *upload_vb; struct u_upload_mgr *upload_ib; unsigned any_user_vbs; + struct r600_textures_info ps_samplers; + }; struct r600_drawl { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 25310eeda4e..b2e7c282e28 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "r600.h" #include "r600d.h" @@ -703,6 +704,9 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + rctx->ps_samplers.views = resource; + rctx->ps_samplers.n_views = count; + for (int i = 0; i < count; i++) { if (resource[i]) { r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); @@ -726,6 +730,9 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; + rctx->ps_samplers.samplers = states; + rctx->ps_samplers.n_samplers = count; + for (int i = 0; i < count; i++) { r600_context_pipe_state_set_ps_sampler(&rctx->ctx, rstates[i], i); } @@ -1025,14 +1032,9 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, /* unreference old buffer and reference new one */ rstate->id = R600_PIPE_STATE_FRAMEBUFFER; - for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], NULL); - } - for (int i = 0; i < state->nr_cbufs; i++) { - pipe_surface_reference(&rctx->framebuffer.cbufs[i], state->cbufs[i]); - } - pipe_surface_reference(&rctx->framebuffer.zsbuf, state->zsbuf); - rctx->framebuffer = *state; + + util_copy_framebuffer_state(&rctx->framebuffer, state); + rctx->pframebuffer = &rctx->framebuffer; /* build states */ -- cgit v1.2.3 From d59498b78041b8a7a046ac2c892e7a1896f59ca2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 15:22:04 +1000 Subject: r600g: reduce size of context structure. this thing will be in the cache a lot, so having massive big struct arrays inside it won't be helping anyone. --- src/gallium/drivers/r600/r600_pipe.c | 28 ++++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_pipe.h | 11 +++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 52fe3c777b3..69bfb2a1a4e 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -85,6 +85,10 @@ static void r600_destroy_context(struct pipe_context *context) u_upload_destroy(rctx->upload_vb); u_upload_destroy(rctx->upload_ib); + FREE(rctx->ps_resource); + FREE(rctx->vs_resource); + FREE(rctx->vs_const); + FREE(rctx->ps_const); FREE(rctx); } @@ -171,6 +175,30 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + rctx->vs_const = CALLOC(R600_CONSTANT_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->vs_const) { + FREE(rctx); + return NULL; + } + + rctx->ps_const = CALLOC(R600_CONSTANT_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->vs_const) { + FREE(rctx); + return NULL; + } + + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->vs_resource) { + FREE(rctx); + return NULL; + } + + rctx->ps_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); + if (!rctx->ps_resource) { + FREE(rctx); + return NULL; + } + class = r600_get_family_class(rctx->radeon); if (class == R600 || class == R700) rctx->custom_dsa_flush = r600_create_db_flush_dsa(rctx); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 34a59646d51..6ce1f969522 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -100,6 +100,9 @@ struct r600_textures_info { unsigned n_samplers; }; +#define R600_CONSTANT_ARRAY_SIZE 256 +#define R600_RESOURCE_ARRAY_SIZE 160 + struct r600_pipe_context { struct pipe_context context; struct blitter_context *blitter; @@ -122,10 +125,10 @@ struct r600_pipe_context { struct pipe_clip_state clip; unsigned vs_nconst; unsigned ps_nconst; - struct r600_pipe_state vs_const[256]; - struct r600_pipe_state ps_const[256]; - struct r600_pipe_state vs_resource[160]; - struct r600_pipe_state ps_resource[160]; + struct r600_pipe_state *vs_const; + struct r600_pipe_state *ps_const; + struct r600_pipe_state *vs_resource; + struct r600_pipe_state *ps_resource; struct r600_pipe_state config; struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; -- cgit v1.2.3 From 560427667006f01ad9146fa07185924d4f3a08d6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 15:56:12 +1000 Subject: r600g: the vs/ps const arrays weren't actually being used. completely removed them. --- src/gallium/drivers/r600/r600_pipe.c | 14 -------------- src/gallium/drivers/r600/r600_pipe.h | 2 -- 2 files changed, 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 69bfb2a1a4e..8eb97993236 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -87,8 +87,6 @@ static void r600_destroy_context(struct pipe_context *context) FREE(rctx->ps_resource); FREE(rctx->vs_resource); - FREE(rctx->vs_const); - FREE(rctx->ps_const); FREE(rctx); } @@ -175,18 +173,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } - rctx->vs_const = CALLOC(R600_CONSTANT_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->vs_const) { - FREE(rctx); - return NULL; - } - - rctx->ps_const = CALLOC(R600_CONSTANT_ARRAY_SIZE, sizeof(struct r600_pipe_state)); - if (!rctx->vs_const) { - FREE(rctx); - return NULL; - } - rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); if (!rctx->vs_resource) { FREE(rctx); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6ce1f969522..87317867696 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -125,8 +125,6 @@ struct r600_pipe_context { struct pipe_clip_state clip; unsigned vs_nconst; unsigned ps_nconst; - struct r600_pipe_state *vs_const; - struct r600_pipe_state *ps_const; struct r600_pipe_state *vs_resource; struct r600_pipe_state *ps_resource; struct r600_pipe_state config; -- cgit v1.2.3 From 9979d60c0e2e4152bce19c2c4128ff2941b9191b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 12 Oct 2010 11:54:16 +1000 Subject: r600g: add copy into tiled texture --- src/gallium/drivers/r600/r600_texture.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 1eaf40fdf0a..f770a35dab4 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -53,6 +53,25 @@ static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_t transfer->box.width, transfer->box.height); } + +/* Copy from a detiled texture to a tiled one. */ +static void r600_copy_into_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) +{ + struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; + struct pipe_resource *texture = transfer->resource; + struct pipe_subresource subsrc; + + subsrc.face = 0; + subsrc.level = 0; + ctx->resource_copy_region(ctx, texture, transfer->sr, + transfer->box.x, transfer->box.y, transfer->box.z, + rtransfer->linear_texture, subsrc, + 0, 0, 0, + transfer->box.width, transfer->box.height); + + ctx->flush(ctx, 0, NULL); +} + static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned zslice, unsigned face) @@ -339,12 +358,12 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx, struct r600_resource_texture *rtex = (struct r600_resource_texture*)transfer->resource; if (rtransfer->linear_texture) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { + r600_copy_into_tiled_texture(ctx, rtransfer); + } pipe_resource_reference(&rtransfer->linear_texture, NULL); } if (rtex->flushed_depth_texture) { - if (transfer->usage & PIPE_TRANSFER_WRITE) { - // TODO - } pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL); } pipe_resource_reference(&transfer->resource, NULL); -- cgit v1.2.3 From 771dd89881791e38c076230497023ad7522602b3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 12 Oct 2010 09:53:17 +1000 Subject: r600g: split out miptree setup like r300g just a cleanup step towards tiling --- src/gallium/drivers/r600/r600_texture.c | 55 ++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index f770a35dab4..048339e1204 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -91,22 +91,53 @@ static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, } } -static void r600_setup_miptree(struct r600_resource_texture *rtex, enum chip_class chipc) +static unsigned r600_texture_get_stride(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) { struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned long w, h, pitch, size, layer_size, i, offset; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned width, stride; + + width = u_minify(ptex->width0, level); + + stride = util_format_get_stride(ptex->format, align(width, 64)); + if (chipc == EVERGREEN) + stride = align(stride, 512); + else + stride = align(stride, 256); + return stride; +} + +static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + unsigned height; + + height = u_minify(ptex->height0, level); + height = util_next_power_of_two(height); + return util_format_get_nblocksy(ptex->format, height); +} + +static void r600_setup_miptree(struct pipe_screen *screen, + struct r600_resource_texture *rtex) +{ + struct pipe_resource *ptex = &rtex->resource.base.b; + struct radeon *radeon = (struct radeon *)screen->winsys; + enum chip_class chipc = r600_get_family_class(radeon); + unsigned long pitch, size, layer_size, i, offset; + unsigned nblocksy; rtex->bpt = util_format_get_blocksize(ptex->format); for (i = 0, offset = 0; i <= ptex->last_level; i++) { - w = u_minify(ptex->width0, i); - h = u_minify(ptex->height0, i); - h = util_next_power_of_two(h); - pitch = util_format_get_stride(ptex->format, align(w, 64)); - if (chipc == EVERGREEN) - pitch = align(pitch, 512); - else - pitch = align(pitch, 256); - layer_size = pitch * h; + pitch = r600_texture_get_stride(screen, rtex, i); + nblocksy = r600_texture_get_nblocksy(screen, rtex, i); + + layer_size = pitch * nblocksy; + if (ptex->target == PIPE_TEXTURE_CUBE) { if (chipc >= R700) size = layer_size * 8; @@ -139,7 +170,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, resource->base.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; - r600_setup_miptree(rtex, r600_get_family_class(radeon)); + r600_setup_miptree(screen, rtex); /* FIXME alignment 4096 enought ? too much ? */ resource->domain = r600_domain_from_usage(resource->base.b.bind); -- cgit v1.2.3 From 6a0066a69f6873a53d45684205926e8f5b73ddb2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 12 Oct 2010 10:51:03 +1000 Subject: r600g: use common texture object create function --- src/gallium/drivers/r600/r600_texture.c | 74 ++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 33 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 048339e1204..e825aeee350 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -100,6 +100,9 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, enum chip_class chipc = r600_get_family_class(radeon); unsigned width, stride; + if (rtex->pitch_override) + return rtex->pitch_override; + width = u_minify(ptex->width0, level); stride = util_format_get_stride(ptex->format, align(width, 64)); @@ -154,33 +157,54 @@ static void r600_setup_miptree(struct pipe_screen *screen, rtex->size = offset; } -struct pipe_resource *r600_texture_create(struct pipe_screen *screen, - const struct pipe_resource *templ) +static struct r600_resource_texture * +r600_texture_create_object(struct pipe_screen *screen, + const struct pipe_resource *base, + unsigned array_mode, + unsigned pitch_in_bytes_override, + unsigned max_buffer_size, + struct r600_bo *bo) { struct r600_resource_texture *rtex; struct r600_resource *resource; struct radeon *radeon = (struct radeon *)screen->winsys; rtex = CALLOC_STRUCT(r600_resource_texture); - if (!rtex) { + if (rtex == NULL) return NULL; - } + resource = &rtex->resource; - resource->base.b = *templ; + resource->base.b = *base; resource->base.vtbl = &r600_texture_vtbl; pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; + resource->bo = bo; + resource->domain = r600_domain_from_usage(resource->base.b.bind); + rtex->pitch_override = pitch_in_bytes_override; + rtex->array_mode = array_mode; + r600_setup_miptree(screen, rtex); - /* FIXME alignment 4096 enought ? too much ? */ - resource->domain = r600_domain_from_usage(resource->base.b.bind); resource->size = rtex->size; - resource->bo = r600_bo(radeon, rtex->size, 4096, 0); - if (resource->bo == NULL) { - FREE(rtex); - return NULL; + + if (!resource->bo) { + resource->bo = r600_bo(radeon, rtex->size, 4096, 0); + if (!resource->bo) { + FREE(rtex); + return NULL; + } } - return &resource->base.b; + return rtex; +} + +struct pipe_resource *r600_texture_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + unsigned array_mode = 0; + + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, + 0, 0, NULL); + } static void r600_texture_destroy(struct pipe_screen *screen, @@ -231,13 +255,12 @@ static void r600_tex_surface_destroy(struct pipe_surface *surface) FREE(surface); } + struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, const struct pipe_resource *templ, struct winsys_handle *whandle) { struct radeon *rw = (struct radeon*)screen->winsys; - struct r600_resource_texture *rtex; - struct r600_resource *resource; struct r600_bo *bo = NULL; /* Support only 2D textures without mipmaps */ @@ -245,30 +268,15 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, templ->depth0 != 1 || templ->last_level != 0) return NULL; - rtex = CALLOC_STRUCT(r600_resource_texture); - if (rtex == NULL) - return NULL; - bo = r600_bo_handle(rw, whandle->handle); if (bo == NULL) { - FREE(rtex); return NULL; } - resource = &rtex->resource; - resource->base.b = *templ; - resource->base.vtbl = &r600_texture_vtbl; - pipe_reference_init(&resource->base.b.reference, 1); - resource->base.b.screen = screen; - resource->bo = bo; - rtex->depth = 0; - rtex->pitch_override = whandle->stride; - rtex->bpt = util_format_get_blocksize(templ->format); - rtex->pitch[0] = whandle->stride; - rtex->offset[0] = 0; - rtex->size = align(rtex->pitch[0] * templ->height0, 64); - - return &resource->base.b; + return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0, + whandle->stride, + 0, + bo); } static unsigned int r600_texture_is_referenced(struct pipe_context *context, -- cgit v1.2.3 From fa797f12b3e1e82020eb7bc8fd0181baa7515efe Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 11:02:52 +1000 Subject: r600g: rename pitch in texture to pitch_in_bytes --- src/gallium/drivers/r600/evergreen_state.c | 12 ++++++------ src/gallium/drivers/r600/r600_resource.h | 2 +- src/gallium/drivers/r600/r600_state.c | 10 +++++----- src/gallium/drivers/r600/r600_texture.c | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4e95d4c4905..54d2233467a 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -451,7 +451,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; } - pitch = align(tmp->pitch[0] / tmp->bpt, 8); + pitch = align(tmp->pitch_in_bytes[0] / tmp->bpt, 8); /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, @@ -740,8 +740,8 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; + pitch = (rtex->pitch_in_bytes[level] / rtex->bpt) / 8 - 1; + slice = (rtex->pitch_in_bytes[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -802,8 +802,8 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state rbuffer = &rtex->resource; level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + pitch = (rtex->pitch_in_bytes[level] / rtex->bpt) / 8 - 1; + slice = (rtex->pitch_in_bytes[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); @@ -815,7 +815,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state if (stencil_format) { uint32_t stencil_offset; - stencil_offset = ((state->zsbuf->height * rtex->pitch[level]) + 255) & ~255; + stencil_offset = ((state->zsbuf->height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 323960960d6..04b31ddf899 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -50,7 +50,7 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; unsigned long pitch_override; unsigned long bpt; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index b2e7c282e28..8c8e7986522 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -653,7 +653,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; } - pitch = align(tmp->pitch[0] / tmp->bpt, 8); + pitch = align(tmp->pitch_in_bytes[0] / tmp->bpt, 8); /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, @@ -943,8 +943,8 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; + pitch = (rtex->pitch_in_bytes[level] / rtex->bpt) / 8 - 1; + slice = (rtex->pitch_in_bytes[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -1003,8 +1003,8 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta rbuffer = &rtex->resource; level = state->zsbuf->level; - pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + pitch = (rtex->pitch_in_bytes[level] / rtex->bpt) / 8 - 1; + slice = (rtex->pitch_in_bytes[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index e825aeee350..5bdfd499397 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -151,7 +151,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, size = layer_size * u_minify(ptex->depth0, i); rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; - rtex->pitch[i] = pitch; + rtex->pitch_in_bytes[i] = pitch; offset += size; } rtex->size = offset; @@ -340,7 +340,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; - trans->transfer.stride = rtex->pitch[sr.level]; + trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); if (rtex->depth) { r = r600_texture_depth_flush(ctx, texture); -- cgit v1.2.3 From e3b089126c63c7178d725fbe245ca09d3f9edba1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 11:03:18 +1000 Subject: r600g: remove bpt and start using pitch_in_bytes/pixels. this mirror changes in r300g, bpt is kinda useless when it comes to some of the non-simple texture formats. --- src/gallium/drivers/r600/evergreen_state.c | 10 +++++----- src/gallium/drivers/r600/r600_resource.h | 2 +- src/gallium/drivers/r600/r600_state.c | 10 +++++----- src/gallium/drivers/r600/r600_texture.c | 10 +++++++++- 4 files changed, 20 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 54d2233467a..49a888abe35 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -451,7 +451,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; } - pitch = align(tmp->pitch_in_bytes[0] / tmp->bpt, 8); + pitch = align(tmp->pitch_in_pixels[0], 8); /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, @@ -740,8 +740,8 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; - pitch = (rtex->pitch_in_bytes[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch_in_bytes[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -802,8 +802,8 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state rbuffer = &rtex->resource; level = state->zsbuf->level; - pitch = (rtex->pitch_in_bytes[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch_in_bytes[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 04b31ddf899..2d7495e0fbb 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -51,9 +51,9 @@ struct r600_resource_texture { struct r600_resource resource; unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; unsigned long pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; unsigned long pitch_override; - unsigned long bpt; unsigned long size; unsigned tiled; unsigned array_mode; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 8c8e7986522..7b0aaef770f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -653,7 +653,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c bo[0] = rbuffer->bo; bo[1] = rbuffer->bo; } - pitch = align(tmp->pitch_in_bytes[0] / tmp->bpt, 8); + pitch = align(tmp->pitch_in_pixels[0], 8); /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, @@ -943,8 +943,8 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta bo[1] = rbuffer->bo; bo[2] = rbuffer->bo; - pitch = (rtex->pitch_in_bytes[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch_in_bytes[level] / rtex->bpt) * state->cbufs[cb]->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -1003,8 +1003,8 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta rbuffer = &rtex->resource; level = state->zsbuf->level; - pitch = (rtex->pitch_in_bytes[level] / rtex->bpt) / 8 - 1; - slice = (rtex->pitch_in_bytes[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; + pitch = rtex->pitch_in_pixels[level] / 8 - 1; + slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 5bdfd499397..d1339f69e73 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -125,6 +125,14 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, return util_format_get_nblocksy(ptex->format, height); } +/* Get a width in pixels from a stride in bytes. */ +static unsigned pitch_to_width(enum pipe_format format, + unsigned pitch_in_bytes) +{ + return (pitch_in_bytes / util_format_get_blocksize(format)) * + util_format_get_blockwidth(format); +} + static void r600_setup_miptree(struct pipe_screen *screen, struct r600_resource_texture *rtex) { @@ -134,7 +142,6 @@ static void r600_setup_miptree(struct pipe_screen *screen, unsigned long pitch, size, layer_size, i, offset; unsigned nblocksy; - rtex->bpt = util_format_get_blocksize(ptex->format); for (i = 0, offset = 0; i <= ptex->last_level; i++) { pitch = r600_texture_get_stride(screen, rtex, i); nblocksy = r600_texture_get_nblocksy(screen, rtex, i); @@ -152,6 +159,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch_in_bytes[i] = pitch; + rtex->pitch_in_pixels[i] = pitch_to_width(ptex->format, pitch); offset += size; } rtex->size = offset; -- cgit v1.2.3 From e9acf9a3bb45caea7b0fba197aa9ab01f24bb63f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 10:14:55 +1000 Subject: r600g: fix transfer stride. fixes segfaults --- src/gallium/drivers/r600/r600_texture.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index d1339f69e73..94886acc38e 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -387,6 +387,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, FREE(trans); return NULL; } + + trans->transfer.stride = + ((struct r600_resource_texture *)trans->linear_texture)->pitch_in_bytes[0]; if (usage & PIPE_TRANSFER_READ) { /* We cannot map a tiled texture directly because the data is * in a different order, therefore we do detiling using a blit. */ -- cgit v1.2.3 From f8778eeb40daf355f8dbcfeb1a9b492c57ce6a35 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 11:08:44 +1000 Subject: r600g: drop all use of unsigned long this changes size on 32/64 bit so is definitely no what you want to use here. --- src/gallium/drivers/r600/r600_resource.h | 12 ++++++------ src/gallium/drivers/r600/r600_texture.c | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 2d7495e0fbb..ef484aba4a2 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -49,12 +49,12 @@ struct r600_resource { struct r600_resource_texture { struct r600_resource resource; - unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch_override; - unsigned long size; + unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; + unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch_override; + unsigned size; unsigned tiled; unsigned array_mode; unsigned tile_type; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 94886acc38e..22fe8bf0f39 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -72,11 +72,11 @@ static void r600_copy_into_tiled_texture(struct pipe_context *ctx, struct r600_t ctx->flush(ctx, 0, NULL); } -static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, +static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned zslice, unsigned face) { - unsigned long offset = rtex->offset[level]; + unsigned offset = rtex->offset[level]; switch (rtex->resource.base.b.target) { case PIPE_TEXTURE_3D: @@ -139,7 +139,7 @@ static void r600_setup_miptree(struct pipe_screen *screen, struct pipe_resource *ptex = &rtex->resource.base.b; struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); - unsigned long pitch, size, layer_size, i, offset; + unsigned pitch, size, layer_size, i, offset; unsigned nblocksy; for (i = 0, offset = 0; i <= ptex->last_level; i++) { @@ -238,7 +238,7 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - unsigned long offset; + unsigned offset; if (surface == NULL) return NULL; @@ -427,7 +427,7 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, struct r600_bo *bo; enum pipe_format format = transfer->resource->format; struct radeon *radeon = (struct radeon *)ctx->screen->winsys; - unsigned long offset = 0; + unsigned offset = 0; char *map; if (rtransfer->linear_texture) { -- cgit v1.2.3 From 88c1b32c62427c24ea276f20ac5ef260385a98d4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 11:17:44 +1000 Subject: r600g: use blitter for hw copy region at the moment depth copies are failing (piglit depth-level-clamp) so use the fallback for now until get some time to investigate. --- src/gallium/drivers/r600/r600_blit.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index aecc87f7da1..cae05aab28b 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -22,6 +22,7 @@ */ #include #include +#include #include "r600_pipe.h" enum r600_blitter_op /* bitmask */ @@ -163,6 +164,26 @@ static void r600_clear_depth_stencil(struct pipe_context *ctx, } + +/* Copy a block of pixels from one surface to another using HW. */ +static void r600_hw_copy_region(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_subresource subdst, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + struct pipe_subresource subsrc, + unsigned srcx, unsigned srcy, unsigned srcz, + unsigned width, unsigned height) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + r600_blitter_begin(ctx, R600_COPY); + util_blitter_copy_region(rctx->blitter, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height, + TRUE); + r600_blitter_end(ctx); +} + static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_subresource subdst, @@ -172,8 +193,16 @@ static void r600_resource_copy_region(struct pipe_context *ctx, unsigned srcx, unsigned srcy, unsigned srcz, unsigned width, unsigned height) { - util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, - src, subsrc, srcx, srcy, srcz, width, height); + boolean is_depth; + /* there is something wrong with depth resource copies at the moment so avoid them for now */ + is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0; + if (is_depth) + util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + else + r600_hw_copy_region(ctx, dst, subdst, dstx, dsty, dstz, + src, subsrc, srcx, srcy, srcz, width, height); + } void r600_init_blit_functions(struct r600_pipe_context *rctx) -- cgit v1.2.3 From 92e729bba5aab9958f5ba1339c27f6bfe743ef2e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 17:40:32 +1000 Subject: r600g: evergreen add stencil export bit --- src/gallium/drivers/r600/evergreen_state.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 49a888abe35..379d66f48c6 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1558,6 +1558,11 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader R_02880C_DB_SHADER_CONTROL, S_02880C_Z_EXPORT_ENABLE(1), S_02880C_Z_EXPORT_ENABLE(1), NULL); + if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + r600_pipe_state_add_reg(rstate, + R_02880C_DB_SHADER_CONTROL, + S_02880C_STENCIL_EXPORT_ENABLE(1), + S_02880C_STENCIL_EXPORT_ENABLE(1), NULL); } exports_ps = 0; -- cgit v1.2.3 From 6da8129b3c25be1da6d6f6a0e56b8f70b1e2f054 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 17:45:10 +1000 Subject: r600g: add missing eg reg definition --- src/gallium/drivers/r600/evergreend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index eb36a35165e..94661fdd1f6 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -686,6 +686,9 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE +#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x2) << 0) +#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 0) & 0x2) +#define C_02880C_STENCIL_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) #define C_02880C_Z_ORDER 0xFFFFFCFF -- cgit v1.2.3 From 40cc5bfcd70e412289dbb32a1ebca91bf109e1bd Mon Sep 17 00:00:00 2001 From: Stephan Schmid Date: Mon, 11 Oct 2010 15:49:56 +0200 Subject: r600g: fix relative addressing when splitting constant accesses Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8930d4e1831..470f355cde2 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -808,6 +808,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; + alu.src[0].rel = r600_src[i].rel; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -818,6 +819,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s return r; } r600_src[i].sel = treg; + r600_src[i].rel =0; j--; } } -- cgit v1.2.3 From ff4b397517a374ac3d4bf437f85ae6a96171a714 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 18:50:37 +1000 Subject: r600g: fix stencil export for evergreen harder --- src/gallium/drivers/r600/evergreen_state.c | 2 +- src/gallium/drivers/r600/evergreend.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 379d66f48c6..542df11db66 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1568,7 +1568,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader exports_ps = 0; num_cout = 0; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { num_cout++; diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 94661fdd1f6..5d07f532f09 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -686,8 +686,8 @@ #define S_02880C_Z_EXPORT_ENABLE(x) (((x) & 0x1) << 0) #define G_02880C_Z_EXPORT_ENABLE(x) (((x) >> 0) & 0x1) #define C_02880C_Z_EXPORT_ENABLE 0xFFFFFFFE -#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x2) << 0) -#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 0) & 0x2) +#define S_02880C_STENCIL_EXPORT_ENABLE(x) (((x) & 0x1) << 1) +#define G_02880C_STENCIL_EXPORT_ENABLE(x) (((x) >> 1) & 0x1) #define C_02880C_STENCIL_EXPORT_ENABLE 0xFFFFFFFD #define S_02880C_Z_ORDER(x) (((x) & 0x3) << 4) #define G_02880C_Z_ORDER(x) (((x) >> 4) & 0x3) -- cgit v1.2.3 From 95c18abb03b035c6fa029cd0852f07fb39951279 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 13 Oct 2010 14:28:51 +0100 Subject: llvmpipe: Unbreak Z32_FLOAT. Z32_FLOAT uses <4 x float> as intermediate/destination type, instead of <4 x i32>. The necessary bitcasts got removed with commit 5b7eb868fde98388d80601d8dea39e679828f42f Also use depth/stencil type and build contexts consistently, and make the depth pointer argument a ordinary , to catch this sort of issues in the future (and also to pave way for Z16 and Z32_FLOAT_S8_X24 support). --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 130 ++++++++++++++++------------ src/gallium/drivers/llvmpipe/lp_bld_depth.h | 6 ++ src/gallium/drivers/llvmpipe/lp_state_fs.c | 21 +++-- 3 files changed, 93 insertions(+), 64 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 264fce8d6a6..3162f3e1c2e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -458,9 +458,9 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef *zs_value, boolean do_branch) { - struct lp_type type; - struct lp_build_context bld; - struct lp_build_context sbld; + struct lp_type z_type; + struct lp_build_context z_bld; + struct lp_build_context s_bld; struct lp_type s_type; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; @@ -483,28 +483,31 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, /* We know the values in z_dst are all >= 0, so allow * lp_build_compare to use signed compare intrinsics: */ - type.floating = 0; - type.fixed = 0; - type.sign = 1; - type.norm = 1; - type.width = 32; - type.length = z_src_type.length; + z_type.floating = 0; + z_type.fixed = 0; + z_type.sign = 1; + z_type.norm = 1; + z_type.width = 32; + z_type.length = z_src_type.length; int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length); - const_8_int = lp_build_const_int_vec(type, 8); + const_8_int = lp_build_const_int_vec(z_type, 8); const_ffffff_float = lp_build_const_vec(z_src_type, (float)0xffffff); zscaled = LLVMBuildFMul(builder, z_src, const_ffffff_float, "zscaled"); z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src"); /* Load current z/stencil value from z/stencil buffer */ + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(int32_vec_type, 0), ""); z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst"); /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_compare(builder, - type, + z_type, depth->func, z_src, z_dst); lp_build_mask_update(mask, z_pass); @@ -517,10 +520,10 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * storage. */ if (depth->writemask) { - type.sign = 1; - lp_build_context_init(&bld, builder, type); + z_type.sign = 1; + lp_build_context_init(&z_bld, builder, z_type); - z_dst = lp_build_select(&bld, lp_build_mask_value(mask), z_src, z_dst); + z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), z_src, z_dst); z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); *zs_value = z_dst; } @@ -543,19 +546,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } /* Pick the depth type. */ - type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); /* FIXME: Cope with a depth test type with a different bit width. */ - assert(type.width == z_src_type.width); - assert(type.length == z_src_type.length); + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); /* Convert fragment Z from float to integer */ - lp_build_conv(builder, z_src_type, type, &z_src, 1, &z_src, 1); - - zs_dst_ptr = LLVMBuildBitCast(builder, - zs_dst_ptr, - LLVMPointerType(lp_build_vec_type(type), 0), ""); - + lp_build_conv(builder, z_src_type, z_type, &z_src, 1, &z_src, 1); /* Sanity checking */ @@ -578,8 +576,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } assert(z_swizzle < 4); - assert(format_desc->block.bits == type.width); - if (type.floating) { + assert(format_desc->block.bits == z_type.width); + if (z_type.floating) { assert(z_swizzle == 0); assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT); @@ -590,21 +588,24 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, assert(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); - assert(!type.fixed); - assert(!type.sign); - assert(type.norm); + assert(!z_type.fixed); + assert(!z_type.sign); + assert(z_type.norm); } } /* Setup build context for Z vals */ - lp_build_context_init(&bld, builder, type); + lp_build_context_init(&z_bld, builder, z_type); /* Setup build context for stencil vals */ - s_type = lp_type_int_vec(type.width); - lp_build_context_init(&sbld, builder, s_type); + s_type = lp_type_int_vec(z_type.width); + lp_build_context_init(&s_bld, builder, s_type); /* Load current z/stencil value from z/stencil buffer */ + zs_dst_ptr = LLVMBuildBitCast(builder, + zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); lp_build_name(zs_dst, "zsbufval"); @@ -618,12 +619,12 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { if (z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, z_shift); + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); z_src = LLVMBuildLShr(builder, z_src, shift, ""); } if (z_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, z_mask); + LLVMValueRef mask = lp_build_const_int_vec(z_type, z_mask); z_src = LLVMBuildAnd(builder, z_src, mask, ""); z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); z_bitmask = mask; /* used below */ @@ -637,7 +638,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { - LLVMValueRef shift = lp_build_const_int_vec(type, s_shift); + LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, ""); stencil_shift = shift; /* used below */ } @@ -646,7 +647,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (s_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(type, s_mask); + LLVMValueRef mask = lp_build_const_int_vec(s_type, s_mask); stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } @@ -662,24 +663,24 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, /* front_facing = face > 0.0 ? ~0 : 0 */ front_facing = LLVMBuildFCmp(builder, LLVMRealUGT, face, zero, ""); front_facing = LLVMBuildSExt(builder, front_facing, - LLVMIntType(bld.type.length*bld.type.width), + LLVMIntType(s_bld.type.length*s_bld.type.width), ""); front_facing = LLVMBuildBitCast(builder, front_facing, - bld.int_vec_type, ""); + s_bld.int_vec_type, ""); } /* convert scalar stencil refs into vectors */ - stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]); - stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]); + stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); + stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); - s_pass_mask = lp_build_stencil_test(&sbld, stencil, + s_pass_mask = lp_build_stencil_test(&s_bld, stencil, stencil_refs, stencil_vals, front_facing); /* apply stencil-fail operator */ { - LLVMValueRef s_fail_mask = lp_build_andnot(&bld, orig_mask, s_pass_mask); - stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP, + LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, stencil_refs, stencil_vals, s_fail_mask, front_facing); } @@ -687,7 +688,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (depth->enabled) { /* compare src Z to dst Z, returning 'pass' mask */ - z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst); + z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); if (!stencil[0].enabled) { /* We can potentially skip all remaining operations here, but only @@ -721,7 +722,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, /* Mix the old and new Z buffer values. * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) */ - z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select_bitwise(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { @@ -729,14 +730,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef z_fail_mask, z_pass_mask; /* apply Z-fail operator */ - z_fail_mask = lp_build_andnot(&bld, orig_mask, z_pass); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP, + z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, stencil_refs, stencil_vals, z_fail_mask, front_facing); /* apply Z-pass operator */ - z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + z_pass_mask = LLVMBuildAnd(z_bld.builder, orig_mask, z_pass, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, z_pass_mask, front_facing); } @@ -745,8 +746,8 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, /* No depth test: apply Z-pass operator to stencil buffer values which * passed the stencil test. */ - s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, ""); - stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP, + s_pass_mask = LLVMBuildAnd(s_bld.builder, orig_mask, s_pass_mask, ""); + stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, stencil_refs, stencil_vals, s_pass_mask, front_facing); } @@ -755,7 +756,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, * stencil bits before ORing Z with Stencil to make the final pixel value. */ if (stencil_vals && stencil_shift) - stencil_vals = LLVMBuildShl(bld.builder, stencil_vals, + stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals, stencil_shift, ""); /* Finally, merge/store the z/stencil values */ @@ -763,7 +764,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, (stencil[0].enabled && stencil[0].writemask)) { if (z_dst && stencil_vals) - zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, ""); + zs_dst = LLVMBuildOr(z_bld.builder, z_dst, stencil_vals, ""); else if (z_dst) zs_dst = z_dst; else @@ -784,6 +785,18 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value) +{ + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(LLVMTypeOf(zs_value), 0), ""); + + LLVMBuildStore(builder, zs_value, zs_dst_ptr); +} + void lp_build_deferred_depth_write(LLVMBuilderRef builder, @@ -793,17 +806,20 @@ lp_build_deferred_depth_write(LLVMBuilderRef builder, LLVMValueRef zs_dst_ptr, LLVMValueRef zs_value) { - struct lp_type type; - struct lp_build_context bld; + struct lp_type z_type; + struct lp_build_context z_bld; LLVMValueRef z_dst; /* XXX: pointlessly redo type logic: */ - type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); - lp_build_context_init(&bld, builder, type); + z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length); + lp_build_context_init(&z_bld, builder, z_type); + + zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, + LLVMPointerType(z_bld.vec_type, 0), ""); z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); - z_dst = lp_build_select(&bld, lp_build_mask_value(mask), zs_value, z_dst); + z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst); LLVMBuildStore(builder, z_dst, zs_dst_ptr); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h index 0f89668123a..a54ef3a711e 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h @@ -64,6 +64,12 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef *zs_value, boolean do_branch); +void +lp_build_depth_write(LLVMBuilderRef builder, + const struct util_format_description *format_desc, + LLVMValueRef zs_dst_ptr, + LLVMValueRef zs_value); + void lp_build_deferred_depth_write(LLVMBuilderRef builder, struct lp_type z_src_type, diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 6872f2d3c6a..c09835635dd 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -325,8 +325,9 @@ generate_fs(struct llvmpipe_context *lp, &zs_value, !simple_shader); - if (depth_mode & EARLY_DEPTH_WRITE) - LLVMBuildStore(builder, zs_value, depth_ptr); + if (depth_mode & EARLY_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } } lp_build_interp_soa_update_inputs(interp, i); @@ -379,8 +380,9 @@ generate_fs(struct llvmpipe_context *lp, &zs_value, !simple_shader); /* Late Z write */ - if (depth_mode & LATE_DEPTH_WRITE) - LLVMBuildStore(builder, zs_value, depth_ptr); + if (depth_mode & LATE_DEPTH_WRITE) { + lp_build_depth_write(builder, zs_format_desc, depth_ptr, zs_value); + } } else if ((depth_mode & EARLY_DEPTH_TEST) && (depth_mode & LATE_DEPTH_WRITE)) @@ -534,6 +536,7 @@ generate_fragment(struct llvmpipe_context *lp, LLVMValueRef blend_mask; LLVMValueRef function; LLVMValueRef facing; + const struct util_format_description *zs_format_desc; unsigned num_fs; unsigned i; unsigned chan; @@ -579,7 +582,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */ - arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */ + arg_types[8] = LLVMPointerType(LLVMInt8Type(), 0); /* depth */ arg_types[9] = LLVMInt32Type(); /* mask_input */ arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */ @@ -648,12 +651,16 @@ generate_fragment(struct llvmpipe_context *lp, sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr); /* loop over quads in the block */ + zs_format_desc = util_format_description(key->zsbuf_format); + for(i = 0; i < num_fs; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef depth_offset = LLVMConstInt(LLVMInt32Type(), + i*fs_type.length*zs_format_desc->block.bits/8, + 0); LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS]; LLVMValueRef depth_ptr_i; - depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, ""); + depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, ""); generate_fs(lp, shader, key, builder, -- cgit v1.2.3 From ae00e34e4b0d3be247b0538b60810176397c7915 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 13 Oct 2010 20:25:17 +0100 Subject: llvmpipe: Generalize the x8z24 fast path to all depth formats. Together with the previous commit, this generalize the benefits of d2cf757f44f4ee5554243f3279483a25886d9927 to all depth formats, in particular: - simpler float -> 24unorm conversion - avoid unsigned comparisons (not directly supported on SSE) by aligning to the least significant bit - avoid unecessary/repeated mask ANDing Verified with trivial/tri-z that the exact same assembly is produced for X8Z24. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 193 ++++++++++++---------------- 1 file changed, 82 insertions(+), 111 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 3162f3e1c2e..e4cfa97aa3f 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -304,8 +304,13 @@ lp_depth_type(const struct util_format_description *format_desc, } else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { assert(format_desc->block.bits <= 32); - if(format_desc->channel[swizzle].normalized) - type.norm = TRUE; + assert(format_desc->channel[swizzle].normalized); + if (format_desc->channel[swizzle].size < format_desc->block.bits) { + /* Prefer signed integers when possible, as SSE has less support + * for unsigned comparison; + */ + type.sign = TRUE; + } } else assert(0); @@ -325,9 +330,9 @@ lp_depth_type(const struct util_format_description *format_desc, * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us * get by with fewer bit twiddling steps. */ -static boolean +static void get_z_shift_and_mask(const struct util_format_description *format_desc, - unsigned *shift, unsigned *mask) + unsigned *shift, unsigned *width, unsigned *mask) { const unsigned total_bits = format_desc->block.bits; unsigned z_swizzle; @@ -340,15 +345,16 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, z_swizzle = format_desc->swizzle[0]; - if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) - return FALSE; + assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + + *width = format_desc->channel[z_swizzle].size; padding_right = 0; for (chan = 0; chan < z_swizzle; ++chan) padding_right += format_desc->channel[chan].size; padding_left = - total_bits - (padding_right + format_desc->channel[z_swizzle].size); + total_bits - (padding_right + *width); if (padding_left || padding_right) { unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1; @@ -359,9 +365,7 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, *mask = 0xffffffff; } - *shift = padding_left; - - return TRUE; + *shift = padding_right; } @@ -462,6 +466,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, struct lp_build_context z_bld; struct lp_build_context s_bld; struct lp_type s_type; + unsigned z_shift, z_width, z_mask; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; @@ -469,67 +474,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMValueRef orig_mask = lp_build_mask_value(mask); LLVMValueRef front_facing = NULL; - /* Prototype a simpler path: - */ - if (z_src_type.floating && - format_desc->format == PIPE_FORMAT_X8Z24_UNORM && - depth->enabled) - { - LLVMValueRef zscaled; - LLVMValueRef const_ffffff_float; - LLVMValueRef const_8_int; - LLVMTypeRef int32_vec_type; - - /* We know the values in z_dst are all >= 0, so allow - * lp_build_compare to use signed compare intrinsics: - */ - z_type.floating = 0; - z_type.fixed = 0; - z_type.sign = 1; - z_type.norm = 1; - z_type.width = 32; - z_type.length = z_src_type.length; - - int32_vec_type = LLVMVectorType(LLVMInt32Type(), z_src_type.length); - - const_8_int = lp_build_const_int_vec(z_type, 8); - const_ffffff_float = lp_build_const_vec(z_src_type, (float)0xffffff); - - zscaled = LLVMBuildFMul(builder, z_src, const_ffffff_float, "zscaled"); - z_src = LLVMBuildFPToSI(builder, zscaled, int32_vec_type, "z_src"); - - /* Load current z/stencil value from z/stencil buffer */ - zs_dst_ptr = LLVMBuildBitCast(builder, - zs_dst_ptr, - LLVMPointerType(int32_vec_type, 0), ""); - z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval"); - z_dst = LLVMBuildLShr(builder, z_dst, const_8_int, "z_dst"); - - /* compare src Z to dst Z, returning 'pass' mask */ - z_pass = lp_build_compare(builder, - z_type, - depth->func, z_src, z_dst); - - lp_build_mask_update(mask, z_pass); - - if (do_branch) - lp_build_mask_check(mask); - - /* No need to worry about old stencil contents, just blend the - * old and new values and shift into the correct position for - * storage. - */ - if (depth->writemask) { - z_type.sign = 1; - lp_build_context_init(&z_bld, builder, z_type); - - z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), z_src, z_dst); - z_dst = LLVMBuildShl(builder, z_dst, const_8_int, "z_dst"); - *zs_value = z_dst; - } - - return; - } /* * Depths are expected to be between 0 and 1, even if they are stored in @@ -552,10 +496,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, assert(z_type.width == z_src_type.width); assert(z_type.length == z_src_type.length); - /* Convert fragment Z from float to integer */ - lp_build_conv(builder, z_src_type, z_type, &z_src, 1, &z_src, 1); - - /* Sanity checking */ { const unsigned z_swizzle = format_desc->swizzle[0]; @@ -589,8 +529,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, UTIL_FORMAT_TYPE_UNSIGNED); assert(format_desc->channel[z_swizzle].normalized); assert(!z_type.fixed); - assert(!z_type.sign); - assert(z_type.norm); } } @@ -608,34 +546,14 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, LLVMPointerType(z_bld.vec_type, 0), ""); zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, ""); - lp_build_name(zs_dst, "zsbufval"); + lp_build_name(zs_dst, "zs_dst"); /* Compute and apply the Z/stencil bitmasks and shifts. */ { - unsigned z_shift, z_mask; unsigned s_shift, s_mask; - if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) { - if (z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); - z_src = LLVMBuildLShr(builder, z_src, shift, ""); - } - - if (z_mask != 0xffffffff) { - LLVMValueRef mask = lp_build_const_int_vec(z_type, z_mask); - z_src = LLVMBuildAnd(builder, z_src, mask, ""); - z_dst = LLVMBuildAnd(builder, zs_dst, mask, ""); - z_bitmask = mask; /* used below */ - } - else { - z_dst = zs_dst; - } - - lp_build_name(z_dst, "zsbuf.z"); - } - if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); @@ -651,7 +569,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); } - lp_build_name(stencil_vals, "stencil"); + lp_build_name(stencil_vals, "s_dst"); } } @@ -687,6 +605,62 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (depth->enabled) { + get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); + + /* + * Convert fragment Z to the desired type, aligning the LSB to the right. + */ + + assert(z_type.width == z_src_type.width); + assert(z_type.length == z_src_type.length); + assert(lp_check_value(z_src_type, z_src)); + if (z_src_type.floating) { + /* + * Convert from floating point values + */ + + if (!z_type.floating) { + z_src = lp_build_clamped_float_to_unsigned_norm(builder, + z_src_type, + z_width, + z_src); + } + } else { + /* + * Convert from unsigned normalized values. + */ + + assert(!z_src_type.sign); + assert(!z_src_type.fixed); + assert(z_src_type.norm); + assert(!z_type.floating); + if (z_src_type.width > z_width) { + LLVMValueRef shift = lp_build_const_int_vec(z_src_type, + z_src_type.width - z_width); + z_src = LLVMBuildLShr(builder, z_src, shift, ""); + } + } + assert(lp_check_value(z_type, z_src)); + + lp_build_name(z_src, "z_src"); + + if (z_mask != 0xffffffff) { + z_bitmask = lp_build_const_int_vec(z_type, z_mask); + } + + /* + * Align the framebuffer Z 's LSB to the right. + */ + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + } else if (z_bitmask) { + z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + } else { + z_dst = zs_dst; + lp_build_name(z_dst, "z_dst"); + } + /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); @@ -704,25 +678,20 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (depth->writemask) { - LLVMValueRef zselectmask = lp_build_mask_value(mask); + LLVMValueRef zselectmask; /* mask off bits that failed Z test */ - zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, ""); + zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, ""); /* mask off bits that failed stencil test */ if (s_pass_mask) { zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, ""); } - /* if combined Z/stencil format, mask off the stencil bits */ - if (z_bitmask) { - zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, ""); - } - /* Mix the old and new Z buffer values. - * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i]) + * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] */ - z_dst = lp_build_select_bitwise(&z_bld, zselectmask, z_src, z_dst); + z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst); } if (stencil[0].enabled) { @@ -752,9 +721,11 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, s_pass_mask, front_facing); } - /* The Z bits are already in the right place but we may need to shift the - * stencil bits before ORing Z with Stencil to make the final pixel value. - */ + /* Put Z and ztencil bits in the right place */ + if (z_dst && z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildShl(builder, z_dst, shift, ""); + } if (stencil_vals && stencil_shift) stencil_vals = LLVMBuildShl(s_bld.builder, stencil_vals, stencil_shift, ""); -- cgit v1.2.3 From 26dacce2c0b33a2a6aff77e6094c06e385e1a541 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 14 Oct 2010 09:10:16 +1000 Subject: r600g: drop unused context members --- src/gallium/drivers/r600/r600_pipe.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 87317867696..47a1b3070e9 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -123,8 +123,6 @@ struct r600_pipe_context { struct pipe_stencil_ref stencil_ref; struct pipe_viewport_state viewport; struct pipe_clip_state clip; - unsigned vs_nconst; - unsigned ps_nconst; struct r600_pipe_state *vs_resource; struct r600_pipe_state *ps_resource; struct r600_pipe_state config; -- cgit v1.2.3 From a21a2748beb1f42d21e14858eee9a1323d85a00f Mon Sep 17 00:00:00 2001 From: Fredrik Höglund Date: Wed, 13 Oct 2010 17:49:15 +0200 Subject: r600g: Fix texture sampling with swizzled coords Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 470f355cde2..4a9d9beaba5 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1860,7 +1860,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = src_gpr; - alu.src[0].chan = i; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) -- cgit v1.2.3 From 8a9f02c5d503089bdcc90ff934f6269e59356d52 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 14 Oct 2010 13:29:11 +1000 Subject: r600g: only pick centroid coordinate when asked. TGSI tells us when to use this, its not hooked up from GLSL to MESA to TGSI yet though. --- src/gallium/drivers/r600/r600_shader.c | 4 +++- src/gallium/drivers/r600/r600_shader.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 4a9d9beaba5..512cc4a32f4 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -115,7 +115,8 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); - tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].centroid) + tmp |= S_028644_SEL_CENTROID(1); if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) have_pos = TRUE; if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || @@ -439,6 +440,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; ctx->shader->input[i].interpolate = d->Declaration.Interpolate; + ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; if (ctx->type == TGSI_PROCESSOR_VERTEX) { /* turn input into fetch */ diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 6e2620f2012..a341cca0836 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -31,6 +31,7 @@ struct r600_shader_io { unsigned done; int sid; unsigned interpolate; + boolean centroid; }; struct r600_shader { -- cgit v1.2.3 From 1e82c28fcf76bf79ceb5a1eaf29b3d6d25909ddd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 14 Oct 2010 14:14:22 +1000 Subject: r600g: fixup pos/face ena/address properly --- src/gallium/drivers/r600/r600_shader.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 512cc4a32f4..42878a43f38 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -107,8 +107,8 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE, have_face = FALSE; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; /* clear previous register */ rstate->nregs = 0; @@ -118,14 +118,14 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade if (rshader->input[i].centroid) tmp |= S_028644_SEL_CENTROID(1); if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; + pos_index = i; if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; + face_index = i; if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { tmp |= S_028644_PT_SPRITE_TEX(1); @@ -163,13 +163,22 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | - S_0286CC_BARYC_SAMPLE_CNTL(1); + if (pos_index != -1) { + spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | + S_0286CC_BARYC_SAMPLE_CNTL(1)); spi_input_z |= 1; } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, -- cgit v1.2.3 From 0637044add50b3a4aee8e915b84c18813c9130f3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 14 Oct 2010 14:19:37 +1000 Subject: r600g: fixup typo in macro name --- src/gallium/drivers/r600/r600_shader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 42878a43f38..d6f73cb7432 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -174,7 +174,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade spi_ps_in_control_1 = 0; if (face_index != -1) { spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | - S_286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); } r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); -- cgit v1.2.3 From 68014c8d19559576d368e158932278df05fe659b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 14 Oct 2010 14:27:34 +1000 Subject: r600g: select linear interpolate if tgsi input requests it --- src/gallium/drivers/r600/r600_shader.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d6f73cb7432..141adcc54be 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -117,6 +117,9 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); if (rshader->input[i].centroid) tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + tmp |= S_028644_SEL_LINEAR(1); + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) pos_index = i; if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || -- cgit v1.2.3 From f0bd76f28d17da6eabf977a7e619e4ff943a70c7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 14 Oct 2010 13:15:28 +0100 Subject: llvmpipe: don't try to emit non-existent color outputs --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c09835635dd..6e3c27e78e9 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -406,14 +406,15 @@ generate_fs(struct llvmpipe_context *lp, if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) { unsigned cbuf = shader->info.base.output_semantic_index[attrib]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) - { - /* XXX: just initialize outputs to point at colors[] and - * skip this. - */ - LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); - lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); - LLVMBuildStore(builder, out, color[cbuf][chan]); + for(chan = 0; chan < NUM_CHANNELS; ++chan) { + if(outputs[attrib][chan]) { + /* XXX: just initialize outputs to point at colors[] and + * skip this. + */ + LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]); + LLVMBuildStore(builder, out, color[cbuf][chan]); + } } } } -- cgit v1.2.3 From c28f7645722ed3da1a04d3187f9cfa5d8e5e489d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 14 Oct 2010 16:42:39 +0100 Subject: r600g: emit hardware linewidth Tested with demos/pixeltest - line rasterization doesn't seem to be set up for GL conventions yet, but at least width is respected now. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_state.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7b0aaef770f..2c0a2005cf7 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -499,7 +499,10 @@ static void *r600_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)(state->point_size * 8.0); r600_pipe_state_add_reg(rstate, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A04_PA_SU_POINT_MINMAX, 0x80000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, 0x00000008, 0xFFFFFFFF, NULL); + + tmp = (unsigned)(state->line_width * 8.0); + r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A0C_PA_SC_LINE_STIPPLE, 0x00000005, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MPASS_PS_CNTL, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); -- cgit v1.2.3 From 8260ab93461eca3e18f9c17a9ca1961a11372071 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 14 Oct 2010 17:19:00 +0100 Subject: r600g: handle absolute modifier in shader translator This was being classed as unsupported in one place but used in others. Enabling it seems to work fine. Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_shader.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 141adcc54be..b53d4780719 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -389,11 +389,9 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) } #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { - if (i->Src[j].Register.Dimension || - i->Src[j].Register.Absolute) { - R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, - i->Src[j].Register.Dimension, - i->Src[j].Register.Absolute); + if (i->Src[j].Register.Dimension) { + R600_ERR("unsupported src %d (dimension %d)\n", j, + i->Src[j].Register.Dimension); return -EINVAL; } } @@ -760,6 +758,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx, if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; } -- cgit v1.2.3 From 510c503762a228db2b3d6804f5f5c7af9be1a920 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 15 Oct 2010 08:46:16 +1000 Subject: r300g: clean up warning due to unknown cap. --- src/gallium/drivers/r300/r300_screen.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 7f41ff0e2ec..b448924f85e 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -124,6 +124,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INDEP_BLEND_FUNC: case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_SHADER_STENCIL_EXPORT: return 0; /* Texturing. */ -- cgit v1.2.3 From 3d7479d70568c84354338d0da0b7bed4d296c169 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 14 Oct 2010 16:31:54 -0600 Subject: llvmpipe: code to dump bytecode to file (disabled) --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 6e3c27e78e9..d2fbe277083 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -99,6 +99,7 @@ #include +#include static unsigned fs_no = 0; @@ -778,6 +779,11 @@ generate_fragment(struct llvmpipe_context *lp, debug_printf("\n"); } + /* Dump byte code to a file */ + if (0) { + LLVMWriteBitcodeToFile(lp_build_module, "llvmpipe.bc"); + } + /* * Translate the LLVM IR into machine code. */ -- cgit v1.2.3 From fc6caef4cb67fb13642c5ebccee53019d1764df6 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 14 Oct 2010 14:40:51 +1000 Subject: r600g: evergreen interpolation support. On evergreen, interpolation has moved into the fragment shader, with the interpolation parmaters being passed via GPRs and LDS entries. This works out the number of interps required and reserves GPR/LDS storage for them, it also correctly routes face/position values which aren't interpolated from the vertex shader. Also if we noticed nothing is to be interpolated we always setup perspective interpolation for one value otherwise the GPU appears to lockup. This fixes about 15 piglit tests on evergreen. --- src/gallium/drivers/r600/evergreen_state.c | 74 ++++++++++++++++++----- src/gallium/drivers/r600/r600_shader.c | 95 ++++++++++++++++++++++++++++-- src/gallium/drivers/r600/r600_shader.h | 2 + 3 files changed, 149 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 542df11db66..935496c04af 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1529,23 +1529,39 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; - unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; - boolean have_pos = FALSE, have_face = FALSE; + unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; + int pos_index = -1, face_index = -1; + int ninterp = 0; + boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE; + unsigned spi_baryc_cntl; /* clear previous register */ rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); + /* evergreen NUM_INTERP only contains values interpolated into the LDS, + POSITION goes via GPRs from the SC so isn't counted */ if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) - have_pos = TRUE; + pos_index = i; + else if (rshader->input[i].name == TGSI_SEMANTIC_FACE) + face_index = i; + else { + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR || + rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + ninterp++; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) + have_linear = TRUE; + if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) + have_perspective = TRUE; + if (rshader->input[i].centroid) + have_centroid = TRUE; + } if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } - if (rshader->input[i].name == TGSI_SEMANTIC_FACE) - have_face = TRUE; if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { tmp |= S_028644_PT_SPRITE_TEX(1); @@ -1568,7 +1584,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader exports_ps = 0; num_cout = 0; for (i = 0; i < rshader->noutput; i++) { - if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) + if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || + rshader->output[i].name == TGSI_SEMANTIC_STENCIL) exports_ps |= 1; else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { num_cout++; @@ -1580,18 +1597,48 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader exports_ps = 2; } - spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | - S_0286CC_PERSP_GRADIENT_ENA(1); + if (ninterp == 0) { + ninterp = 1; + have_perspective = TRUE; + } + + spi_ps_in_control_0 = S_0286CC_NUM_INTERP(ninterp) | + S_0286CC_PERSP_GRADIENT_ENA(have_perspective) | + S_0286CC_LINEAR_GRADIENT_ENA(have_linear); spi_input_z = 0; - if (have_pos) { - spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1); + if (pos_index != -1) { + spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | + S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | + S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr); spi_input_z |= 1; } + + spi_ps_in_control_1 = 0; + if (face_index != -1) { + spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); + } + + spi_baryc_cntl = 0; + if (have_perspective) + spi_baryc_cntl |= S_0286E0_PERSP_CENTER_ENA(1) | + S_0286E0_PERSP_CENTROID_ENA(have_centroid); + if (have_linear) + spi_baryc_cntl |= S_0286E0_LINEAR_CENTER_ENA(1) | + S_0286E0_LINEAR_CENTROID_ENA(have_centroid); + r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, - S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); + spi_ps_in_control_1, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0286E4_SPI_PS_IN_CONTROL_2, + 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, + R_0286E0_SPI_BARYC_CNTL, + spi_baryc_cntl, + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo); @@ -1607,11 +1654,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_02884C_SQ_PGM_EXPORTS_PS, exports_ps, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, - R_0286E0_SPI_BARYC_CNTL, - S_0286E0_PERSP_CENTROID_ENA(1) | - S_0286E0_LINEAR_CENTROID_ENA(1), - 0xFFFFFFFF, NULL); if (rshader->uses_kill) { /* only set some bits here, the other bits are set in the dsa state */ diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index b53d4780719..94c9cbd9234 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -357,6 +357,11 @@ struct r600_shader_ctx { u32 *literals; u32 nliterals; u32 max_driver_temp_used; + /* needed for evergreen interpolation */ + boolean input_centroid; + boolean input_linear; + boolean input_perspective; + int num_interp_gpr; }; struct r600_shader_tgsi_instruction { @@ -404,10 +409,33 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) +static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) { int i, r; struct r600_bc_alu alu; + int gpr = 0, base_chan = 0; + int ij_index = 0; + + if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { + ij_index = 0; + if (ctx->shader->input[input].centroid) + ij_index++; + } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { + ij_index = 0; + /* if we have perspective add one */ + if (ctx->input_perspective) { + ij_index++; + /* if we have perspective centroid */ + if (ctx->input_centroid) + ij_index++; + } + if (ctx->shader->input[input].centroid) + ij_index++; + } + + /* work out gpr and base_chan from index */ + gpr = ij_index / 2; + base_chan = (2 * (ij_index % 2)) + 1; for (i = 0; i < 8; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -418,13 +446,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; if ((i > 1) && (i < 6)) { - alu.dst.sel = ctx->shader->input[gpr].gpr; + alu.dst.sel = ctx->shader->input[input].gpr; alu.dst.write = 1; } alu.dst.chan = i % 4; - alu.src[0].chan = (1 - (i % 2)); - alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; + + alu.src[0].sel = gpr; + alu.src[0].chan = (base_chan - (i % 2)); + + alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; alu.bank_swizzle_force = SQ_ALU_VEC_210; if ((i % 4) == 3) @@ -474,7 +505,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) } if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { /* turn input into interpolate on EG */ - evergreen_interp_alu(ctx, i); + if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { + if (ctx->shader->input[i].interpolate > 0) { + ctx->shader->input[i].lds_pos = ctx->shader->nlds++; + evergreen_interp_alu(ctx, i); + } + } } break; case TGSI_FILE_OUTPUT: @@ -501,6 +537,53 @@ static int r600_get_temp(struct r600_shader_ctx *ctx) return ctx->temp_reg + ctx->max_driver_temp_used++; } +/* + * for evergreen we need to scan the shader to find the number of GPRs we need to + * reserve for interpolation. + * + * we need to know if we are going to emit + * any centroid inputs + * if perspective and linear are required +*/ +static int evergreen_gpr_count(struct r600_shader_ctx *ctx) +{ + int i; + int num_baryc; + + ctx->input_linear = FALSE; + ctx->input_perspective = FALSE; + ctx->input_centroid = FALSE; + ctx->num_interp_gpr = 1; + + /* any centroid inputs */ + for (i = 0; i < ctx->info.num_inputs; i++) { + /* skip position/face */ + if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || + ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) + continue; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) + ctx->input_linear = TRUE; + if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) + ctx->input_perspective = TRUE; + if (ctx->info.input_centroid[i]) + ctx->input_centroid = TRUE; + } + + num_baryc = 0; + /* ignoring sample for now */ + if (ctx->input_perspective) + num_baryc++; + if (ctx->input_linear) + num_baryc++; + if (ctx->input_centroid) + num_baryc *= 2; + + ctx->num_interp_gpr += (num_baryc + 1) >> 1; + + /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ + return ctx->num_interp_gpr; +} + int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; @@ -547,7 +630,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.file_offset[TGSI_FILE_INPUT] = 1; } if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { - ctx.file_offset[TGSI_FILE_INPUT] = 1; + ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_count[TGSI_FILE_INPUT]; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index a341cca0836..f8bc5951395 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -32,6 +32,7 @@ struct r600_shader_io { int sid; unsigned interpolate; boolean centroid; + unsigned lds_pos; /* for evergreen */ }; struct r600_shader { @@ -40,6 +41,7 @@ struct r600_shader { boolean flat_shade; unsigned ninput; unsigned noutput; + unsigned nlds; struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; -- cgit v1.2.3 From 4195febeecd2d2f5571afdb90cbb185a4759f50a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 14 Oct 2010 23:28:10 +0100 Subject: llvmpipe: reintroduce SET_STATE binner command But bin lazily only into bins which are receiving geometry. --- src/gallium/drivers/llvmpipe/lp_rast.c | 13 +++++-- src/gallium/drivers/llvmpipe/lp_rast.h | 6 ++-- src/gallium/drivers/llvmpipe/lp_rast_debug.c | 35 +++++++++++------- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 7 +++- src/gallium/drivers/llvmpipe/lp_scene.c | 4 ++- src/gallium/drivers/llvmpipe/lp_scene.h | 28 ++++++++++++++- src/gallium/drivers/llvmpipe/lp_setup_line.c | 1 - src/gallium/drivers/llvmpipe/lp_setup_point.c | 1 - src/gallium/drivers/llvmpipe/lp_setup_tri.c | 51 +++++++++++++++------------ 9 files changed, 100 insertions(+), 46 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index db9b2f9b128..35e2f731e89 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -334,7 +334,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, { const struct lp_scene *scene = task->scene; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; @@ -414,7 +414,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, unsigned x, unsigned y, unsigned mask) { - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; const struct lp_scene *scene = task->scene; uint8_t *color[PIPE_MAX_COLOR_BUFS]; @@ -490,6 +490,14 @@ lp_rast_end_query(struct lp_rasterizer_task *task, } +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + task->state = arg.state; +} + + /** * Set top row and left column of the tile's pixels to white. For debugging. @@ -602,6 +610,7 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = lp_rast_shade_tile_opaque, lp_rast_begin_query, lp_rast_end_query, + lp_rast_set_state, }; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index e2bcc450168..f74b198a66c 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -85,8 +85,6 @@ struct lp_rast_shader_inputs { float (*a0)[4]; float (*dadx)[4]; float (*dady)[4]; - - const struct lp_rast_state *state; }; /* Note: the order of these values is important as they are loaded by @@ -154,6 +152,7 @@ union lp_rast_cmd_arg { uint32_t value; uint32_t mask; } clear_zstencil; + const struct lp_rast_state *state; struct lp_fence *fence; struct llvmpipe_query *query_obj; }; @@ -245,8 +244,9 @@ lp_rast_arg_null( void ) #define LP_RAST_OP_SHADE_TILE_OPAQUE 0xe #define LP_RAST_OP_BEGIN_QUERY 0xf #define LP_RAST_OP_END_QUERY 0x10 +#define LP_RAST_OP_SET_STATE 0x11 -#define LP_RAST_OP_MAX 0x11 +#define LP_RAST_OP_MAX 0x12 #define LP_RAST_OP_MASK 0xff void diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 6f4ba1c6fef..3113e196c40 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -12,6 +12,7 @@ static INLINE int u_bit_scan(unsigned *mask) struct tile { int coverage; int overdraw; + const struct lp_rast_state *state; char data[TILE_SIZE][TILE_SIZE]; }; @@ -47,6 +48,7 @@ static const char *cmd_names[LP_RAST_OP_MAX] = "shade_tile_opaque", "begin_query", "end_query", + "set_state", }; static const char *cmd_name(unsigned cmd) @@ -56,31 +58,31 @@ static const char *cmd_name(unsigned cmd) } static const struct lp_fragment_shader_variant * -get_variant( const struct cmd_block *block, - int k ) +get_variant( const struct lp_rast_state *state, + const struct cmd_block *block, + int k ) { if (block->cmd[k] == LP_RAST_OP_SHADE_TILE || - block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE) - return block->arg[k].shade_tile->state->variant; - - if (block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || + block->cmd[k] == LP_RAST_OP_SHADE_TILE_OPAQUE || + block->cmd[k] == LP_RAST_OP_TRIANGLE_1 || block->cmd[k] == LP_RAST_OP_TRIANGLE_2 || block->cmd[k] == LP_RAST_OP_TRIANGLE_3 || block->cmd[k] == LP_RAST_OP_TRIANGLE_4 || block->cmd[k] == LP_RAST_OP_TRIANGLE_5 || block->cmd[k] == LP_RAST_OP_TRIANGLE_6 || block->cmd[k] == LP_RAST_OP_TRIANGLE_7) - return block->arg[k].triangle.tri->inputs.state->variant; + return state->variant; return NULL; } static boolean -is_blend( const struct cmd_block *block, +is_blend( const struct lp_rast_state *state, + const struct cmd_block *block, int k ) { - const struct lp_fragment_shader_variant *variant = get_variant(block, k); + const struct lp_fragment_shader_variant *variant = get_variant(state, block, k); if (variant) return variant->key.blend.rt[0].blend_enable; @@ -93,6 +95,7 @@ is_blend( const struct cmd_block *block, static void debug_bin( const struct cmd_bin *bin ) { + const struct lp_rast_state *state; const struct cmd_block *head = bin->head; int i, j = 0; @@ -100,9 +103,12 @@ debug_bin( const struct cmd_bin *bin ) while (head) { for (i = 0; i < head->count; i++, j++) { + if (head->cmd[i] == LP_RAST_OP_SET_STATE) + state = head->arg[i].state; + debug_printf("%d: %s %s\n", j, cmd_name(head->cmd[i]), - is_blend(head, i) ? "blended" : ""); + is_blend(state, head, i) ? "blended" : ""); } head = head->next; } @@ -134,7 +140,7 @@ debug_shade_tile(int x, int y, char val) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - boolean blend = inputs->state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; unsigned i,j; if (inputs->disable) @@ -176,7 +182,7 @@ debug_triangle(int tilex, int tiley, int x, y; int count = 0; unsigned i, nr_planes = 0; - boolean blend = tri->inputs.state->variant->key.blend.rt[0].blend_enable; + boolean blend = tile->state->variant->key.blend.rt[0].blend_enable; if (tri->inputs.disable) { /* This triangle was partially binned and has been disabled */ @@ -236,12 +242,15 @@ do_debug_bin( struct tile *tile, for (block = bin->head; block; block = block->next) { for (k = 0; k < block->count; k++, j++) { - boolean blend = is_blend(block, k); + boolean blend = is_blend(tile->state, block, k); char val = get_label(j); int count = 0; if (print_cmds) debug_printf("%c: %15s", val, cmd_name(block->cmd[k])); + + if (block->cmd[k] == LP_RAST_OP_SET_STATE) + tile->state = block->arg[k].state; if (block->cmd[k] == LP_RAST_OP_CLEAR_COLOR || block->cmd[k] == LP_RAST_OP_CLEAR_ZSTENCIL) diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 104000a040c..7ffd735def2 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -77,6 +77,7 @@ struct cmd_bin; struct lp_rasterizer_task { const struct cmd_bin *bin; + const struct lp_rast_state *state; struct lp_scene *scene; unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */ @@ -244,7 +245,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, unsigned x, unsigned y ) { const struct lp_scene *scene = task->scene; - const struct lp_rast_state *state = inputs->state; + const struct lp_rast_state *state = task->state; struct lp_fragment_shader_variant *variant = state->variant; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -297,6 +298,10 @@ void lp_rast_triangle_3_16( struct lp_rasterizer_task *, void lp_rast_triangle_4_16( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg); + void lp_debug_bin( const struct cmd_bin *bin ); diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index 8b504f23a33..a4fdf7cff36 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -203,7 +203,9 @@ lp_scene_end_rasterization(struct lp_scene *scene ) for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->head = bin->tail = NULL; + bin->head = NULL; + bin->tail = NULL; + bin->last_state = NULL; } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index dbef7692e42..622c522f11a 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -41,6 +41,7 @@ #include "lp_debug.h" struct lp_scene_queue; +struct lp_rast_state; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -94,6 +95,7 @@ struct data_block { struct cmd_bin { ushort x; ushort y; + const struct lp_rast_state *last_state; /* most recent state set in bin */ struct cmd_block *head; struct cmd_block *tail; }; @@ -297,7 +299,7 @@ lp_scene_bin_command( struct lp_scene *scene, assert(x < scene->tiles_x); assert(y < scene->tiles_y); - assert(cmd <= LP_RAST_OP_END_QUERY); + assert(cmd < LP_RAST_OP_MAX); if (tail == NULL || tail->count == CMD_BLOCK_MAX) { tail = lp_scene_new_cmd_block( scene, bin ); @@ -318,6 +320,30 @@ lp_scene_bin_command( struct lp_scene *scene, } +static INLINE boolean +lp_scene_bin_cmd_with_state( struct lp_scene *scene, + unsigned x, unsigned y, + const struct lp_rast_state *state, + unsigned cmd, + union lp_rast_cmd_arg arg ) +{ + struct cmd_bin *bin = lp_scene_get_bin(scene, x, y); + + if (state != bin->last_state) { + bin->last_state = state; + if (!lp_scene_bin_command(scene, x, y, + LP_RAST_OP_SET_STATE, + lp_rast_arg_state(state))) + return FALSE; + } + + if (!lp_scene_bin_command( scene, x, y, cmd, arg )) + return FALSE; + + return TRUE; +} + + /* Add a command to all active bins. */ static INLINE boolean diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 693ac281752..e4cff9aa42c 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -597,7 +597,6 @@ try_setup_line( struct lp_setup_context *setup, setup_line_coefficients( setup, line, &info); line->inputs.facing = 1.0F; - line->inputs.state = setup->fs.stored; line->inputs.disable = FALSE; line->inputs.opaque = FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 64b24a88d54..93c3efe347e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -374,7 +374,6 @@ try_setup_point( struct lp_setup_context *setup, setup_point_coefficients(setup, point, &info); point->inputs.facing = 1.0F; - point->inputs.state = setup->fs.stored; point->inputs.disable = FALSE; point->inputs.opaque = FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 8fd034666c3..bc48eb8d1b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -200,14 +200,16 @@ lp_setup_whole_tile(struct lp_setup_context *setup, } LP_COUNT(nr_shade_opaque_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE_OPAQUE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE_OPAQUE, + lp_rast_arg_inputs(inputs) ); } else { LP_COUNT(nr_shade_64); - return lp_scene_bin_command( scene, tx, ty, - LP_RAST_OP_SHADE_TILE, - lp_rast_arg_inputs(inputs) ); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE, + lp_rast_arg_inputs(inputs) ); } } @@ -320,7 +322,6 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->inputs.facing = frontfacing ? 1.0F : -1.0F; tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; - tri->inputs.state = setup->fs.stored; for (i = 0; i < 3; i++) { @@ -491,34 +492,36 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, { /* Triangle is contained in a single 4x4 stamp: */ - - return lp_scene_bin_command( scene, ix0, iy0, - LP_RAST_OP_TRIANGLE_3_4, - lp_rast_arg_triangle(tri, mask) ); + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle(tri, mask) ); } if (sz < 16) { /* Triangle is contained in a single 16x16 block: */ - return lp_scene_bin_command( scene, ix0, iy0, - LP_RAST_OP_TRIANGLE_3_16, - lp_rast_arg_triangle(tri, mask) ); + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_16, + lp_rast_arg_triangle(tri, mask) ); } } else if (nr_planes == 4 && sz < 16) { - return lp_scene_bin_command( scene, ix0, iy0, - LP_RAST_OP_TRIANGLE_4_16, - lp_rast_arg_triangle(tri, mask) ); + return lp_scene_bin_cmd_with_state(scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_4_16, + lp_rast_arg_triangle(tri, mask) ); } /* Triangle is contained in a single tile: */ - return lp_scene_bin_command( scene, ix0, iy0, - lp_rast_tri_tab[nr_planes], - lp_rast_arg_triangle(tri, (1<fs.stored, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<fs.stored, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) )) goto fail; LP_COUNT(nr_partially_covered_64); -- cgit v1.2.3 From 0a1c9001037a13b69b157994e7983aa3dee158d3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Oct 2010 00:12:19 +0100 Subject: llvmpipe: don't pass frontfacing as a float --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 8 ++++---- src/gallium/drivers/llvmpipe/lp_jit.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_rast.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_setup_line.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup_point.c | 2 +- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 2 +- src/gallium/drivers/llvmpipe/lp_state_fs.c | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index e4cfa97aa3f..ddf7da0b14d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -446,7 +446,7 @@ lp_build_occlusion_count(LLVMBuilderRef builder, * \param stencil_refs the front/back stencil ref values (scalar) * \param z_src the incoming depth/stencil values (a 2x2 quad, float32) * \param zs_dst_ptr pointer to depth/stencil values in framebuffer - * \param facing contains float value indicating front/back facing polygon + * \param facing contains boolean value indicating front/back facing polygon */ void lp_build_depth_stencil_test(LLVMBuilderRef builder, @@ -576,10 +576,10 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, if (stencil[0].enabled) { if (face) { - LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0); + LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); - /* front_facing = face > 0.0 ? ~0 : 0 */ - front_facing = LLVMBuildFCmp(builder, LLVMRealUGT, face, zero, ""); + /* front_facing = face != 0 ? ~0 : 0 */ + front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); front_facing = LLVMBuildSExt(builder, front_facing, LLVMIntType(s_bld.type.length*s_bld.type.width), ""); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 16e04fce0cd..114f21f2d16 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -144,7 +144,7 @@ typedef void (*lp_jit_frag_func)(const struct lp_jit_context *context, uint32_t x, uint32_t y, - float facing, + uint32_t facing, const void *a0, const void *dadx, const void *dady, diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 35e2f731e89..8e9be755e07 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -365,7 +365,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, - inputs->facing, + inputs->frontfacing, inputs->a0, inputs->dadx, inputs->dady, @@ -446,7 +446,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, - inputs->facing, + inputs->frontfacing, inputs->a0, inputs->dadx, inputs->dady, diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index f74b198a66c..c5fb15484c5 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -78,7 +78,7 @@ struct lp_rast_state { * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { - float facing; /** Positive for front-facing, negative for back-facing */ + unsigned frontfacing; /** One for front-facing */ unsigned disable:1; /** Partially binned, disable this command */ unsigned opaque:1; /** Is opaque */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 7ffd735def2..e5d04c65b00 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -261,7 +261,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, BEGIN_JIT_CALL(state); variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, - inputs->facing, + inputs->frontfacing, inputs->a0, inputs->dadx, inputs->dady, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index e4cff9aa42c..efc48eecfee 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -596,7 +596,7 @@ try_setup_line( struct lp_setup_context *setup, */ setup_line_coefficients( setup, line, &info); - line->inputs.facing = 1.0F; + line->inputs.frontfacing = TRUE; line->inputs.disable = FALSE; line->inputs.opaque = FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 93c3efe347e..108c831e66e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -373,7 +373,7 @@ try_setup_point( struct lp_setup_context *setup, */ setup_point_coefficients(setup, point, &info); - point->inputs.facing = 1.0F; + point->inputs.frontfacing = TRUE; point->inputs.disable = FALSE; point->inputs.opaque = FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index bc48eb8d1b5..3bf0b2d2522 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -319,7 +319,7 @@ do_triangle_ccw(struct lp_setup_context *setup, */ lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing ); - tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index d2fbe277083..8df807cec88 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -579,7 +579,7 @@ generate_fragment(struct llvmpipe_context *lp, arg_types[0] = screen->context_ptr_type; /* context */ arg_types[1] = LLVMInt32Type(); /* x */ arg_types[2] = LLVMInt32Type(); /* y */ - arg_types[3] = LLVMFloatType(); /* facing */ + arg_types[3] = LLVMInt32Type(); /* facing */ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */ -- cgit v1.2.3 From 9bf8a55c4b29d55320fc2e7875ecf0e9ca164ee8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Oct 2010 12:23:22 +0100 Subject: llvmpipe: slightly shrink the size of a binned triangle --- src/gallium/drivers/llvmpipe/lp_rast.c | 12 +- src/gallium/drivers/llvmpipe/lp_rast.h | 30 ++-- src/gallium/drivers/llvmpipe/lp_rast_debug.c | 3 +- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 6 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 4 +- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 7 +- src/gallium/drivers/llvmpipe/lp_setup_coef.c | 67 ++++----- src/gallium/drivers/llvmpipe/lp_setup_coef.h | 4 + .../drivers/llvmpipe/lp_setup_coef_intrin.c | 52 ++++--- src/gallium/drivers/llvmpipe/lp_setup_line.c | 154 +++++++++++---------- src/gallium/drivers/llvmpipe/lp_setup_point.c | 141 ++++++++++--------- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 128 ++++++++--------- 12 files changed, 316 insertions(+), 292 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 8e9be755e07..d358a983943 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -366,9 +366,9 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task, variant->jit_function[RAST_WHOLE]( &state->jit_context, tile_x + x, tile_y + y, inputs->frontfacing, - inputs->a0, - inputs->dadx, - inputs->dady, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, @@ -447,9 +447,9 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, variant->jit_function[RAST_EDGE_TEST](&state->jit_context, x, y, inputs->frontfacing, - inputs->a0, - inputs->dadx, - inputs->dady, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, mask, diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index c5fb15484c5..8d8b6210ec7 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -78,13 +78,14 @@ struct lp_rast_state { * These pointers point into the bin data buffer. */ struct lp_rast_shader_inputs { - unsigned frontfacing; /** One for front-facing */ - unsigned disable:1; /** Partially binned, disable this command */ - unsigned opaque:1; /** Is opaque */ - - float (*a0)[4]; - float (*dadx)[4]; - float (*dady)[4]; + unsigned frontfacing:1; /** True for front-facing */ + unsigned disable:1; /** Partially binned, disable this command */ + unsigned opaque:1; /** Is opaque */ + unsigned pad0:29; /* wasted space */ + unsigned stride; /* how much to advance data between a0, dadx, dady */ + unsigned pad2; /* wasted space */ + unsigned pad3; /* wasted space */ + /* followed by a0, dadx, dady and planes[] */ }; /* Note: the order of these values is important as they are loaded by @@ -111,17 +112,24 @@ struct lp_rast_plane { * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { - /* inputs for the shader */ - struct lp_rast_shader_inputs inputs; - #ifdef DEBUG float v[3][2]; + float pad0; + float pad1; #endif - struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */ + /* inputs for the shader */ + struct lp_rast_shader_inputs inputs; + /* planes are also allocated here */ }; +#define GET_A0(inputs) ((float (*)[4])((inputs)+1)) +#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride)) +#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride)) +#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride)) + + struct lp_rasterizer * lp_rast_create( unsigned num_threads ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 3113e196c40..e2783aa5683 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -178,6 +178,7 @@ debug_triangle(int tilex, int tiley, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); struct lp_rast_plane plane[8]; int x, y; int count = 0; @@ -190,7 +191,7 @@ debug_triangle(int tilex, int tiley, } while (plane_mask) { - plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)]; + plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)]; plane[nr_planes].c = (plane[nr_planes].c + plane[nr_planes].dcdy * tiley - plane[nr_planes].dcdx * tilex); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index e5d04c65b00..b30408f097b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -262,9 +262,9 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, variant->jit_function[RAST_WHOLE]( &state->jit_context, x, y, inputs->frontfacing, - inputs->a0, - inputs->dadx, - inputs->dady, + GET_A0(inputs), + GET_DADX(inputs), + GET_DADY(inputs), color, depth, 0xffff, diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index bae772b9c50..5bdf19712f4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -311,7 +311,7 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; + const struct lp_rast_plane *plane = GET_PLANES(tri); int x = (arg.triangle.plane_mask & 0xff) + task->x; int y = (arg.triangle.plane_mask >> 8) + task->y; unsigned i, j; @@ -421,7 +421,7 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; + const struct lp_rast_plane *plane = GET_PLANES(tri); int x = (arg.triangle.plane_mask & 0xff) + task->x; int y = (arg.triangle.plane_mask >> 8) + task->y; diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 2f032295126..9976996719a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -156,6 +156,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, { const struct lp_rast_triangle *tri = arg.triangle.tri; unsigned plane_mask = arg.triangle.plane_mask; + const struct lp_rast_plane *tri_plane = GET_PLANES(tri); const int x = task->x, y = task->y; struct lp_rast_plane plane[NR_PLANES]; int c[NR_PLANES]; @@ -172,7 +173,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, while (plane_mask) { int i = ffs(plane_mask) - 1; - plane[j] = tri->plane[i]; + plane[j] = tri_plane[i]; plane_mask &= ~(1 << i); c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x; @@ -255,7 +256,7 @@ TRI_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; + const struct lp_rast_plane *plane = GET_PLANES(tri); unsigned mask = arg.triangle.plane_mask; unsigned outmask, partial_mask; unsigned j; @@ -328,7 +329,7 @@ TRI_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; - const struct lp_rast_plane *plane = tri->plane; + const struct lp_rast_plane *plane = GET_PLANES(tri); unsigned mask = arg.triangle.plane_mask; const int x = task->x + (mask & 0xff); const int y = task->y + (mask >> 8); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c index 8dc2688ddb6..a835df6af24 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c @@ -42,20 +42,19 @@ /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct lp_rast_shader_inputs *inputs, +static void constant_coef( struct lp_tri_info *info, unsigned slot, const float value, unsigned i ) { - inputs->a0[slot][i] = value; - inputs->dadx[slot][i] = 0.0f; - inputs->dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } -static void linear_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, +static void linear_coef( struct lp_tri_info *info, unsigned slot, unsigned vert_attr, unsigned i) @@ -69,8 +68,8 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs, float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20); float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01); - inputs->dadx[slot][i] = dadx; - inputs->dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at @@ -84,7 +83,7 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - inputs->a0[slot][i] = a0 - (dadx * info->x0_center + + info->a0[slot][i] = a0 - (dadx * info->x0_center + dady * info->y0_center); } @@ -97,8 +96,7 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, +static void perspective_coef( struct lp_tri_info *info, unsigned slot, unsigned vert_attr, unsigned i) @@ -113,9 +111,9 @@ static void perspective_coef( struct lp_rast_shader_inputs *inputs, float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20; float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01; - inputs->dadx[slot][i] = dadx; - inputs->dady[slot][i] = dady; - inputs->a0[slot][i] = a0 - (dadx * info->x0_center + + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; + info->a0[slot][i] = a0 - (dadx * info->x0_center + dady * info->y0_center); } @@ -127,23 +125,22 @@ static void perspective_coef( struct lp_rast_shader_inputs *inputs, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, +setup_fragcoord_coef(struct lp_tri_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - inputs->a0[slot][0] = 0.0; - inputs->dadx[slot][0] = 1.0; - inputs->dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - inputs->a0[slot][1] = 0.0; - inputs->dadx[slot][1] = 0.0; - inputs->dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ @@ -162,23 +159,23 @@ setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs, * Setup the fragment input attribute with the front-facing value. * \param frontface is the triangle front facing? */ -static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, +static void setup_facing_coef( struct lp_tri_info *info, unsigned slot, boolean frontface, unsigned usage_mask) { /* convert TRUE to 1.0 and FALSE to -1.0 */ if (usage_mask & TGSI_WRITEMASK_X) - constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 ); + constant_coef( info, slot, 2.0f * frontface - 1.0f, 0 ); if (usage_mask & TGSI_WRITEMASK_Y) - constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */ + constant_coef( info, slot, 0.0f, 1 ); /* wasted */ if (usage_mask & TGSI_WRITEMASK_Z) - constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */ + constant_coef( info, slot, 0.0f, 2 ); /* wasted */ if (usage_mask & TGSI_WRITEMASK_W) - constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */ + constant_coef( info, slot, 0.0f, 3 ); /* wasted */ } @@ -212,6 +209,10 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, info.dx20_ooa = dx20 * oneoverarea; info.dy01_ooa = dy01 * oneoverarea; info.dy20_ooa = dy20 * oneoverarea; + info.a0 = GET_A0(inputs); + info.dadx = GET_DADX(inputs); + info.dady = GET_DADY(inputs); + /* setup interpolation for all the remaining attributes: @@ -225,25 +226,25 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, if (setup->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info.v0[vert_attr][i], i); + constant_coef(&info, slot+1, info.v0[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(inputs, slot+1, info.v2[vert_attr][i], i); + constant_coef(&info, slot+1, info.v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(inputs, &info, slot+1, vert_attr, i); + linear_coef(&info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(inputs, &info, slot+1, vert_attr, i); + perspective_coef(&info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -257,7 +258,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, break; case LP_INTERP_FACING: - setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask); + setup_facing_coef(&info, slot+1, info.frontfacing, usage_mask); break; default: @@ -267,7 +268,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask); + setup_fragcoord_coef(&info, 0, fragcoord_usage_mask); } #else diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.h b/src/gallium/drivers/llvmpipe/lp_setup_coef.h index 87a3255ccc6..7b5b78edd53 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.h @@ -52,6 +52,10 @@ struct lp_tri_info { const float (*v2)[4]; boolean frontfacing; /* remove eventually */ + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; void lp_setup_tri_coef( struct lp_setup_context *setup, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c index 3742fd672b2..29714e27687 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c @@ -40,14 +40,13 @@ #include -static void constant_coef4( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, +static void constant_coef4( struct lp_tri_info *info, unsigned slot, const float *attr) { - *(__m128 *)inputs->a0[slot] = *(__m128 *)attr; - *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); + *(__m128 *)info->a0[slot] = *(__m128 *)attr; + *(__m128 *)info->dadx[slot] = _mm_set1_ps(0.0); + *(__m128 *)info->dady[slot] = _mm_set1_ps(0.0); } @@ -56,8 +55,7 @@ static void constant_coef4( struct lp_rast_shader_inputs *inputs, * Setup the fragment input attribute with the front-facing value. * \param frontface is the triangle front facing? */ -static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, +static void setup_facing_coef( struct lp_tri_info *info, unsigned slot ) { /* XXX: just pass frontface directly to the shader, don't bother @@ -66,15 +64,14 @@ static void setup_facing_coef( struct lp_rast_shader_inputs *inputs, __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0, 0, 0, 0); - *(__m128 *)inputs->a0[slot] = a0; - *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0); + *(__m128 *)info->a0[slot] = a0; + *(__m128 *)info->dadx[slot] = _mm_set1_ps(0.0); + *(__m128 *)info->dady[slot] = _mm_set1_ps(0.0); } -static void calc_coef4( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, +static void calc_coef4( const struct lp_tri_info *info, unsigned slot, __m128 a0, __m128 a1, @@ -96,14 +93,13 @@ static void calc_coef4( struct lp_rast_shader_inputs *inputs, __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0); __m128 attr_0 = _mm_sub_ps(a0, attr_v0); - *(__m128 *)inputs->a0[slot] = attr_0; - *(__m128 *)inputs->dadx[slot] = dadx; - *(__m128 *)inputs->dady[slot] = dady; + *(__m128 *)info->a0[slot] = attr_0; + *(__m128 *)info->dadx[slot] = dadx; + *(__m128 *)info->dady[slot] = dady; } -static void linear_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, +static void linear_coef( struct lp_tri_info *info, unsigned slot, unsigned vert_attr) { @@ -111,7 +107,7 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs, __m128 a1 = *(const __m128 *)info->v1[vert_attr]; __m128 a2 = *(const __m128 *)info->v2[vert_attr]; - calc_coef4(inputs, info, slot, a0, a1, a2); + calc_coef4(info, slot, a0, a1, a2); } @@ -124,8 +120,7 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct lp_rast_shader_inputs *inputs, - const struct lp_tri_info *info, +static void perspective_coef( const struct lp_tri_info *info, unsigned slot, unsigned vert_attr) { @@ -139,7 +134,7 @@ static void perspective_coef( struct lp_rast_shader_inputs *inputs, __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3])); __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3])); - calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow); + calc_coef4(info, slot, a0_oow, a1_oow, a2_oow); } @@ -174,11 +169,14 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, info.dx20_ooa = dx20 * oneoverarea; info.dy01_ooa = dy01 * oneoverarea; info.dy20_ooa = dy20 * oneoverarea; + info.a0 = GET_A0(inputs); + info.dadx = GET_DADX(inputs); + info.dady = GET_DADY(inputs); /* The internal position input is in slot zero: */ - linear_coef(inputs, &info, 0, 0); + linear_coef(&info, 0, 0); /* setup interpolation for all the remaining attributes: */ @@ -188,19 +186,19 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: if (setup->flatshade_first) { - constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]); + constant_coef4(&info, slot+1, info.v0[vert_attr]); } else { - constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]); + constant_coef4(&info, slot+1, info.v2[vert_attr]); } break; case LP_INTERP_LINEAR: - linear_coef(inputs, &info, slot+1, vert_attr); + linear_coef(&info, slot+1, vert_attr); break; case LP_INTERP_PERSPECTIVE: - perspective_coef(inputs, &info, slot+1, vert_attr); + perspective_coef(&info, slot+1, vert_attr); break; case LP_INTERP_POSITION: @@ -211,7 +209,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup, break; case LP_INTERP_FACING: - setup_facing_coef(inputs, &info, slot+1); + setup_facing_coef(&info, slot+1); break; default: diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index efc48eecfee..2fd9f2e2f2a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -46,6 +46,10 @@ struct lp_line_info { const float (*v1)[4]; const float (*v2)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; @@ -53,14 +57,14 @@ struct lp_line_info { * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, + struct lp_line_info *info, unsigned slot, const float value, unsigned i ) { - tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0.0f; - tri->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } @@ -69,7 +73,6 @@ static void constant_coef( struct lp_setup_context *setup, * for a triangle. */ static void linear_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -82,10 +85,10 @@ static void linear_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - + info->a0[slot][i] = (a1 - (dadx * (info->v1[0][0] - setup->pixel_offset) + dady * (info->v1[0][1] - setup->pixel_offset))); } @@ -100,7 +103,6 @@ static void linear_coef( struct lp_setup_context *setup, * divide the interpolated value by the interpolated W at that fragment. */ static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned vert_attr, @@ -115,43 +117,42 @@ static void perspective_coef( struct lp_setup_context *setup, float dadx = da21 * info->dx * info->oneoverarea; float dady = da21 * info->dy * info->oneoverarea; - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; + info->dadx[slot][i] = dadx; + info->dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - - (dadx * (info->v1[0][0] - setup->pixel_offset) + - dady * (info->v1[0][1] - setup->pixel_offset))); + info->a0[slot][i] = (a1 - + (dadx * (info->v1[0][0] - setup->pixel_offset) + + dady * (info->v1[0][1] - setup->pixel_offset))); } static void setup_fragcoord_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(setup, tri, info, slot, 0, 2); + linear_coef(setup, info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(setup, tri, info, slot, 0, 3); + linear_coef(setup, info, slot, 0, 3); } } @@ -159,7 +160,6 @@ setup_fragcoord_coef( struct lp_setup_context *setup, * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_line_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, struct lp_line_info *info) { unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; @@ -177,25 +177,25 @@ static void setup_line_coefficients( struct lp_setup_context *setup, if (setup->flatshade_first) { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i); } else { for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i); } break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - linear_coef(setup, tri, info, slot+1, vert_attr, i); + linear_coef(setup, info, slot+1, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - perspective_coef(setup, tri, info, slot+1, vert_attr, i); + perspective_coef(setup, info, slot+1, vert_attr, i); fragcoord_usage_mask |= TGSI_WRITEMASK_W; break; @@ -211,7 +211,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, case LP_INTERP_FACING: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, 1.0, i); + constant_coef(setup, info, slot+1, 1.0, i); break; default: @@ -221,7 +221,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(setup, tri, info, 0, + setup_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } @@ -276,6 +276,7 @@ try_setup_line( struct lp_setup_context *setup, { struct lp_scene *scene = setup->scene; struct lp_rast_triangle *line; + struct lp_rast_plane *plane; struct lp_line_info info; float width = MAX2(1.0, setup->line_width); struct u_rect bbox; @@ -581,32 +582,35 @@ try_setup_line( struct lp_setup_context *setup, #endif /* calculate the deltas */ - line->plane[0].dcdy = x[0] - x[1]; - line->plane[1].dcdy = x[1] - x[2]; - line->plane[2].dcdy = x[2] - x[3]; - line->plane[3].dcdy = x[3] - x[0]; + plane = GET_PLANES(line); + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[3]; + plane[3].dcdy = x[3] - x[0]; - line->plane[0].dcdx = y[0] - y[1]; - line->plane[1].dcdx = y[1] - y[2]; - line->plane[2].dcdx = y[2] - y[3]; - line->plane[3].dcdx = y[3] - y[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[3]; + plane[3].dcdx = y[3] - y[0]; /* Setup parameter interpolants: */ - setup_line_coefficients( setup, line, &info); + info.a0 = GET_A0(&line->inputs); + info.dadx = GET_DADX(&line->inputs); + info.dady = GET_DADY(&line->inputs); + setup_line_coefficients(setup, &info); line->inputs.frontfacing = TRUE; line->inputs.disable = FALSE; line->inputs.opaque = FALSE; for (i = 0; i < 4; i++) { - struct lp_rast_plane *plane = &line->plane[i]; /* half-edge constants, will be interated over the whole render * target. */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. @@ -622,38 +626,38 @@ try_setup_line( struct lp_setup_context *setup, * to its usual method, in which case it will probably want * to use the opposite, top-left convention. */ - if (plane->dcdx < 0) { + if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ - plane->c++; + plane[i].c++; } - else if (plane->dcdx == 0) { + else if (plane[i].dcdx == 0) { if (setup->pixel_offset == 0) { /* correct for top-left fill convention: */ - if (plane->dcdy > 0) plane->c++; + if (plane[i].dcdy > 0) plane[i].c++; } else { /* correct for bottom-left fill convention: */ - if (plane->dcdy < 0) plane->c++; + if (plane[i].dcdy < 0) plane[i].c++; } } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; /* Calculate trivial accept offsets from the above. */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + plane[i].ei = plane[i].dcdy - plane[i].dcdx - plane[i].eo; } @@ -676,29 +680,29 @@ try_setup_line( struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 8) { - line->plane[4].dcdx = -1; - line->plane[4].dcdy = 0; - line->plane[4].c = 1-bbox.x0; - line->plane[4].ei = 0; - line->plane[4].eo = 1; - - line->plane[5].dcdx = 1; - line->plane[5].dcdy = 0; - line->plane[5].c = bbox.x1+1; - line->plane[5].ei = -1; - line->plane[5].eo = 0; - - line->plane[6].dcdx = 0; - line->plane[6].dcdy = 1; - line->plane[6].c = 1-bbox.y0; - line->plane[6].ei = 0; - line->plane[6].eo = 1; - - line->plane[7].dcdx = 0; - line->plane[7].dcdy = -1; - line->plane[7].c = bbox.y1+1; - line->plane[7].ei = -1; - line->plane[7].eo = 0; + plane[4].dcdx = -1; + plane[4].dcdy = 0; + plane[4].c = 1-bbox.x0; + plane[4].ei = 0; + plane[4].eo = 1; + + plane[5].dcdx = 1; + plane[5].dcdy = 0; + plane[5].c = bbox.x1+1; + plane[5].ei = -1; + plane[5].eo = 0; + + plane[6].dcdx = 0; + plane[6].dcdy = 1; + plane[6].c = 1-bbox.y0; + plane[6].ei = 0; + plane[6].eo = 1; + + plane[7].dcdx = 0; + plane[7].dcdy = -1; + plane[7].c = bbox.y1+1; + plane[7].ei = -1; + plane[7].eo = 0; } return lp_setup_bin_triangle(setup, line, &bbox, nr_planes); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 108c831e66e..e30e70e16d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -45,6 +45,10 @@ struct point_info { int dx01, dx12; const float (*v0)[4]; + + float (*a0)[4]; + float (*dadx)[4]; + float (*dady)[4]; }; @@ -53,20 +57,19 @@ struct point_info { */ static void constant_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, + struct point_info *info, unsigned slot, const float value, unsigned i) { - point->inputs.a0[slot][i] = value; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = value; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } static void point_persp_coeff(struct lp_setup_context *setup, - struct lp_rast_triangle *point, const struct point_info *info, unsigned slot, unsigned i) @@ -82,9 +85,9 @@ point_persp_coeff(struct lp_setup_context *setup, assert(i < 4); - point->inputs.a0[slot][i] = info->v0[slot][i]*w0; - point->inputs.dadx[slot][i] = 0.0f; - point->inputs.dady[slot][i] = 0.0f; + info->a0[slot][i] = info->v0[slot][i]*w0; + info->dadx[slot][i] = 0.0f; + info->dady[slot][i] = 0.0f; } @@ -98,7 +101,6 @@ point_persp_coeff(struct lp_setup_context *setup, */ static void texcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, const struct point_info *info, unsigned slot, unsigned i, @@ -115,14 +117,14 @@ texcoord_coef(struct lp_setup_context *setup, float x0 = info->v0[0][0] - setup->pixel_offset; float y0 = info->v0[0][1] - setup->pixel_offset; - point->inputs.dadx[slot][0] = dadx; - point->inputs.dady[slot][0] = dady; - point->inputs.a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); + info->dadx[slot][0] = dadx; + info->dady[slot][0] = dady; + info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0); if (perspective) { - point->inputs.dadx[slot][0] *= w0; - point->inputs.dady[slot][0] *= w0; - point->inputs.a0[slot][0] *= w0; + info->dadx[slot][0] *= w0; + info->dady[slot][0] *= w0; + info->a0[slot][0] *= w0; } } else if (i == 1) { @@ -135,25 +137,25 @@ texcoord_coef(struct lp_setup_context *setup, dady = -dady; } - point->inputs.dadx[slot][1] = dadx; - point->inputs.dady[slot][1] = dady; - point->inputs.a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); + info->dadx[slot][1] = dadx; + info->dady[slot][1] = dady; + info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0); if (perspective) { - point->inputs.dadx[slot][1] *= w0; - point->inputs.dady[slot][1] *= w0; - point->inputs.a0[slot][1] *= w0; + info->dadx[slot][1] *= w0; + info->dady[slot][1] *= w0; + info->a0[slot][1] *= w0; } } else if (i == 2) { - point->inputs.a0[slot][2] = 0.0f; - point->inputs.dadx[slot][2] = 0.0f; - point->inputs.dady[slot][2] = 0.0f; + info->a0[slot][2] = 0.0f; + info->dadx[slot][2] = 0.0f; + info->dady[slot][2] = 0.0f; } else { - point->inputs.a0[slot][3] = perspective ? w0 : 1.0f; - point->inputs.dadx[slot][3] = 0.0f; - point->inputs.dady[slot][3] = 0.0f; + info->a0[slot][3] = perspective ? w0 : 1.0f; + info->dadx[slot][3] = 0.0f; + info->dady[slot][3] = 0.0f; } } @@ -166,33 +168,32 @@ texcoord_coef(struct lp_setup_context *setup, */ static void setup_point_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info, + struct point_info *info, unsigned slot, unsigned usage_mask) { /*X*/ if (usage_mask & TGSI_WRITEMASK_X) { - point->inputs.a0[slot][0] = 0.0; - point->inputs.dadx[slot][0] = 1.0; - point->inputs.dady[slot][0] = 0.0; + info->a0[slot][0] = 0.0; + info->dadx[slot][0] = 1.0; + info->dady[slot][0] = 0.0; } /*Y*/ if (usage_mask & TGSI_WRITEMASK_Y) { - point->inputs.a0[slot][1] = 0.0; - point->inputs.dadx[slot][1] = 0.0; - point->inputs.dady[slot][1] = 1.0; + info->a0[slot][1] = 0.0; + info->dadx[slot][1] = 0.0; + info->dady[slot][1] = 1.0; } /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - constant_coef(setup, point, slot, info->v0[0][2], 2); + constant_coef(setup, info, slot, info->v0[0][2], 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - constant_coef(setup, point, slot, info->v0[0][3], 3); + constant_coef(setup, info, slot, info->v0[0][3], 3); } } @@ -202,8 +203,7 @@ setup_point_fragcoord_coef(struct lp_setup_context *setup, */ static void setup_point_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *point, - const struct point_info *info) + struct point_info *info) { const struct lp_fragment_shader *shader = setup->fs.current.variant->shader; unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ; @@ -248,7 +248,7 @@ setup_point_coefficients( struct lp_setup_context *setup, (setup->sprite_coord_enable & (1 << semantic_index))) { for (i = 0; i < NUM_CHANNELS; i++) { if (usage_mask & (1 << i)) { - texcoord_coef(setup, point, info, slot + 1, i, + texcoord_coef(setup, info, slot + 1, i, setup->sprite_coord_origin, perspective); } @@ -261,10 +261,10 @@ setup_point_coefficients( struct lp_setup_context *setup, for (i = 0; i < NUM_CHANNELS; i++) { if (usage_mask & (1 << i)) { if (perspective) { - point_persp_coeff(setup, point, info, slot+1, i); + point_persp_coeff(setup, info, slot+1, i); } else { - constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i); + constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i); } } } @@ -273,7 +273,7 @@ setup_point_coefficients( struct lp_setup_context *setup, case LP_INTERP_FACING: for (i = 0; i < NUM_CHANNELS; i++) if (usage_mask & (1 << i)) - constant_coef(setup, point, slot+1, 1.0, i); + constant_coef(setup, info, slot+1, 1.0, i); break; default: @@ -284,7 +284,7 @@ setup_point_coefficients( struct lp_setup_context *setup, /* The internal position input is in slot zero: */ - setup_point_fragcoord_coef(setup, point, info, 0, + setup_point_fragcoord_coef(setup, info, 0, fragcoord_usage_mask); } @@ -368,39 +368,44 @@ try_setup_point( struct lp_setup_context *setup, info.dx12 = fixed_width; info.dy01 = fixed_width; info.dy12 = 0; + info.a0 = GET_A0(&point->inputs); + info.dadx = GET_DADX(&point->inputs); + info.dady = GET_DADY(&point->inputs); /* Setup parameter interpolants: */ - setup_point_coefficients(setup, point, &info); + setup_point_coefficients(setup, &info); point->inputs.frontfacing = TRUE; point->inputs.disable = FALSE; point->inputs.opaque = FALSE; { - point->plane[0].dcdx = -1; - point->plane[0].dcdy = 0; - point->plane[0].c = 1-bbox.x0; - point->plane[0].ei = 0; - point->plane[0].eo = 1; - - point->plane[1].dcdx = 1; - point->plane[1].dcdy = 0; - point->plane[1].c = bbox.x1+1; - point->plane[1].ei = -1; - point->plane[1].eo = 0; - - point->plane[2].dcdx = 0; - point->plane[2].dcdy = 1; - point->plane[2].c = 1-bbox.y0; - point->plane[2].ei = 0; - point->plane[2].eo = 1; - - point->plane[3].dcdx = 0; - point->plane[3].dcdy = -1; - point->plane[3].c = bbox.y1+1; - point->plane[3].ei = -1; - point->plane[3].eo = 0; + struct lp_rast_plane *plane = GET_PLANES(point); + + plane[0].dcdx = -1; + plane[0].dcdy = 0; + plane[0].c = 1-bbox.x0; + plane[0].ei = 0; + plane[0].eo = 1; + + plane[1].dcdx = 1; + plane[1].dcdy = 0; + plane[1].c = bbox.x1+1; + plane[1].ei = -1; + plane[1].eo = 0; + + plane[2].dcdx = 0; + plane[2].dcdy = 1; + plane[2].c = 1-bbox.y0; + plane[2].ei = 0; + plane[2].eo = 1; + + plane[3].dcdx = 0; + plane[3].dcdy = -1; + plane[3].c = bbox.y1+1; + plane[3].ei = -1; + plane[3].eo = 0; } return lp_setup_bin_triangle(setup, point, &bbox, nr_planes); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 3bf0b2d2522..937821b4c3a 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -75,24 +75,25 @@ lp_setup_alloc_triangle(struct lp_scene *scene, unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); + unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane); struct lp_rast_triangle *tri; - unsigned tri_bytes, bytes; - char *inputs; - tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16); - bytes = tri_bytes + (3 * input_array_sz); - - tri = lp_scene_alloc_aligned( scene, bytes, 16 ); + *tri_size = (sizeof(struct lp_rast_triangle) + + 3 * input_array_sz + + plane_sz); + tri = lp_scene_alloc_aligned( scene, *tri_size, 16 ); if (tri) { - inputs = ((char *)tri) + tri_bytes; - tri->inputs.a0 = (float (*)[4]) inputs; - tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); - tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); + tri->inputs.stride = input_array_sz; + } - *tri_size = bytes; + { + char *a = (char *)tri; + char *b = (char *)&GET_PLANES(tri)[nr_planes]; + assert(b - a == *tri_size); } + return tri; } @@ -228,6 +229,7 @@ do_triangle_ccw(struct lp_setup_context *setup, { struct lp_scene *scene = setup->scene; struct lp_rast_triangle *tri; + struct lp_rast_plane *plane; int x[3]; int y[3]; struct u_rect bbox; @@ -296,7 +298,7 @@ do_triangle_ccw(struct lp_setup_context *setup, if (!tri) return FALSE; -#ifdef DEBUG +#if 0 tri->v[0][0] = v0[0][0]; tri->v[1][0] = v1[0][0]; tri->v[2][0] = v2[0][0]; @@ -305,13 +307,14 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v2[0][1]; #endif - tri->plane[0].dcdy = x[0] - x[1]; - tri->plane[1].dcdy = x[1] - x[2]; - tri->plane[2].dcdy = x[2] - x[0]; + plane = GET_PLANES(tri); + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[0]; - tri->plane[0].dcdx = y[0] - y[1]; - tri->plane[1].dcdx = y[1] - y[2]; - tri->plane[2].dcdx = y[2] - y[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[0]; LP_COUNT(nr_tris); @@ -325,12 +328,10 @@ do_triangle_ccw(struct lp_setup_context *setup, for (i = 0; i < 3; i++) { - struct lp_rast_plane *plane = &tri->plane[i]; - /* half-edge constants, will be interated over the whole render * target. */ - plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; /* correct for top-left vs. bottom-left fill convention. * @@ -345,38 +346,38 @@ do_triangle_ccw(struct lp_setup_context *setup, * to its usual method, in which case it will probably want * to use the opposite, top-left convention. */ - if (plane->dcdx < 0) { + if (plane[i].dcdx < 0) { /* both fill conventions want this - adjust for left edges */ - plane->c++; + plane[i].c++; } - else if (plane->dcdx == 0) { + else if (plane[i].dcdx == 0) { if (setup->pixel_offset == 0) { /* correct for top-left fill convention: */ - if (plane->dcdy > 0) plane->c++; + if (plane[i].dcdy > 0) plane[i].c++; } else { /* correct for bottom-left fill convention: */ - if (plane->dcdy < 0) plane->c++; + if (plane[i].dcdy < 0) plane[i].c++; } } - plane->dcdx *= FIXED_ONE; - plane->dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; /* find trivial reject offsets for each edge for a single-pixel * sized block. These will be scaled up at each recursive level to * match the active blocksize. Scaling in this way works best if * the blocks are square. */ - plane->eo = 0; - if (plane->dcdx < 0) plane->eo -= plane->dcdx; - if (plane->dcdy > 0) plane->eo += plane->dcdy; + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; /* Calculate trivial accept offsets from the above. */ - plane->ei = plane->dcdy - plane->dcdx - plane->eo; + plane[i].ei = plane[i].dcdy - plane[i].dcdx - plane[i].eo; } @@ -399,29 +400,29 @@ do_triangle_ccw(struct lp_setup_context *setup, * these planes elsewhere. */ if (nr_planes == 7) { - tri->plane[3].dcdx = -1; - tri->plane[3].dcdy = 0; - tri->plane[3].c = 1-bbox.x0; - tri->plane[3].ei = 0; - tri->plane[3].eo = 1; - - tri->plane[4].dcdx = 1; - tri->plane[4].dcdy = 0; - tri->plane[4].c = bbox.x1+1; - tri->plane[4].ei = -1; - tri->plane[4].eo = 0; - - tri->plane[5].dcdx = 0; - tri->plane[5].dcdy = 1; - tri->plane[5].c = 1-bbox.y0; - tri->plane[5].ei = 0; - tri->plane[5].eo = 1; - - tri->plane[6].dcdx = 0; - tri->plane[6].dcdy = -1; - tri->plane[6].c = bbox.y1+1; - tri->plane[6].ei = -1; - tri->plane[6].eo = 0; + plane[3].dcdx = -1; + plane[3].dcdy = 0; + plane[3].c = 1-bbox.x0; + plane[3].ei = 0; + plane[3].eo = 1; + + plane[4].dcdx = 1; + plane[4].dcdy = 0; + plane[4].c = bbox.x1+1; + plane[4].ei = -1; + plane[4].eo = 0; + + plane[5].dcdx = 0; + plane[5].dcdy = 1; + plane[5].c = 1-bbox.y0; + plane[5].ei = 0; + plane[5].eo = 1; + + plane[6].dcdx = 0; + plane[6].dcdy = -1; + plane[6].c = bbox.y1+1; + plane[6].ei = -1; + plane[6].eo = 0; } return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); @@ -525,6 +526,7 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, } else { + struct lp_rast_plane *plane = GET_PLANES(tri); int c[MAX_PLANES]; int ei[MAX_PLANES]; int eo[MAX_PLANES]; @@ -538,14 +540,14 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, int iy1 = bbox->y1 / TILE_SIZE; for (i = 0; i < nr_planes; i++) { - c[i] = (tri->plane[i].c + - tri->plane[i].dcdy * iy0 * TILE_SIZE - - tri->plane[i].dcdx * ix0 * TILE_SIZE); - - ei[i] = tri->plane[i].ei << TILE_ORDER; - eo[i] = tri->plane[i].eo << TILE_ORDER; - xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER); - ystep[i] = tri->plane[i].dcdy << TILE_ORDER; + c[i] = (plane[i].c + + plane[i].dcdy * iy0 * TILE_SIZE - + plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = plane[i].ei << TILE_ORDER; + eo[i] = plane[i].eo << TILE_ORDER; + xstep[i] = -(plane[i].dcdx << TILE_ORDER); + ystep[i] = plane[i].dcdy << TILE_ORDER; } -- cgit v1.2.3 From 8965f042b327ad8697963e757f4607f4bb13a045 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Oct 2010 13:04:19 +0100 Subject: llvmpipe: don't store plane.ei value in binned data Further reduce the size of a binned triangle. --- src/gallium/drivers/llvmpipe/lp_rast.h | 3 --- src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h | 6 ++++-- src/gallium/drivers/llvmpipe/lp_setup_line.c | 8 -------- src/gallium/drivers/llvmpipe/lp_setup_point.c | 4 ---- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 13 ++++--------- 5 files changed, 8 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 8d8b6210ec7..a64c152cf83 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -100,9 +100,6 @@ struct lp_rast_plane { /* one-pixel sized trivial reject offsets for each plane */ int eo; - - /* one-pixel sized trivial accept offsets for each plane */ - int ei; }; /** diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h index 9976996719a..4825d651c04 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h @@ -82,7 +82,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 4; const int dcdy = plane[j].dcdy * 4; const int cox = plane[j].eo * 4; - const int cio = plane[j].ei * 4 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 4 - 1; build_masks(c[j] + cox, cio - cox, @@ -181,7 +182,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task, const int dcdx = -plane[j].dcdx * 16; const int dcdy = plane[j].dcdy * 16; const int cox = plane[j].eo * 16; - const int cio = plane[j].ei * 16 - 1; + const int ei = plane[j].dcdy - plane[j].dcdx - plane[j].eo; + const int cio = ei * 16 - 1; build_masks(c[j] + cox, cio - cox, diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index 2fd9f2e2f2a..ece8638b5a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -654,10 +654,6 @@ try_setup_line( struct lp_setup_context *setup, plane[i].eo = 0; if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; - - /* Calculate trivial accept offsets from the above. - */ - plane[i].ei = plane[i].dcdy - plane[i].dcdx - plane[i].eo; } @@ -683,25 +679,21 @@ try_setup_line( struct lp_setup_context *setup, plane[4].dcdx = -1; plane[4].dcdy = 0; plane[4].c = 1-bbox.x0; - plane[4].ei = 0; plane[4].eo = 1; plane[5].dcdx = 1; plane[5].dcdy = 0; plane[5].c = bbox.x1+1; - plane[5].ei = -1; plane[5].eo = 0; plane[6].dcdx = 0; plane[6].dcdy = 1; plane[6].c = 1-bbox.y0; - plane[6].ei = 0; plane[6].eo = 1; plane[7].dcdx = 0; plane[7].dcdy = -1; plane[7].c = bbox.y1+1; - plane[7].ei = -1; plane[7].eo = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index e30e70e16d2..16d21df35e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -386,25 +386,21 @@ try_setup_point( struct lp_setup_context *setup, plane[0].dcdx = -1; plane[0].dcdy = 0; plane[0].c = 1-bbox.x0; - plane[0].ei = 0; plane[0].eo = 1; plane[1].dcdx = 1; plane[1].dcdy = 0; plane[1].c = bbox.x1+1; - plane[1].ei = -1; plane[1].eo = 0; plane[2].dcdx = 0; plane[2].dcdy = 1; plane[2].c = 1-bbox.y0; - plane[2].ei = 0; plane[2].eo = 1; plane[3].dcdx = 0; plane[3].dcdy = -1; plane[3].c = bbox.y1+1; - plane[3].ei = -1; plane[3].eo = 0; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 937821b4c3a..6ceda80a716 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -374,10 +374,6 @@ do_triangle_ccw(struct lp_setup_context *setup, plane[i].eo = 0; if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; - - /* Calculate trivial accept offsets from the above. - */ - plane[i].ei = plane[i].dcdy - plane[i].dcdx - plane[i].eo; } @@ -403,25 +399,21 @@ do_triangle_ccw(struct lp_setup_context *setup, plane[3].dcdx = -1; plane[3].dcdy = 0; plane[3].c = 1-bbox.x0; - plane[3].ei = 0; plane[3].eo = 1; plane[4].dcdx = 1; plane[4].dcdy = 0; plane[4].c = bbox.x1+1; - plane[4].ei = -1; plane[4].eo = 0; plane[5].dcdx = 0; plane[5].dcdy = 1; plane[5].c = 1-bbox.y0; - plane[5].ei = 0; plane[5].eo = 1; plane[6].dcdx = 0; plane[6].dcdy = -1; plane[6].c = bbox.y1+1; - plane[6].ei = -1; plane[6].eo = 0; } @@ -544,7 +536,10 @@ lp_setup_bin_triangle( struct lp_setup_context *setup, plane[i].dcdy * iy0 * TILE_SIZE - plane[i].dcdx * ix0 * TILE_SIZE); - ei[i] = plane[i].ei << TILE_ORDER; + ei[i] = (plane[i].dcdy - + plane[i].dcdx - + plane[i].eo) << TILE_ORDER; + eo[i] = plane[i].eo << TILE_ORDER; xstep[i] = -(plane[i].dcdx << TILE_ORDER); ystep[i] = plane[i].dcdy << TILE_ORDER; -- cgit v1.2.3 From 15f4e3a8b98b5f4ca2833c02192ed9e6c237c5c7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 12 Oct 2010 18:58:05 +0100 Subject: gallium: move some intrinsics helpers to u_sse.h --- src/gallium/auxiliary/util/u_sse.h | 74 ++++++++++++++++++++++++++++++ src/gallium/drivers/llvmpipe/lp_rast_tri.c | 58 ----------------------- 2 files changed, 74 insertions(+), 58 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 8fd0e52a3a4..1df6c872677 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -71,6 +71,12 @@ _mm_castps_si128(__m128 a) #endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ +union m128i { + __m128i m; + ubyte ub[16]; + ushort us[8]; + uint ui[4]; +}; static INLINE void u_print_epi8(const char *name, __m128i r) { @@ -149,6 +155,12 @@ static INLINE void u_print_ps(const char *name, __m128 r) } +#define U_DUMP_EPI32(a) u_print_epi32(#a, a) +#define U_DUMP_EPI16(a) u_print_epi16(#a, a) +#define U_DUMP_EPI8(a) u_print_epi8(#a, a) +#define U_DUMP_PS(a) u_print_ps(#a, a) + + #if defined(PIPE_ARCH_SSSE3) @@ -176,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask) #endif /* !PIPE_ARCH_SSSE3 */ + + +/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of + * _mm_mul_epu32(). + * + * I suspect this works fine for us because one of our operands is + * always positive, but not sure that this can be used for general + * signed integer multiplication. + * + * This seems close enough to the speed of SSE4 and the real + * _mm_mullo_epi32() intrinsic as to not justify adding an sse4 + * dependency at this point. + */ +static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) +{ + __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */ + __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */ + __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ + __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ + + /* Interleave the results, either with shuffles or (slightly + * faster) direct bit operations: + */ +#if 0 + __m128i ba8 = _mm_shuffle_epi32(ba, 8); + __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8); + __m128i result = _mm_unpacklo_epi32(ba8, b4a48); +#else + __m128i mask = _mm_setr_epi32(~0,0,~0,0); + __m128i ba_mask = _mm_and_si128(ba, mask); + __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32); + __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); +#endif + + return result; +} + + +static INLINE void +transpose4_epi32(const __m128i * restrict a, + const __m128i * restrict b, + const __m128i * restrict c, + const __m128i * restrict d, + __m128i * restrict o, + __m128i * restrict p, + __m128i * restrict q, + __m128i * restrict r) +{ + __m128i t0 = _mm_unpacklo_epi32(*a, *b); + __m128i t1 = _mm_unpacklo_epi32(*c, *d); + __m128i t2 = _mm_unpackhi_epi32(*a, *b); + __m128i t3 = _mm_unpackhi_epi32(*c, *d); + + *o = _mm_unpacklo_epi64(t0, t1); + *p = _mm_unpackhi_epi64(t0, t1); + *q = _mm_unpacklo_epi64(t2, t3); + *r = _mm_unpackhi_epi64(t2, t3); +} + +#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) + + #endif /* PIPE_ARCH_SSE */ #endif /* U_SSE_H_ */ diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5bdf19712f4..659eb1cac3a 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -240,68 +240,10 @@ sign_bits4(const __m128i *cstep, int cdiff) } -static INLINE void -transpose4_epi32(const __m128i * restrict a, - const __m128i * restrict b, - const __m128i * restrict c, - const __m128i * restrict d, - __m128i * restrict o, - __m128i * restrict p, - __m128i * restrict q, - __m128i * restrict r) -{ - __m128i t0 = _mm_unpacklo_epi32(*a, *b); - __m128i t1 = _mm_unpacklo_epi32(*c, *d); - __m128i t2 = _mm_unpackhi_epi32(*a, *b); - __m128i t3 = _mm_unpackhi_epi32(*c, *d); - - *o = _mm_unpacklo_epi64(t0, t1); - *p = _mm_unpackhi_epi64(t0, t1); - *q = _mm_unpacklo_epi64(t2, t3); - *r = _mm_unpackhi_epi64(t2, t3); -} - - -#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i)) - #define NR_PLANES 3 -/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of - * _mm_mul_epu32(). - * - * I suspect this works fine for us because one of our operands is - * always positive, but not sure that this can be used for general - * signed integer multiplication. - * - * This seems close enough to the speed of SSE4 and the real - * _mm_mullo_epi32() intrinsic as to not justify adding an sse4 - * dependency at this point. - */ -static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b) -{ - __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */ - __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */ - __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */ - __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */ - - /* Interleave the results, either with shuffles or (slightly - * faster) direct bit operations: - */ -#if 0 - __m128i ba8 = _mm_shuffle_epi32(ba, 8); - __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8); - __m128i result = _mm_unpacklo_epi32(ba8, b4a48); -#else - __m128i mask = _mm_setr_epi32(~0,0,~0,0); - __m128i ba_mask = _mm_and_si128(ba, mask); - __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32); - __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift); -#endif - - return result; -} -- cgit v1.2.3 From 9f9a17eba8d6080bf30f17c8a7eaed97b10a559f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 12 Oct 2010 18:59:15 +0100 Subject: llvmpipe: do plane calculations with intrinsics This is a step towards moving this code into the rasterizer. --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 205 ++++++++++++++++++++-------- 1 file changed, 148 insertions(+), 57 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 6ceda80a716..49ded9e0452 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -32,6 +32,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "util/u_rect.h" +#include "util/u_sse.h" #include "lp_perf.h" #include "lp_setup_context.h" #include "lp_setup_coef.h" @@ -40,7 +41,9 @@ #define NUM_CHANNELS 4 - +#if defined(PIPE_ARCH_SSE) +#include +#endif static INLINE int subpixel_snap(float a) @@ -230,11 +233,10 @@ do_triangle_ccw(struct lp_setup_context *setup, struct lp_scene *scene = setup->scene; struct lp_rast_triangle *tri; struct lp_rast_plane *plane; - int x[3]; - int y[3]; + int x[4]; + int y[4]; struct u_rect bbox; unsigned tri_bytes; - int i; int nr_planes = 3; if (0) @@ -251,10 +253,12 @@ do_triangle_ccw(struct lp_setup_context *setup, x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + x[3] = 0; y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); - + y[3] = 0; + /* Bounding rectangle (in pixels) */ { @@ -307,15 +311,6 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->v[2][1] = v2[0][1]; #endif - plane = GET_PLANES(tri); - plane[0].dcdy = x[0] - x[1]; - plane[1].dcdy = x[1] - x[2]; - plane[2].dcdy = x[2] - x[0]; - - plane[0].dcdx = y[0] - y[1]; - plane[1].dcdx = y[1] - y[2]; - plane[2].dcdx = y[2] - y[0]; - LP_COUNT(nr_tris); /* Setup parameter interpolants: @@ -326,54 +321,150 @@ do_triangle_ccw(struct lp_setup_context *setup, tri->inputs.disable = FALSE; tri->inputs.opaque = setup->fs.current.variant->opaque; - - for (i = 0; i < 3; i++) { - /* half-edge constants, will be interated over the whole render - * target. + plane = GET_PLANES(tri); + +#if defined(PIPE_ARCH_SSE) + { + __m128i vertx, verty; + __m128i shufx, shufy; + __m128i dcdx, dcdy, c; + __m128i unused; + __m128i dcdx_neg_mask; + __m128i dcdy_neg_mask; + __m128i dcdx_zero_mask; + __m128i top_left_flag; + __m128i c_inc_mask, c_inc; + __m128i eo, p0, p1, p2; + __m128i zero = _mm_setzero_si128(); + + vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */ + verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */ + + shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1)); + shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1)); + + dcdx = _mm_sub_epi32(verty, shufy); + dcdy = _mm_sub_epi32(vertx, shufx); + + dcdx_neg_mask = _mm_srai_epi32(dcdx, 31); + dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero); + dcdy_neg_mask = _mm_srai_epi32(dcdy, 31); + + top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0); + + c_inc_mask = _mm_or_si128(dcdx_neg_mask, + _mm_and_si128(dcdx_zero_mask, + _mm_xor_si128(dcdy_neg_mask, + top_left_flag))); + + c_inc = _mm_srli_epi32(c_inc_mask, 31); + + c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx), + mm_mullo_epi32(dcdy, verty)); + + c = _mm_add_epi32(c, c_inc); + + /* Scale up to match c: */ - plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; - - /* correct for top-left vs. bottom-left fill convention. - * - * note that we're overloading gl_rasterization_rules to mean - * both (0.5,0.5) pixel centers *and* bottom-left filling - * convention. - * - * GL actually has a top-left filling convention, but GL's - * notion of "top" differs from gallium's... - * - * Also, sometimes (in FBO cases) GL will render upside down - * to its usual method, in which case it will probably want - * to use the opposite, top-left convention. - */ - if (plane[i].dcdx < 0) { - /* both fill conventions want this - adjust for left edges */ - plane[i].c++; - } - else if (plane[i].dcdx == 0) { - if (setup->pixel_offset == 0) { - /* correct for top-left fill convention: - */ - if (plane[i].dcdy > 0) plane[i].c++; + dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER); + dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER); + + /* Calculate trivial reject values: + */ + eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy), + _mm_and_si128(dcdx_neg_mask, dcdx)); + + /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */ + + /* Pointless transpose which gets undone immediately in + * rasterization: + */ + transpose4_epi32(&c, &dcdx, &dcdy, &eo, + &p0, &p1, &p2, &unused); + + _mm_storeu_si128((__m128i *)&plane[0], p0); + _mm_storeu_si128((__m128i *)&plane[1], p1); + _mm_storeu_si128((__m128i *)&plane[2], p2); + } +#else + { + int i; + plane[0].dcdy = x[0] - x[1]; + plane[1].dcdy = x[1] - x[2]; + plane[2].dcdy = x[2] - x[0]; + plane[0].dcdx = y[0] - y[1]; + plane[1].dcdx = y[1] - y[2]; + plane[2].dcdx = y[2] - y[0]; + + for (i = 0; i < 3; i++) { + /* half-edge constants, will be interated over the whole render + * target. + */ + plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane[i].dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane[i].c++; } - else { - /* correct for bottom-left fill convention: - */ - if (plane[i].dcdy < 0) plane[i].c++; + else if (plane[i].dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane[i].dcdy > 0) plane[i].c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane[i].dcdy < 0) plane[i].c++; + } } - } - plane[i].dcdx *= FIXED_ONE; - plane[i].dcdy *= FIXED_ONE; + plane[i].dcdx *= FIXED_ONE; + plane[i].dcdy *= FIXED_ONE; - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - plane[i].eo = 0; - if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; - if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane[i].eo = 0; + if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx; + if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy; + } + } +#endif + + if (0) { + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[0].c, + plane[0].dcdx, + plane[0].dcdy, + plane[0].eo); + + debug_printf("p1: %08x/%08x/%08x/%08x\n", + plane[1].c, + plane[1].dcdx, + plane[1].dcdy, + plane[1].eo); + + debug_printf("p0: %08x/%08x/%08x/%08x\n", + plane[2].c, + plane[2].dcdx, + plane[2].dcdy, + plane[2].eo); } -- cgit v1.2.3 From 392b0954c265fdd66b2de99ab677d2e662935682 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Oct 2010 13:52:00 +0100 Subject: llvmpipe: use aligned loads/stores for plane values --- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 12 ++++++------ src/gallium/drivers/llvmpipe/lp_setup_tri.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 659eb1cac3a..042c315635e 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -261,9 +261,9 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; unsigned nr = 0; - __m128i p0 = _mm_loadu_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ - __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ - __m128i p2 = _mm_loadu_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ __m128i zero = _mm_setzero_si128(); __m128i c; @@ -367,9 +367,9 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, int x = (arg.triangle.plane_mask & 0xff) + task->x; int y = (arg.triangle.plane_mask >> 8) + task->y; - __m128i p0 = _mm_loadu_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ - __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ - __m128i p2 = _mm_loadu_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ __m128i zero = _mm_setzero_si128(); __m128i c; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 49ded9e0452..c6cb9afda47 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -382,9 +382,9 @@ do_triangle_ccw(struct lp_setup_context *setup, transpose4_epi32(&c, &dcdx, &dcdy, &eo, &p0, &p1, &p2, &unused); - _mm_storeu_si128((__m128i *)&plane[0], p0); - _mm_storeu_si128((__m128i *)&plane[1], p1); - _mm_storeu_si128((__m128i *)&plane[2], p2); + _mm_store_si128((__m128i *)&plane[0], p0); + _mm_store_si128((__m128i *)&plane[1], p1); + _mm_store_si128((__m128i *)&plane[2], p2); } #else { -- cgit v1.2.3 From 39185efd3a891b0d66b1ded10d165dd9aee94464 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Oct 2010 14:11:22 +0100 Subject: llvmpipe: fix non-sse build after recent changes --- src/gallium/drivers/llvmpipe/lp_setup_coef.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_coef.c b/src/gallium/drivers/llvmpipe/lp_setup_coef.c index a835df6af24..95d6615bb95 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_coef.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_coef.c @@ -145,12 +145,12 @@ setup_fragcoord_coef(struct lp_tri_info *info, /*Z*/ if (usage_mask & TGSI_WRITEMASK_Z) { - linear_coef(inputs, info, slot, 0, 2); + linear_coef(info, slot, 0, 2); } /*W*/ if (usage_mask & TGSI_WRITEMASK_W) { - linear_coef(inputs, info, slot, 0, 3); + linear_coef(info, slot, 0, 3); } } -- cgit v1.2.3 From ffab84c9a27a229e6fa14c3de63868bb843c0f3e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Oct 2010 13:23:05 +0100 Subject: llvmpipe: check shader outputs are non-null before using --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 8df807cec88..c4b1b868b65 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -345,7 +345,7 @@ generate_fs(struct llvmpipe_context *lp, TGSI_SEMANTIC_COLOR, 0); - if (color0 != -1) { + if (color0 != -1 && outputs[color0][3]) { LLVMValueRef alpha = LLVMBuildLoad(builder, outputs[color0][3], "alpha"); LLVMValueRef alpha_ref_value; @@ -364,7 +364,7 @@ generate_fs(struct llvmpipe_context *lp, TGSI_SEMANTIC_POSITION, 0); - if (pos0 != -1) { + if (pos0 != -1 && outputs[pos0][2]) { z = LLVMBuildLoad(builder, outputs[pos0][2], "z"); lp_build_name(z, "output%u.%u.%c", i, pos0, "xyzw"[chan]); } -- cgit v1.2.3 From ac98519c4eed0daf770a9ba380056978e4420352 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Oct 2010 13:23:30 +0100 Subject: llvmpipe: validate color outputs against key->nr_cbufs --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c4b1b868b65..c070b55d3d1 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -404,7 +404,8 @@ generate_fs(struct llvmpipe_context *lp, /* Color write */ for (attrib = 0; attrib < shader->info.base.num_outputs; ++attrib) { - if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR) + if (shader->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_COLOR && + shader->info.base.output_semantic_index[attrib] < key->nr_cbufs) { unsigned cbuf = shader->info.base.output_semantic_index[attrib]; for(chan = 0; chan < NUM_CHANNELS; ++chan) { -- cgit v1.2.3 From 9c439e3c7af7fd1704d44e3ba1a013de7febde37 Mon Sep 17 00:00:00 2001 From: Xavier Chantry Date: Fri, 15 Oct 2010 15:53:13 +0200 Subject: nv50: apply layout_mask to tile_flags The tile_flags now store more than just nv50 page table entry bits. --- src/gallium/drivers/nv50/nv50_context.h | 3 +++ src/gallium/drivers/nv50/nv50_surface.c | 2 +- src/gallium/drivers/nv50/nv50_transfer.c | 10 +++++----- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index ac69c7848e9..bf6a577188b 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -26,6 +26,9 @@ #define NOUVEAU_MSG(fmt, args...) \ fprintf(stderr, "nouveau: "fmt, ##args); +#define nouveau_bo_tile_layout(nvbo) \ + ((nvbo)->tile_flags & NOUVEAU_BO_TILE_LAYOUT_MASK) + /* Constant buffer assignment */ #define NV50_CB_PMISC 0 #define NV50_CB_PVP 1 diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3f3166261b1..f70c138fe1a 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -92,7 +92,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) return 1; } - if (!bo->tile_flags) { + if (!nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, mthd, 2); OUT_RING (chan, format); OUT_RING (chan, 1); diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index f973cf24b98..0cc2f4a837f 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -45,7 +45,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, WAIT_RING (chan, 14); - if (!src_bo->tile_flags) { + if (!nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1); OUT_RING (chan, 1); @@ -64,7 +64,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, OUT_RING (chan, sz); /* copying only 1 zslice per call */ } - if (!dst_bo->tile_flags) { + if (!nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1); OUT_RING (chan, 1); @@ -95,14 +95,14 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2); OUT_RELOCl(chan, src_bo, src_offset, src_reloc); OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); - if (src_bo->tile_flags) { + if (nouveau_bo_tile_layout(src_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_IN, 1); OUT_RING (chan, (sy << 16) | (sx * cpp)); } else { src_offset += (line_count * src_pitch); } - if (dst_bo->tile_flags) { + if (nouveau_bo_tile_layout(dst_bo)) { BEGIN_RING(chan, m2mf, NV50_MEMORY_TO_MEMORY_FORMAT_TILING_POSITION_OUT, 1); OUT_RING (chan, (dy << 16) | (dx * cpp)); @@ -280,7 +280,7 @@ nv50_upload_sifc(struct nv50_context *nv50, MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */ - if (bo->tile_flags) { + if (nouveau_bo_tile_layout(bo)) { BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5); OUT_RING (chan, dst_format); OUT_RING (chan, 0); -- cgit v1.2.3 From 992e7c72797545e5d7dac11c4714c107be07d41c Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 12 Oct 2010 18:41:24 +0100 Subject: llvmpipe: Move makefile include to before targets Or plain make inside of the directory wont build libllvmpipe.a --- src/gallium/drivers/llvmpipe/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 55b877b4ab9..d71f09eeb37 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -63,12 +63,12 @@ PROGS := lp_test_format \ # Need this for the lp_test_*.o files CLEAN_EXTRA = *.o +include ../../Makefile.template + lp_test_sincos.o : sse_mathfun.h PROGS_DEPS := ../../auxiliary/libgallium.a -include ../../Makefile.template - lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ -- cgit v1.2.3 From 98b3f27439ba3a48286ed0d6a4467e5482b41fec Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 17 Oct 2010 16:45:15 +1000 Subject: r600g: add evergreen ARL support. Thanks to Alex Deucher for pointing out the FLT to int conversion is necessary and writing an initial patch, this brings about 20 piglits, and I think this is the last piece to make evergreen and r600 equal in terms of features. --- src/gallium/drivers/r600/r600_opcodes.h | 8 ++----- src/gallium/drivers/r600/r600_shader.c | 39 ++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index 0cf9c1c401c..4f9b39a7fdc 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -233,12 +233,6 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL 0x00000012 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE 0x00000013 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR 0x00000014 -/* same up to here */ -/* -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA 0x00000015 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR 0x00000016 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x00000018 -*/ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT 0x00000015 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT 0x00000016 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT 0x00000017 @@ -336,9 +330,11 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED_64 0x00000098 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_64 0x00000099 /* TODO Fill in more ALU */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR 0x000000B1 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 0x000000BE #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE 0x000000BF #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE 0x000000C0 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT 0x000000CC #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY 0x000000D6 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW 0x000000D7 diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 94c9cbd9234..d1143985ead 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -2610,7 +2610,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } /* r6/7 only for now */ -static int tgsi_arl(struct r600_shader_ctx *ctx) +static int tgsi_eg_arl(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + alu.last = 1; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); + if (r) + return r; + return 0; +} +static int tgsi_r600_arl(struct r600_shader_ctx *ctx) { /* TODO from r600c, ar values don't persist between clauses */ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -2955,7 +2988,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) } static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, @@ -3119,7 +3152,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { }; static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, -- cgit v1.2.3 From 914b0d34e89ee53ef3d7d8aac4baa794492a2064 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 17 Oct 2010 07:15:58 -0700 Subject: llvmpipe: Fix depth-stencil regression. If stencil is enabled then we need to load the z_dst, even if depth testing is disabled. This fixes reflect mesa demo. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 47 ++++++++++++++++------------- 1 file changed, 26 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index ddf7da0b14d..167ac0ed2e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -330,7 +330,7 @@ lp_depth_type(const struct util_format_description *format_desc, * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us * get by with fewer bit twiddling steps. */ -static void +static boolean get_z_shift_and_mask(const struct util_format_description *format_desc, unsigned *shift, unsigned *width, unsigned *mask) { @@ -345,7 +345,8 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, z_swizzle = format_desc->swizzle[0]; - assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE); + if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) + return FALSE; *width = format_desc->channel[z_swizzle].size; @@ -366,6 +367,8 @@ get_z_shift_and_mask(const struct util_format_description *format_desc, } *shift = padding_right; + + return TRUE; } @@ -554,6 +557,27 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, { unsigned s_shift, s_mask; + if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) { + if (z_mask != 0xffffffff) { + z_bitmask = lp_build_const_int_vec(z_type, z_mask); + } + + /* + * Align the framebuffer Z 's LSB to the right. + */ + if (z_shift) { + LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); + z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); + } else if (z_bitmask) { + /* TODO: Instead of loading a mask from memory and ANDing, it's + * probably faster to just shake the bits with two shifts. */ + z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); + } else { + z_dst = zs_dst; + lp_build_name(z_dst, "z_dst"); + } + } + if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) { if (s_shift) { LLVMValueRef shift = lp_build_const_int_vec(s_type, s_shift); @@ -605,8 +629,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, } if (depth->enabled) { - get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); - /* * Convert fragment Z to the desired type, aligning the LSB to the right. */ @@ -644,23 +666,6 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, lp_build_name(z_src, "z_src"); - if (z_mask != 0xffffffff) { - z_bitmask = lp_build_const_int_vec(z_type, z_mask); - } - - /* - * Align the framebuffer Z 's LSB to the right. - */ - if (z_shift) { - LLVMValueRef shift = lp_build_const_int_vec(z_type, z_shift); - z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst"); - } else if (z_bitmask) { - z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst"); - } else { - z_dst = zs_dst; - lp_build_name(z_dst, "z_dst"); - } - /* compare src Z to dst Z, returning 'pass' mask */ z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); -- cgit v1.2.3 From 709699d2e25146ac16af7e939de4398fdc50307e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 17 Oct 2010 07:45:08 -0700 Subject: llvmpipe: Ensure z_shift and z_width is initialized. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 167ac0ed2e2..7eb76d4fb31 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -469,7 +469,7 @@ lp_build_depth_stencil_test(LLVMBuilderRef builder, struct lp_build_context z_bld; struct lp_build_context s_bld; struct lp_type s_type; - unsigned z_shift, z_width, z_mask; + unsigned z_shift = 0, z_width = 0, z_mask = 0; LLVMValueRef zs_dst, z_dst = NULL; LLVMValueRef stencil_vals = NULL; LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; -- cgit v1.2.3 From a0add0446ca9dce6d4a96014c42ba6cf3a73a44a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 17 Oct 2010 09:58:04 -0700 Subject: llvmpipe: Fix bad refactoring. 'i' and 'chan' have random values here, which could cause a buffer overflow in debug builds, if chan > 4. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index c070b55d3d1..7acbe7e86c3 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -365,8 +365,7 @@ generate_fs(struct llvmpipe_context *lp, 0); if (pos0 != -1 && outputs[pos0][2]) { - z = LLVMBuildLoad(builder, outputs[pos0][2], "z"); - lp_build_name(z, "output%u.%u.%c", i, pos0, "xyzw"[chan]); + z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z"); } lp_build_depth_stencil_test(builder, -- cgit v1.2.3 From 4afad7d3edcaaa62b748cc9bd4d88a626ac0920a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 17 Oct 2010 10:15:15 -0700 Subject: llvmpipe: Initialize bld ctx via lp_build_context_init instead of ad-hoc and broken code. --- src/gallium/drivers/llvmpipe/lp_test_round.c | 5 +---- src/gallium/drivers/llvmpipe/lp_test_sincos.c | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index 57b0ee57767..0770c7ab9ac 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -75,10 +75,7 @@ add_test(LLVMModuleRef module, const char *name, lp_func_t lp_func) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c index 7ab357f162e..79939b1a393 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_sincos.c +++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c @@ -72,10 +72,7 @@ add_sincos_test(LLVMModuleRef module, boolean sin) LLVMValueRef ret; struct lp_build_context bld; - bld.builder = builder; - bld.type.floating = 1; - bld.type.width = 32; - bld.type.length = 4; + lp_build_context_init(&bld, builder, lp_float32_vec4_type()); LLVMSetFunctionCallConv(func, LLVMCCallConv); -- cgit v1.2.3 From c9d297162ad7efb87ab3e29c14259147d891baf9 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sun, 17 Oct 2010 14:09:53 -0700 Subject: llvmpipe: Return non-zero exit code for lp_test_round failures. --- src/gallium/drivers/llvmpipe/lp_test_round.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index 0770c7ab9ac..0ca27915923 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -97,9 +97,10 @@ printv(char* string, v4sf value) f[0], f[1], f[2], f[3]); } -static void +static boolean compare(v4sf x, v4sf y) { + boolean success = TRUE; float *xp = (float *) &x; float *yp = (float *) &y; if (xp[0] != yp[0] || @@ -107,7 +108,9 @@ compare(v4sf x, v4sf y) xp[2] != yp[2] || xp[3] != yp[3]) { printf(" Incorrect result! ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ \n"); + success = FALSE; } + return success; } @@ -188,7 +191,7 @@ test_round(unsigned verbose, FILE *fp) y = round_func(x); printv("C round(x) ", ref); printv("LLVM round(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = trunc(xp[0]); refp[1] = trunc(xp[1]); @@ -197,7 +200,7 @@ test_round(unsigned verbose, FILE *fp) y = trunc_func(x); printv("C trunc(x) ", ref); printv("LLVM trunc(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = floor(xp[0]); refp[1] = floor(xp[1]); @@ -206,7 +209,7 @@ test_round(unsigned verbose, FILE *fp) y = floor_func(x); printv("C floor(x) ", ref); printv("LLVM floor(x)", y); - compare(ref, y); + success = success && compare(ref, y); refp[0] = ceil(xp[0]); refp[1] = ceil(xp[1]); @@ -215,7 +218,7 @@ test_round(unsigned verbose, FILE *fp) y = ceil_func(x); printv("C ceil(x) ", ref); printv("LLVM ceil(x) ", y); - compare(ref, y); + success = success && compare(ref, y); } LLVMFreeMachineCodeForFunction(engine, test_round); @@ -244,11 +247,7 @@ test_round(unsigned verbose, FILE *fp) boolean test_all(unsigned verbose, FILE *fp) { - boolean success = TRUE; - - test_round(verbose, fp); - - return success; + return test_round(verbose, fp); } -- cgit v1.2.3 From 82114ac02a2d5a764ce69711fc0a71f559ee9137 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 6 Oct 2010 12:49:26 +1000 Subject: r600g: switch to a common formats.h file since they are in different regs --- src/gallium/drivers/r600/eg_state_inlines.h | 37 +++++++++--------- src/gallium/drivers/r600/evergreend.h | 39 ------------------- src/gallium/drivers/r600/r600_formats.h | 56 +++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_state_inlines.h | 47 +++++++++++----------- src/gallium/drivers/r600/r600_texture.c | 41 ++++++++++---------- src/gallium/drivers/r600/r600d.h | 40 +------------------ 6 files changed, 121 insertions(+), 139 deletions(-) create mode 100644 src/gallium/drivers/r600/r600_formats.h (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h index 5a4e00aac38..be81c28b43f 100644 --- a/src/gallium/drivers/r600/eg_state_inlines.h +++ b/src/gallium/drivers/r600/eg_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "evergreend.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -523,32 +524,32 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) case 16: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_16_FLOAT; + result = FMT_16_FLOAT; break; case 2: - result = V_030008_FMT_16_16_FLOAT; + result = FMT_16_16_FLOAT; break; case 3: - result = V_030008_FMT_16_16_16_FLOAT; + result = FMT_16_16_16_FLOAT; break; case 4: - result = V_030008_FMT_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; break; } break; case 32: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_32_FLOAT; + result = FMT_32_FLOAT; break; case 2: - result = V_030008_FMT_32_32_FLOAT; + result = FMT_32_32_FLOAT; break; case 3: - result = V_030008_FMT_32_32_32_FLOAT; + result = FMT_32_32_32_FLOAT; break; case 4: - result = V_030008_FMT_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; break; } break; @@ -564,48 +565,48 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) case 8: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_8; + result = FMT_8; break; case 2: - result = V_030008_FMT_8_8; + result = FMT_8_8; break; case 3: // result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ // break; case 4: - result = V_030008_FMT_8_8_8_8; + result = FMT_8_8_8_8; break; } break; case 16: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_16; + result = FMT_16; break; case 2: - result = V_030008_FMT_16_16; + result = FMT_16_16; break; case 3: // result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ // break; case 4: - result = V_030008_FMT_16_16_16_16; + result = FMT_16_16_16_16; break; } break; case 32: switch (desc->nr_channels) { case 1: - result = V_030008_FMT_32; + result = FMT_32; break; case 2: - result = V_030008_FMT_32_32; + result = FMT_32_32; break; case 3: - result = V_030008_FMT_32_32_32; + result = FMT_32_32_32; break; case 4: - result = V_030008_FMT_32_32_32_32; + result = FMT_32_32_32_32; break; } break; diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 5d07f532f09..8e96f9355e6 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -1050,45 +1050,6 @@ #define S_030008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_030008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_030008_DATA_FORMAT 0xFC0FFFFF -#define V_030008_FMT_INVALID 0x00000000 -#define V_030008_FMT_8 0x00000001 -#define V_030008_FMT_4_4 0x00000002 -#define V_030008_FMT_3_3_2 0x00000003 -#define V_030008_FMT_16 0x00000005 -#define V_030008_FMT_16_FLOAT 0x00000006 -#define V_030008_FMT_8_8 0x00000007 -#define V_030008_FMT_5_6_5 0x00000008 -#define V_030008_FMT_6_5_5 0x00000009 -#define V_030008_FMT_1_5_5_5 0x0000000A -#define V_030008_FMT_4_4_4_4 0x0000000B -#define V_030008_FMT_5_5_5_1 0x0000000C -#define V_030008_FMT_32 0x0000000D -#define V_030008_FMT_32_FLOAT 0x0000000E -#define V_030008_FMT_16_16 0x0000000F -#define V_030008_FMT_16_16_FLOAT 0x00000010 -#define V_030008_FMT_8_24 0x00000011 -#define V_030008_FMT_8_24_FLOAT 0x00000012 -#define V_030008_FMT_24_8 0x00000013 -#define V_030008_FMT_24_8_FLOAT 0x00000014 -#define V_030008_FMT_10_11_11 0x00000015 -#define V_030008_FMT_10_11_11_FLOAT 0x00000016 -#define V_030008_FMT_11_11_10 0x00000017 -#define V_030008_FMT_11_11_10_FLOAT 0x00000018 -#define V_030008_FMT_2_10_10_10 0x00000019 -#define V_030008_FMT_8_8_8_8 0x0000001A -#define V_030008_FMT_10_10_10_2 0x0000001B -#define V_030008_FMT_X24_8_32_FLOAT 0x0000001C -#define V_030008_FMT_32_32 0x0000001D -#define V_030008_FMT_32_32_FLOAT 0x0000001E -#define V_030008_FMT_16_16_16_16 0x0000001F -#define V_030008_FMT_16_16_16_16_FLOAT 0x00000020 -#define V_030008_FMT_32_32_32_32 0x00000022 -#define V_030008_FMT_32_32_32_32_FLOAT 0x00000023 -#define V_030008_FMT_8_8_8 0x0000002c -#define V_030008_FMT_16_16_16 0x0000002d -#define V_030008_FMT_16_16_16_FLOAT 0x0000002e -#define V_030008_FMT_32_32_32 0x0000002f -#define V_030008_FMT_32_32_32_FLOAT 0x00000030 #define S_030008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_030008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_030008_NUM_FORMAT_ALL 0xF3FFFFFF diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h new file mode 100644 index 00000000000..0c91a212384 --- /dev/null +++ b/src/gallium/drivers/r600/r600_formats.h @@ -0,0 +1,56 @@ +#ifndef R600_FORMATS_H +#define R600_FORMATS_H + +/* list of formats from R700 ISA document - apply across GPUs in different registers */ +#define FMT_INVALID 0x00000000 +#define FMT_8 0x00000001 +#define FMT_4_4 0x00000002 +#define FMT_3_3_2 0x00000003 +#define FMT_16 0x00000005 +#define FMT_16_FLOAT 0x00000006 +#define FMT_8_8 0x00000007 +#define FMT_5_6_5 0x00000008 +#define FMT_6_5_5 0x00000009 +#define FMT_1_5_5_5 0x0000000A +#define FMT_4_4_4_4 0x0000000B +#define FMT_5_5_5_1 0x0000000C +#define FMT_32 0x0000000D +#define FMT_32_FLOAT 0x0000000E +#define FMT_16_16 0x0000000F +#define FMT_16_16_FLOAT 0x00000010 +#define FMT_8_24 0x00000011 +#define FMT_8_24_FLOAT 0x00000012 +#define FMT_24_8 0x00000013 +#define FMT_24_8_FLOAT 0x00000014 +#define FMT_10_11_11 0x00000015 +#define FMT_10_11_11_FLOAT 0x00000016 +#define FMT_11_11_10 0x00000017 +#define FMT_11_11_10_FLOAT 0x00000018 +#define FMT_2_10_10_10 0x00000019 +#define FMT_8_8_8_8 0x0000001A +#define FMT_10_10_10_2 0x0000001B +#define FMT_X24_8_32_FLOAT 0x0000001C +#define FMT_32_32 0x0000001D +#define FMT_32_32_FLOAT 0x0000001E +#define FMT_16_16_16_16 0x0000001F +#define FMT_16_16_16_16_FLOAT 0x00000020 +#define FMT_32_32_32_32 0x00000022 +#define FMT_32_32_32_32_FLOAT 0x00000023 +#define FMT_1 0x00000025 +#define FMT_GB_GR 0x00000027 +#define FMT_BG_RG 0x00000028 +#define FMT_32_AS_8 0x00000029 +#define FMT_32_AS_8_8 0x0000002a +#define FMT_5_9_9_9_SHAREDEXP 0x0000002b +#define FMT_8_8_8 0x0000002c +#define FMT_16_16_16 0x0000002d +#define FMT_16_16_16_FLOAT 0x0000002e +#define FMT_32_32_32 0x0000002f +#define FMT_32_32_32_FLOAT 0x00000030 +#define FMT_BC1 0x00000031 +#define FMT_BC2 0x00000032 +#define FMT_BC3 0x00000033 +#define FMT_BC4 0x00000034 +#define FMT_BC5 0x00000035 + +#endif diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index 29a29d87cd3..1c1978f8abb 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "r600d.h" +#include "r600_formats.h" static INLINE uint32_t r600_translate_blend_function(int blend_func) { @@ -352,13 +353,13 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format) /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: - // return V_0280A0_COLOR_16_16_16_16; + // return FMT_16_16_16_16; case PIPE_FORMAT_R16G16B16A16_FLOAT: - // return V_0280A0_COLOR_16_16_16_16_FLOAT; + // return FMT_16_16_16_16_FLOAT; /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32A32_FLOAT: - // return V_0280A0_COLOR_32_32_32_32_FLOAT; + // return FMT_32_32_32_32_FLOAT; return 0; default: R600_ERR("unsupported colorswap format %d\n", format); @@ -522,32 +523,32 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) case 16: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_16_FLOAT; + result = FMT_16_FLOAT; break; case 2: - result = V_038008_FMT_16_16_FLOAT; + result = FMT_16_16_FLOAT; break; case 3: - result = V_038008_FMT_16_16_16_FLOAT; + result = FMT_16_16_16_FLOAT; break; case 4: - result = V_038008_FMT_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; break; } break; case 32: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_32_FLOAT; + result = FMT_32_FLOAT; break; case 2: - result = V_038008_FMT_32_32_FLOAT; + result = FMT_32_32_FLOAT; break; case 3: - result = V_038008_FMT_32_32_32_FLOAT; + result = FMT_32_32_32_FLOAT; break; case 4: - result = V_038008_FMT_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; break; } break; @@ -563,48 +564,48 @@ static INLINE uint32_t r600_translate_vertex_data_type(enum pipe_format format) case 8: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_8; + result = FMT_8; break; case 2: - result = V_038008_FMT_8_8; + result = FMT_8_8; break; case 3: - // result = V_038008_FMT_8_8_8; /* fails piglit draw-vertices test */ + // result = FMT_8_8_8; /* fails piglit draw-vertices test */ // break; case 4: - result = V_038008_FMT_8_8_8_8; + result = FMT_8_8_8_8; break; } break; case 16: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_16; + result = FMT_16; break; case 2: - result = V_038008_FMT_16_16; + result = FMT_16_16; break; case 3: - // result = V_038008_FMT_16_16_16; /* fails piglit draw-vertices test */ + // result = FMT_16_16_16; /* fails piglit draw-vertices test */ // break; case 4: - result = V_038008_FMT_16_16_16_16; + result = FMT_16_16_16_16; break; } break; case 32: switch (desc->nr_channels) { case 1: - result = V_038008_FMT_32; + result = FMT_32; break; case 2: - result = V_038008_FMT_32_32; + result = FMT_32_32; break; case 3: - result = V_038008_FMT_32_32_32; + result = FMT_32_32_32; break; case 4: - result = V_038008_FMT_32_32_32_32; + result = FMT_32_32_32_32; break; } break; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 22fe8bf0f39..152cd9210fd 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -35,6 +35,7 @@ #include "r600_resource.h" #include "r600_state_inlines.h" #include "r600d.h" +#include "r600_formats.h" extern struct u_resource_vtbl r600_texture_vtbl; @@ -565,19 +566,19 @@ uint32_t r600_translate_texformat(enum pipe_format format, case UTIL_FORMAT_COLORSPACE_ZS: switch (format) { case PIPE_FORMAT_Z16_UNORM: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; case PIPE_FORMAT_X24S8_USCALED: word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - result = V_0280A0_COLOR_8_24; + result = FMT_8_24; goto out_word4; case PIPE_FORMAT_S8X24_USCALED: word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_INT); case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - result = V_0280A0_COLOR_24_8; + result = FMT_24_8; goto out_word4; case PIPE_FORMAT_S8_USCALED: result = V_0280A0_COLOR_8; @@ -635,7 +636,7 @@ uint32_t r600_translate_texformat(enum pipe_format format, if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && desc->channel[2].size == 5) { - result = V_0280A0_COLOR_5_6_5; + result = FMT_5_6_5; goto out_word4; } goto out_unknown; @@ -644,14 +645,14 @@ uint32_t r600_translate_texformat(enum pipe_format format, desc->channel[1].size == 5 && desc->channel[2].size == 5 && desc->channel[3].size == 1) { - result = V_0280A0_COLOR_1_5_5_5; + result = FMT_1_5_5_5; goto out_word4; } if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && desc->channel[2].size == 10 && desc->channel[3].size == 2) { - result = V_0280A0_COLOR_10_10_10_2; + result = FMT_10_10_10_2; goto out_word4; } goto out_unknown; @@ -682,36 +683,36 @@ uint32_t r600_translate_texformat(enum pipe_format format, case 4: switch (desc->nr_channels) { case 2: - result = V_0280A0_COLOR_4_4; + result = FMT_4_4; goto out_word4; case 4: - result = V_0280A0_COLOR_4_4_4_4; + result = FMT_4_4_4_4; goto out_word4; } goto out_unknown; case 8: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_8; + result = FMT_8; goto out_word4; case 2: - result = V_0280A0_COLOR_8_8; + result = FMT_8_8; goto out_word4; case 4: - result = V_0280A0_COLOR_8_8_8_8; + result = FMT_8_8_8_8; goto out_word4; } goto out_unknown; case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16; + result = FMT_16; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16; + result = FMT_16_16; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16; + result = FMT_16_16_16_16; goto out_word4; } } @@ -722,26 +723,26 @@ uint32_t r600_translate_texformat(enum pipe_format format, case 16: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_16_FLOAT; + result = FMT_16_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_16_16_FLOAT; + result = FMT_16_16_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_16_16_16_16_FLOAT; + result = FMT_16_16_16_16_FLOAT; goto out_word4; } goto out_unknown; case 32: switch (desc->nr_channels) { case 1: - result = V_0280A0_COLOR_32_FLOAT; + result = FMT_32_FLOAT; goto out_word4; case 2: - result = V_0280A0_COLOR_32_32_FLOAT; + result = FMT_32_32_FLOAT; goto out_word4; case 4: - result = V_0280A0_COLOR_32_32_32_32_FLOAT; + result = FMT_32_32_32_32_FLOAT; goto out_word4; } } diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index f32f5286c95..76df62ffc3e 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -1028,45 +1028,7 @@ #define S_038008_DATA_FORMAT(x) (((x) & 0x3F) << 20) #define G_038008_DATA_FORMAT(x) (((x) >> 20) & 0x3F) #define C_038008_DATA_FORMAT 0xFC0FFFFF -#define V_038008_FMT_INVALID 0x00000000 -#define V_038008_FMT_8 0x00000001 -#define V_038008_FMT_4_4 0x00000002 -#define V_038008_FMT_3_3_2 0x00000003 -#define V_038008_FMT_16 0x00000005 -#define V_038008_FMT_16_FLOAT 0x00000006 -#define V_038008_FMT_8_8 0x00000007 -#define V_038008_FMT_5_6_5 0x00000008 -#define V_038008_FMT_6_5_5 0x00000009 -#define V_038008_FMT_1_5_5_5 0x0000000A -#define V_038008_FMT_4_4_4_4 0x0000000B -#define V_038008_FMT_5_5_5_1 0x0000000C -#define V_038008_FMT_32 0x0000000D -#define V_038008_FMT_32_FLOAT 0x0000000E -#define V_038008_FMT_16_16 0x0000000F -#define V_038008_FMT_16_16_FLOAT 0x00000010 -#define V_038008_FMT_8_24 0x00000011 -#define V_038008_FMT_8_24_FLOAT 0x00000012 -#define V_038008_FMT_24_8 0x00000013 -#define V_038008_FMT_24_8_FLOAT 0x00000014 -#define V_038008_FMT_10_11_11 0x00000015 -#define V_038008_FMT_10_11_11_FLOAT 0x00000016 -#define V_038008_FMT_11_11_10 0x00000017 -#define V_038008_FMT_11_11_10_FLOAT 0x00000018 -#define V_038008_FMT_2_10_10_10 0x00000019 -#define V_038008_FMT_8_8_8_8 0x0000001A -#define V_038008_FMT_10_10_10_2 0x0000001B -#define V_038008_FMT_X24_8_32_FLOAT 0x0000001C -#define V_038008_FMT_32_32 0x0000001D -#define V_038008_FMT_32_32_FLOAT 0x0000001E -#define V_038008_FMT_16_16_16_16 0x0000001F -#define V_038008_FMT_16_16_16_16_FLOAT 0x00000020 -#define V_038008_FMT_32_32_32_32 0x00000022 -#define V_038008_FMT_32_32_32_32_FLOAT 0x00000023 -#define V_038008_FMT_8_8_8 0x0000002c -#define V_038008_FMT_16_16_16 0x0000002d -#define V_038008_FMT_16_16_16_FLOAT 0x0000002e -#define V_038008_FMT_32_32_32 0x0000002f -#define V_038008_FMT_32_32_32_FLOAT 0x00000030 + #define S_038008_NUM_FORMAT_ALL(x) (((x) & 0x3) << 26) #define G_038008_NUM_FORMAT_ALL(x) (((x) >> 26) & 0x3) #define C_038008_NUM_FORMAT_ALL 0xF3FFFFFF -- cgit v1.2.3 From e8e20313afbdf3a38e5a10e6fa566864452eeb2c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 8 Oct 2010 11:56:12 +1000 Subject: r600g: add defines for tiling --- src/gallium/drivers/r600/r600d.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 76df62ffc3e..a3cb5b86004 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -904,6 +904,10 @@ #define S_038000_TILE_MODE(x) (((x) & 0xF) << 3) #define G_038000_TILE_MODE(x) (((x) >> 3) & 0xF) #define C_038000_TILE_MODE 0xFFFFFF87 +#define V_038000_ARRAY_LINEAR_GENERAL 0x00000000 +#define V_038000_ARRAY_LINEAR_ALIGNED 0x00000001 +#define V_038000_ARRAY_1D_TILED_THIN1 0x00000002 +#define V_038000_ARRAY_2D_TILED_THIN1 0x00000004 #define S_038000_TILE_TYPE(x) (((x) & 0x1) << 7) #define G_038000_TILE_TYPE(x) (((x) >> 7) & 0x1) #define C_038000_TILE_TYPE 0xFFFFFF7F -- cgit v1.2.3 From 7b3fa038830663de9bceded1b0dd2d64b8cf39c4 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 8 Oct 2010 11:56:43 +1000 Subject: r600g: get tiling info from kernel --- src/gallium/drivers/r600/r600.h | 7 ++++ src/gallium/drivers/r600/r600_pipe.c | 2 ++ src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/winsys/r600/drm/r600.c | 5 +++ src/gallium/winsys/r600/drm/r600_drm.c | 62 +++++++++++++++++++++++++++++++++ src/gallium/winsys/r600/drm/r600_priv.h | 1 + 6 files changed, 78 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 24e25cec0db..15ee0011061 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -99,8 +99,15 @@ enum chip_class { EVERGREEN, }; +struct r600_tiling_info { + unsigned num_channels; + unsigned num_banks; + unsigned group_bytes; +}; + enum radeon_family r600_get_family(struct radeon *rw); enum chip_class r600_get_family_class(struct radeon *radeon); +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon); /* r600_bo.c */ struct r600_bo; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 8eb97993236..dd8fa4fcd77 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -443,5 +443,7 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon) r600_init_screen_texture_functions(&rscreen->screen); r600_init_screen_resource_functions(&rscreen->screen); + rscreen->tiling_info = r600_get_tiling_info(radeon); + return &rscreen->screen; } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 47a1b3070e9..35548329e46 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -58,6 +58,7 @@ enum r600_pipe_state_id { struct r600_screen { struct pipe_screen screen; struct radeon *radeon; + struct r600_tiling_info *tiling_info; }; struct r600_pipe_sampler_view { diff --git a/src/gallium/winsys/r600/drm/r600.c b/src/gallium/winsys/r600/drm/r600.c index 496547ca994..0a4d2e791db 100644 --- a/src/gallium/winsys/r600/drm/r600.c +++ b/src/gallium/winsys/r600/drm/r600.c @@ -40,6 +40,11 @@ enum chip_class r600_get_family_class(struct radeon *radeon) return radeon->chip_class; } +struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon) +{ + return &radeon->tiling_info; +} + static int r600_get_device(struct radeon *r600) { struct drm_radeon_info info; diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 4916843fd6d..c9de95ffc02 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -37,6 +37,9 @@ #include "xf86drm.h" #include "radeon_drm.h" +#ifndef RADEON_INFO_TILING_CONFIG +#define RADEON_INFO_TILING_CONFIG 0x6 +#endif static int radeon_get_device(struct radeon *radeon) { struct drm_radeon_info info; @@ -50,6 +53,61 @@ static int radeon_get_device(struct radeon *radeon) return r; } +static int radeon_drm_get_tiling(struct radeon *radeon) +{ + struct drm_radeon_info info; + int r; + uint32_t tiling_config; + + info.request = RADEON_INFO_TILING_CONFIG; + info.value = (uintptr_t)&tiling_config; + r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info, + sizeof(struct drm_radeon_info)); + + if (r) + return r; + + switch ((tiling_config & 0xe) >> 1) { + case 0: + radeon->tiling_info.num_channels = 1; + break; + case 1: + radeon->tiling_info.num_channels = 2; + break; + case 2: + radeon->tiling_info.num_channels = 4; + break; + case 3: + radeon->tiling_info.num_channels = 8; + break; + default: + return -EINVAL; + } + + switch ((tiling_config & 0x30) >> 4) { + case 0: + radeon->tiling_info.num_banks = 4; + break; + case 1: + radeon->tiling_info.num_banks = 8; + break; + default: + return -EINVAL; + + } + switch ((tiling_config & 0xc0) >> 6) { + case 0: + radeon->tiling_info.group_bytes = 256; + break; + case 1: + radeon->tiling_info.group_bytes = 512; + break; + default: + return -EINVAL; + } + return 0; +} + struct radeon *radeon_new(int fd, unsigned device) { struct radeon *radeon; @@ -157,6 +215,10 @@ struct radeon *radeon_new(int fd, unsigned device) break; } + if (radeon->chip_class == R600 || radeon->chip_class == R700) { + if (radeon_drm_get_tiling(radeon)) + return NULL; + } radeon->kman = radeon_bo_pbmgr_create(radeon); if (!radeon->kman) return NULL; diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index e3868d3cb9a..08e243b00d0 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -42,6 +42,7 @@ struct radeon { enum chip_class chip_class; struct pb_manager *kman; /* kernel bo manager */ struct pb_manager *cman; /* cached bo manager */ + struct r600_tiling_info tiling_info; }; struct radeon *r600_new(int fd, unsigned device); -- cgit v1.2.3 From 5b966f58e3d87fc271cc429be969cf98eec991ca Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 8 Oct 2010 11:57:04 +1000 Subject: r600g: set tiling bits in hw state --- src/gallium/drivers/r600/r600_state.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 2c0a2005cf7..00234f956aa 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -657,6 +657,10 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c bo[1] = rbuffer->bo; } pitch = align(tmp->pitch_in_pixels[0], 8); + if (tmp->tiled) { + array_mode = tmp->array_mode; + tile_type = tmp->tile_type; + } /* FIXME properly handle first level != 0 */ r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, @@ -957,6 +961,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta swap = r600_translate_colorswap(rtex->resource.base.b.format); color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | + S_0280A0_ARRAY_MODE(rtex->array_mode); S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) -- cgit v1.2.3 From 543fb77ddece7e1806e8eaa0d65bb2a945ef9a75 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 17 Oct 2010 18:29:28 -0700 Subject: llvmpipe: remove setup fallback path --- src/gallium/drivers/llvmpipe/Makefile | 1 + src/gallium/drivers/llvmpipe/SConscript | 1 - src/gallium/drivers/llvmpipe/lp_state_setup.c | 13 +- .../drivers/llvmpipe/lp_state_setup_fallback.c | 265 --------------------- 4 files changed, 3 insertions(+), 277 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_state_setup_fallback.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 55b877b4ab9..379f14b43d2 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -38,6 +38,7 @@ C_SOURCES = \ lp_state_clip.c \ lp_state_derived.c \ lp_state_fs.c \ + lp_state_setup.c \ lp_state_gs.c \ lp_state_rasterizer.c \ lp_state_sampler.c \ diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 6ddce659206..f893878daab 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -71,7 +71,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_state_derived.c', 'lp_state_fs.c', 'lp_state_setup.c', - 'lp_state_setup_fallback.c', 'lp_state_gs.c', 'lp_state_rasterizer.c', 'lp_state_sampler.c', diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index 3261c53f516..ee4991bf8d2 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -704,17 +704,8 @@ llvmpipe_update_setup(struct llvmpipe_context *lp) } variant = generate_setup_variant(screen, key); - if (variant) { - insert_at_head(&lp->setup_variants_list, &variant->list_item_global); - lp->nr_setup_variants++; - } - else { - /* Keep the old path around for debugging, and also perhaps - * in case malloc fails during compilation. - */ - variant = &lp->setup_variant; - variant->jit_function = lp_setup_tri_fallback; - } + insert_at_head(&lp->setup_variants_list, &variant->list_item_global); + lp->nr_setup_variants++; } lp_setup_set_setup_variant(lp->setup, diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup_fallback.c b/src/gallium/drivers/llvmpipe/lp_state_setup_fallback.c deleted file mode 100644 index 1922efcc88d..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_state_setup_fallback.c +++ /dev/null @@ -1,265 +0,0 @@ -/************************************************************************** - * - * Copyright 2010, VMware. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * Fallback (non-llvm) path for triangle setup. Will remove once llvm - * is up and running. - * - * TODO: line/point setup. - */ - -#include "util/u_math.h" -#include "util/u_memory.h" -#include "lp_state_setup.h" - - - -#if defined(PIPE_ARCH_SSE) -#include - -struct setup_args { - float (*a0)[4]; /* aligned */ - float (*dadx)[4]; /* aligned */ - float (*dady)[4]; /* aligned */ - - float x0_center; - float y0_center; - - /* turn these into an aligned float[4] */ - float dy01_ooa; - float dy20_ooa; - float dx01_ooa; - float dx20_ooa; - - const float (*v0)[4]; /* aligned */ - const float (*v1)[4]; /* aligned */ - const float (*v2)[4]; /* aligned */ - - boolean frontfacing; /* remove eventually */ -}; - - -static void constant_coef4( struct setup_args *args, - unsigned slot, - const float *attr) -{ - *(__m128 *)args->a0[slot] = *(__m128 *)attr; - *(__m128 *)args->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)args->dady[slot] = _mm_set1_ps(0.0); -} - - - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct setup_args *args, - unsigned slot ) -{ - /* XXX: just pass frontface directly to the shader, don't bother - * treating it as an input. - */ - __m128 a0 = _mm_setr_ps(args->frontfacing ? 1.0 : -1.0, - 0, 0, 0); - - *(__m128 *)args->a0[slot] = a0; - *(__m128 *)args->dadx[slot] = _mm_set1_ps(0.0); - *(__m128 *)args->dady[slot] = _mm_set1_ps(0.0); -} - - - -static void calc_coef4( struct setup_args *args, - unsigned slot, - __m128 a0, - __m128 a1, - __m128 a2) -{ - __m128 da01 = _mm_sub_ps(a0, a1); - __m128 da20 = _mm_sub_ps(a2, a0); - - __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(args->dy20_ooa)); - __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(args->dy01_ooa)); - __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa); - - __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(args->dx20_ooa)); - __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(args->dx01_ooa)); - __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa); - - __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(args->x0_center)); - __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(args->y0_center)); - __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0); - __m128 attr_0 = _mm_sub_ps(a0, attr_v0); - - *(__m128 *)args->a0[slot] = attr_0; - *(__m128 *)args->dadx[slot] = dadx; - *(__m128 *)args->dady[slot] = dady; -} - - -static void linear_coef( struct setup_args *args, - unsigned slot, - unsigned vert_attr) -{ - __m128 a0 = *(const __m128 *)args->v0[vert_attr]; - __m128 a1 = *(const __m128 *)args->v1[vert_attr]; - __m128 a2 = *(const __m128 *)args->v2[vert_attr]; - - calc_coef4(args, slot, a0, a1, a2); -} - - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct setup_args *args, - unsigned slot, - unsigned vert_attr) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - __m128 a0 = *(const __m128 *)args->v0[vert_attr]; - __m128 a1 = *(const __m128 *)args->v1[vert_attr]; - __m128 a2 = *(const __m128 *)args->v2[vert_attr]; - - __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(args->v0[0][3])); - __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(args->v1[0][3])); - __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(args->v2[0][3])); - - calc_coef4(args, slot, a0_oow, a1_oow, a2_oow); -} - - - - - -/** - * Compute the args-> dadx, dady, a0 values. - * - * Note that this was effectively a little interpreted program, where - * the opcodes were LP_INTERP_*. This is the program which is now - * being code-generated in lp_state_setup.c. - */ -void lp_setup_tri_fallback( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean front_facing, - float (*a0)[4], - float (*dadx)[4], - float (*dady)[4], - const struct lp_setup_variant_key *key ) -{ - struct setup_args args; - float pixel_offset = key->pixel_center_half ? 0.5 : 0.0; - float dx01 = v0[0][0] - v1[0][0]; - float dy01 = v0[0][1] - v1[0][1]; - float dx20 = v2[0][0] - v0[0][0]; - float dy20 = v2[0][1] - v0[0][1]; - float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01); - unsigned slot; - - args.v0 = v0; - args.v1 = v1; - args.v2 = v2; - args.frontfacing = front_facing; - args.a0 = a0; - args.dadx = dadx; - args.dady = dady; - - args.x0_center = v0[0][0] - pixel_offset; - args.y0_center = v0[0][1] - pixel_offset; - args.dx01_ooa = dx01 * oneoverarea; - args.dx20_ooa = dx20 * oneoverarea; - args.dy01_ooa = dy01 * oneoverarea; - args.dy20_ooa = dy20 * oneoverarea; - - /* The internal position input is in slot zero: - */ - linear_coef(&args, 0, 0); - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < key->num_inputs; slot++) { - unsigned vert_attr = key->inputs[slot].src_index; - - switch (key->inputs[slot].interp) { - case LP_INTERP_CONSTANT: - if (key->flatshade_first) { - constant_coef4(&args, slot+1, args.v0[vert_attr]); - } - else { - constant_coef4(&args, slot+1, args.v2[vert_attr]); - } - break; - - case LP_INTERP_LINEAR: - linear_coef(&args, slot+1, vert_attr); - break; - - case LP_INTERP_PERSPECTIVE: - perspective_coef(&args, slot+1, vert_attr); - break; - - case LP_INTERP_POSITION: - /* - * The generated pixel interpolators will pick up the coeffs from - * slot 0. - */ - break; - - case LP_INTERP_FACING: - setup_facing_coef(&args, slot+1); - break; - - default: - assert(0); - } - } -} - -#else - -void lp_setup_tri_fallback( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4], - boolean front_facing, - float (*a0)[4], - float (*dadx)[4], - float (*dady)[4], - const struct lp_setup_variant_key *key ) -{ - /* this path for debugging only, don't need a non-sse version. */ -} - -#endif -- cgit v1.2.3 From ca2b2ac131933b4171b519813df1aaa3a81621cd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 17 Oct 2010 18:48:11 -0700 Subject: llvmpipe: fail cleanly on malloc failure in lp_setup_alloc_triangle --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index c6cb9afda47..15c414d8c3b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -86,9 +86,10 @@ lp_setup_alloc_triangle(struct lp_scene *scene, plane_sz); tri = lp_scene_alloc_aligned( scene, *tri_size, 16 ); - if (tri) { - tri->inputs.stride = input_array_sz; - } + if (tri == NULL) + return NULL; + + tri->inputs.stride = input_array_sz; { char *a = (char *)tri; @@ -96,7 +97,6 @@ lp_setup_alloc_triangle(struct lp_scene *scene, assert(b - a == *tri_size); } - return tri; } -- cgit v1.2.3 From 75799b8d02ff3bc6acc5157bf62ee2188e9f3ee3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 17 Oct 2010 19:11:47 -0700 Subject: llvmpipe: remove unused file --- src/gallium/drivers/llvmpipe/SConscript | 1 - src/gallium/drivers/llvmpipe/lp_setup_debug.c | 1 - 2 files changed, 2 deletions(-) delete mode 100644 src/gallium/drivers/llvmpipe/lp_setup_debug.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index d63bdd72a72..49950153a4f 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -55,7 +55,6 @@ llvmpipe = env.ConvenienceLibrary( 'lp_scene_queue.c', 'lp_screen.c', 'lp_setup.c', - 'lp_setup_debug.c', 'lp_setup_line.c', 'lp_setup_point.c', 'lp_setup_tri.c', diff --git a/src/gallium/drivers/llvmpipe/lp_setup_debug.c b/src/gallium/drivers/llvmpipe/lp_setup_debug.c deleted file mode 100644 index a71a4719a86..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_setup_debug.c +++ /dev/null @@ -1 +0,0 @@ -/* Some debugging stuff */ -- cgit v1.2.3 From 9da17fed2e7645a401a378ae690eb23513948e18 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 17 Oct 2010 19:23:40 -0700 Subject: llvmpipe: remove unused arg from jit_setup_tri function --- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 3 +-- src/gallium/drivers/llvmpipe/lp_state_setup.c | 3 +-- src/gallium/drivers/llvmpipe/lp_state_setup.h | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 03036cd75d2..4ab0b72a574 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -323,8 +323,7 @@ do_triangle_ccw(struct lp_setup_context *setup, frontfacing, GET_A0(&tri->inputs), GET_DADX(&tri->inputs), - GET_DADY(&tri->inputs), - &setup->setup.variant->key ); + GET_DADY(&tri->inputs) ); tri->inputs.frontfacing = frontfacing; tri->inputs.disable = FALSE; diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c index a8dee280dd5..2c8b8b9a928 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c @@ -485,7 +485,7 @@ generate_setup_variant(struct llvmpipe_screen *screen, char func_name[256]; LLVMTypeRef vec4f_type; LLVMTypeRef func_type; - LLVMTypeRef arg_types[8]; + LLVMTypeRef arg_types[7]; LLVMBasicBlockRef block; LLVMBuilderRef builder; int64_t t0, t1; @@ -521,7 +521,6 @@ generate_setup_variant(struct llvmpipe_screen *screen, arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ - arg_types[7] = LLVMPointerType(vec4f_type, 0); /* key, unused */ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h index 2b080fbc321..b0c81baa75f 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h @@ -30,8 +30,7 @@ typedef void (*lp_jit_setup_triangle)( const float (*v0)[4], boolean front_facing, float (*a0)[4], float (*dadx)[4], - float (*dady)[4], - const struct lp_setup_variant_key *key ); + float (*dady)[4] ); -- cgit v1.2.3 From a1b7333c07797faea29f50fd49b6c5e877beca0a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 18 Oct 2010 12:04:57 +1000 Subject: r600g: do proper tracking of views/samplers. we need to do pretty much what r300g does in for this, this fixes some issues seen while working on tiling. --- src/gallium/drivers/r600/evergreen_state.c | 29 +++++++++++++++++++++-------- src/gallium/drivers/r600/r600_pipe.h | 6 ++++-- src/gallium/drivers/r600/r600_state.c | 25 +++++++++++++++++++------ 3 files changed, 44 insertions(+), 16 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 935496c04af..ce34ed4ad3c 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -500,15 +500,27 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; - - rctx->ps_samplers.views = resource; - rctx->ps_samplers.n_views = count; - - for (int i = 0; i < count; i++) { - if (resource[i]) { - evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + int i; + + for (i = 0; i < count; i++) { + if (&rctx->ps_samplers.views[i]->base != views[i]) { + if (resource[i]) + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + else + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + + pipe_sampler_view_reference( + (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], + views[i]); } } + for (i = count; i < NUM_TEX_UNITS; i++) { + if (rctx->ps_samplers.views[i]) { + evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); + } + } + rctx->ps_samplers.n_views = count; } static void evergreen_bind_state(struct pipe_context *ctx, void *state) @@ -527,7 +539,8 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - rctx->ps_samplers.samplers = states; + + memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); rctx->ps_samplers.n_samplers = count; for (int i = 0; i < count; i++) { diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 35548329e46..e7c4b60d00c 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -94,10 +94,12 @@ struct r600_pipe_shader { }; /* needed for blitter save */ +#define NUM_TEX_UNITS 16 + struct r600_textures_info { - struct r600_pipe_sampler_view **views; + struct r600_pipe_sampler_view *views[NUM_TEX_UNITS]; unsigned n_views; - void **samplers; + void *samplers[NUM_TEX_UNITS]; unsigned n_samplers; }; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 00234f956aa..a95e2c6590c 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -710,15 +710,28 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view **resource = (struct r600_pipe_sampler_view **)views; + int i; - rctx->ps_samplers.views = resource; - rctx->ps_samplers.n_views = count; + for (i = 0; i < count; i++) { + if (&rctx->ps_samplers.views[i]->base != views[i]) { + if (resource[i]) + r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); + else + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + + pipe_sampler_view_reference( + (struct pipe_sampler_view **)&rctx->ps_samplers.views[i], + views[i]); - for (int i = 0; i < count; i++) { - if (resource[i]) { - r600_context_pipe_state_set_ps_resource(&rctx->ctx, &resource[i]->state, i); } } + for (i = count; i < NUM_TEX_UNITS; i++) { + if (rctx->ps_samplers.views[i]) { + r600_context_pipe_state_set_ps_resource(&rctx->ctx, NULL, i); + pipe_sampler_view_reference((struct pipe_sampler_view **)&rctx->ps_samplers.views[i], NULL); + } + } + rctx->ps_samplers.n_views = count; } static void r600_bind_state(struct pipe_context *ctx, void *state) @@ -737,7 +750,7 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state **rstates = (struct r600_pipe_state **)states; - rctx->ps_samplers.samplers = states; + memcpy(rctx->ps_samplers.samplers, states, sizeof(void*) * count); rctx->ps_samplers.n_samplers = count; for (int i = 0; i < count; i++) { -- cgit v1.2.3 From 69f8eebe726d01d013f7cdea6c0d058ff6b04337 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 18 Oct 2010 12:05:38 +1000 Subject: r600g: fix typo in tiling setup cb code. --- src/gallium/drivers/r600/r600_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index a95e2c6590c..81d25b54203 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -974,7 +974,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta swap = r600_translate_colorswap(rtex->resource.base.b.format); color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | - S_0280A0_ARRAY_MODE(rtex->array_mode); + S_0280A0_ARRAY_MODE(rtex->array_mode) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) -- cgit v1.2.3 From 21c6459dfbe3434fd80bc1beaffb9f71a34e8994 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 18 Oct 2010 13:23:34 +1000 Subject: r600g: depth needs to bound to ds --- src/gallium/drivers/r600/r600_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 152cd9210fd..07156bb7acc 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -318,7 +318,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.bind = 0; resource.flags = 0; - resource.bind |= PIPE_BIND_RENDER_TARGET; + resource.bind |= PIPE_BIND_DEPTH_STENCIL; rtex->flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource); if (rtex->flushed_depth_texture == NULL) { -- cgit v1.2.3 From c61b97d50425236f001dbc54f098318f921fe916 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 18 Oct 2010 12:05:27 +1000 Subject: r600g: attempt to cleanup depth blit cleanup what I'm nearly sure is unnecessary work in the depth blit code. --- src/gallium/drivers/r600/r600_blit.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index cae05aab28b..50d47060c1a 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -81,19 +81,10 @@ static void r600_blitter_end(struct pipe_context *ctx) int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *texture) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_framebuffer_state fb = *rctx->pframebuffer; struct pipe_surface *zsurf, *cbsurf; int level = 0; float depth = 1.0f; - r600_context_queries_suspend(&rctx->ctx); - for (int i = 0; i < fb.nr_cbufs; i++) { - fb.cbufs[i] = NULL; - pipe_surface_reference(&fb.cbufs[i], rctx->pframebuffer->cbufs[i]); - } - fb.zsbuf = NULL; - pipe_surface_reference(&fb.zsbuf, rctx->pframebuffer->zsbuf); - zsurf = ctx->screen->get_tex_surface(ctx->screen, &texture->resource.base.b, 0, level, 0, PIPE_BIND_DEPTH_STENCIL); @@ -101,21 +92,17 @@ int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_te (struct pipe_resource*)texture->flushed_depth_texture, 0, level, 0, PIPE_BIND_RENDER_TARGET); - r600_blitter_begin(ctx, R600_CLEAR); - util_blitter_save_framebuffer(rctx->blitter, &fb); if (rctx->family == CHIP_RV610 || rctx->family == CHIP_RV630 || - rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) + rctx->family == CHIP_RV620 || rctx->family == CHIP_RV635) depth = 0.0f; + r600_blitter_begin(ctx, R600_CLEAR_SURFACE); util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, rctx->custom_dsa_flush, depth); + r600_blitter_end(ctx); pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); - for (int i = 0; i < fb.nr_cbufs; i++) { - pipe_surface_reference(&fb.cbufs[i], NULL); - } - pipe_surface_reference(&fb.zsbuf, NULL); - r600_context_queries_resume(&rctx->ctx); + return 0; } -- cgit v1.2.3 From 375613afe38e0704b4ce38e64765b12d9660a846 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 13 Oct 2010 10:44:46 +1000 Subject: r600g: fix transfer function for tiling. this makes readback with tiled back work better. --- src/gallium/drivers/r600/r600_texture.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 07156bb7acc..edaebf86395 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -349,8 +349,6 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.sr = sr; trans->transfer.usage = usage; trans->transfer.box = *box; - trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; - trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); if (rtex->depth) { r = r600_texture_depth_flush(ctx, texture); if (r < 0) { @@ -398,7 +396,10 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, /* Always referenced in the blit. */ ctx->flush(ctx, 0, NULL); } + return &trans->transfer; } + trans->transfer.stride = rtex->pitch_in_bytes[sr.level]; + trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); return &trans->transfer; } -- cgit v1.2.3 From 8a74f7422bedb419f3527bb1ccd60e1e9220502c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 18 Oct 2010 09:45:58 +1000 Subject: r600g: retrieve tiling info from kernel for shared buffers. we need to know if the back is tiled so we can blit from it properly. --- src/gallium/drivers/r600/r600.h | 2 +- src/gallium/drivers/r600/r600_buffer.c | 2 +- src/gallium/drivers/r600/r600_texture.c | 7 +++++-- src/gallium/winsys/r600/drm/r600_bo.c | 18 +++++++++++++++++- src/gallium/winsys/r600/drm/r600_priv.h | 7 ++++++- src/gallium/winsys/r600/drm/radeon_bo.c | 19 +++++++++++++++++++ 6 files changed, 49 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 15ee0011061..62d983269f5 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -114,7 +114,7 @@ struct r600_bo; struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned usage); struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle); + unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index 2bfa4e22fec..455aa2e81f6 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -227,7 +227,7 @@ struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct r600_resource *rbuffer; struct r600_bo *bo = NULL; - bo = r600_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle, NULL); if (bo == NULL) { return NULL; } diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index edaebf86395..95906a74eb3 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -192,6 +192,8 @@ r600_texture_create_object(struct pipe_screen *screen, rtex->pitch_override = pitch_in_bytes_override; rtex->array_mode = array_mode; + if (array_mode) + rtex->tiled = 1; r600_setup_miptree(screen, rtex); resource->size = rtex->size; @@ -271,18 +273,19 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, { struct radeon *rw = (struct radeon*)screen->winsys; struct r600_bo *bo = NULL; + unsigned array_mode = 0; /* Support only 2D textures without mipmaps */ if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || templ->depth0 != 1 || templ->last_level != 0) return NULL; - bo = r600_bo_handle(rw, whandle->handle); + bo = r600_bo_handle(rw, whandle->handle, &array_mode); if (bo == NULL) { return NULL; } - return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0, + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, whandle->stride, 0, bo); diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 9498f3a82ea..7d54ff18fc2 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -26,7 +26,9 @@ #include #include #include +#include "radeon_drm.h" #include "r600_priv.h" +#include "r600d.h" struct r600_bo *r600_bo(struct radeon *radeon, unsigned size, unsigned alignment, unsigned usage) @@ -55,7 +57,7 @@ struct r600_bo *r600_bo(struct radeon *radeon, } struct r600_bo *r600_bo_handle(struct radeon *radeon, - unsigned handle) + unsigned handle, unsigned *array_mode) { struct r600_bo *ws_bo = calloc(1, sizeof(struct r600_bo)); struct radeon_bo *bo; @@ -68,6 +70,20 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, bo = radeon_bo_pb_get_bo(ws_bo->pb); ws_bo->size = bo->size; pipe_reference_init(&ws_bo->reference, 1); + + radeon_bo_get_tiling_flags(radeon, bo, &ws_bo->tiling_flags, + &ws_bo->kernel_pitch); + if (array_mode) { + if (ws_bo->tiling_flags) { + if (ws_bo->tiling_flags & RADEON_TILING_MICRO) + *array_mode = V_0280A0_ARRAY_1D_TILED_THIN1; + if ((ws_bo->tiling_flags & (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) == + (RADEON_TILING_MICRO | RADEON_TILING_MACRO)) + *array_mode = V_0280A0_ARRAY_2D_TILED_THIN1; + } else { + *array_mode = 0; + } + } return ws_bo; } diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 08e243b00d0..b5bd7bd92c1 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -77,6 +77,8 @@ struct r600_bo { struct pipe_reference reference; struct pb_buffer *pb; unsigned size; + unsigned tiling_flags; + unsigned kernel_pitch; }; @@ -95,7 +97,10 @@ int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain); void radeon_bo_pbmgr_flush_maps(struct pb_manager *_mgr); int radeon_bo_fencelist(struct radeon *radeon, struct radeon_bo **bolist, uint32_t num_bo); - +int radeon_bo_get_tiling_flags(struct radeon *radeon, + struct radeon_bo *bo, + uint32_t *tiling_flags, + uint32_t *pitch); /* radeon_bo_pb.c */ struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf); diff --git a/src/gallium/winsys/r600/drm/radeon_bo.c b/src/gallium/winsys/r600/drm/radeon_bo.c index 14a00161c8b..9d664b7e537 100644 --- a/src/gallium/winsys/r600/drm/radeon_bo.c +++ b/src/gallium/winsys/r600/drm/radeon_bo.c @@ -200,3 +200,22 @@ int radeon_bo_busy(struct radeon *radeon, struct radeon_bo *bo, uint32_t *domain *domain = args.domain; return ret; } + +int radeon_bo_get_tiling_flags(struct radeon *radeon, + struct radeon_bo *bo, + uint32_t *tiling_flags, + uint32_t *pitch) +{ + struct drm_radeon_gem_get_tiling args; + int ret; + + args.handle = bo->handle; + ret = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_GET_TILING, + &args, sizeof(args)); + if (ret) + return ret; + + *tiling_flags = args.tiling_flags; + *pitch = args.pitch; + return ret; +} -- cgit v1.2.3 From 01e0aaefcc0b9dbef47df4c2fc08118013db6894 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 18 Oct 2010 07:59:02 -0600 Subject: llvmpipe: remove lp_setup_coef*.c files from Makefile --- src/gallium/drivers/llvmpipe/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 726a19db2b5..08da2286b05 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -28,8 +28,6 @@ C_SOURCES = \ lp_scene_queue.c \ lp_screen.c \ lp_setup.c \ - lp_setup_coef.c \ - lp_setup_coef_intrin.c \ lp_setup_line.c \ lp_setup_point.c \ lp_setup_tri.c \ -- cgit v1.2.3 From f37b114dc3b9281938738572246fe7c685cf7211 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 18 Oct 2010 09:35:21 -0700 Subject: llvmpipe: Don't test rounding of x.5 numbers. SSE4.1 has different rules, and so far this doesn't seem to cause any problems with conformance test suites. --- src/gallium/drivers/llvmpipe/lp_test_round.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_test_round.c b/src/gallium/drivers/llvmpipe/lp_test_round.c index 0ca27915923..816518e5081 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_round.c +++ b/src/gallium/drivers/llvmpipe/lp_test_round.c @@ -171,9 +171,12 @@ test_round(unsigned verbose, FILE *fp) LLVMDumpModule(module); for (i = 0; i < 3; i++) { + /* NOTE: There are several acceptable rules for x.5 rounding: ceiling, + * nearest even, etc. So we avoid testing such corner cases here. + */ v4sf xvals[3] = { {-10.0, -1, 0, 12.0}, - {-1.5, -0.25, 1.25, 2.5}, + {-1.49, -0.25, 1.25, 2.51}, {-0.99, -0.01, 0.01, 0.99} }; v4sf x = xvals[i]; -- cgit v1.2.3 From f822cc22f223a0a4f9cf1cdd5871780e5df11d67 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sun, 17 Oct 2010 23:17:01 -0700 Subject: r300g: Add new debug option for logging vertex/fragment program stats --- src/gallium/drivers/r300/r300_debug.c | 1 + src/gallium/drivers/r300/r300_fs.c | 5 +- src/gallium/drivers/r300/r300_screen.h | 1 + src/gallium/drivers/r300/r300_vs.c | 5 +- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 4 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 2 +- .../drivers/dri/r300/compiler/radeon_compiler.c | 66 ++++++++++++++++++++-- .../drivers/dri/r300/compiler/radeon_compiler.h | 5 +- .../dri/r300/compiler/radeon_remove_constants.c | 2 +- 9 files changed, 78 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 145a7985da3..f78fe34790c 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -29,6 +29,7 @@ static const struct debug_named_value debug_options[] = { { "fp", DBG_FP, "Log fragment program compilation" }, { "vp", DBG_VP, "Log vertex program compilation" }, + { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, { "draw", DBG_DRAW, "Log draw calls" }, { "swtcl", DBG_SWTCL, "Log SWTCL-specific info" }, { "rsblock", DBG_RS_BLOCK, "Log rasterizer registers" }, diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index d9d4a9304df..c91532eb7b3 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -378,7 +378,8 @@ static void r300_translate_fragment_shader( /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = DBG_ON(r300, DBG_FP); + DBG_ON(r300, DBG_FP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; + DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; compiler.code = &shader->code; compiler.state = shader->compare_state; @@ -395,7 +396,7 @@ static void r300_translate_fragment_shader( find_output_registers(&compiler, shader); - if (compiler.Base.Debug) { + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_FP, "r300: Initial fragment program\n"); tgsi_dump(tokens, 0); } diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index dc2bc7e8279..8b7f1fab61b 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -102,6 +102,7 @@ r300_winsys_screen(struct pipe_screen *screen) { #define DBG_NO_CBZB (1 << 21) /* Statistics. */ #define DBG_STATS (1 << 24) +#define DBG_P_STAT (1 << 25) /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index e2b9af9d018..65696555ac3 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -202,7 +202,8 @@ void r300_translate_vertex_shader(struct r300_context *r300, memset(&compiler, 0, sizeof(compiler)); rc_init(&compiler.Base); - compiler.Base.Debug = DBG_ON(r300, DBG_VP); + DBG_ON(r300, DBG_VP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; + DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; compiler.code = &vs->code; compiler.UserData = vs; compiler.Base.is_r500 = r300->screen->caps.is_r500; @@ -214,7 +215,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256; compiler.Base.remove_unused_constants = TRUE; - if (compiler.Base.Debug) { + if (compiler.Base.Debug & RC_DBG_LOG) { DBG(r300, DBG_VP, "r300: Initial vertex program\n"); tgsi_dump(vs->state.tokens, 0); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 4793f335770..2f130198d35 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -145,8 +145,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, - {"dump machine code", 0, is_r500 && c->Base.Debug, r500FragmentProgramDump, NULL}, - {"dump machine code", 0, !is_r500 && c->Base.Debug, r300FragmentProgramDump, NULL}, + {"dump machine code", 0, is_r500 && (c->Base.Debug & RC_DBG_LOG), r500FragmentProgramDump, NULL}, + {"dump machine code", 0, !is_r500 && (c->Base.Debug & RC_DBG_LOG), r300FragmentProgramDump, NULL}, {NULL, 0, 0, NULL, NULL} }; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index d7d49e514b9..bf8341f0173 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -1067,7 +1067,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, 1, translate_vertex_program, NULL}, - {"dump machine code", 0,c->Base.Debug,r300_vertex_program_dump, NULL}, + {"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL}, {NULL, 0, 0, NULL, NULL} }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index b410b2daf42..125d64e6497 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -26,7 +26,9 @@ #include #include +#include "radeon_dataflow.h" #include "radeon_program.h" +#include "radeon_program_pair.h" void rc_init(struct radeon_compiler * c) @@ -50,7 +52,7 @@ void rc_debug(struct radeon_compiler * c, const char * fmt, ...) { va_list ap; - if (!c->Debug) + if (!(c->Debug & RC_DBG_LOG)) return; va_start(ap, fmt); @@ -84,7 +86,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) } } - if (c->Debug) { + if (c->Debug & RC_DBG_LOG) { fprintf(stderr, "r300compiler error: "); va_start(ap, fmt); @@ -351,11 +353,65 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) } } +static void reg_count_callback(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned int * max_reg = userdata; + if (file == RC_FILE_TEMPORARY) + index > *max_reg ? *max_reg = index : 0; +} + +static void print_stats(struct radeon_compiler * c) +{ + struct rc_instruction * tmp; + unsigned i, max_reg, insts, fc, tex, alpha, rgb, presub; + max_reg = insts = fc = tex = alpha = rgb = presub = 0; + for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; + tmp = tmp->Next){ + const struct rc_opcode_info * info; + rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); + if (tmp->Type == RC_INSTRUCTION_NORMAL) { + if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) + presub++; + info = rc_get_opcode_info(tmp->U.I.Opcode); + } else { + if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) + presub++; + if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) + presub++; + /* Assuming alpha will never be a flow control or + * a tex instruction. */ + if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) + alpha++; + if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) + rgb++; + info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); + } + if (info->IsFlowControl) + fc++; + if (info->HasTexture) + tex++; + insts++; + } + if (insts < 4) + return; + fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + "~%4u Instructions\n" + "~%4u Vector Instructions (RGB)\n" + "~%4u Scalar Instructions (Alpha)\n" + "~%4u Flow Control Instructions\n" + "~%4u Texture Instructions\n" + "~%4u Presub Operations\n" + "~%4u Temporary Registers\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + insts, rgb, alpha, fc, tex, presub, max_reg + 1); +} + /* Executes a list of compiler passes given in the parameter 'list'. */ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list, const char *shader_name) { - if (c->Debug) { + if (c->Debug & RC_DBG_LOG) { fprintf(stderr, "%s: before compilation\n", shader_name); rc_print_program(&c->Program); } @@ -367,12 +423,14 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis if (c->Error) return; - if (c->Debug && list[i].dump) { + if ((c->Debug & RC_DBG_LOG) && list[i].dump) { fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name); rc_print_program(&c->Program); } } } + if (c->Debug & RC_DBG_STATS) + print_stats(c); } void rc_validate_final_shader(struct radeon_compiler *c, void *user) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 6d96ac9fdd9..31fd469a04f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -30,12 +30,15 @@ #include "radeon_program.h" #include "radeon_emulate_loops.h" +#define RC_DBG_LOG (1 << 0) +#define RC_DBG_STATS (1 << 1) + struct rc_swizzle_caps; struct radeon_compiler { struct memory_pool Pool; struct rc_program Program; - unsigned Debug:1; + unsigned Debug:2; unsigned Error:1; char * ErrorMsg; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c index d6c808ad815..5f67f536f61 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -145,6 +145,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user) free(const_used); free(inv_remap_table); - if (c->Debug) + if (c->Debug & RC_DBG_LOG) rc_constants_print(&c->Program.Constants); } -- cgit v1.2.3 From 22725eb3e87921632a24579d63bb20c35a122afa Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 19 Oct 2010 10:02:28 -0700 Subject: llvmpipe: Initialize state variable in debug_bin function. --- src/gallium/drivers/llvmpipe/lp_rast_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index e2783aa5683..5c9dc50866b 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -95,7 +95,7 @@ is_blend( const struct lp_rast_state *state, static void debug_bin( const struct cmd_bin *bin ) { - const struct lp_rast_state *state; + const struct lp_rast_state *state = NULL; const struct cmd_block *head = bin->head; int i, j = 0; -- cgit v1.2.3 From 36dde032a4f7d6a8b68c1adc8e829816e2e8826e Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Tue, 19 Oct 2010 10:14:11 -0700 Subject: llvmpipe: Initialize variable. --- src/gallium/drivers/llvmpipe/lp_rast_debug.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c b/src/gallium/drivers/llvmpipe/lp_rast_debug.c index 5c9dc50866b..64ac616f629 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c @@ -240,6 +240,7 @@ do_debug_bin( struct tile *tile, memset(tile->data, ' ', sizeof tile->data); tile->coverage = 0; tile->overdraw = 0; + tile->state = NULL; for (block = bin->head; block; block = block->next) { for (k = 0; k < block->count; k++, j++) { -- cgit v1.2.3 From 89c26866f05dcf8fbb716e38d4780cebcae71653 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 20 Oct 2010 12:44:08 -0700 Subject: r600g: Ensure r600_src is initialized in tgsi_exp function. Silences these GCC warnings. r600_shader.c: In function 'tgsi_exp': r600_shader.c:2339: warning: 'r600_src[0].rel' is used uninitialized in this function r600_shader.c:2339: warning: 'r600_src[0].abs' is used uninitialized in this function r600_shader.c:2339: warning: 'r600_src[0].neg' is used uninitialized in this function r600_shader.c:2339: warning: 'r600_src[0].chan' is used uninitialized in this function r600_shader.c:2339: warning: 'r600_src[0].sel' is used uninitialized in this function --- src/gallium/drivers/r600/r600_shader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index d1143985ead..f98f05512a1 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -2287,7 +2287,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; + struct r600_bc_alu_src r600_src[3] = { { 0 } }; struct r600_bc_alu alu; int r; -- cgit v1.2.3 From f36346c11662901cc2f2b2239d9adb38bbdc54b6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Oct 2010 14:47:32 -0600 Subject: llvmpipe/draw: always enable LLVMAddInstructionCombiningPass() We were working around an LLVM 2.5 bug but we're using LLVM 2.6 or later now. This basically reverts commit baddcbc5225e12052b3bc8c07a8b65243d76574d. This fixes the piglit bug/tri-tex-crash.c failure. --- src/gallium/auxiliary/draw/draw_llvm.c | 8 +------- src/gallium/drivers/llvmpipe/lp_jit.c | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 4ad570950fc..f8fd17fe2c4 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -274,13 +274,7 @@ draw_llvm_create(struct draw_context *draw) LLVMAddConstantPropagationPass(llvm->pass); } - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(llvm->pass); - } + LLVMAddInstructionCombiningPass(llvm->pass); LLVMAddGVNPass(llvm->pass); } else { /* We need at least this pass to prevent the backends to fail in diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index e09ec504ab7..4dbb12c999b 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -187,13 +187,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) LLVMAddCFGSimplificationPass(screen->pass); LLVMAddPromoteMemoryToRegisterPass(screen->pass); LLVMAddConstantPropagationPass(screen->pass); - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(screen->pass); - } + LLVMAddInstructionCombiningPass(screen->pass); LLVMAddGVNPass(screen->pass); } else { /* We need at least this pass to prevent the backends to fail in -- cgit v1.2.3 From 206fbd9640f076ada23368edf158b6d40f673ba7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 13:20:14 +1000 Subject: r600g: all non-0 mipmap levels need to be w/h aligned to POT. this adds a new minify function to the driver to ensure this. --- src/gallium/drivers/r600/r600_texture.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 95906a74eb3..b9348baf0db 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -92,6 +92,15 @@ static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, } } +static unsigned mip_minify(unsigned size, unsigned level) +{ + unsigned val; + val = u_minify(size, level); + if (level > 0) + val = util_next_power_of_two(val); + return val; +} + static unsigned r600_texture_get_stride(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level) @@ -104,7 +113,7 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, if (rtex->pitch_override) return rtex->pitch_override; - width = u_minify(ptex->width0, level); + width = mip_minify(ptex->width0, level); stride = util_format_get_stride(ptex->format, align(width, 64)); if (chipc == EVERGREEN) @@ -121,8 +130,7 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, struct pipe_resource *ptex = &rtex->resource.base.b; unsigned height; - height = u_minify(ptex->height0, level); - height = util_next_power_of_two(height); + height = mip_minify(ptex->height0, level); return util_format_get_nblocksy(ptex->format, height); } @@ -249,8 +257,8 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, pipe_reference_init(&surface->reference, 1); pipe_resource_reference(&surface->texture, texture); surface->format = texture->format; - surface->width = u_minify(texture->width0, level); - surface->height = u_minify(texture->height0, level); + surface->width = mip_minify(texture->width0, level); + surface->height = mip_minify(texture->height0, level); surface->offset = offset; surface->usage = flags; surface->zslice = zslice; -- cgit v1.2.3 From ea5aab85fd195074189832c2d6870dd78f0f8966 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 13:26:04 +1000 Subject: r600g: move to per-miplevel array mode. Since the hw transitions from 2D->1D sampling below the 2D macrotile size we need to keep track of the array mode per level so we can render to it using the CB. --- src/gallium/drivers/r600/evergreen_state.c | 7 ++++--- src/gallium/drivers/r600/r600_resource.h | 2 +- src/gallium/drivers/r600/r600_state.c | 11 ++++++----- src/gallium/drivers/r600/r600_texture.c | 15 ++++++++++++--- 4 files changed, 23 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index ce34ed4ad3c..026d9355586 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -807,14 +807,15 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state if (state->zsbuf == NULL) return; + level = state->zsbuf->level; + rtex = (struct r600_resource_texture*)state->zsbuf->texture; rtex->tiled = 1; - rtex->array_mode = 2; + rtex->array_mode[level] = 2; rtex->tile_type = 1; rtex->depth = 1; rbuffer = &rtex->resource; - level = state->zsbuf->level; pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); @@ -840,7 +841,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, - S_028040_ARRAY_MODE(rtex->array_mode) | S_028040_FORMAT(format), + S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format), 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE, S_028058_PITCH_TILE_MAX(pitch), diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index ef484aba4a2..d34a8edac2b 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -53,10 +53,10 @@ struct r600_resource_texture { unsigned pitch_in_bytes[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_in_pixels[PIPE_MAX_TEXTURE_LEVELS]; unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned array_mode[PIPE_MAX_TEXTURE_LEVELS]; unsigned pitch_override; unsigned size; unsigned tiled; - unsigned array_mode; unsigned tile_type; unsigned depth; unsigned dirty; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 81d25b54203..7775619c04b 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -658,7 +658,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c } pitch = align(tmp->pitch_in_pixels[0], 8); if (tmp->tiled) { - array_mode = tmp->array_mode; + array_mode = tmp->array_mode[0]; tile_type = tmp->tile_type; } @@ -974,7 +974,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta swap = r600_translate_colorswap(rtex->resource.base.b.format); color_info = S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | - S_0280A0_ARRAY_MODE(rtex->array_mode) | + S_0280A0_ARRAY_MODE(rtex->array_mode[level]) | S_0280A0_BLEND_CLAMP(1) | S_0280A0_NUMBER_TYPE(ntype); if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) @@ -1016,14 +1016,15 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta if (state->zsbuf == NULL) return; + level = state->zsbuf->level; + rtex = (struct r600_resource_texture*)state->zsbuf->texture; rtex->tiled = 1; - rtex->array_mode = 2; + rtex->array_mode[level] = 2; rtex->tile_type = 1; rtex->depth = 1; rbuffer = &rtex->resource; - level = state->zsbuf->level; pitch = rtex->pitch_in_pixels[level] / 8 - 1; slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); @@ -1035,7 +1036,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO, - S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format), + S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format), 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index b9348baf0db..b36f8236081 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -142,8 +142,16 @@ static unsigned pitch_to_width(enum pipe_format format, util_format_get_blockwidth(format); } +static void r600_texture_set_array_mode(struct pipe_screen *screen, + struct r600_resource_texture *rtex, + unsigned level, unsigned array_mode) +{ + rtex->array_mode[level] = array_mode; +} + static void r600_setup_miptree(struct pipe_screen *screen, - struct r600_resource_texture *rtex) + struct r600_resource_texture *rtex, + unsigned array_mode) { struct pipe_resource *ptex = &rtex->resource.base.b; struct radeon *radeon = (struct radeon *)screen->winsys; @@ -152,6 +160,8 @@ static void r600_setup_miptree(struct pipe_screen *screen, unsigned nblocksy; for (i = 0, offset = 0; i <= ptex->last_level; i++) { + r600_texture_set_array_mode(screen, rtex, i, array_mode); + pitch = r600_texture_get_stride(screen, rtex, i); nblocksy = r600_texture_get_nblocksy(screen, rtex, i); @@ -198,11 +208,10 @@ r600_texture_create_object(struct pipe_screen *screen, resource->bo = bo; resource->domain = r600_domain_from_usage(resource->base.b.bind); rtex->pitch_override = pitch_in_bytes_override; - rtex->array_mode = array_mode; if (array_mode) rtex->tiled = 1; - r600_setup_miptree(screen, rtex); + r600_setup_miptree(screen, rtex, array_mode); resource->size = rtex->size; -- cgit v1.2.3 From 388ce31baa860a0d7535c852d768c6e243c8133c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 13:27:07 +1000 Subject: r600g: start adding hooks for aligning width/height for tiles. --- src/gallium/drivers/r600/r600_texture.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index b36f8236081..a90fc0382f1 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -92,6 +92,19 @@ static unsigned r600_texture_get_offset(struct r600_resource_texture *rtex, } } +static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, + enum pipe_format format, + unsigned array_mode) +{ + return 64; +} + +static unsigned r600_get_height_alignment(struct pipe_screen *screen, + unsigned array_mode) +{ + return 1; +} + static unsigned mip_minify(unsigned size, unsigned level) { unsigned val; @@ -108,14 +121,18 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, struct pipe_resource *ptex = &rtex->resource.base.b; struct radeon *radeon = (struct radeon *)screen->winsys; enum chip_class chipc = r600_get_family_class(radeon); - unsigned width, stride; + unsigned width, stride, tile_width; if (rtex->pitch_override) return rtex->pitch_override; width = mip_minify(ptex->width0, level); - - stride = util_format_get_stride(ptex->format, align(width, 64)); + if (util_format_is_plain(ptex->format)) { + tile_width = r600_get_pixel_alignment(screen, ptex->format, + rtex->array_mode[level]); + width = align(width, tile_width); + } + stride = util_format_get_stride(ptex->format, width); if (chipc == EVERGREEN) stride = align(stride, 512); else @@ -128,9 +145,14 @@ static unsigned r600_texture_get_nblocksy(struct pipe_screen *screen, unsigned level) { struct pipe_resource *ptex = &rtex->resource.base.b; - unsigned height; + unsigned height, tile_height; height = mip_minify(ptex->height0, level); + if (util_format_is_plain(ptex->format)) { + tile_height = r600_get_height_alignment(screen, + rtex->array_mode[level]); + height = align(height, tile_height); + } return util_format_get_nblocksy(ptex->format, height); } -- cgit v1.2.3 From 91e513044de21f20c2c085a99e9d784c7a61173c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 13:31:27 +1000 Subject: r600g: add r600 surface to store the aligned height. we need to know the aligned height when binding the surface to cb/zb, not the gallium surface height. --- src/gallium/drivers/r600/evergreen_state.c | 10 +++++++--- src/gallium/drivers/r600/r600_resource.h | 5 +++++ src/gallium/drivers/r600/r600_state.c | 10 +++++++--- src/gallium/drivers/r600/r600_texture.c | 31 ++++++++++++++++-------------- 4 files changed, 36 insertions(+), 20 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 026d9355586..0b54c2c7167 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -740,6 +740,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state { struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct r600_surface *surf; unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; @@ -747,6 +748,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state const struct util_format_description *desc; struct r600_bo *bo[3]; + surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; bo[0] = rbuffer->bo; @@ -754,7 +756,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state bo[2] = rbuffer->bo; pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; + slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -801,6 +803,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state { struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct r600_surface *surf; unsigned level; unsigned pitch, slice, format, stencil_format; @@ -809,6 +812,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state level = state->zsbuf->level; + surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; rtex->tiled = 1; rtex->array_mode[level] = 2; @@ -817,7 +821,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state rbuffer = &rtex->resource; pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; + slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); stencil_format = r600_translate_stencilformat(state->zsbuf->texture->format); @@ -829,7 +833,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state if (stencil_format) { uint32_t stencil_offset; - stencil_offset = ((state->zsbuf->height * rtex->pitch_in_bytes[level]) + 255) & ~255; + stencil_offset = ((surf->aligned_height * rtex->pitch_in_bytes[level]) + 255) & ~255; r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, (state->zsbuf->offset + stencil_offset + r600_bo_offset(rbuffer->bo)) >> 8, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index d34a8edac2b..5d9fe8cf944 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -124,4 +124,9 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); +struct r600_surface { + struct pipe_surface base; + unsigned aligned_height; +}; + #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7775619c04b..bab3f224d7d 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -950,6 +950,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta { struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct r600_surface *surf; unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; @@ -957,6 +958,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta const struct util_format_description *desc; struct r600_bo *bo[3]; + surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; bo[0] = rbuffer->bo; @@ -964,7 +966,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta bo[2] = rbuffer->bo; pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * state->cbufs[cb]->height / 64 - 1; + slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; ntype = 0; desc = util_format_description(rtex->resource.base.b.format); if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) @@ -1010,6 +1012,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta { struct r600_resource_texture *rtex; struct r600_resource *rbuffer; + struct r600_surface *surf; unsigned level; unsigned pitch, slice, format; @@ -1018,6 +1021,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta level = state->zsbuf->level; + surf = (struct r600_surface *)state->zsbuf; rtex = (struct r600_resource_texture*)state->zsbuf->texture; rtex->tiled = 1; rtex->array_mode[level] = 2; @@ -1026,7 +1030,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta rbuffer = &rtex->resource; pitch = rtex->pitch_in_pixels[level] / 8 - 1; - slice = rtex->pitch_in_pixels[level] * state->zsbuf->height / 64 - 1; + slice = rtex->pitch_in_pixels[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); r600_pipe_state_add_reg(rstate, R_02800C_DB_DEPTH_BASE, @@ -1039,7 +1043,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format), 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT, - (state->zsbuf->height / 8) - 1, 0xFFFFFFFF, NULL); + (surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL); } static void r600_set_framebuffer_state(struct pipe_context *ctx, diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index a90fc0382f1..c765c0c87a1 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -279,24 +279,27 @@ static struct pipe_surface *r600_get_tex_surface(struct pipe_screen *screen, unsigned zslice, unsigned flags) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - unsigned offset; + struct r600_surface *surface = CALLOC_STRUCT(r600_surface); + unsigned offset, tile_height; if (surface == NULL) return NULL; offset = r600_texture_get_offset(rtex, level, zslice, face); - pipe_reference_init(&surface->reference, 1); - pipe_resource_reference(&surface->texture, texture); - surface->format = texture->format; - surface->width = mip_minify(texture->width0, level); - surface->height = mip_minify(texture->height0, level); - surface->offset = offset; - surface->usage = flags; - surface->zslice = zslice; - surface->texture = texture; - surface->face = face; - surface->level = level; - return surface; + pipe_reference_init(&surface->base.reference, 1); + pipe_resource_reference(&surface->base.texture, texture); + surface->base.format = texture->format; + surface->base.width = mip_minify(texture->width0, level); + surface->base.height = mip_minify(texture->height0, level); + surface->base.offset = offset; + surface->base.usage = flags; + surface->base.zslice = zslice; + surface->base.texture = texture; + surface->base.face = face; + surface->base.level = level; + + tile_height = r600_get_height_alignment(screen, rtex->array_mode[level]); + surface->aligned_height = align(surface->base.height, tile_height); + return &surface->base; } static void r600_tex_surface_destroy(struct pipe_surface *surface) -- cgit v1.2.3 From 92ed84d11560e226c87bf2758b1503e3075b3f82 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 13:36:01 +1000 Subject: r600g: introduce a per-driver resource flag for transfers. this is to be used to decide not to tile a surface being used for transfers. --- src/gallium/drivers/r600/r600_resource.h | 3 +++ src/gallium/drivers/r600/r600_texture.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 5d9fe8cf944..d152285815c 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -25,6 +25,9 @@ #include "util/u_transfer.h" +/* flag to indicate a resource is to be used as a transfer so should not be tiled */ +#define R600_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV + /* Texture transfer. */ struct r600_transfer { /* Base class. */ diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index c765c0c87a1..b0e5cda6010 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -361,7 +361,7 @@ int r600_texture_depth_flush(struct pipe_context *ctx, resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; - resource.flags = 0; + resource.flags = R600_RESOURCE_FLAG_TRANSFER; resource.bind |= PIPE_BIND_DEPTH_STENCIL; @@ -412,7 +412,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, resource.nr_samples = 0; resource.usage = PIPE_USAGE_DYNAMIC; resource.bind = 0; - resource.flags = 0; + resource.flags = R600_RESOURCE_FLAG_TRANSFER; /* For texture reading, the temporary (detiled) texture is used as * a render target when blitting from a tiled texture. */ if (usage & PIPE_TRANSFER_READ) { -- cgit v1.2.3 From cdd14668b67d80729c0bb6480dc2317ac4e1cbb9 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 13:37:54 +1000 Subject: r600g: add texture tiling alignment support. this sets things up to align stride/height with tile sizes, it also adds support for the 2D/1D array mode cross over point. --- src/gallium/drivers/r600/r600_texture.c | 65 ++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index b0e5cda6010..d7ac74c0f19 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -96,13 +96,46 @@ static unsigned r600_get_pixel_alignment(struct pipe_screen *screen, enum pipe_format format, unsigned array_mode) { - return 64; + struct r600_screen* rscreen = (struct r600_screen *)screen; + unsigned pixsize = util_format_get_blocksize(format); + int p_align; + + switch(array_mode) { + case V_038000_ARRAY_1D_TILED_THIN1: + p_align = MAX2(8, + ((rscreen->tiling_info->group_bytes / 8 / pixsize))); + break; + case V_038000_ARRAY_2D_TILED_THIN1: + p_align = MAX2(rscreen->tiling_info->num_banks, + (((rscreen->tiling_info->group_bytes / 8 / pixsize)) * + rscreen->tiling_info->num_banks)); + break; + case 0: + default: + p_align = 64; + break; + } + return p_align; } static unsigned r600_get_height_alignment(struct pipe_screen *screen, unsigned array_mode) { - return 1; + struct r600_screen* rscreen = (struct r600_screen *)screen; + int h_align; + + switch (array_mode) { + case V_038000_ARRAY_2D_TILED_THIN1: + h_align = rscreen->tiling_info->num_channels * 8; + break; + case V_038000_ARRAY_1D_TILED_THIN1: + h_align = 8; + break; + default: + h_align = 1; + break; + } + return h_align; } static unsigned mip_minify(unsigned size, unsigned level) @@ -135,8 +168,6 @@ static unsigned r600_texture_get_stride(struct pipe_screen *screen, stride = util_format_get_stride(ptex->format, width); if (chipc == EVERGREEN) stride = align(stride, 512); - else - stride = align(stride, 256); return stride; } @@ -168,7 +199,31 @@ static void r600_texture_set_array_mode(struct pipe_screen *screen, struct r600_resource_texture *rtex, unsigned level, unsigned array_mode) { - rtex->array_mode[level] = array_mode; + struct pipe_resource *ptex = &rtex->resource.base.b; + + switch (array_mode) { + case V_0280A0_ARRAY_LINEAR_GENERAL: + case V_0280A0_ARRAY_LINEAR_ALIGNED: + case V_0280A0_ARRAY_1D_TILED_THIN1: + default: + rtex->array_mode[level] = array_mode; + break; + case V_0280A0_ARRAY_2D_TILED_THIN1: + { + unsigned w, h, tile_height, tile_width; + + tile_height = r600_get_height_alignment(screen, array_mode); + tile_width = r600_get_pixel_alignment(screen, ptex->format, array_mode); + + w = mip_minify(ptex->width0, level); + h = mip_minify(ptex->height0, level); + if (w < tile_width || h < tile_height) + rtex->array_mode[level] = V_0280A0_ARRAY_1D_TILED_THIN1; + else + rtex->array_mode[level] = array_mode; + } + break; + } } static void r600_setup_miptree(struct pipe_screen *screen, -- cgit v1.2.3 From 089aa0ba247cee908ae689f8e4f3ffc457ce7627 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 13:40:45 +1000 Subject: r600g: add texture tiling enable under a debug option. At the moment you need kernel patches to have texture tiling work with the kernel CS checker, so once they are upstream and the drm version is bumped we can make this enable flip the other way most likely. --- src/gallium/drivers/r600/r600_texture.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index d7ac74c0f19..4ebd5b754b3 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -307,6 +307,13 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, { unsigned array_mode = 0; + if (debug_get_bool_option("R600_FORCE_TILING", FALSE)) { + if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) && + !(templ->bind & PIPE_BIND_SCANOUT)) { + array_mode = V_038000_ARRAY_2D_TILED_THIN1; + } + } + return (struct pipe_resource *)r600_texture_create_object(screen, templ, array_mode, 0, 0, NULL); -- cgit v1.2.3 From abc5435f225e637c0c936f8562829dfdddd10caf Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 21 Oct 2010 01:44:48 -0700 Subject: llvmpipe: Remove unnecessary header. --- src/gallium/drivers/llvmpipe/lp_jit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 4dbb12c999b..c540f9b3628 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -36,7 +36,6 @@ #include #include "util/u_memory.h" -#include "util/u_cpu_detect.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_debug.h" #include "lp_screen.h" -- cgit v1.2.3 From e68c83a5a01a8a659857310cfcc785c7e028d3f0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 17:34:25 +1000 Subject: r600g: initial translate state support --- src/gallium/drivers/r600/r600_pipe.c | 9 ++ src/gallium/drivers/r600/r600_pipe.h | 19 ++- src/gallium/drivers/r600/r600_shader.c | 2 +- src/gallium/drivers/r600/r600_state.c | 225 ++++++++++++++++++++++++++++++++- 4 files changed, 250 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index dd8fa4fcd77..bea7ef5df84 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -85,6 +85,9 @@ static void r600_destroy_context(struct pipe_context *context) u_upload_destroy(rctx->upload_vb); u_upload_destroy(rctx->upload_ib); + if (rctx->tran.translate_cache) + translate_cache_destroy(rctx->tran.translate_cache); + FREE(rctx->ps_resource); FREE(rctx->vs_resource); FREE(rctx); @@ -173,6 +176,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void return NULL; } + rctx->tran.translate_cache = translate_cache_create(); + if (rctx->tran.translate_cache == NULL) { + FREE(rctx); + return NULL; + } + rctx->vs_resource = CALLOC(R600_RESOURCE_ARRAY_SIZE, sizeof(struct r600_pipe_state)); if (!rctx->vs_resource) { FREE(rctx); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index e7c4b60d00c..7fb47b608e4 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -30,6 +30,7 @@ #include #include #include +#include "translate/translate_cache.h" #include "r600.h" #include "r600_public.h" #include "r600_shader.h" @@ -83,7 +84,10 @@ struct r600_vertex_element { unsigned count; unsigned refcount; - struct pipe_vertex_element elements[32]; + struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; + enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; + unsigned hw_format_size[PIPE_MAX_ATTRIBS]; + boolean incompatible_layout; }; struct r600_pipe_shader { @@ -103,6 +107,16 @@ struct r600_textures_info { unsigned n_samplers; }; +struct r600_translate_context { + /* Translate cache for incompatible vertex offset/stride/format fallback. */ + struct translate_cache *translate_cache; + + /* The vertex buffer slot containing the translated buffer. */ + unsigned vb_slot; + /* Saved and new vertex element state. */ + void *saved_velems, *new_velems; +}; + #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 @@ -142,6 +156,9 @@ struct r600_pipe_context { unsigned any_user_vbs; struct r600_textures_info ps_samplers; + unsigned vb_max_index; + struct r600_translate_context tran; + }; struct r600_drawl { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f98f05512a1..0dd416c0d83 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -270,7 +270,7 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader } rshader->vertex_elements = *rctx->vertex_elements; for (i = 0; i < rctx->vertex_elements->count; i++) { - resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; + resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; } r600_bo_reference(rctx->radeon, &rshader->bo, NULL); LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index bab3f224d7d..5cc4b3044b4 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -39,6 +39,8 @@ #include #include #include +#include "translate/translate_cache.h" +#include "translate/translate.h" #include #include "r600.h" #include "r600d.h" @@ -99,7 +101,7 @@ static void r600_draw_common(struct r600_drawl *draw) vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); word2 = format | S_038008_STRIDE(vertex_buffer->stride); @@ -182,6 +184,171 @@ static void r600_draw_common(struct r600_drawl *draw) r600_context_draw(&rctx->ctx, &rdraw); } +static void r600_begin_vertex_translate(struct r600_pipe_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + struct translate_key key = {0}; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; + struct translate *tr; + struct r600_vertex_element *ve = rctx->vertex_elements; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; + struct pipe_resource *out_buffer; + unsigned i, num_verts; + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + for (i = 0; i < ve->count; i++) { + struct pipe_vertex_buffer *vb = + &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; + enum pipe_format output_format = ve->hw_format[i]; + unsigned output_format_size = ve->hw_format_size[i]; + + /* Check for support. */ + if (ve->elements[i].src_format == ve->hw_format[i] && + (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 && + vb->stride % 4 == 0) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = ve->elements[i].vertex_buffer_index; + te->input_format = ve->elements[i].src_format; + te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(rctx->tran.translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + } + } + + /* Create and map the output buffer. */ + num_verts = rctx->vb_max_index + 1; + + out_buffer = pipe_buffer_create(&rctx->screen->screen, + PIPE_BIND_VERTEX_BUFFER, + key.output_stride * num_verts); + + out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, + &out_transfer); + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, + vb_transfer[i]); + } + } + + { + float *flt = out_map; + fprintf(stderr,"num verts is %d\n", num_verts); + for (i = 0; i < num_verts; i++) { + fprintf(stderr,"%f %f\n", flt[i*2], flt[i*2+1]); + } + } + pipe_buffer_unmap(pipe, out_buffer, out_transfer); + + /* Setup the new vertex buffer in the first free slot. */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + if (!vb->buffer) { + pipe_resource_reference(&vb->buffer, out_buffer); + vb->buffer_offset = 0; + vb->max_index = num_verts - 1; + vb->stride = key.output_stride; + rctx->tran.vb_slot = i; + break; + } + } + + /* Save and replace vertex elements. */ + { + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + rctx->tran.saved_velems = rctx->vertex_elements; + + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->elements[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->elements[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->elements[i], + sizeof(struct pipe_vertex_element)); + } + } + + rctx->tran.new_velems = + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); + } + + pipe_resource_reference(&out_buffer, NULL); +} + +static void r600_end_vertex_translate(struct r600_pipe_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + + /* Restore vertex elements. */ + pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); + pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, + NULL); +} + void r600_translate_index_buffer(struct r600_pipe_context *r600, struct pipe_resource **index_buffer, unsigned *index_size, @@ -210,12 +377,17 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_drawl draw; + boolean translate = FALSE; + + if (rctx->vertex_elements->incompatible_layout) { + r600_begin_vertex_translate(rctx); + translate = TRUE; + } if (rctx->any_user_vbs) { r600_upload_user_buffers(rctx); rctx->any_user_vbs = FALSE; } - memset(&draw, 0, sizeof(struct r600_drawl)); draw.ctx = ctx; draw.mode = info->mode; @@ -246,6 +418,9 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } r600_draw_common(&draw); + if (translate) + r600_end_vertex_translate(rctx); + pipe_resource_reference(&draw.index_buffer, NULL); } @@ -582,16 +757,45 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, return rstate; } +#define FORMAT_REPLACE(what, withwhat) \ + case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break + static void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements) { struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + int i; + enum pipe_format *format; assert(count < 32); + if (!v) + return NULL; + v->count = count; - v->refcount = 1; memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + + for (i = 0; i < count; i++) { + v->hw_format[i] = v->elements[i].src_format; + format = &v->hw_format[i]; + + switch (*format) { + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + default:; + } + v->incompatible_layout = + v->incompatible_layout || + v->elements[i].src_format != v->hw_format[i] || + v->elements[i].src_offset % 4 != 0; + + v->hw_format_size[i] = + align(util_format_get_blocksize(v->hw_format[i]), 4); + } + + v->refcount = 1; return v; } @@ -1164,18 +1368,33 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_buffer *buffers) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_vertex_buffer *vbo; + unsigned max_index = (unsigned)-1, vbo_max_index; for (int i = 0; i < rctx->nvertex_buffer; i++) { pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); } memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + for (int i = 0; i < count; i++) { + vbo = (struct pipe_vertex_buffer*)&buffers[i]; + rctx->vertex_buffer[i].buffer = NULL; if (r600_buffer_is_user_buffer(buffers[i].buffer)) rctx->any_user_vbs = TRUE; pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + + if (vbo->max_index == ~0) { + if (!vbo->stride) + vbo->max_index = 1; + else + vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + } + + max_index = MIN2(vbo->max_index, max_index); } rctx->nvertex_buffer = count; + rctx->vb_max_index = max_index; } static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, -- cgit v1.2.3 From f39e6c9c816b603a4ed8fd8cda8569b7e13c1f68 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Oct 2010 19:11:23 +1000 Subject: r600g: start splitting out common code from eg/r600. no point duplicating code that doesn't touch hw, also make it easier to spot mistakes --- src/gallium/drivers/r600/Makefile | 4 +- src/gallium/drivers/r600/evergreen_state.c | 66 ++----- src/gallium/drivers/r600/r600_pipe.h | 20 +- src/gallium/drivers/r600/r600_state.c | 282 --------------------------- src/gallium/drivers/r600/r600_state_common.c | 123 ++++++++++++ src/gallium/drivers/r600/r600_translate.c | 211 ++++++++++++++++++++ 6 files changed, 367 insertions(+), 339 deletions(-) create mode 100644 src/gallium/drivers/r600/r600_state_common.c create mode 100644 src/gallium/drivers/r600/r600_translate.c (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile index ede0bb2ec45..a484f38e9f1 100644 --- a/src/gallium/drivers/r600/Makefile +++ b/src/gallium/drivers/r600/Makefile @@ -19,6 +19,8 @@ C_SOURCES = \ r600_texture.c \ r700_asm.c \ evergreen_state.c \ - eg_asm.c + eg_asm.c \ + r600_translate.c \ + r600_state_common.c include ../../Makefile.template diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 0b54c2c7167..f394527edfd 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -377,19 +377,6 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, return rstate; } -static void *evergreen_create_vertex_elements(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - - assert(count < 32); - v->count = count; - v->refcount = 1; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - return v; -} - static void evergreen_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) { @@ -935,40 +922,6 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void evergreen_set_index_buffer(struct pipe_context *ctx, - const struct pipe_index_buffer *ib) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (ib) { - pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); - memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } else { - pipe_resource_reference(&rctx->index_buffer.buffer, NULL); - memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); - } - - /* TODO make this more like a state */ -} - -static void evergreen_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - for (int i = 0; i < count; i++) { - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); - } - rctx->nvertex_buffer = count; -} - static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct pipe_resource *buffer) { @@ -1065,7 +1018,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.create_rasterizer_state = evergreen_create_rs_state; rctx->context.create_sampler_state = evergreen_create_sampler_state; rctx->context.create_sampler_view = evergreen_create_sampler_view; - rctx->context.create_vertex_elements_state = evergreen_create_vertex_elements; + rctx->context.create_vertex_elements_state = r600_create_vertex_elements; rctx->context.create_vs_state = evergreen_create_shader_state; rctx->context.bind_blend_state = evergreen_bind_blend_state; rctx->context.bind_depth_stencil_alpha_state = evergreen_bind_state; @@ -1091,8 +1044,8 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_sample_mask = evergreen_set_sample_mask; rctx->context.set_scissor_state = evergreen_set_scissor_state; rctx->context.set_stencil_ref = evergreen_set_stencil_ref; - rctx->context.set_vertex_buffers = evergreen_set_vertex_buffers; - rctx->context.set_index_buffer = evergreen_set_index_buffer; + rctx->context.set_vertex_buffers = r600_set_vertex_buffers; + rctx->context.set_index_buffer = r600_set_index_buffer; rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; rctx->context.set_viewport_state = evergreen_set_viewport_state; rctx->context.sampler_view_destroy = evergreen_sampler_view_destroy; @@ -1378,6 +1331,12 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) struct r600_draw rdraw; struct r600_pipe_state vgt; struct r600_drawl draw; + boolean translate = FALSE; + + if (rctx->vertex_elements->incompatible_layout) { + r600_begin_vertex_translate(rctx); + translate = TRUE; + } if (rctx->any_user_vbs) { r600_upload_user_buffers(rctx); @@ -1454,11 +1413,11 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); - format = r600_translate_vertex_data_type(rctx->vertex_elements->elements[i].src_format); + format = r600_translate_vertex_data_type(rctx->vertex_elements->hw_format[i]); word2 = format | S_030008_STRIDE(vertex_buffer->stride); - word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->elements[i].src_format); + word3 = r600_translate_vertex_data_swizzle(rctx->vertex_elements->hw_format[i]); r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, rbuffer->size - offset - 1, 0xFFFFFFFF, NULL); @@ -1539,6 +1498,9 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info) } evergreen_context_draw(&rctx->ctx, &rdraw); + if (translate) + r600_end_vertex_translate(rctx); + pipe_resource_reference(&draw.index_buffer, NULL); } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 7fb47b608e4..11410f17c75 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -216,10 +216,6 @@ int r600_find_vs_semantic_index(struct r600_shader *vs, void r600_init_state_functions(struct r600_pipe_context *rctx); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); void r600_init_config(struct r600_pipe_context *rctx); -void r600_translate_index_buffer(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); /* r600_helper.h */ int r600_conv_pipe_prim(unsigned pprim, unsigned *prim); @@ -230,6 +226,22 @@ uint32_t r600_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); +/* r600_translate.c */ +void r600_begin_vertex_translate(struct r600_pipe_context *rctx); +void r600_end_vertex_translate(struct r600_pipe_context *rctx); +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count); + +/* r600_state_common.c */ +void r600_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib); +void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + const struct pipe_vertex_buffer *buffers); +void *r600_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements); /* * common helpers */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 5cc4b3044b4..df2c05ea139 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -37,10 +37,7 @@ #include #include #include -#include #include -#include "translate/translate_cache.h" -#include "translate/translate.h" #include #include "r600.h" #include "r600d.h" @@ -184,195 +181,6 @@ static void r600_draw_common(struct r600_drawl *draw) r600_context_draw(&rctx->ctx, &rdraw); } -static void r600_begin_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - struct translate_key key = {0}; - struct translate_element *te; - unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; - struct translate *tr; - struct r600_vertex_element *ve = rctx->vertex_elements; - boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; - void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; - struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; - struct pipe_resource *out_buffer; - unsigned i, num_verts; - - /* Initialize the translate key, i.e. the recipe how vertices should be - * translated. */ - for (i = 0; i < ve->count; i++) { - struct pipe_vertex_buffer *vb = - &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; - enum pipe_format output_format = ve->hw_format[i]; - unsigned output_format_size = ve->hw_format_size[i]; - - /* Check for support. */ - if (ve->elements[i].src_format == ve->hw_format[i] && - (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 && - vb->stride % 4 == 0) { - continue; - } - - /* Workaround for translate: output floats instead of halfs. */ - switch (output_format) { - case PIPE_FORMAT_R16_FLOAT: - output_format = PIPE_FORMAT_R32_FLOAT; - output_format_size = 4; - break; - case PIPE_FORMAT_R16G16_FLOAT: - output_format = PIPE_FORMAT_R32G32_FLOAT; - output_format_size = 8; - break; - case PIPE_FORMAT_R16G16B16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - output_format_size = 12; - break; - case PIPE_FORMAT_R16G16B16A16_FLOAT: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - output_format_size = 16; - break; - default:; - } - - /* Add this vertex element. */ - te = &key.element[key.nr_elements]; - /*te->type; - te->instance_divisor;*/ - te->input_buffer = ve->elements[i].vertex_buffer_index; - te->input_format = ve->elements[i].src_format; - te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; - te->output_format = output_format; - te->output_offset = key.output_stride; - - key.output_stride += output_format_size; - vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; - tr_elem_index[i] = key.nr_elements; - key.nr_elements++; - } - - /* Get a translate object. */ - tr = translate_cache_find(rctx->tran.translate_cache, &key); - - /* Map buffers we want to translate. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { - if (vb_translated[i]) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - vb_map[i] = pipe_buffer_map(pipe, vb->buffer, - PIPE_TRANSFER_READ, &vb_transfer[i]); - - tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); - } - } - - /* Create and map the output buffer. */ - num_verts = rctx->vb_max_index + 1; - - out_buffer = pipe_buffer_create(&rctx->screen->screen, - PIPE_BIND_VERTEX_BUFFER, - key.output_stride * num_verts); - - out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, - &out_transfer); - - /* Translate. */ - tr->run(tr, 0, num_verts, 0, out_map); - - /* Unmap all buffers. */ - for (i = 0; i < rctx->nvertex_buffer; i++) { - if (vb_translated[i]) { - pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, - vb_transfer[i]); - } - } - - { - float *flt = out_map; - fprintf(stderr,"num verts is %d\n", num_verts); - for (i = 0; i < num_verts; i++) { - fprintf(stderr,"%f %f\n", flt[i*2], flt[i*2+1]); - } - } - pipe_buffer_unmap(pipe, out_buffer, out_transfer); - - /* Setup the new vertex buffer in the first free slot. */ - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; - - if (!vb->buffer) { - pipe_resource_reference(&vb->buffer, out_buffer); - vb->buffer_offset = 0; - vb->max_index = num_verts - 1; - vb->stride = key.output_stride; - rctx->tran.vb_slot = i; - break; - } - } - - /* Save and replace vertex elements. */ - { - struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; - - rctx->tran.saved_velems = rctx->vertex_elements; - - for (i = 0; i < ve->count; i++) { - if (vb_translated[ve->elements[i].vertex_buffer_index]) { - te = &key.element[tr_elem_index[i]]; - new_velems[i].instance_divisor = ve->elements[i].instance_divisor; - new_velems[i].src_format = te->output_format; - new_velems[i].src_offset = te->output_offset; - new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; - } else { - memcpy(&new_velems[i], &ve->elements[i], - sizeof(struct pipe_vertex_element)); - } - } - - rctx->tran.new_velems = - pipe->create_vertex_elements_state(pipe, ve->count, new_velems); - pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); - } - - pipe_resource_reference(&out_buffer, NULL); -} - -static void r600_end_vertex_translate(struct r600_pipe_context *rctx) -{ - struct pipe_context *pipe = &rctx->context; - - /* Restore vertex elements. */ - pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); - pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); - - /* Delete the now-unused VBO. */ - pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, - NULL); -} - -void r600_translate_index_buffer(struct r600_pipe_context *r600, - struct pipe_resource **index_buffer, - unsigned *index_size, - unsigned *start, unsigned count) -{ - switch (*index_size) { - case 1: - util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); - *index_size = 2; - *start = 0; - break; - - case 2: - if (*start % 2 != 0) { - util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); - *start = 0; - } - break; - - case 4: - break; - } -} - void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -757,47 +565,6 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, return rstate; } -#define FORMAT_REPLACE(what, withwhat) \ - case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break - -static void *r600_create_vertex_elements(struct pipe_context *ctx, - unsigned count, - const struct pipe_vertex_element *elements) -{ - struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); - int i; - enum pipe_format *format; - - assert(count < 32); - if (!v) - return NULL; - - v->count = count; - memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); - - for (i = 0; i < count; i++) { - v->hw_format[i] = v->elements[i].src_format; - format = &v->hw_format[i]; - - switch (*format) { - FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); - FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); - FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); - FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); - default:; - } - v->incompatible_layout = - v->incompatible_layout || - v->elements[i].src_format != v->hw_format[i] || - v->elements[i].src_offset % 4 != 0; - - v->hw_format_size[i] = - align(util_format_get_blocksize(v->hw_format[i]), 4); - } - - v->refcount = 1; - return v; -} static void r600_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) @@ -1348,55 +1115,6 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_set_index_buffer(struct pipe_context *ctx, - const struct pipe_index_buffer *ib) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (ib) { - pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); - memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); - } else { - pipe_resource_reference(&rctx->index_buffer.buffer, NULL); - memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); - } - - /* TODO make this more like a state */ -} - -static void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct pipe_vertex_buffer *vbo; - unsigned max_index = (unsigned)-1, vbo_max_index; - - for (int i = 0; i < rctx->nvertex_buffer; i++) { - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); - } - memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); - - for (int i = 0; i < count; i++) { - vbo = (struct pipe_vertex_buffer*)&buffers[i]; - - rctx->vertex_buffer[i].buffer = NULL; - if (r600_buffer_is_user_buffer(buffers[i].buffer)) - rctx->any_user_vbs = TRUE; - pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); - - if (vbo->max_index == ~0) { - if (!vbo->stride) - vbo->max_index = 1; - else - vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; - } - - max_index = MIN2(vbo->max_index, max_index); - } - rctx->nvertex_buffer = count; - rctx->vb_max_index = max_index; -} - static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct pipe_resource *buffer) { diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c new file mode 100644 index 00000000000..722ce32263e --- /dev/null +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -0,0 +1,123 @@ +/* + * Copyright 2010 Red Hat Inc. + * 2010 Jerome Glisse + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Jerome Glisse + */ +#include +#include +#include +#include "r600_pipe.h" + +/* common state between evergreen and r600 */ + +void r600_set_index_buffer(struct pipe_context *ctx, + const struct pipe_index_buffer *ib) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (ib) { + pipe_resource_reference(&rctx->index_buffer.buffer, ib->buffer); + memcpy(&rctx->index_buffer, ib, sizeof(rctx->index_buffer)); + } else { + pipe_resource_reference(&rctx->index_buffer.buffer, NULL); + memset(&rctx->index_buffer, 0, sizeof(rctx->index_buffer)); + } + + /* TODO make this more like a state */ +} + +void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct pipe_vertex_buffer *vbo; + unsigned max_index = (unsigned)-1; + + for (int i = 0; i < rctx->nvertex_buffer; i++) { + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, NULL); + } + memcpy(rctx->vertex_buffer, buffers, sizeof(struct pipe_vertex_buffer) * count); + + for (int i = 0; i < count; i++) { + vbo = (struct pipe_vertex_buffer*)&buffers[i]; + + rctx->vertex_buffer[i].buffer = NULL; + if (r600_buffer_is_user_buffer(buffers[i].buffer)) + rctx->any_user_vbs = TRUE; + pipe_resource_reference(&rctx->vertex_buffer[i].buffer, buffers[i].buffer); + + if (vbo->max_index == ~0) { + if (!vbo->stride) + vbo->max_index = 1; + else + vbo->max_index = (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride; + } + max_index = MIN2(vbo->max_index, max_index); + } + rctx->nvertex_buffer = count; + rctx->vb_max_index = max_index; +} + + +#define FORMAT_REPLACE(what, withwhat) \ + case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break + +void *r600_create_vertex_elements(struct pipe_context *ctx, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element); + int i; + enum pipe_format *format; + + assert(count < 32); + if (!v) + return NULL; + + v->count = count; + memcpy(v->elements, elements, count * sizeof(struct pipe_vertex_element)); + + for (i = 0; i < count; i++) { + v->hw_format[i] = v->elements[i].src_format; + format = &v->hw_format[i]; + + switch (*format) { + FORMAT_REPLACE(R64_FLOAT, R32_FLOAT); + FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT); + FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT); + FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT); + default:; + } + v->incompatible_layout = + v->incompatible_layout || + v->elements[i].src_format != v->hw_format[i] || + v->elements[i].src_offset % 4 != 0; + + v->hw_format_size[i] = + align(util_format_get_blocksize(v->hw_format[i]), 4); + } + + v->refcount = 1; + return v; +} diff --git a/src/gallium/drivers/r600/r600_translate.c b/src/gallium/drivers/r600/r600_translate.c new file mode 100644 index 00000000000..9a07cf2073f --- /dev/null +++ b/src/gallium/drivers/r600/r600_translate.c @@ -0,0 +1,211 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + */ +#include "translate/translate_cache.h" +#include "translate/translate.h" +#include +#include +#include "r600_pipe.h" + +void r600_begin_vertex_translate(struct r600_pipe_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + struct translate_key key = {0}; + struct translate_element *te; + unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0}; + struct translate *tr; + struct r600_vertex_element *ve = rctx->vertex_elements; + boolean vb_translated[PIPE_MAX_ATTRIBS] = {0}; + void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map; + struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer; + struct pipe_resource *out_buffer; + unsigned i, num_verts; + + /* Initialize the translate key, i.e. the recipe how vertices should be + * translated. */ + for (i = 0; i < ve->count; i++) { + struct pipe_vertex_buffer *vb = + &rctx->vertex_buffer[ve->elements[i].vertex_buffer_index]; + enum pipe_format output_format = ve->hw_format[i]; + unsigned output_format_size = ve->hw_format_size[i]; + + /* Check for support. */ + if (ve->elements[i].src_format == ve->hw_format[i] && + (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 && + vb->stride % 4 == 0) { + continue; + } + + /* Workaround for translate: output floats instead of halfs. */ + switch (output_format) { + case PIPE_FORMAT_R16_FLOAT: + output_format = PIPE_FORMAT_R32_FLOAT; + output_format_size = 4; + break; + case PIPE_FORMAT_R16G16_FLOAT: + output_format = PIPE_FORMAT_R32G32_FLOAT; + output_format_size = 8; + break; + case PIPE_FORMAT_R16G16B16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + output_format_size = 12; + break; + case PIPE_FORMAT_R16G16B16A16_FLOAT: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + output_format_size = 16; + break; + default:; + } + + /* Add this vertex element. */ + te = &key.element[key.nr_elements]; + /*te->type; + te->instance_divisor;*/ + te->input_buffer = ve->elements[i].vertex_buffer_index; + te->input_format = ve->elements[i].src_format; + te->input_offset = vb->buffer_offset + ve->elements[i].src_offset; + te->output_format = output_format; + te->output_offset = key.output_stride; + + key.output_stride += output_format_size; + vb_translated[ve->elements[i].vertex_buffer_index] = TRUE; + tr_elem_index[i] = key.nr_elements; + key.nr_elements++; + } + + /* Get a translate object. */ + tr = translate_cache_find(rctx->tran.translate_cache, &key); + + /* Map buffers we want to translate. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + vb_map[i] = pipe_buffer_map(pipe, vb->buffer, + PIPE_TRANSFER_READ, &vb_transfer[i]); + + tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index); + } + } + + /* Create and map the output buffer. */ + num_verts = rctx->vb_max_index + 1; + + out_buffer = pipe_buffer_create(&rctx->screen->screen, + PIPE_BIND_VERTEX_BUFFER, + key.output_stride * num_verts); + + out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE, + &out_transfer); + + /* Translate. */ + tr->run(tr, 0, num_verts, 0, out_map); + + /* Unmap all buffers. */ + for (i = 0; i < rctx->nvertex_buffer; i++) { + if (vb_translated[i]) { + pipe_buffer_unmap(pipe, rctx->vertex_buffer[i].buffer, + vb_transfer[i]); + } + } + + pipe_buffer_unmap(pipe, out_buffer, out_transfer); + + /* Setup the new vertex buffer in the first free slot. */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i]; + + if (!vb->buffer) { + pipe_resource_reference(&vb->buffer, out_buffer); + vb->buffer_offset = 0; + vb->max_index = num_verts - 1; + vb->stride = key.output_stride; + rctx->tran.vb_slot = i; + break; + } + } + + /* Save and replace vertex elements. */ + { + struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS]; + + rctx->tran.saved_velems = rctx->vertex_elements; + + for (i = 0; i < ve->count; i++) { + if (vb_translated[ve->elements[i].vertex_buffer_index]) { + te = &key.element[tr_elem_index[i]]; + new_velems[i].instance_divisor = ve->elements[i].instance_divisor; + new_velems[i].src_format = te->output_format; + new_velems[i].src_offset = te->output_offset; + new_velems[i].vertex_buffer_index = rctx->tran.vb_slot; + } else { + memcpy(&new_velems[i], &ve->elements[i], + sizeof(struct pipe_vertex_element)); + } + } + + rctx->tran.new_velems = + pipe->create_vertex_elements_state(pipe, ve->count, new_velems); + pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems); + } + + pipe_resource_reference(&out_buffer, NULL); +} + +void r600_end_vertex_translate(struct r600_pipe_context *rctx) +{ + struct pipe_context *pipe = &rctx->context; + + /* Restore vertex elements. */ + pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems); + pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems); + + /* Delete the now-unused VBO. */ + pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer, + NULL); +} + +void r600_translate_index_buffer(struct r600_pipe_context *r600, + struct pipe_resource **index_buffer, + unsigned *index_size, + unsigned *start, unsigned count) +{ + switch (*index_size) { + case 1: + util_shorten_ubyte_elts(&r600->context, index_buffer, 0, *start, count); + *index_size = 2; + *start = 0; + break; + + case 2: + if (*start % 2 != 0) { + util_rebuild_ushort_elts(&r600->context, index_buffer, 0, *start, count); + *start = 0; + } + break; + + case 4: + break; + } +} -- cgit v1.2.3 From 27d3bab05538a6b4cb41dac8334136f824b0e673 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Oct 2010 19:03:38 -0600 Subject: softpipe: enable primitive restart --- src/gallium/drivers/softpipe/sp_screen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 37557d11940..a2bfa1bd8d5 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -112,6 +112,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) return 1; case PIPE_CAP_STREAM_OUTPUT: return 1; + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 0; case PIPE_CAP_SHADER_STENCIL_EXPORT: -- cgit v1.2.3 From 6692ed6f03126625810d56102a11171928e054c7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 21 Oct 2010 19:03:38 -0600 Subject: llvmpipe: enable primitive restart --- src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 96633d93654..ad0ea75b3a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -158,6 +158,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 0; + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: return 1; case PIPE_CAP_DEPTH_CLAMP: -- cgit v1.2.3 From 67f6a4a8c46fa7cf0b337f3e937b4048a9fceb97 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Oct 2010 08:58:35 -0600 Subject: galahad: silence warnings --- src/gallium/drivers/galahad/glhd_context.c | 4 ++-- src/gallium/drivers/galahad/glhd_screen.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/galahad/glhd_context.c b/src/gallium/drivers/galahad/glhd_context.c index ff6d2aa00ab..50f66079c2a 100644 --- a/src/gallium/drivers/galahad/glhd_context.c +++ b/src/gallium/drivers/galahad/glhd_context.c @@ -641,7 +641,7 @@ galahad_set_index_buffer(struct pipe_context *_pipe, break; default: glhd_warn("index buffer %p has unrecognized index size %d", - _ib->buffer, _ib->index_size); + (void *) _ib->buffer, _ib->index_size); break; } } @@ -1013,7 +1013,7 @@ galahad_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) glhd_pipe->pipe = pipe; - glhd_warn("Created context %p", glhd_pipe); + glhd_warn("Created context %p", (void *) glhd_pipe); return &glhd_pipe->base; } diff --git a/src/gallium/drivers/galahad/glhd_screen.c b/src/gallium/drivers/galahad/glhd_screen.c index 288941b1066..b6cc41d908b 100644 --- a/src/gallium/drivers/galahad/glhd_screen.c +++ b/src/gallium/drivers/galahad/glhd_screen.c @@ -370,7 +370,7 @@ galahad_screen_create(struct pipe_screen *screen) glhd_screen->screen = screen; - glhd_warn("Created screen %p", glhd_screen); + glhd_warn("Created screen %p", (void *) glhd_screen); return &glhd_screen->base; } -- cgit v1.2.3 From 1d96ad67bc7927eb314d497e30cfd4eb3cfbb9ea Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 22 Oct 2010 19:45:05 +0200 Subject: r300g: do not print get_param errors in non-debug build --- src/gallium/drivers/r300/r300_screen.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index b448924f85e..963fa8fcd08 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -154,8 +154,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: return 0; default: - fprintf(stderr, "r300: Implementation error: Bad param %d\n", - param); + debug_printf("r300: Warning: Unknown CAP %d in get_param.\n", + param); return 0; } } @@ -265,8 +265,8 @@ static float r300_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 16.0f; default: - fprintf(stderr, "r300: Implementation error: Bad paramf %d\n", - param); + debug_printf("r300: Warning: Unknown CAP %d in get_paramf.\n", + param); return 0.0f; } } -- cgit v1.2.3 From 469907d59738ad32c7a3b95072897432f1e81a0f Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 22 Oct 2010 20:33:25 +0200 Subject: r300g: say no to PIPE_CAP_STREAM_OUTPUT and PIPE_CAP_PRIMITIVE_RESTART --- src/gallium/drivers/r300/r300_screen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 963fa8fcd08..d445df408dd 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -125,6 +125,8 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_DEPTH_CLAMP: /* XXX implemented, but breaks Regnum Online */ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: case PIPE_CAP_SHADER_STENCIL_EXPORT: + case PIPE_CAP_STREAM_OUTPUT: + case PIPE_CAP_PRIMITIVE_RESTART: return 0; /* Texturing. */ -- cgit v1.2.3 From 4250882ccf8326ba9074c671110370534489caa6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 23 Oct 2010 09:35:37 -0600 Subject: softpipe: added some texture sample debug code (disabled) --- src/gallium/drivers/softpipe/sp_tex_sample.c | 57 ++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 088e48f81fe..2eac4c7a82b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -44,6 +44,9 @@ #include "sp_tex_tile_cache.h" +/** Set to one to help debug texture sampling */ +#define DEBUG_TEX 0 + /* * Return fractional part of 'f'. Used for computing interpolation weights. @@ -774,6 +777,18 @@ pot_level_size(unsigned base_pot, unsigned level) } +static void +print_sample(const char *function, float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n", + function, + rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0], + rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1], + rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2], + rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]); +} + + /* Some image-filter fastpaths: */ static INLINE void @@ -832,6 +847,10 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler, tx[2][c], tx[3][c]); } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -872,6 +891,10 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -921,6 +944,10 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -957,6 +984,10 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -997,6 +1028,10 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -1045,6 +1080,10 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler, rgba[c][j] = out[c]; } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -1357,6 +1396,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler, } } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -1402,13 +1445,9 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler, samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); } -#if 0 - printf("RGBA %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n", - rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0], - rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1], - rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2], - rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]); -#endif + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } @@ -1510,6 +1549,10 @@ mip_filter_linear_2d_linear_repeat_POT( } } } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } } -- cgit v1.2.3 From d8740b77ac30298c1742107e2afc4edabca562f0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 23 Oct 2010 10:27:19 -0600 Subject: softpipe: remove >32bpp color restriction The comment was out of date. The tile cache does handle >32-bit colors. --- src/gallium/drivers/softpipe/sp_screen.c | 7 ------- src/gallium/drivers/softpipe/sp_tile_cache.h | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index a2bfa1bd8d5..d90cf56808c 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -209,13 +209,6 @@ softpipe_is_format_supported( struct pipe_screen *screen, if (format_desc->block.width != 1 || format_desc->block.height != 1) return FALSE; - - /* - * TODO: Unfortunately we cannot render into anything more than 32 bits - * because we encode color clear values into a 32bit word. - */ - if (format_desc->block.bits > 32) - return FALSE; } if (bind & PIPE_BIND_DEPTH_STENCIL) { diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 031c7c1ea5c..4151a47c323 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -86,7 +86,7 @@ struct softpipe_tile_cache struct softpipe_cached_tile *entries[NUM_ENTRIES]; uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; float clear_color[4]; /**< for color bufs */ - uint clear_val; /**< for z+stencil, or packed color clear value */ + uint clear_val; /**< for z+stencil */ boolean depth_stencil; /**< Is the surface a depth/stencil format? */ struct softpipe_cached_tile *tile; /**< scratch tile for clears */ -- cgit v1.2.3 From 9f9d24c89a2286f952b123c703e8268d2f1aa719 Mon Sep 17 00:00:00 2001 From: Tilman Sauerbeck Date: Sat, 23 Oct 2010 13:33:15 +0200 Subject: r600g: Fixed r600_vertex_element leak. Signed-off-by: Tilman Sauerbeck Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 13 +++++-------- src/gallium/drivers/r600/r600_pipe.h | 1 - src/gallium/drivers/r600/r600_state.c | 13 +++++-------- src/gallium/drivers/r600/r600_state_common.c | 1 - 4 files changed, 10 insertions(+), 18 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index f394527edfd..81241478ab2 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -561,13 +561,12 @@ static void evergreen_delete_state(struct pipe_context *ctx, void *state) static void evergreen_delete_vertex_element(struct pipe_context *ctx, void *state) { - struct r600_vertex_element *v = (struct r600_vertex_element*)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); + FREE(state); + + if (rctx->vertex_elements == state) + rctx->vertex_elements = NULL; } static void evergreen_set_clip_state(struct pipe_context *ctx, @@ -610,10 +609,8 @@ static void evergreen_bind_vertex_elements(struct pipe_context *ctx, void *state struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = (struct r600_vertex_element*)state; - evergreen_delete_vertex_element(ctx, rctx->vertex_elements); rctx->vertex_elements = v; if (v) { - v->refcount++; // rctx->vs_rebuild = TRUE; } } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 11410f17c75..b27bbbac8c1 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -83,7 +83,6 @@ struct r600_pipe_blend { struct r600_vertex_element { unsigned count; - unsigned refcount; struct pipe_vertex_element elements[PIPE_MAX_ATTRIBS]; enum pipe_format hw_format[PIPE_MAX_ATTRIBS]; unsigned hw_format_size[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index df2c05ea139..71005297646 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -755,13 +755,12 @@ static void r600_delete_state(struct pipe_context *ctx, void *state) static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) { - struct r600_vertex_element *v = (struct r600_vertex_element*)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - if (v == NULL) - return; - if (--v->refcount) - return; - free(v); + FREE(state); + + if (rctx->vertex_elements == state) + rctx->vertex_elements = NULL; } static void r600_set_clip_state(struct pipe_context *ctx, @@ -804,10 +803,8 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_vertex_element *v = (struct r600_vertex_element*)state; - r600_delete_vertex_element(ctx, rctx->vertex_elements); rctx->vertex_elements = v; if (v) { - v->refcount++; // rctx->vs_rebuild = TRUE; } } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 722ce32263e..e127bac7797 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -118,6 +118,5 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, align(util_format_get_blocksize(v->hw_format[i]), 4); } - v->refcount = 1; return v; } -- cgit v1.2.3 From 9612b482e2c8e994709bcaab79185224b4d76670 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 24 Oct 2010 12:53:50 +1000 Subject: r600g: merge more of the common r600/evergreen state handling --- src/gallium/drivers/r600/evergreen_state.c | 92 +++------------------------- src/gallium/drivers/r600/r600_pipe.h | 9 +++ src/gallium/drivers/r600/r600_state.c | 89 --------------------------- src/gallium/drivers/r600/r600_state_common.c | 88 ++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 172 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 81241478ab2..c7ed422df34 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -137,20 +137,6 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, return rstate; } -static void evergreen_bind_blend_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; - struct r600_pipe_state *rstate; - - if (state == NULL) - return; - rstate = &blend->rstate; - rctx->states[rstate->id] = rstate; - rctx->cb_target_mask = blend->cb_target_mask; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - static void *evergreen_create_dsa_state(struct pipe_context *ctx, const struct pipe_depth_stencil_alpha_state *state) { @@ -307,36 +293,6 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, return rstate; } -static void evergreen_bind_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (state == NULL) - return; - - rctx->flatshade = rs->flatshade; - rctx->sprite_coord_enable = rs->sprite_coord_enable; - rctx->rasterizer = rs; - - rctx->states[rs->rstate.id] = &rs->rstate; - r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); -} - -static void evergreen_delete_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - - if (rctx->rasterizer == rs) { - rctx->rasterizer = NULL; - } - if (rctx->states[rs->rstate.id] == &rs->rstate) { - rctx->states[rs->rstate.id] = NULL; - } - free(rs); -} - static void *evergreen_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { @@ -377,15 +333,6 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, return rstate; } -static void evergreen_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - - pipe_resource_reference(&state->texture, NULL); - FREE(resource); -} - static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_sampler_view *state) @@ -510,17 +457,6 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou rctx->ps_samplers.n_views = count; } -static void evergreen_bind_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (state == NULL) - return; - rctx->states[rstate->id] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -559,16 +495,6 @@ static void evergreen_delete_state(struct pipe_context *ctx, void *state) free(rstate); } -static void evergreen_delete_vertex_element(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - FREE(state); - - if (rctx->vertex_elements == state) - rctx->vertex_elements = NULL; -} - static void evergreen_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { @@ -1017,20 +943,20 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.create_sampler_view = evergreen_create_sampler_view; rctx->context.create_vertex_elements_state = r600_create_vertex_elements; rctx->context.create_vs_state = evergreen_create_shader_state; - rctx->context.bind_blend_state = evergreen_bind_blend_state; - rctx->context.bind_depth_stencil_alpha_state = evergreen_bind_state; + rctx->context.bind_blend_state = r600_bind_blend_state; + rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; rctx->context.bind_fragment_sampler_states = evergreen_bind_ps_sampler; rctx->context.bind_fs_state = evergreen_bind_ps_shader; - rctx->context.bind_rasterizer_state = evergreen_bind_rs_state; + rctx->context.bind_rasterizer_state = r600_bind_rs_state; rctx->context.bind_vertex_elements_state = evergreen_bind_vertex_elements; rctx->context.bind_vertex_sampler_states = evergreen_bind_vs_sampler; rctx->context.bind_vs_state = evergreen_bind_vs_shader; - rctx->context.delete_blend_state = evergreen_delete_state; - rctx->context.delete_depth_stencil_alpha_state = evergreen_delete_state; + rctx->context.delete_blend_state = r600_delete_state; + rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; rctx->context.delete_fs_state = evergreen_delete_ps_shader; - rctx->context.delete_rasterizer_state = evergreen_delete_rs_state; - rctx->context.delete_sampler_state = evergreen_delete_state; - rctx->context.delete_vertex_elements_state = evergreen_delete_vertex_element; + rctx->context.delete_rasterizer_state = r600_delete_rs_state; + rctx->context.delete_sampler_state = r600_delete_state; + rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; rctx->context.delete_vs_state = evergreen_delete_vs_shader; rctx->context.set_blend_color = evergreen_set_blend_color; rctx->context.set_clip_state = evergreen_set_clip_state; @@ -1045,7 +971,7 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.set_index_buffer = r600_set_index_buffer; rctx->context.set_vertex_sampler_views = evergreen_set_vs_sampler_view; rctx->context.set_viewport_state = evergreen_set_viewport_state; - rctx->context.sampler_view_destroy = evergreen_sampler_view_destroy; + rctx->context.sampler_view_destroy = r600_sampler_view_destroy; } void evergreen_init_config(struct r600_pipe_context *rctx) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index b27bbbac8c1..b0bd2659de9 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -241,6 +241,15 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, void *r600_create_vertex_elements(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_element *elements); +void r600_delete_vertex_element(struct pipe_context *ctx, void *state); +void r600_bind_blend_state(struct pipe_context *ctx, void *state); +void r600_bind_rs_state(struct pipe_context *ctx, void *state); +void r600_delete_rs_state(struct pipe_context *ctx, void *state); +void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state); +void r600_bind_state(struct pipe_context *ctx, void *state); +void r600_delete_state(struct pipe_context *ctx, void *state); + /* * common helpers */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 71005297646..f5601a5effa 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -324,20 +324,6 @@ static void *r600_create_blend_state(struct pipe_context *ctx, return rstate; } -static void r600_bind_blend_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; - struct r600_pipe_state *rstate; - - if (state == NULL) - return; - rstate = &blend->rstate; - rctx->states[rstate->id] = rstate; - rctx->cb_target_mask = blend->cb_target_mask; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - static void *r600_create_dsa_state(struct pipe_context *ctx, const struct pipe_depth_stencil_alpha_state *state) { @@ -499,36 +485,6 @@ static void *r600_create_rs_state(struct pipe_context *ctx, return rstate; } -static void r600_bind_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - if (state == NULL) - return; - - rctx->flatshade = rs->flatshade; - rctx->sprite_coord_enable = rs->sprite_coord_enable; - rctx->rasterizer = rs; - - rctx->states[rs->rstate.id] = &rs->rstate; - r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); -} - -static void r600_delete_rs_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; - - if (rctx->rasterizer == rs) { - rctx->rasterizer = NULL; - } - if (rctx->states[rs->rstate.id] == &rs->rstate) { - rctx->states[rs->rstate.id] = NULL; - } - free(rs); -} - static void *r600_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { @@ -565,16 +521,6 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, return rstate; } - -static void r600_sampler_view_destroy(struct pipe_context *ctx, - struct pipe_sampler_view *state) -{ - struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; - - pipe_resource_reference(&state->texture, NULL); - FREE(resource); -} - static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_sampler_view *state) @@ -705,17 +651,6 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, unsigned count, rctx->ps_samplers.n_views = count; } -static void r600_bind_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (state == NULL) - return; - rctx->states[rstate->id] = rstate; - r600_context_pipe_state_set(&rctx->ctx, rstate); -} - static void r600_bind_ps_sampler(struct pipe_context *ctx, unsigned count, void **states) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -739,30 +674,6 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx, unsigned count, void } } -static void r600_delete_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (rctx->states[rstate->id] == rstate) { - rctx->states[rstate->id] = NULL; - } - for (int i = 0; i < rstate->nregs; i++) { - r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); - } - free(rstate); -} - -static void r600_delete_vertex_element(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - FREE(state); - - if (rctx->vertex_elements == state) - rctx->vertex_elements = NULL; -} - static void r600_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index e127bac7797..0ea70868c7d 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -30,6 +30,94 @@ #include "r600_pipe.h" /* common state between evergreen and r600 */ +void r600_bind_blend_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_blend *blend = (struct r600_pipe_blend *)state; + struct r600_pipe_state *rstate; + + if (state == NULL) + return; + rstate = &blend->rstate; + rctx->states[rstate->id] = rstate; + rctx->cb_target_mask = blend->cb_target_mask; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +void r600_bind_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + if (state == NULL) + return; + + rctx->flatshade = rs->flatshade; + rctx->sprite_coord_enable = rs->sprite_coord_enable; + rctx->rasterizer = rs; + + rctx->states[rs->rstate.id] = &rs->rstate; + r600_context_pipe_state_set(&rctx->ctx, &rs->rstate); +} + +void r600_delete_rs_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_rasterizer *rs = (struct r600_pipe_rasterizer *)state; + + if (rctx->rasterizer == rs) { + rctx->rasterizer = NULL; + } + if (rctx->states[rs->rstate.id] == &rs->rstate) { + rctx->states[rs->rstate.id] = NULL; + } + free(rs); +} + +void r600_sampler_view_destroy(struct pipe_context *ctx, + struct pipe_sampler_view *state) +{ + struct r600_pipe_sampler_view *resource = (struct r600_pipe_sampler_view *)state; + + pipe_resource_reference(&state->texture, NULL); + FREE(resource); +} + +void r600_bind_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (state == NULL) + return; + rctx->states[rstate->id] = rstate; + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + +void r600_delete_state(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; + + if (rctx->states[rstate->id] == rstate) { + rctx->states[rstate->id] = NULL; + } + for (int i = 0; i < rstate->nregs; i++) { + r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); + } + free(rstate); +} + +void r600_delete_vertex_element(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + FREE(state); + + if (rctx->vertex_elements == state) + rctx->vertex_elements = NULL; +} + void r600_set_index_buffer(struct pipe_context *ctx, const struct pipe_index_buffer *ib) -- cgit v1.2.3 From ccb9be105602edaaff196046e324c8cb4a12fe0a Mon Sep 17 00:00:00 2001 From: Tilman Sauerbeck Date: Sat, 23 Oct 2010 13:33:18 +0200 Subject: r600g: Added r600_pipe_shader_destroy(). Not yet complete. Signed-off-by: Tilman Sauerbeck Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_pipe.h | 1 + src/gallium/drivers/r600/r600_shader.c | 11 +++++++++++ src/gallium/drivers/r600/r600_state.c | 6 ++++-- 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index b0bd2659de9..0bfa7afeadc 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -208,6 +208,7 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600); /* r600_shader.c */ int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens); +void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader); int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 0dd416c0d83..a4e052c23a3 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -338,6 +338,17 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s return 0; } +void +r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_bc_cf *cf, *next_cf; + + r600_bo_reference(rctx->radeon, &shader->bo, NULL); + + /* FIXME: is there more stuff to free? */ +} + /* * tgsi -> r600 shader */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f5601a5effa..d496051948f 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1095,7 +1095,8 @@ static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) if (rctx->ps_shader == shader) { rctx->ps_shader = NULL; } - /* TODO proper delete */ + + r600_pipe_shader_destroy(ctx, shader); free(shader); } @@ -1107,7 +1108,8 @@ static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) if (rctx->vs_shader == shader) { rctx->vs_shader = NULL; } - /* TODO proper delete */ + + r600_pipe_shader_destroy(ctx, shader); free(shader); } -- cgit v1.2.3 From f4a2c62af56ce10e43688e8283f8defeb05cef1a Mon Sep 17 00:00:00 2001 From: Tilman Sauerbeck Date: Sat, 23 Oct 2010 13:33:22 +0200 Subject: r600g: Also clear bc data when we're destroying a shader. [airlied: remove unused vars] Signed-off-by: Tilman Sauerbeck Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_asm.c | 36 ++++++++++++++++++++++++++++++++++ src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_shader.c | 3 ++- 3 files changed, 39 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index d13da0ef638..a71d95ff1d0 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -871,3 +871,39 @@ int r600_bc_build(struct r600_bc *bc) } return 0; } + +void r600_bc_clear(struct r600_bc *bc) +{ + struct r600_bc_cf *cf, *next_cf; + + free(bc->bytecode); + bc->bytecode = NULL; + + LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { + struct r600_bc_alu *alu, *next_alu; + struct r600_bc_tex *tex, *next_tex; + struct r600_bc_tex *vtx, *next_vtx; + + LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { + free(alu); + } + + LIST_INITHEAD(&cf->alu); + + LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) { + free(tex); + } + + LIST_INITHEAD(&cf->tex); + + LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) { + free(vtx); + } + + LIST_INITHEAD(&cf->vtx); + + free(cf); + } + + LIST_INITHEAD(&cf->list); +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index bebc7c15b00..97d08ee4b54 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -185,6 +185,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf); /* r600_asm.c */ int r600_bc_init(struct r600_bc *bc, enum radeon_family family); +void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a4e052c23a3..4106587398b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -342,10 +342,11 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_bc_cf *cf, *next_cf; r600_bo_reference(rctx->radeon, &shader->bo, NULL); + r600_bc_clear(&shader->shader.bc); + /* FIXME: is there more stuff to free? */ } -- cgit v1.2.3 From a20c2347a0bb9e0e1591b070ee5d0cac81168114 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 24 Oct 2010 13:04:44 +1000 Subject: r600g: drop more common state handling code --- src/gallium/drivers/r600/evergreen_state.c | 92 +++------------------------- src/gallium/drivers/r600/r600_pipe.h | 7 +++ src/gallium/drivers/r600/r600_state.c | 66 -------------------- src/gallium/drivers/r600/r600_state_common.c | 66 ++++++++++++++++++++ 4 files changed, 80 insertions(+), 151 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index c7ed422df34..e3f11da7d8c 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -481,20 +481,6 @@ static void evergreen_bind_vs_sampler(struct pipe_context *ctx, unsigned count, } } -static void evergreen_delete_state(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_state *rstate = (struct r600_pipe_state *)state; - - if (rctx->states[rstate->id] == rstate) { - rctx->states[rstate->id] = NULL; - } - for (int i = 0; i < rstate->nregs; i++) { - r600_bo_reference(rctx->radeon, &rstate->regs[i].bo, NULL); - } - free(rstate); -} - static void evergreen_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { @@ -530,17 +516,6 @@ static void evergreen_set_clip_state(struct pipe_context *ctx, r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void evergreen_bind_vertex_elements(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - rctx->vertex_elements = v; - if (v) { -// rctx->vs_rebuild = TRUE; - } -} - static void evergreen_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) { @@ -880,84 +855,31 @@ static void evergreen_set_constant_buffer(struct pipe_context *ctx, uint shader, } } -static void *evergreen_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); - int r; - - r = r600_pipe_shader_create(ctx, shader, state->tokens); - if (r) { - return NULL; - } - return shader; -} - -static void evergreen_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->ps_shader = (struct r600_pipe_shader *)state; -} - -static void evergreen_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->vs_shader = (struct r600_pipe_shader *)state; -} - -static void evergreen_delete_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->ps_shader == shader) { - rctx->ps_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - -static void evergreen_delete_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->vs_shader == shader) { - rctx->vs_shader = NULL; - } - /* TODO proper delete */ - free(shader); -} - void evergreen_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; - rctx->context.create_fs_state = evergreen_create_shader_state; + rctx->context.create_fs_state = r600_create_shader_state; rctx->context.create_rasterizer_state = evergreen_create_rs_state; rctx->context.create_sampler_state = evergreen_create_sampler_state; rctx->context.create_sampler_view = evergreen_create_sampler_view; rctx->context.create_vertex_elements_state = r600_create_vertex_elements; - rctx->context.create_vs_state = evergreen_create_shader_state; + rctx->context.create_vs_state = r600_create_shader_state; rctx->context.bind_blend_state = r600_bind_blend_state; rctx->context.bind_depth_stencil_alpha_state = r600_bind_state; rctx->context.bind_fragment_sampler_states = evergreen_bind_ps_sampler; - rctx->context.bind_fs_state = evergreen_bind_ps_shader; + rctx->context.bind_fs_state = r600_bind_ps_shader; rctx->context.bind_rasterizer_state = r600_bind_rs_state; - rctx->context.bind_vertex_elements_state = evergreen_bind_vertex_elements; + rctx->context.bind_vertex_elements_state = r600_bind_vertex_elements; rctx->context.bind_vertex_sampler_states = evergreen_bind_vs_sampler; - rctx->context.bind_vs_state = evergreen_bind_vs_shader; + rctx->context.bind_vs_state = r600_bind_vs_shader; rctx->context.delete_blend_state = r600_delete_state; rctx->context.delete_depth_stencil_alpha_state = r600_delete_state; - rctx->context.delete_fs_state = evergreen_delete_ps_shader; + rctx->context.delete_fs_state = r600_delete_ps_shader; rctx->context.delete_rasterizer_state = r600_delete_rs_state; rctx->context.delete_sampler_state = r600_delete_state; rctx->context.delete_vertex_elements_state = r600_delete_vertex_element; - rctx->context.delete_vs_state = evergreen_delete_vs_shader; + rctx->context.delete_vs_state = r600_delete_vs_shader; rctx->context.set_blend_color = evergreen_set_blend_color; rctx->context.set_clip_state = evergreen_set_clip_state; rctx->context.set_constant_buffer = evergreen_set_constant_buffer; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 0bfa7afeadc..1c691f6b764 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -250,7 +250,14 @@ void r600_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state); void r600_bind_state(struct pipe_context *ctx, void *state); void r600_delete_state(struct pipe_context *ctx, void *state); +void r600_bind_vertex_elements(struct pipe_context *ctx, void *state); +void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state); +void r600_bind_ps_shader(struct pipe_context *ctx, void *state); +void r600_bind_vs_shader(struct pipe_context *ctx, void *state); +void r600_delete_ps_shader(struct pipe_context *ctx, void *state); +void r600_delete_vs_shader(struct pipe_context *ctx, void *state); /* * common helpers */ diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index d496051948f..1c80075245d 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -709,17 +709,6 @@ static void r600_set_clip_state(struct pipe_context *ctx, r600_context_pipe_state_set(&rctx->ctx, rstate); } -static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_vertex_element *v = (struct r600_vertex_element*)state; - - rctx->vertex_elements = v; - if (v) { -// rctx->vs_rebuild = TRUE; - } -} - static void r600_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state) { @@ -1058,61 +1047,6 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint } } -static void *r600_create_shader_state(struct pipe_context *ctx, - const struct pipe_shader_state *state) -{ - struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); - int r; - - r = r600_pipe_shader_create(ctx, shader, state->tokens); - if (r) { - return NULL; - } - return shader; -} - -static void r600_bind_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->ps_shader = (struct r600_pipe_shader *)state; -} - -static void r600_bind_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - - /* TODO delete old shader */ - rctx->vs_shader = (struct r600_pipe_shader *)state; -} - -static void r600_delete_ps_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->ps_shader == shader) { - rctx->ps_shader = NULL; - } - - r600_pipe_shader_destroy(ctx, shader); - free(shader); -} - -static void r600_delete_vs_shader(struct pipe_context *ctx, void *state) -{ - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; - - if (rctx->vs_shader == shader) { - rctx->vs_shader = NULL; - } - - r600_pipe_shader_destroy(ctx, shader); - free(shader); -} - void r600_init_state_functions(struct r600_pipe_context *rctx) { rctx->context.create_blend_state = r600_create_blend_state; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 0ea70868c7d..210420e823b 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -108,6 +108,17 @@ void r600_delete_state(struct pipe_context *ctx, void *state) free(rstate); } +void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_vertex_element *v = (struct r600_vertex_element*)state; + + rctx->vertex_elements = v; + if (v) { +// rctx->vs_rebuild = TRUE; + } +} + void r600_delete_vertex_element(struct pipe_context *ctx, void *state) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; @@ -208,3 +219,58 @@ void *r600_create_vertex_elements(struct pipe_context *ctx, return v; } + +void *r600_create_shader_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + struct r600_pipe_shader *shader = CALLOC_STRUCT(r600_pipe_shader); + int r; + + r = r600_pipe_shader_create(ctx, shader, state->tokens); + if (r) { + return NULL; + } + return shader; +} + +void r600_bind_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->ps_shader = (struct r600_pipe_shader *)state; +} + +void r600_bind_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + + /* TODO delete old shader */ + rctx->vs_shader = (struct r600_pipe_shader *)state; +} + +void r600_delete_ps_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->ps_shader == shader) { + rctx->ps_shader = NULL; + } + + r600_pipe_shader_destroy(ctx, shader); + free(shader); +} + +void r600_delete_vs_shader(struct pipe_context *ctx, void *state) +{ + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + struct r600_pipe_shader *shader = (struct r600_pipe_shader *)state; + + if (rctx->vs_shader == shader) { + rctx->vs_shader = NULL; + } + + r600_pipe_shader_destroy(ctx, shader); + free(shader); +} -- cgit v1.2.3 From 8449a4772a73f613d9425b691cffba6a261df813 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 5 Oct 2010 02:52:03 +0200 Subject: r300g: fix texture border for 16-bits-per-channel formats This is kinda hacky, but it's hard to come up with a generic solution for all formats when only a few are used in practice (I mostly get B8G8R8*8). --- src/gallium/drivers/r300/r300_state_derived.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 904736ef06d..730ef7a3ee2 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -620,14 +620,19 @@ static uint32_t r300_get_border_color(enum pipe_format format, } break; - default: - /* I think the fat formats (16, 32) are specified - * as the 8-bit ones. I am not sure how compressed formats - * work here. */ + case 8: r = ((float_to_ubyte(border_swizzled[0]) & 0xff) << 0) | ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | ((float_to_ubyte(border_swizzled[2]) & 0xff) << 16) | ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); + break; + + case 16: + r = ((float_to_ubyte(border_swizzled[2]) & 0xff) << 0) | + ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | + ((float_to_ubyte(border_swizzled[0]) & 0xff) << 16) | + ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); + break; } return r; -- cgit v1.2.3 From 2d2bafdb30110e83b7e14017326a454d7e7f37f3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 14 Oct 2010 11:15:37 +1000 Subject: r600g: fix magic 0x1 ->flat shade ena --- src/gallium/drivers/r600/evergreen_state.c | 2 +- src/gallium/drivers/r600/r600_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index e3f11da7d8c..72223485067 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -247,7 +247,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_RASTERIZER; if (state->flatshade_first) prov_vtx = 0; - tmp = 0x00000001; + tmp = S_0286D4_FLAT_SHADE_ENA(1); if (state->sprite_coord_enable) { tmp |= S_0286D4_PNT_SPRITE_ENA(1) | S_0286D4_PNT_SPRITE_OVRD_X(2) | diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 1c80075245d..b3e0d493217 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -434,7 +434,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_RASTERIZER; if (state->flatshade_first) prov_vtx = 0; - tmp = 0x00000001; + tmp = S_0286D4_FLAT_SHADE_ENA(1); if (state->sprite_coord_enable) { tmp |= S_0286D4_PNT_SPRITE_ENA(1) | S_0286D4_PNT_SPRITE_OVRD_X(2) | -- cgit v1.2.3 From d1acb920163ab0f39ff2fc72b85fc7bf16c37262 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 26 Oct 2010 12:08:00 +1000 Subject: r600g: add assembler support for all the kcache fields. --- src/gallium/drivers/r600/eg_asm.c | 7 ++++++- src/gallium/drivers/r600/r600_asm.c | 7 ++++++- src/gallium/drivers/r600/r600_asm.h | 5 +++++ 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 52b7189e9e5..c30f09c394b 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -36,8 +36,13 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index a71d95ff1d0..c22628423bd 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -701,9 +701,14 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | S_SQ_CF_ALU_WORD1_BARRIER(1) | S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 97d08ee4b54..25cda16837d 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -132,6 +132,11 @@ struct r600_bc_cf { unsigned pop_count; unsigned cf_addr; /* control flow addr */ unsigned kcache0_mode; + unsigned kcache1_mode; + unsigned kcache0_addr; + unsigned kcache1_addr; + unsigned kcache0_bank; + unsigned kcache1_bank; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; -- cgit v1.2.3 From 8ad9d83fdfc609303cebba3d811064b91aed6b0b Mon Sep 17 00:00:00 2001 From: Tilman Sauerbeck Date: Sun, 24 Oct 2010 14:52:14 +0200 Subject: r600g: Destroy the blitter. This fix got lost in the state rework merge. Signed-off-by: Tilman Sauerbeck --- src/gallium/drivers/r600/r600_pipe.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index bea7ef5df84..35645398437 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -82,6 +82,8 @@ static void r600_destroy_context(struct pipe_context *context) free(rctx->states[i]); } + util_blitter_destroy(rctx->blitter); + u_upload_destroy(rctx->upload_vb); u_upload_destroy(rctx->upload_ib); -- cgit v1.2.3 From 3c8106402f970aac61981381b1c9988504d3765d Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 27 Oct 2010 09:02:00 -0700 Subject: r300g: Silence uninitialized variable warning. Fixes this GCC warning. r300_state_derived.c: In function 'r300_update_derived_state': r300_state_derived.c:593: warning: 'r' may be used uninitialized in this function r300_state_derived.c:593: note: 'r' was declared here --- src/gallium/drivers/r300/r300_state_derived.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 730ef7a3ee2..efec9495104 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -633,6 +633,11 @@ static uint32_t r300_get_border_color(enum pipe_format format, ((float_to_ubyte(border_swizzled[0]) & 0xff) << 16) | ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); break; + + default: + assert(0); + r = 0; + break; } return r; -- cgit v1.2.3 From d674ee2a4dfeda55d3f4780e97a26f73d855c448 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 27 Oct 2010 09:26:27 -0700 Subject: r600g: Silence uninitialized variable warnings. --- src/gallium/drivers/r600/r600_asm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index c22628423bd..8a7f3ce575c 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -879,15 +879,15 @@ int r600_bc_build(struct r600_bc *bc) void r600_bc_clear(struct r600_bc *bc) { - struct r600_bc_cf *cf, *next_cf; + struct r600_bc_cf *cf = NULL, *next_cf; free(bc->bytecode); bc->bytecode = NULL; LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { - struct r600_bc_alu *alu, *next_alu; - struct r600_bc_tex *tex, *next_tex; - struct r600_bc_tex *vtx, *next_vtx; + struct r600_bc_alu *alu = NULL, *next_alu; + struct r600_bc_tex *tex = NULL, *next_tex; + struct r600_bc_tex *vtx = NULL, *next_vtx; LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { free(alu); -- cgit v1.2.3 From 676c3f08bd13564c088ae85c3538ee0ee3c52599 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 27 Oct 2010 21:13:41 +0200 Subject: r300g: add a default channel ordering of texture border for unhandled formats It should fix the texture border for compressed textures. Broken since 8449a4772a73f613d9425b691cffba6a261df813. --- src/gallium/drivers/r300/r300_state_derived.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index efec9495104..50366e32c2b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -620,13 +620,6 @@ static uint32_t r300_get_border_color(enum pipe_format format, } break; - case 8: - r = ((float_to_ubyte(border_swizzled[0]) & 0xff) << 0) | - ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | - ((float_to_ubyte(border_swizzled[2]) & 0xff) << 16) | - ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); - break; - case 16: r = ((float_to_ubyte(border_swizzled[2]) & 0xff) << 0) | ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | @@ -634,9 +627,12 @@ static uint32_t r300_get_border_color(enum pipe_format format, ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); break; + case 8: default: - assert(0); - r = 0; + r = ((float_to_ubyte(border_swizzled[0]) & 0xff) << 0) | + ((float_to_ubyte(border_swizzled[1]) & 0xff) << 8) | + ((float_to_ubyte(border_swizzled[2]) & 0xff) << 16) | + ((float_to_ubyte(border_swizzled[3]) & 0xff) << 24); break; } -- cgit v1.2.3 From 3acc8265200295265b476222299a013791050b73 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 25 Oct 2010 13:52:58 -0700 Subject: Fix build on systems where "python" is python 3. First, it changes autoconf to use a "python2" binary when available, rather than plain "python" (which is ambiguous). Secondly, it changes the Makefiles to use $(PYTHON) $(PYTHON_FLAGS) rather than calling python directly. Signed-off-by: Xavier Chantry Signed-off-by: Matthew William Cox Signed-off-by: Kenneth Graunke --- configs/autoconf.in | 2 +- configure.ac | 3 +++ src/gallium/auxiliary/Makefile | 10 +++++----- src/gallium/drivers/llvmpipe/Makefile | 2 +- src/glsl/Makefile | 2 +- src/mesa/drivers/dri/common/xmlpool/Makefile | 2 +- 6 files changed, 12 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers') diff --git a/configs/autoconf.in b/configs/autoconf.in index 9bdbefebddd..e2d70c63a12 100644 --- a/configs/autoconf.in +++ b/configs/autoconf.in @@ -53,7 +53,7 @@ MKDEP_OPTIONS = @MKDEP_OPTIONS@ INSTALL = @INSTALL@ # Python and flags (generally only needed by the developers) -PYTHON2 = python +PYTHON2 = @PYTHON2@ PYTHON_FLAGS = -t -O -O # Library names (base name) diff --git a/configure.ac b/configure.ac index 364ee039470..d6f15b5c346 100644 --- a/configure.ac +++ b/configure.ac @@ -30,6 +30,7 @@ AC_PROG_CPP AC_PROG_CC AC_PROG_CXX AC_CHECK_PROGS([MAKE], [gmake make]) +AC_CHECK_PROGS([PYTHON2], [python2 python]) AC_PATH_PROG([MKDEP], [makedepend]) AC_PATH_PROG([SED], [sed]) @@ -1691,6 +1692,8 @@ echo "" echo " CFLAGS: $cflags" echo " CXXFLAGS: $cxxflags" echo " Macros: $defines" +echo "" +echo " PYTHON2: $PYTHON2" echo "" echo " Run '${MAKE-make}' to build Mesa" diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index abd33f6eef1..f37d59e9a3a 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -211,16 +211,16 @@ include ../Makefile.template indices/u_indices_gen.c: indices/u_indices_gen.py - python $< > $@ + $(PYTHON2) $< > $@ indices/u_unfilled_gen.c: indices/u_unfilled_gen.py - python $< > $@ + $(PYTHON2) $< > $@ util/u_format_srgb.c: util/u_format_srgb.py - python $< > $@ + $(PYTHON2) $< > $@ util/u_format_table.c: util/u_format_table.py util/u_format_pack.py util/u_format_parse.py util/u_format.csv - python util/u_format_table.py util/u_format.csv > $@ + $(PYTHON2) util/u_format_table.py util/u_format.csv > $@ util/u_half.c: util/u_half.py - python util/u_half.py > $@ + $(PYTHON2) util/u_half.py > $@ diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 08da2286b05..669e42e3003 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -69,7 +69,7 @@ lp_test_sincos.o : sse_mathfun.h PROGS_DEPS := ../../auxiliary/libgallium.a lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv - python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ + $(PYTHON2) $(PYTHON_FLAGS) lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@ LDFLAGS += $(LLVM_LDFLAGS) LIBS += -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) $(GL_LIB_DEPS) diff --git a/src/glsl/Makefile b/src/glsl/Makefile index 83869b1d846..4cfecb130f2 100644 --- a/src/glsl/Makefile +++ b/src/glsl/Makefile @@ -178,7 +178,7 @@ builtins: builtin_function.cpp builtins/profiles/* builtins/ir/* builtins/tools/ cp builtins/tools/builtin_function.cpp . make glsl_compiler @echo Regenerating builtin_function.cpp... - ./builtins/tools/generate_builtins.py > builtin_function.cpp + $(PYTHON2) $(PYTHON_FLAGS) builtins/tools/generate_builtins.py > builtin_function.cpp @echo Rebuilding the real compiler... make glsl_compiler diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile b/src/mesa/drivers/dri/common/xmlpool/Makefile index 62ec919ea68..b71629e9f16 100644 --- a/src/mesa/drivers/dri/common/xmlpool/Makefile +++ b/src/mesa/drivers/dri/common/xmlpool/Makefile @@ -62,7 +62,7 @@ clean: # Default target options.h options.h: t_options.h mo - python gen_xmlpool.py $(LANGS) > options.h + $(PYTHON2) $(PYTHON_FLAGS) gen_xmlpool.py $(LANGS) > options.h # Update .mo files from the corresponding .po files. mo: -- cgit v1.2.3 From 5479fa34d9acebd55f68c23a278cf382d0e84248 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 28 Oct 2010 17:38:18 +0100 Subject: gallium: Avoid using __doc__ in python scripts. --- src/gallium/auxiliary/util/u_format_srgb.py | 4 ++-- src/gallium/auxiliary/util/u_format_table.py | 4 ++-- src/gallium/drivers/llvmpipe/lp_tile_soa.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/auxiliary/util/u_format_srgb.py b/src/gallium/auxiliary/util/u_format_srgb.py index 3e8000f3687..cd63ae78919 100644 --- a/src/gallium/auxiliary/util/u_format_srgb.py +++ b/src/gallium/auxiliary/util/u_format_srgb.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +CopyRight = ''' /************************************************************************** * * Copyright 2010 VMware, Inc. @@ -89,7 +89,7 @@ def main(): print '/* This file is autogenerated by u_format_srgb.py. Do not edit directly. */' print # This will print the copyright message on the top of this file - print __doc__.strip() + print CopyRight.strip() print print '#include "u_format_srgb.h"' print diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py index f0b407b8b8e..8cc22a56371 100755 --- a/src/gallium/auxiliary/util/u_format_table.py +++ b/src/gallium/auxiliary/util/u_format_table.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +CopyRight = ''' /************************************************************************** * * Copyright 2010 VMware, Inc. @@ -83,7 +83,7 @@ def write_format_table(formats): print '/* This file is autogenerated by u_format_table.py from u_format.csv. Do not edit directly. */' print # This will print the copyright message on the top of this file - print __doc__.strip() + print CopyRight.strip() print print '#include "u_format.h"' print '#include "u_format_s3tc.h"' diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index e49f9c62fe7..8df7b236fe0 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -''' +CopyRight = ''' /************************************************************************** * * Copyright 2009 VMware, Inc. @@ -510,7 +510,7 @@ def main(): print '/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */' print # This will print the copyright message on the top of this file - print __doc__.strip() + print CopyRight.strip() print print '#include "pipe/p_compiler.h"' print '#include "util/u_format.h"' -- cgit v1.2.3