diff options
author | Dave Airlie <[email protected]> | 2009-04-22 21:41:57 +1000 |
---|---|---|
committer | Dave Airlie <[email protected]> | 2009-04-22 21:41:57 +1000 |
commit | 466c78c93538f2853449124c06274d538830cd5a (patch) | |
tree | a6bd88060ba328d8bed1ff2ea1a37ee741a90a17 /src/mesa/drivers/dri | |
parent | 65fe0c86ffcff99f9f09606d462bf3731ea0c308 (diff) | |
parent | f057f6543da469f231d551cb5728d98df8add4fa (diff) |
Merge remote branch 'origin/master' into radeon-rewrite
Diffstat (limited to 'src/mesa/drivers/dri')
47 files changed, 1304 insertions, 715 deletions
diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 2fa36bab3f9..5c01d233c13 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -72,10 +72,11 @@ $(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ - $(ASM_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend + @ touch depend + @ $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ + $(ASM_SOURCES) > /dev/null 2>/dev/null # Emacs tags diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 0ec4adc2325..ae0e61e515b 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -62,7 +62,7 @@ __driUtilMessage(const char *f, ...) va_list args; if (getenv("LIBGL_DEBUG")) { - fprintf(stderr, "libGL error: \n"); + fprintf(stderr, "libGL: "); va_start(args, f); vfprintf(stderr, f, args); va_end(args); @@ -314,12 +314,12 @@ static void driSwapBuffers(__DRIdrawable *dPriv) __DRIscreen *psp = dPriv->driScreenPriv; drm_clip_rect_t *rects; int i; - - if (!dPriv->numClipRects) - return; psp->DriverAPI.SwapBuffers(dPriv); + if (!dPriv->numClipRects) + return; + rects = _mesa_malloc(sizeof(*rects) * dPriv->numClipRects); if (!rects) diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h index f2868cb58a6..89c815722f6 100644 --- a/src/mesa/drivers/dri/common/spantmp2.h +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -82,6 +82,71 @@ rgba[3] = 0xff; \ } while (0) +#elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_4_4_4_4_REV) + +/** + ** GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV + **/ + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +#define INIT_MONO_PIXEL(p, color) \ + p = PACK_COLOR_4444(color[3], color[0], color[1], color[2]) + +#define WRITE_RGBA( _x, _y, r, g, b, a ) \ + PUT_VALUE(_x, _y, PACK_COLOR_4444(a, r, g, b)) \ + +#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = GET_VALUE(_x, _y); \ + rgba[0] = ((p >> 8) & 0xf) * 0x11; \ + rgba[1] = ((p >> 4) & 0xf) * 0x11; \ + rgba[2] = ((p >> 0) & 0xf) * 0x11; \ + rgba[3] = ((p >> 12) & 0xf) * 0x11; \ + } while (0) + + +#elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_1_5_5_5_REV) + +/** + ** GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV + **/ + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +#define INIT_MONO_PIXEL(p, color) \ + p = PACK_COLOR_1555(color[3], color[0], color[1], color[2]) + +#define WRITE_RGBA( _x, _y, r, g, b, a ) \ + PUT_VALUE(_x, _y, PACK_COLOR_1555(a, r, g, b)) \ + +#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = GET_VALUE(_x, _y); \ + rgba[0] = ((p >> 7) & 0xf8) * 255 / 0xf8; \ + rgba[1] = ((p >> 2) & 0xf8) * 255 / 0xf8; \ + rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8; \ + rgba[3] = ((p >> 15) & 0x1) * 0xff; \ + } while (0) + #elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV) /** diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 954a7e2af1f..9f4bd1699f9 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -11,7 +11,6 @@ DRIVER_SOURCES = \ i830_metaops.c \ i830_state.c \ i830_texblend.c \ - i830_tex.c \ i830_texstate.c \ i830_vtbl.c \ intel_render.c \ @@ -36,7 +35,6 @@ DRIVER_SOURCES = \ intel_buffers.c \ intel_blit.c \ intel_swapbuffers.c \ - i915_tex.c \ i915_tex_layout.c \ i915_texstate.c \ i915_context.c \ diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c index 9c540cb2bb7..10b9bf371c3 100644 --- a/src/mesa/drivers/dri/i915/i830_context.c +++ b/src/mesa/drivers/dri/i915/i830_context.c @@ -47,7 +47,6 @@ i830InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); i830InitStateFuncs(functions); - i830InitTextureFuncs(functions); } extern const struct tnl_pipeline_stage *intel_pipeline[]; diff --git a/src/mesa/drivers/dri/i915/i830_tex.c b/src/mesa/drivers/dri/i915/i830_tex.c deleted file mode 100644 index 34ac42a78e0..00000000000 --- a/src/mesa/drivers/dri/i915/i830_tex.c +++ /dev/null @@ -1,100 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mm.h" -#include "main/texstore.h" -#include "main/texformat.h" -#include "swrast/swrast.h" - -#include "texmem.h" - -#include "i830_context.h" -#include "i830_reg.h" - - - -static void -i830TexEnv(GLcontext * ctx, GLenum target, - GLenum pname, const GLfloat * param) -{ - - switch (pname) { - case GL_TEXTURE_ENV_COLOR: - case GL_TEXTURE_ENV_MODE: - case GL_COMBINE_RGB: - case GL_COMBINE_ALPHA: - case GL_SOURCE0_RGB: - case GL_SOURCE1_RGB: - case GL_SOURCE2_RGB: - case GL_SOURCE0_ALPHA: - case GL_SOURCE1_ALPHA: - case GL_SOURCE2_ALPHA: - case GL_OPERAND0_RGB: - case GL_OPERAND1_RGB: - case GL_OPERAND2_RGB: - case GL_OPERAND0_ALPHA: - case GL_OPERAND1_ALPHA: - case GL_OPERAND2_ALPHA: - case GL_RGB_SCALE: - case GL_ALPHA_SCALE: - break; - - case GL_TEXTURE_LOD_BIAS:{ - struct i830_context *i830 = i830_context(ctx); - GLuint unit = ctx->Texture.CurrentUnit; - int b = (int) ((*param) * 16.0); - if (b > 63) - b = 63; - if (b < -64) - b = -64; - I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit)); - i830->lodbias_tm0s3[unit] = - ((b << TM0S3_LOD_BIAS_SHIFT) & TM0S3_LOD_BIAS_MASK); - break; - } - - default: - break; - } -} - - - - -void -i830InitTextureFuncs(struct dd_function_table *functions) -{ -/* - functions->TexEnv = i830TexEnv; -*/ -} diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 1a949210789..3bf02de61f8 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -26,12 +26,14 @@ **************************************************************************/ #include "glapi/glapi.h" +#include "main/texformat.h" #include "i830_context.h" #include "i830_reg.h" #include "intel_batchbuffer.h" #include "intel_regions.h" #include "intel_tris.h" +#include "intel_fbo.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -614,6 +616,8 @@ i830_state_draw_region(struct intel_context *intel, { struct i830_context *i830 = i830_context(&intel->ctx); GLcontext *ctx = &intel->ctx; + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLuint value; ASSERT(state == &i830->state || state == &i830->meta); @@ -651,13 +655,27 @@ i830_state_draw_region(struct intel_context *intel, */ value = (DSTORG_HORT_BIAS(0x8) | /* .5 */ DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */ - - if (color_region && color_region->cpp == 4) { - value |= DV_PF_8888; - } - else { - value |= DV_PF_565; + + if (irb != NULL) { + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + value |= DV_PF_8888; + break; + case MESA_FORMAT_RGB565: + value |= DV_PF_565; + break; + case MESA_FORMAT_ARGB1555: + value |= DV_PF_1555; + break; + case MESA_FORMAT_ARGB4444: + value |= DV_PF_4444; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } } + if (depth_region && depth_region->cpp == 4) { value |= DEPTH_FRMT_24_FIXED_8_OTHER; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 7549029a1be..fdd2cf61096 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -83,7 +83,6 @@ i915InitDriverFunctions(struct dd_function_table *functions) { intelInitDriverFunctions(functions); i915InitStateFunctions(functions); - i915InitTextureFuncs(functions); i915InitFragProgFuncs(functions); functions->UpdateState = i915InvalidateState; } diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 52f09a4b1b2..2db10c60e99 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -162,12 +162,12 @@ src_vector(struct i915_fragment_program *p, GET_SWZ(source->Swizzle, 1), GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); - if (source->NegateBase) + if (source->Negate) src = negate(src, - GET_BIT(source->NegateBase, 0), - GET_BIT(source->NegateBase, 1), - GET_BIT(source->NegateBase, 2), - GET_BIT(source->NegateBase, 3)); + GET_BIT(source->Negate, 0), + GET_BIT(source->Negate, 1), + GET_BIT(source->Negate, 2), + GET_BIT(source->Negate, 3)); return src; } @@ -323,7 +323,8 @@ upload_program(struct i915_fragment_program *p) p->ctx->FragmentProgram._Current; const struct prog_instruction *inst = program->Base.Instructions; -/* _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */ + if (INTEL_DEBUG & DEBUG_WM) + _mesa_print_program(&program->Base); /* Is this a parse-failed program? Ensure a valid program is * loaded, as the flagging of an error isn't sufficient to stop @@ -1049,9 +1050,6 @@ i915ProgramStringNotify(GLcontext * ctx, _mesa_append_fog_code(ctx, &p->FragProg); p->FragProg.FogOption = GL_NONE; } - - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_print_program(prog); } _tnl_program_string(ctx, target, prog); diff --git a/src/mesa/drivers/dri/i915/i915_tex.c b/src/mesa/drivers/dri/i915/i915_tex.c deleted file mode 100644 index e38d8fe79d1..00000000000 --- a/src/mesa/drivers/dri/i915/i915_tex.c +++ /dev/null @@ -1,78 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/mm.h" -#include "main/texstore.h" -#include "main/texformat.h" -#include "swrast/swrast.h" - -#include "texmem.h" - -#include "i915_context.h" -#include "i915_reg.h" - - - -static void -i915TexEnv(GLcontext * ctx, GLenum target, - GLenum pname, const GLfloat * param) -{ - struct i915_context *i915 = I915_CONTEXT(ctx); - - switch (pname) { - case GL_TEXTURE_LOD_BIAS:{ - GLuint unit = ctx->Texture.CurrentUnit; - GLint b = (int) ((*param) * 16.0); - if (b > 255) - b = 255; - if (b < -256) - b = -256; - I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit)); - i915->lodbias_ss2[unit] = - ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); - break; - } - - default: - break; - } -} - - -void -i915InitTextureFuncs(struct dd_function_table *functions) -{ -/* - functions->TexEnv = i915TexEnv; -*/ -} diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 3f6d282d342..115004616ff 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -32,6 +32,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/colormac.h" +#include "main/texformat.h" #include "tnl/t_context.h" #include "tnl/t_vertex.h" @@ -40,6 +41,7 @@ #include "intel_tex.h" #include "intel_regions.h" #include "intel_tris.h" +#include "intel_fbo.h" #include "i915_reg.h" #include "i915_context.h" @@ -542,6 +544,8 @@ i915_state_draw_region(struct intel_context *intel, { struct i915_context *i915 = i915_context(&intel->ctx); GLcontext *ctx = &intel->ctx; + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); GLuint value; ASSERT(state == &i915->state || state == &i915->meta); @@ -580,12 +584,26 @@ i915_state_draw_region(struct intel_context *intel, value = (DSTORG_HORT_BIAS(0x8) | /* .5 */ DSTORG_VERT_BIAS(0x8) | /* .5 */ LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); - if (color_region && color_region->cpp == 4) { - value |= DV_PF_8888; - } - else { - value |= (DITHER_FULL_ALWAYS | DV_PF_565); + if (irb != NULL) { + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + value |= DV_PF_8888; + break; + case MESA_FORMAT_RGB565: + value |= DV_PF_565 | DITHER_FULL_ALWAYS; + break; + case MESA_FORMAT_ARGB1555: + value |= DV_PF_1555 | DITHER_FULL_ALWAYS; + break; + case MESA_FORMAT_ARGB4444: + value |= DV_PF_4444 | DITHER_FULL_ALWAYS; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } } + if (depth_region && depth_region->cpp == 4) { value |= DEPTH_FRMT_24_FIXED_8_OTHER; } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 01e07c967fa..a0b3b06309f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -141,7 +141,8 @@ struct brw_context; #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ #define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_SURFACES 0x40000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -159,6 +160,7 @@ struct brw_state_flags { struct brw_vertex_program { struct gl_vertex_program program; GLuint id; + dri_bo *const_buffer; /** Program constant buffer/surface */ }; @@ -168,8 +170,7 @@ struct brw_fragment_program { GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ - /** Program constant buffer/surface */ - dri_bo *const_buffer; + dri_bo *const_buffer; /** Program constant buffer/surface */ }; @@ -186,7 +187,7 @@ struct brw_wm_prog_data { GLuint total_grf; GLuint total_scratch; - GLuint nr_params; + GLuint nr_params; /**< number of float params/constants */ GLboolean error; /* Pointer to tracked values (only valid once @@ -225,6 +226,7 @@ struct brw_vs_prog_data { GLuint urb_read_length; GLuint total_grf; GLuint outputs_written; + GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -245,12 +247,31 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_TEX_UNIT 16 /** - * Size of our surface binding table. + * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture - * objects and shader constant buffer (+1). + * objects and shader constant buffers (+2). */ #define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define SURF_INDEX_DRAW(d) (d) +#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + enum brw_cache_id { BRW_CC_VP, @@ -557,6 +578,11 @@ struct brw_context dri_bo *prog_bo; dri_bo *state_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; } vs; struct { diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index a6bfb7507e7..18b187ed1dc 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -45,17 +45,21 @@ #include "brw_util.h" -/* Partition the CURBE between the various users of constant values: +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... */ static void calculate_curbe_offsets( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* CACHE_NEW_WM_PROG */ - GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; + const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -248,7 +252,7 @@ static void prepare_constant_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = vp->program.Base.Parameters->NumParameters; + GLuint nr = brw->vs.prog_data->nr_params / 4; _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); @@ -333,37 +337,69 @@ static void prepare_constant_buffer(struct brw_context *brw) /** - * Vertex/fragment shader constants are stored in a pseudo 1D texture. - * This function updates the constants in that buffer. + * Copy Mesa program parameters into given constant buffer. */ static void -update_texture_constant_buffer(struct brw_context *brw) +update_constant_buffer(struct brw_context *brw, + const struct gl_program_parameter_list *params, + dri_bo *const_buffer) { - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - const struct gl_program_parameter_list *params = fp->program.Base.Parameters; const int size = params->NumParameters * 4 * sizeof(GLfloat); - assert(fp->const_buffer); - assert(fp->const_buffer->size >= size); - - /* copy constants into the buffer */ - if (size > 0) { + /* copy Mesa program constants into the buffer */ + if (const_buffer && size > 0) { GLubyte *map; - dri_bo_map(fp->const_buffer, GL_TRUE); - map = fp->const_buffer->virtual; + + assert(const_buffer); + assert(const_buffer->size >= size); + + dri_bo_map(const_buffer, GL_TRUE); + map = const_buffer->virtual; memcpy(map, params->ParameterValues, size); - dri_bo_unmap(fp->const_buffer); + dri_bo_unmap(const_buffer); + + if (0) { + int i; + for (i = 0; i < params->NumParameters; i++) { + float *p = params->ParameterValues[i]; + printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); + } + } } } +/** Copy current vertex program's parameters into the constant buffer */ +static void +update_vertex_constant_buffer(struct brw_context *brw) +{ + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + if (0) { + printf("update VS constants in buffer %p\n", vp->const_buffer); + printf("program %u\n", vp->program.Base.Id); + } + update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); +} + + +/** Copy current fragment program's parameters into the constant buffer */ +static void +update_fragment_constant_buffer(struct brw_context *brw) +{ + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); +} + + static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - update_texture_constant_buffer(brw); + update_vertex_constant_buffer(brw); + update_fragment_constant_buffer(brw); BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index d05f2e6c410..62c98bd8bb3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -862,9 +862,17 @@ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, GLboolean relAddr, - GLuint scratch_offset, + GLuint location, GLuint bind_table_index ); +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 21ce8369db4..60ea44f7a96 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p, /** * Read a float[4] vector from the data port Data Cache (const buffer). - * Scratch offset should be a multiple of 16. + * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. * If relAddr is true, we'll do an indirect fetch using the address register. */ @@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, GLboolean relAddr, - GLuint scratch_offset, + GLuint location, GLuint bind_table_index ) { { @@ -969,18 +969,20 @@ void brw_dp_READ_4( struct brw_compile *p, brw_set_mask_control(p, BRW_MASK_DISABLE); /* set message header global offset field (reg 0, element 2) */ + /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), - brw_imm_d(scratch_offset)); + brw_imm_d(location)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.predicate_control = 0; /* XXX */ + insn->header.predicate_control = BRW_PREDICATE_NONE; insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); @@ -989,7 +991,7 @@ void brw_dp_READ_4( struct brw_compile *p, brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(insn, - bind_table_index, /* binding table index (255=stateless) */ + bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ @@ -1000,6 +1002,78 @@ void brw_dp_READ_4( struct brw_compile *p, } +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 5c94a49f60a..9bc5c35139c 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = { static void prepare_binding_table_pointers(struct brw_context *brw) { + brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); } @@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_BATCH(0); /* vs */ + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0); + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 457bc2fc7f4..bac69187c19 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -95,6 +95,12 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + dri_bo_unreference(brw_fprog->const_buffer); + } + _mesa_delete_program( ctx, prog ); } @@ -111,7 +117,6 @@ static void brwProgramStringNotify( GLcontext *ctx, struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = &brw->intel; if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; @@ -128,24 +133,6 @@ static void brwProgramStringNotify( GLcontext *ctx, brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; newFP->isGLSL = brw_wm_is_glsl(fprog); - - /* alloc constant buffer/surface */ - { - const struct gl_program_parameter_list *params = prog->Parameters; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - /* free old const buffer if too small */ - if (newFP->const_buffer && newFP->const_buffer->size < size) { - dri_bo_unreference(newFP->const_buffer); - newFP->const_buffer = NULL; - } - - if (!newFP->const_buffer) { - newFP->const_buffer = drm_intel_bo_alloc(intel->bufmgr, - "fp_const_buffer", - size, 64); - } - } } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 5d332d010c2..a7132622696 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -84,6 +84,19 @@ get_965_surfacetype(unsigned int surfacetype) } } +static const char * +get_965_surface_format(unsigned int surface_format) +{ + switch (surface_format) { + case 0x000: return "r32g32b32a32_float"; + case 0x0c1: return "b8g8r8a8_unorm"; + case 0x100: return "b5g6r5_unorm"; + case 0x102: return "b5g5r5a1_unorm"; + case 0x104: return "b4g4r4a4_unorm"; + default: return "unknown"; + } +} + static void dump_wm_surface_state(struct brw_context *brw) { int i; @@ -95,7 +108,7 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, "WM SS%d: NULL\n", i); + fprintf(stderr, " WM SS%d: NULL\n", i); continue; } dri_bo_map(surf_bo, GL_FALSE); @@ -103,8 +116,9 @@ static void dump_wm_surface_state(struct brw_context *brw) surf = (struct brw_surface_state *)(surf_bo->virtual); sprintf(name, "WM SS%d", i); - state_out(name, surf, surfoff, 0, "%s\n", - get_965_surfacetype(surf->ss0.surface_type)); + state_out(name, surf, surfoff, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); state_out(name, surf, surfoff, 1, "offset\n"); state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 99d0e937226..d20cf78b8af 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -75,6 +75,13 @@ struct brw_vs_compile { struct brw_reg userplane[6]; + /** using a real constant buffer? */ + GLboolean use_const_buffer; + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index d29eb17f8cf..2637344b482 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -96,7 +96,7 @@ static GLubyte get_active( struct tracker *t, struct prog_src_register src ) { GLuint i; - GLubyte active = src.NegateBase; /* NOTE! */ + GLubyte active = src.Negate; /* NOTE! */ if (src.RelAddr) return 0xf; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 0d6c6ab9a8a..524f1211cee 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -38,14 +38,44 @@ #include "brw_vs.h" +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} -/* Do things as simply as possible. Allocate and populate all regs +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs * ahead of time. */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; - GLuint nr_params; + +#if 0 + if (c->vp->program.Base.Parameters->NumParameters >= 6) + c->use_const_buffer = 1; + else +#endif + c->use_const_buffer = GL_FALSE; + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ /* r0 -- reserved as usual */ @@ -66,13 +96,22 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ - nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params + 1) / 2; + if (c->use_const_buffer) { + /* get constants from a real constant buffer */ + c->prog_data.curb_read_length = 0; + c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + } + else { + /* use a section of the GRF for constants */ + GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params + 1) / 2; + c->prog_data.curb_read_length = reg - 1; - c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = nr_params * 4; + } /* Allocate input regs: */ @@ -133,6 +172,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } + if (c->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + for (i = 0; i < 128; i++) { if (c->output_regs[i].used_in_src) { c->output_regs[i].reg = brw_vec8_grf(reg, 0); @@ -165,28 +212,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; -} - - /** * If an instruction uses a temp reg both as a src and the dest, we * sometimes need to allocate an intermediate temporary. @@ -633,6 +658,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c, } brw_ENDIF(p, if_insn); + + release_tmp(c, tmp); } static void emit_lrp_noalias(struct brw_vs_compile *c, @@ -673,13 +700,83 @@ static void emit_nrm( struct brw_vs_compile *c, } +static struct brw_reg +get_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + struct brw_reg const2_reg; + + assert(argIndex < 3); + + if (c->current_const[argIndex].index != src->Index || src->RelAddr) { + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + c->current_const[argIndex].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg,/* writeback dest */ + 0, /* oword */ + src->RelAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + if (src->RelAddr) { + /* second read */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + src->RelAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + } + } + + const_reg = c->current_const[argIndex].reg; + + if (src->RelAddr) { + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); + } + else { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } + + return const_reg; +} + + + /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, gl_register_file file, GLuint index ) { - switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: @@ -708,13 +805,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Indirect addressing: get reg[[arg] + offset]. + */ static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset) { struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); @@ -735,10 +836,67 @@ static struct brw_reg deref( struct brw_vs_compile *c, brw_pop_insn_state(p); } + /* NOTE: tmp not released */ return vec8(tmp); } +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + if (c->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -750,30 +908,31 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); } -/* Will return mangled results for SWZ op. The emit_swz() function +/** + * Return the brw reg for the given instruction's src argument. + * Will return mangled results for SWZ op. The emit_swz() function * ignores this result and recalculates taking extended swizzles into * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register *src ) + const struct prog_instruction *inst, + GLuint argIndex ) { + const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_reg reg; if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else - reg = get_reg(c, src->File, src->Index); + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -784,16 +943,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, /* Note this is ok for non-swizzle instructions: */ - reg.negate = src->NegateBase ? 1 : 0; + reg.negate = src->Negate ? 1 : 0; return reg; } +/** + * Get brw register for the given program dest register. + */ static struct brw_reg get_dst( struct brw_vs_compile *c, struct prog_dst_register dst ) { - struct brw_reg reg = get_reg(c, dst.File, dst.Index); + struct brw_reg reg; + + switch (dst.File) { + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + assert(c->regs[dst.File][dst.Index].nr != 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_ADDRESS: + assert(dst.Index == 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_UNDEFINED: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } reg.dw1.bits.writemask = dst.WriteMask; @@ -803,14 +984,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, - struct prog_src_register src ) + const struct prog_instruction *inst) { + const GLuint argIndex = 0; + const struct prog_src_register src = inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; GLuint zeros_mask = 0; GLuint ones_mask = 0; GLuint src_mask = 0; GLubyte src_swz[4]; - GLboolean need_tmp = (src.NegateBase && + GLboolean need_tmp = (src.Negate && dst.file != BRW_GENERAL_REGISTER_FILE); struct brw_reg tmp = dst; GLuint i; @@ -844,10 +1027,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_reg(c, src.File, src.Index); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], @@ -862,8 +1042,8 @@ static void emit_swz( struct brw_vs_compile *c, if (ones_mask) brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - if (src.NegateBase) - brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); if (need_tmp) { brw_MOV(p, dst, tmp); @@ -1043,6 +1223,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct brw_reg args[3], dst; GLuint i; +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + /* Get argument regs. SWZ is special and does this itself. */ if (inst->Opcode != OPCODE_SWZ) @@ -1053,7 +1238,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) args[i] = c->output_regs[index].reg; else - args[i] = get_arg(c, src); + args[i] = get_arg(c, inst, i); } /* Get dest regs. Note that it is possible for a reg to be both @@ -1181,7 +1366,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ - emit_swz(c, dst, inst->SrcReg[0] ); + emit_swz(c, dst, inst); break; case OPCODE_TRUNC: /* round toward zero */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 1a63766ea1f..3d295388437 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -44,6 +44,8 @@ struct brw_vs_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; }; static void @@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) key->nr_urb_entries = brw->urb.nr_vs_entries; key->urb_size = brw->urb.vsize; + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence @@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; + vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; @@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 960bbb311e3..ba03afd6c13 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); + dri_bo_release(&brw->vs.bind_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index d65b1332c61..72fc21d2eba 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -742,7 +742,7 @@ static void emit_tex( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, (inst->tex_shadow ? @@ -791,7 +791,7 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index a7f5f1b9a28..1798d842c79 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -80,9 +80,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx) reg.Index = idx; reg.Swizzle = SWIZZLE_NOOP; reg.RelAddr = 0; - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; reg.Abs = 0; - reg.NegateAbs = 0; return reg; } @@ -569,7 +568,7 @@ static void precalc_dst( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase &= ~NEGATE_X; + swz->SrcReg[0].Negate &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -604,7 +603,7 @@ static void precalc_lit( struct brw_wm_compile *c, src_undef(), src_undef()); /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase = 0; + swz->SrcReg[0].Negate = NEGATE_NONE; } if (dst.WriteMask & WRITEMASK_YZ) { @@ -651,7 +650,7 @@ static void precalc_tex( struct brw_wm_compile *c, src0, src_undef(), src_undef()); - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ @@ -1050,14 +1049,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; - out->SrcReg[1].NegateBase ^= 0xf; + out->SrcReg[1].Negate ^= NEGATE_XYZW; break; case OPCODE_SCS: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 575cd45d572..22e17622c6d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -56,7 +56,7 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, * Examine instruction's write mask to find index of first component * enabled for writing. */ -static int get_scalar_dst_index(struct prog_instruction *inst) +static int get_scalar_dst_index(const struct prog_instruction *inst) { int i; for (i = 0; i < 4; i++) @@ -254,14 +254,15 @@ static void prealloc_reg(struct brw_wm_compile *c) * XXX alloc these on demand! */ if (c->use_const_buffer) { - c->current_const[0].reg = alloc_tmp(c); - c->current_const[1].reg = alloc_tmp(c); - c->current_const[2].reg = alloc_tmp(c); + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = alloc_tmp(c); + } } - /* +#if 0 printf("USE CONST BUFFER? %d\n", c->use_const_buffer); printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); - */ +#endif } @@ -283,14 +284,12 @@ static void fetch_constants(struct brw_wm_compile *c, src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM) { if (c->current_const[i].index != src->Index) { - c->current_const[i].index = src->Index; - /*c->current_const[i].reg = alloc_tmp(c);*/ - /* +#if 0 printf(" fetch const[%d] for arg %d into reg %d\n", src->Index, i, c->current_const[i].reg.nr); - */ +#endif /* need to fetch the constant now */ brw_dp_READ_4(p, @@ -298,28 +297,8 @@ static void fetch_constants(struct brw_wm_compile *c, 1, /* msg_reg */ src->RelAddr, /* relative indexing? */ 16 * src->Index, /* byte offset */ - BRW_WM_MAX_SURF - 1 /* binding table index */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ ); - -#if 0 - /* dependency stall */ - { - int response_length = 1; - int mark = mark_tmps( c ); - struct brw_reg src = c->current_const[i].reg; - struct brw_reg tmp = alloc_tmp(c); - - /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } - */ - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, tmp, src); - brw_MOV(p, src, tmp); - brw_pop_insn_state(p); - - release_tmps( c, mark ); - } -#endif } } } @@ -361,18 +340,18 @@ get_src_reg_const(struct brw_wm_compile *c, const_reg = stride(const_reg, 0, 1, 0); const_reg.subnr = component * 4; - if (src->NegateBase) + if (src->Negate & (1 << component)) const_reg = negate(const_reg); if (src->Abs) const_reg = brw_abs(const_reg); - /* - printf(" form const[%d] for arg %d, comp %d, reg %d\n", +#if 0 + printf(" form const[%d].%d for arg %d, reg %d\n", c->current_const[srcRegIndex].index, - srcRegIndex, component, + srcRegIndex, const_reg.nr); - */ +#endif return const_reg; } @@ -398,7 +377,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, else { /* other type of source register */ return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + src->Negate, src->Abs); } } @@ -423,11 +402,13 @@ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; - if (src->NegateBase) + if (src->Negate & (1 << channel)) value = -value; if (src->Abs) value = FABSF(value); - /*printf(" form imm reg %f\n", value);*/ +#if 0 + printf(" form immed value %f for chan %d\n", value, channel); +#endif return brw_imm_f(value); } else { @@ -501,7 +482,7 @@ static void invoke_subroutine( struct brw_wm_compile *c, } static void emit_abs( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -518,7 +499,7 @@ static void emit_abs( struct brw_wm_compile *c, } static void emit_trunc( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -536,7 +517,7 @@ static void emit_trunc( struct brw_wm_compile *c, } static void emit_mov( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -546,7 +527,9 @@ static void emit_mov( struct brw_wm_compile *c, if (mask & (1<<i)) { struct brw_reg src, dst; dst = get_dst_reg(c, inst, i); - src = get_src_reg_imm(c, inst, 0, i); + /* XXX some moves from immediate value don't work reliably!!! */ + /*src = get_src_reg_imm(c, inst, 0, i);*/ + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -554,7 +537,7 @@ static void emit_mov( struct brw_wm_compile *c, } static void emit_pixel_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -584,7 +567,7 @@ static void emit_pixel_xy(struct brw_wm_compile *c, } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; @@ -644,7 +627,7 @@ static void fire_fb_write( struct brw_wm_compile *c, } static void emit_fb_write(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; @@ -713,7 +696,7 @@ static void emit_fb_write(struct brw_wm_compile *c, } static void emit_pixel_w( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -743,7 +726,7 @@ static void emit_pixel_w( struct brw_wm_compile *c, } static void emit_linterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -772,7 +755,7 @@ static void emit_linterp(struct brw_wm_compile *c, } static void emit_cinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -798,7 +781,7 @@ static void emit_cinterp(struct brw_wm_compile *c, } static void emit_pinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -832,7 +815,7 @@ static void emit_pinterp(struct brw_wm_compile *c, /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ static void emit_frontfacing(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); @@ -861,7 +844,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, } static void emit_xpd(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -886,7 +869,7 @@ static void emit_xpd(struct brw_wm_compile *c, } static void emit_dp3(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; @@ -905,7 +888,7 @@ static void emit_dp3(struct brw_wm_compile *c, } static void emit_dp4(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; @@ -924,7 +907,7 @@ static void emit_dp4(struct brw_wm_compile *c, } static void emit_dph(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; @@ -948,7 +931,7 @@ static void emit_dph(struct brw_wm_compile *c, * register's X, Y, Z and W channels (subject to writemasking of course). */ static void emit_math1(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint func) + const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst, tmp; @@ -985,43 +968,43 @@ static void emit_math1(struct brw_wm_compile *c, } static void emit_rcp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1040,7 +1023,7 @@ static void emit_add(struct brw_wm_compile *c, } static void emit_arl(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, addr_reg; @@ -1053,7 +1036,7 @@ static void emit_arl(struct brw_wm_compile *c, } static void emit_sub(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1072,7 +1055,7 @@ static void emit_sub(struct brw_wm_compile *c, } static void emit_mul(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1091,7 +1074,7 @@ static void emit_mul(struct brw_wm_compile *c, } static void emit_frc(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1110,7 +1093,7 @@ static void emit_frc(struct brw_wm_compile *c, } static void emit_flr(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1177,7 +1160,7 @@ static void emit_min_max(struct brw_wm_compile *c, } static void emit_pow(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; @@ -1199,7 +1182,7 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1252,7 +1235,7 @@ static void emit_kil(struct brw_wm_compile *c) } static void emit_mad(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1275,7 +1258,7 @@ static void emit_mad(struct brw_wm_compile *c, } static void emit_sop(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint cond) + const struct prog_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1299,43 +1282,43 @@ static void emit_sop(struct brw_wm_compile *c, } static void emit_slt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_L); } static void emit_sle(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_LE); } static void emit_sgt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_G); } static void emit_sge(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_GE); } static void emit_seq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_EQ); } static void emit_sne(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } static void emit_ddx(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1362,7 +1345,7 @@ static void emit_ddx(struct brw_wm_compile *c, } static void emit_ddy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1505,7 +1488,7 @@ static void noise1_sub( struct brw_wm_compile *c ) { } static void emit_noise1( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src, param, dst; @@ -1675,7 +1658,7 @@ static void noise2_sub( struct brw_wm_compile *c ) { } static void emit_noise2( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, param0, param1, dst; @@ -1978,7 +1961,7 @@ static void noise3_sub( struct brw_wm_compile *c ) { } static void emit_noise3( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, param0, param1, param2, dst; @@ -2401,7 +2384,7 @@ static void noise4_sub( struct brw_wm_compile *c ) } static void emit_noise4( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; @@ -2443,7 +2426,7 @@ static void emit_noise4( struct brw_wm_compile *c, } static void emit_wpos_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -2479,7 +2462,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; @@ -2517,7 +2500,7 @@ static void emit_txb(struct brw_wm_compile *c, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ @@ -2528,7 +2511,7 @@ static void emit_txb(struct brw_wm_compile *c, static void emit_tex(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; @@ -2581,7 +2564,7 @@ static void emit_tex(struct brw_wm_compile *c, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ @@ -2618,22 +2601,22 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { - struct prog_instruction *inst = &c->prog_instructions[i]; + const struct prog_instruction *inst = &c->prog_instructions[i]; - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - - /* +#if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); - */ +#endif /* fetch any constants that this instruction needs */ if (c->use_const_buffer) fetch_constants(c, inst); + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); @@ -2819,6 +2802,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: + /* XXX may need to invalidate the current_constant regs */ loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 2debd0678a5..92142764f5d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -322,7 +322,7 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, newref->value->lastuse = newref; } - if (src.NegateBase & (1<<i)) + if (src.Negate & (1 << i)) newref->hw_reg.negate ^= 1; if (src.Abs) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 58fa6aaf8f9..67b41173fb2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_SURFACES), + BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index e7d55d5dbd1..71840d1e4e8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -38,7 +38,7 @@ #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" #include "intel_tex.h" - +#include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" @@ -176,7 +176,11 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, } } -struct brw_wm_surface_key { + +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { GLenum target, depthmode; dri_bo *bo; GLint format, internal_format; @@ -187,6 +191,7 @@ struct brw_wm_surface_key { GLuint offset; }; + static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -208,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) static dri_bo * brw_create_texture_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { struct brw_surface_state surf; dri_bo *bo; @@ -287,8 +292,8 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_wm_surface_key key; - const GLuint j = MAX_DRAW_BUFFERS + unit; + struct brw_surface_key key; + const GLuint surf = SURF_INDEX_TEXTURE(unit); memset(&key, 0, sizeof(key)); @@ -315,25 +320,25 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); - if (brw->wm.surf_bo[j] == NULL) { - brw->wm.surf_bo[j] = brw_create_texture_surface(brw, &key); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } } /** - * Create the constant buffer surface. Fragment shader constanst will be + * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ static dri_bo * brw_create_constant_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { const GLint w = key->width - 1; struct brw_surface_state surf; @@ -345,8 +350,6 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - /* This is ok for all textures with channel width 8bit or less: - */ assert(key->bo); if (key->bo) surf.ss1.base_addr = key->bo->offset; /* reloc */ @@ -356,8 +359,8 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf.ss3.pitch = (key->pitch * key->cpp) - 1; - brw_set_surface_tiling(&surf, key->tiling); + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, key, sizeof(*key), @@ -379,26 +382,100 @@ brw_create_constant_surface( struct brw_context *brw, /** - * Update the constant buffer surface. + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. */ -static void -brw_update_constant_surface( GLcontext *ctx, - const struct brw_fragment_program *fp ) +static dri_bo * +brw_update_wm_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) { struct brw_context *brw = brw_context(ctx); - struct brw_wm_surface_key key; - const GLuint j = BRW_WM_MAX_SURF - 1; - const GLuint numParams = fp->program.Base.Parameters->NumParameters; + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); + } memset(&key, 0, sizeof(key)); key.format = MESA_FORMAT_RGBA_FLOAT32; key.internal_format = GL_RGBA; - key.bo = fp->const_buffer; + key.bo = const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static dri_bo * +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + assert(surf == 0); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = const_buffer; key.depthmode = GL_NONE; - key.pitch = numParams; - key.width = numParams; + key.pitch = params->NumParameters; + key.width = params->NumParameters; key.height = 1; key.depth = 1; key.cpp = 16; @@ -409,14 +486,16 @@ brw_update_constant_surface( GLcontext *ctx, key.width, key.height, key.depth, key.cpp, key.pitch); */ - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[j] == NULL) { - brw->wm.surf_bo[j] = brw_create_constant_surface(brw, &key); + dri_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); } + + return const_buffer; } @@ -426,15 +505,18 @@ brw_update_constant_surface( GLcontext *ctx, * usable for further buffers when doing ARB_draw_buffer support. */ static void -brw_update_region_surface(struct brw_context *brw, struct intel_region *region, - unsigned int unit, GLboolean cached) +brw_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned int unit, GLboolean cached) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; struct { unsigned int surface_type; unsigned int surface_format; - unsigned int width, height, cpp; + unsigned int width, height, pitch, cpp; GLubyte color_mask[4]; GLboolean color_blend; uint32_t tiling; @@ -446,13 +528,27 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - if (region->cpp == 4) + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else + break; + case MESA_FORMAT_RGB565: key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + case MESA_FORMAT_ARGB1555: + key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + break; + case MESA_FORMAT_ARGB4444: + key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } key.tiling = region->tiling; - key.width = region->pitch; /* XXX: not really! */ + key.width = region->width; key.height = region->height; + key.pitch = region->pitch; key.cpp = region->cpp; } else { key.surface_type = BRW_SURFACE_NULL; @@ -488,7 +584,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss2.width = key.width - 1; surf.ss2.height = key.height - 1; brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.width * key.cpp) - 1; + surf.ss3.pitch = (key.pitch * key.cpp) - 1; /* _NEW_COLOR */ surf.ss0.color_blend = key.color_blend; @@ -499,7 +595,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), + &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), NULL, NULL); @@ -528,6 +624,8 @@ brw_wm_get_binding_table(struct brw_context *brw) { dri_bo *bind_bo; + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, @@ -574,69 +672,159 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; + /* _NEW_BUFFERS */ /* Update surfaces for drawing buffers */ - if (brw->state.nr_color_regions > 1) { - for (i = 0; i < brw->state.nr_color_regions; i++) { - brw_update_region_surface(brw, brw->state.color_regions[i], i, - GL_FALSE); + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i, + GL_FALSE); } } else { - brw_update_region_surface(brw, brw->state.color_regions[0], 0, GL_TRUE); + brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + /* Update surface / buffer for fragment shader constant buffer */ + { + const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + fp->const_buffer = + brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, + fp->program.Base.Parameters); + + brw->wm.nr_surfaces = surf + 1; + } + /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - const GLuint j = MAX_DRAW_BUFFERS + i; + const GLuint surf = SURF_INDEX_TEXTURE(i); /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ if (texUnit->_ReallyEnabled) { if (texUnit->_Current == intel->frame_buffer_texobj) { /* render to texture */ - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[j]); - brw->wm.nr_surfaces = j + 1; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[surf]); + brw->wm.nr_surfaces = surf + 1; } else { /* regular texture */ brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = j + 1; + brw->wm.nr_surfaces = surf + 1; } } else { - dri_bo_unreference(brw->wm.surf_bo[j]); - brw->wm.surf_bo[j] = NULL; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; } } - /* Update surface for fragment shader constant buffer */ - { - const GLuint j = BRW_WM_MAX_SURF - 1; - const struct brw_fragment_program *fp = - brw_fragment_program_const(brw->fragment_program); + dri_bo_unreference(brw->wm.bind_bo); + brw->wm.bind_bo = brw_wm_get_binding_table(brw); + + if (brw->wm.nr_surfaces != old_nr_surfaces) + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); + + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < brw->vs.nr_surfaces; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + data, data_size, + NULL, NULL); - brw_update_constant_surface(ctx, fp); - brw->wm.nr_surfaces = j + 1; + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + dri_bo_emit_reloc(bind_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->vs.surf_bo[i]); + } + } + + free(data); } + return bind_bo; +} - dri_bo_unreference(brw->wm.bind_bo); - brw->wm.bind_bo = brw_wm_get_binding_table(brw); - if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_SURFACES; +/** + * Vertex shader surfaces. Just constant buffer for now. Could add vertex + * shader textures in the future. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + + /* Update surface / buffer for vertex shader constant buffer */ + { + const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + vp->const_buffer = + brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, + vp->program.Base.Parameters); + + brw->vs.nr_surfaces = 1; + } + + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + + if (1) + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; +} + + +static void +prepare_surfaces(struct brw_context *brw) +{ + prepare_wm_surfaces(brw); + prepare_vs_surfaces(brw); } const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, + .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .prepare = prepare_wm_surfaces, + .prepare = prepare_surfaces, }; diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 4ae9b118a3d..49198281316 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -32,6 +32,8 @@ #include "main/mtypes.h" #include "main/context.h" #include "main/enums.h" +#include "main/texformat.h" +#include "main/colormac.h" #include "intel_blit.h" #include "intel_buffers.h" @@ -484,10 +486,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) const GLbitfield bufBit = 1 << buf; if ((clearMask & bufBit) && !(bufBit & skipBuffers)) { /* OK, clear this renderbuffer */ - struct intel_region *irb_region = - intel_get_rb_region(fb, buf); + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, buf); dri_bo *write_buffer = - intel_region_buffer(intel, irb_region, + intel_region_buffer(intel, irb->region, all ? INTEL_WRITE_FULL : INTEL_WRITE_PART); @@ -495,15 +496,13 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) GLint pitch, cpp; GLuint BR13, CMD; - ASSERT(irb_region); - - pitch = irb_region->pitch; - cpp = irb_region->cpp; + pitch = irb->region->pitch; + cpp = irb->region->cpp; DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, - irb_region->buffer, (pitch * cpp), - irb_region->draw_offset, + irb->region->buffer, (pitch * cpp), + irb->region->draw_offset, b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1); BR13 = 0xf0 << 16; @@ -529,7 +528,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) } #ifndef I915 - if (irb_region->tiling != I915_TILING_NONE) { + if (irb->region->tiling != I915_TILING_NONE) { CMD |= XY_DST_TILED; pitch /= 4; } @@ -540,9 +539,36 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) clearVal = clear_depth; } else { - clearVal = (cpp == 4) - ? intel->ClearColor8888 : intel->ClearColor565; - } + uint8_t clear[4]; + GLclampf *color = ctx->Color.ClearColor; + + CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); + CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); + CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); + CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); + + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + clearVal = intel->ClearColor8888; + break; + case MESA_FORMAT_RGB565: + clearVal = intel->ClearColor565; + break; + case MESA_FORMAT_ARGB4444: + clearVal = PACK_COLOR_4444(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB1555: + clearVal = PACK_COLOR_1555(clear[3], clear[0], + clear[1], clear[2]); + break; + default: + _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", + irb->texformat->MesaFormat); + clearVal = 0; + } + } + /* _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n", buf, irb->Base.Name); @@ -558,7 +584,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH((b.y2 << 16) | b.x2); OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - irb_region->draw_offset); + irb->region->draw_offset); OUT_BATCH(clearVal); ADVANCE_BATCH(); clearMask &= ~bufBit; /* turn off bit, for faster loop exit */ diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index 0929a2c223c..90964df3553 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -202,6 +202,8 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) intel_batchbuffer_flush(intel->batch); intel->front_cliprects = GL_TRUE; colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT); + + intel->front_buffer_dirty = GL_TRUE; } else { if (!intel->constant_cliprect && intel->front_cliprects) @@ -319,6 +321,12 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) static void intelDrawBuffer(GLcontext * ctx, GLenum mode) { + if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) { + struct intel_context *const intel = intel_context(ctx); + + intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT); + } + intel_draw_buffer(ctx, ctx->DrawBuffer); } diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 28281b38615..aed95c7c561 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -38,6 +38,7 @@ #include "main/enable.h" #include "main/macros.h" #include "main/matrix.h" +#include "main/polygon.h" #include "main/texstate.h" #include "main/shaders.h" #include "main/stencil.h" @@ -93,6 +94,7 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) GL_CURRENT_BIT | GL_DEPTH_BUFFER_BIT | GL_ENABLE_BIT | + GL_POLYGON_BIT | GL_STENCIL_BUFFER_BIT | GL_TRANSFORM_BIT | GL_CURRENT_BIT); @@ -114,6 +116,7 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) _mesa_Disable(GL_CLIP_PLANE3); _mesa_Disable(GL_CLIP_PLANE4); _mesa_Disable(GL_CLIP_PLANE5); + _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) { saved_fp_enable = GL_TRUE; _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); @@ -146,6 +149,11 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) } } +#if FEATURE_ARB_vertex_buffer_object + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); +#endif + intel_meta_set_passthrough_transform(intel); for (i = 0; i < 4; i++) { diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index a664e749360..3436b8ecd30 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -235,6 +235,11 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) region_name = "dri2 front buffer"; break; + case __DRI_BUFFER_FAKE_FRONT_LEFT: + rb = intel_fb->color_rb[0]; + region_name = "dri2 fake front buffer"; + break; + case __DRI_BUFFER_BACK_LEFT: rb = intel_fb->color_rb[1]; region_name = "dri2 back buffer"; @@ -391,6 +396,27 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); + + if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) { + __DRIscreen *const screen = intel->intelScreen->driScrnPriv; + + if (screen->dri2.loader && + (screen->dri2.loader->base.version >= 2) + && (screen->dri2.loader->flushFrontBuffer != NULL)) { + (*screen->dri2.loader->flushFrontBuffer)(intel->driDrawable, + intel->driDrawable->loaderPrivate); + + /* Only clear the dirty bit if front-buffer rendering is no longer + * enabled. This is done so that the dirty bit can only be set in + * glDrawBuffer. Otherwise the dirty bit would have to be set at + * each of N places that do rendering. This has worse performances, + * but it is much easier to get correct. + */ + if (intel->is_front_buffer_rendering) { + intel->front_buffer_dirty = GL_FALSE; + } + } + } } void diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index d635f3f50dc..d798225ddd9 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -48,6 +48,8 @@ #define DV_PF_555 (1<<8) #define DV_PF_565 (2<<8) #define DV_PF_8888 (3<<8) +#define DV_PF_4444 (8<<8) +#define DV_PF_1555 (9<<8) struct intel_region; struct intel_context; @@ -262,11 +264,29 @@ struct intel_context * flush time while the lock is held. */ GLboolean constant_cliprect; + /** * In !constant_cliprect mode, set to true if the front cliprects should be * used instead of back. */ GLboolean front_cliprects; + + /** + * Set if rendering has occured to the drawable's front buffer. + * + * This is used in the DRI2 case to detect that glFlush should also copy + * the contents of the fake front buffer to the real front buffer. + */ + GLboolean front_buffer_dirty; + + /** + * Track whether front-buffer rendering is currently enabled + * + * A separate flag is used to track this in order to support MRT more + * easily. + */ + GLboolean is_front_buffer_rendering; + drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ int perf_boxes; @@ -319,6 +339,7 @@ extern char *__progname; #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) +#define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0) #define INTEL_FIREVERTICES(intel) \ do { \ diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index f04638206d5..a9dfe281cbd 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -800,6 +800,7 @@ static int decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) { unsigned int len, i, c, opcode, word, map, sampler, instr; + char *format; struct { uint32_t opcode; @@ -1001,6 +1002,35 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i (*failures)++; } return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + + if (len != 2) + fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n"); + if (count < 2) + BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n", + format, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; } for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index a401f730ba2..52647ddf8b2 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -119,6 +119,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->RedBits = 5; rb->GreenBits = 6; rb->BlueBits = 5; + irb->texformat = &_mesa_texformat_rgb565; cpp = 2; break; case GL_RGB: @@ -132,6 +133,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->GreenBits = 8; rb->BlueBits = 8; rb->AlphaBits = 0; + irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */ cpp = 4; break; case GL_RGBA: @@ -148,6 +150,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->GreenBits = 8; rb->BlueBits = 8; rb->AlphaBits = 8; + irb->texformat = &_mesa_texformat_argb8888; cpp = 4; break; case GL_STENCIL_INDEX: @@ -160,12 +163,14 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DataType = GL_UNSIGNED_INT_24_8_EXT; rb->StencilBits = 8; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_COMPONENT16: rb->_ActualFormat = GL_DEPTH_COMPONENT16; rb->DataType = GL_UNSIGNED_SHORT; rb->DepthBits = 16; cpp = 2; + irb->texformat = &_mesa_texformat_z16; break; case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT24: @@ -174,6 +179,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DataType = GL_UNSIGNED_INT_24_8_EXT; rb->DepthBits = 24; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: @@ -182,6 +188,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->DepthBits = 24; rb->StencilBits = 8; cpp = 4; + irb->texformat = &_mesa_texformat_s8_z24; break; default: _mesa_problem(ctx, @@ -322,6 +329,7 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.GreenBits = 6; irb->Base.BlueBits = 5; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_rgb565; break; case GL_RGB8: irb->Base._ActualFormat = GL_RGB8; @@ -331,6 +339,7 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.BlueBits = 8; irb->Base.AlphaBits = 0; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_argb8888; /* XXX: Need xrgb8888 */ break; case GL_RGBA8: irb->Base._ActualFormat = GL_RGBA8; @@ -340,24 +349,28 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.BlueBits = 8; irb->Base.AlphaBits = 8; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_argb8888; break; case GL_STENCIL_INDEX8_EXT: irb->Base._ActualFormat = GL_STENCIL_INDEX8_EXT; irb->Base._BaseFormat = GL_STENCIL_INDEX; irb->Base.StencilBits = 8; irb->Base.DataType = GL_UNSIGNED_BYTE; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH_COMPONENT16: irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; irb->Base.DepthBits = 16; irb->Base.DataType = GL_UNSIGNED_SHORT; + irb->texformat = &_mesa_texformat_z16; break; case GL_DEPTH_COMPONENT24: irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; irb->Base.DepthBits = 24; irb->Base.DataType = GL_UNSIGNED_INT; + irb->texformat = &_mesa_texformat_s8_z24; break; case GL_DEPTH24_STENCIL8_EXT: irb->Base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; @@ -365,6 +378,7 @@ intel_create_renderbuffer(GLenum intFormat) irb->Base.DepthBits = 24; irb->Base.StencilBits = 8; irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; + irb->texformat = &_mesa_texformat_s8_z24; break; default: _mesa_problem(NULL, @@ -449,6 +463,8 @@ static GLboolean intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { + irb->texformat = texImage->TexFormat; + if (texImage->TexFormat == &_mesa_texformat_argb8888) { irb->Base._ActualFormat = GL_RGBA8; irb->Base._BaseFormat = GL_RGBA; @@ -458,9 +474,21 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, else if (texImage->TexFormat == &_mesa_texformat_rgb565) { irb->Base._ActualFormat = GL_RGB5; irb->Base._BaseFormat = GL_RGB; - irb->Base.DataType = GL_UNSIGNED_SHORT; + irb->Base.DataType = GL_UNSIGNED_BYTE; DBG("Render to RGB5 texture OK\n"); } + else if (texImage->TexFormat == &_mesa_texformat_argb1555) { + irb->Base._ActualFormat = GL_RGB5_A1; + irb->Base._BaseFormat = GL_RGBA; + irb->Base.DataType = GL_UNSIGNED_BYTE; + DBG("Render to ARGB1555 texture OK\n"); + } + else if (texImage->TexFormat == &_mesa_texformat_argb4444) { + irb->Base._ActualFormat = GL_RGBA4; + irb->Base._BaseFormat = GL_RGBA; + irb->Base.DataType = GL_UNSIGNED_BYTE; + DBG("Render to ARGB4444 texture OK\n"); + } else if (texImage->TexFormat == &_mesa_texformat_z16) { irb->Base._ActualFormat = GL_DEPTH_COMPONENT16; irb->Base._BaseFormat = GL_DEPTH_COMPONENT; @@ -631,11 +659,11 @@ intel_finish_render_texture(GLcontext * ctx, static void intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) { - struct intel_context *intel = intel_context(ctx); const struct intel_renderbuffer *depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); const struct intel_renderbuffer *stencilRb = intel_get_renderbuffer(fb, BUFFER_STENCIL); + int i; if (stencilRb && stencilRb != depthRb) { /* we only support combined depth/stencil buffers, not separate @@ -644,32 +672,21 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } - /* check that texture color buffers are a format we can render into */ - { - const struct gl_texture_format *supportedFormat; - GLuint i; + for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); - /* The texture format we can render into seems to depend on the - * screen depth. There currently seems to be a problem when - * rendering into a rgb565 texture when the screen is abgr8888. - */ + if (rb == NULL) + continue; - if (intel->ctx.Visual.rgbBits >= 24) - supportedFormat = &_mesa_texformat_argb8888; - else - supportedFormat = &_mesa_texformat_rgb565; - - for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { - const struct gl_texture_object *texObj = - fb->Attachment[BUFFER_COLOR0 + i].Texture; - if (texObj) { - const struct gl_texture_image *texImg = - texObj->Image[0][texObj->BaseLevel]; - if (texImg && texImg->TexFormat != supportedFormat) { - fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; - break; - } - } + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB4444: + break; + default: + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } } } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 7226ee026f6..f0665af482e 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -61,6 +61,8 @@ struct intel_renderbuffer struct gl_renderbuffer Base; struct intel_region *region; + const struct gl_texture_format *texformat; + GLuint vbl_pending; /**< vblank sequence number of pending flip */ uint8_t *span_cache; diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index c3a873f1abd..34b78ebc1ab 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -29,6 +29,7 @@ #include "main/macros.h" #include "main/mtypes.h" #include "main/colormac.h" +#include "main/texformat.h" #include "intel_buffers.h" #include "intel_fbo.h" @@ -313,6 +314,22 @@ static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb, #define INTEL_TAG(x) x##_RGB565 #include "intel_spantmp.h" +/* a4r4g4b4 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV +#define INTEL_READ_VALUE(offset) pread_16(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) +#define INTEL_TAG(x) x##_ARGB4444 +#include "intel_spantmp.h" + +/* a1r5g5b5 color span and pixel functions */ +#define INTEL_PIXEL_FMT GL_BGRA +#define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV +#define INTEL_READ_VALUE(offset) pread_16(irb, offset) +#define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v) +#define INTEL_TAG(x) x##_ARGB1555 +#include "intel_spantmp.h" + /* a8r8g8b8 color span and pixel functions */ #define INTEL_PIXEL_FMT GL_BGRA #define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV @@ -561,8 +578,8 @@ intel_set_span_functions(struct intel_context *intel, else tiling = I915_TILING_NONE; - if (rb->_ActualFormat == GL_RGB5) { - /* 565 RGB */ + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_RGB565: switch (tiling) { case I915_TILING_NONE: default: @@ -575,38 +592,67 @@ intel_set_span_functions(struct intel_context *intel, intel_YTile_InitPointers_RGB565(rb); break; } - } - else if (rb->_ActualFormat == GL_RGB8) { - /* 8888 RGBx */ + break; + case MESA_FORMAT_ARGB4444: switch (tiling) { case I915_TILING_NONE: default: - intelInitPointers_xRGB8888(rb); + intelInitPointers_ARGB4444(rb); break; case I915_TILING_X: - intel_XTile_InitPointers_xRGB8888(rb); + intel_XTile_InitPointers_ARGB4444(rb); break; case I915_TILING_Y: - intel_YTile_InitPointers_xRGB8888(rb); + intel_YTile_InitPointers_ARGB4444(rb); break; } - } - else if (rb->_ActualFormat == GL_RGBA8) { - /* 8888 RGBA */ + break; + case MESA_FORMAT_ARGB1555: switch (tiling) { case I915_TILING_NONE: default: - intelInitPointers_ARGB8888(rb); + intelInitPointers_ARGB1555(rb); break; case I915_TILING_X: - intel_XTile_InitPointers_ARGB8888(rb); + intel_XTile_InitPointers_ARGB1555(rb); break; case I915_TILING_Y: - intel_YTile_InitPointers_ARGB8888(rb); + intel_YTile_InitPointers_ARGB1555(rb); break; } - } - else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) { + break; + case MESA_FORMAT_ARGB8888: + if (rb->AlphaBits == 0) { /* XXX: Need xRGB8888 Mesa format */ + /* 8888 RGBx */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_xRGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_xRGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_xRGB8888(rb); + break; + } + } else { + /* 8888 RGBA */ + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitPointers_ARGB8888(rb); + break; + case I915_TILING_X: + intel_XTile_InitPointers_ARGB8888(rb); + break; + case I915_TILING_Y: + intel_YTile_InitPointers_ARGB8888(rb); + break; + } + } + break; + case MESA_FORMAT_Z16: switch (tiling) { case I915_TILING_NONE: default: @@ -619,51 +665,57 @@ intel_set_span_functions(struct intel_context *intel, intel_YTile_InitDepthPointers_z16(rb); break; } - } - else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24(rb); - break; - } - } - else if (rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitDepthPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitDepthPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitDepthPointers_z24_s8(rb); - break; - } - } - else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { - switch (tiling) { - case I915_TILING_NONE: - default: - intelInitStencilPointers_z24_s8(rb); - break; - case I915_TILING_X: - intel_XTile_InitStencilPointers_z24_s8(rb); - break; - case I915_TILING_Y: - intel_YTile_InitStencilPointers_z24_s8(rb); - break; + break; + case MESA_FORMAT_S8_Z24: + /* There are a few different ways SW asks us to access the S8Z24 data: + * Z24 depth-only depth reads + * S8Z24 depth reads + * S8Z24 stencil reads. + */ + if (rb->_ActualFormat == GL_DEPTH_COMPONENT24) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z24(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z24(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z24(rb); + break; + } + } else if (rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitDepthPointers_z24_s8(rb); + break; + case I915_TILING_X: + intel_XTile_InitDepthPointers_z24_s8(rb); + break; + case I915_TILING_Y: + intel_YTile_InitDepthPointers_z24_s8(rb); + break; + } + } else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) { + switch (tiling) { + case I915_TILING_NONE: + default: + intelInitStencilPointers_z24_s8(rb); + break; + case I915_TILING_X: + intel_XTile_InitStencilPointers_z24_s8(rb); + break; + case I915_TILING_Y: + intel_YTile_InitStencilPointers_z24_s8(rb); + break; + } } - } - else { + break; + default: _mesa_problem(NULL, - "Unexpected _ActualFormat in intelSetSpanFunctions"); + "Unexpected MesaFormat in intelSetSpanFunctions"); + break; } } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 71561cf85cd..1f192dafbe1 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -315,8 +315,8 @@ intelTexImage(GLcontext * ctx, GLint postConvWidth = width; GLint postConvHeight = height; GLint texelBytes, sizeInBytes; - GLuint dstRowStride, srcRowStride = texImage->RowStride; - + GLuint dstRowStride = 0, srcRowStride = texImage->RowStride; + GLboolean needs_map; DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); @@ -482,8 +482,15 @@ intelTexImage(GLcontext * ctx, LOCK_HARDWARE(intel); + /* Two cases where we need a mapping of the miptree: when the user supplied + * data is mapped as well (non-PBO, memcpy upload) or when we're going to do + * (software) mipmap generation. + */ + needs_map = (pixels != NULL) || (level == texObj->BaseLevel && + texObj->GenerateMipmap); + if (intelImage->mt) { - if (pixels) + if (needs_map) texImage->Data = intel_miptree_image_map(intel, intelImage->mt, intelImage->face, @@ -509,8 +516,9 @@ intelTexImage(GLcontext * ctx, } DBG("Upload image %dx%dx%d row_len %d " - "pitch %d\n", - width, height, depth, width * texelBytes, dstRowStride); + "pitch %d pixels %d compressed %d\n", + width, height, depth, width * texelBytes, dstRowStride, + pixels ? 1 : 0, compressed); /* Copy data. Would like to know when it's ok for us to eg. use * the blitter to copy. Or, use the hardware to do the format @@ -523,7 +531,7 @@ intelTexImage(GLcontext * ctx, _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch, 0, 0, intelImage->mt->level[level].width, - intelImage->mt->level[level].height/4, + (intelImage->mt->level[level].height+3)/4, pixels, srcRowStride, 0, 0); @@ -549,7 +557,7 @@ intelTexImage(GLcontext * ctx, _mesa_unmap_teximage_pbo(ctx, unpack); if (intelImage->mt) { - if (pixels) + if (needs_map) intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index aadd1443ad9..620f29b5c6e 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -290,7 +290,7 @@ static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_regis t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); + src->Negate) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src) @@ -302,7 +302,7 @@ static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_sr t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); + src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } static unsigned long t_opcode(enum prog_opcode opcode) @@ -700,7 +700,7 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = UNUSED_SRC_0; o_inst->src2 = UNUSED_SRC_0; } @@ -712,12 +712,12 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte t_swizzle(GET_SWZ(src[0].Swizzle, 0)), SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_ZERO, SWIZZLE_ZERO, t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; @@ -766,11 +766,11 @@ if ((o_inst - vp->instr) == 31) { o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); } else { o_inst->src1 = t_src(vp, &src[1]); @@ -792,7 +792,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 0)), @@ -800,7 +800,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -815,7 +815,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = t_src(vp, &src[1]); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -831,7 +831,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 2)), t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -846,7 +846,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 2)), t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); + (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; goto next; @@ -874,7 +874,7 @@ else { VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, /* Not 100% sure about this */ - (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); + (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/); o_inst->src2 = UNUSED_SRC_0; u_temp_i--; @@ -899,7 +899,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z @@ -907,7 +907,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - src[1].NegateBase) | (src[1].RelAddr << 4); + src[1].Negate) | (src[1].RelAddr << 4); o_inst->src2 = UNUSED_SRC_1; o_inst++; @@ -922,7 +922,7 @@ else { t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w t_src_class(src[1].File), - (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); + (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z @@ -930,7 +930,7 @@ else { t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w t_src_class(src[0].File), - src[0].NegateBase) | (src[0].RelAddr << 4); + src[0].Negate) | (src[0].RelAddr << 4); o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1, VSF_IN_COMPONENT_X, diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 921ca33c750..ea530fd00e7 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -194,9 +194,9 @@ GLboolean r300_transform_TEX( * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; else - tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; tgt[2].Opcode = OPCODE_CMP; tgt[2].DstReg = orig_inst->DstReg; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c index a86d2bd4712..fc9d855bce6 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_swizzle.c @@ -92,7 +92,7 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) { if (reg.Abs) - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; if (opcode == OPCODE_KIL || opcode == OPCODE_TEX || @@ -100,7 +100,7 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) opcode == OPCODE_TXP) { int j; - if (reg.Abs || reg.NegateBase != (15*reg.NegateAbs)) + if (reg.Abs || reg.Negate) return GL_FALSE; for(j = 0; j < 4; ++j) { @@ -121,7 +121,7 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) relevant |= 1 << j; - if ((reg.NegateBase & relevant) && (reg.NegateBase & relevant) != relevant) + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return GL_FALSE; if (!lookup_native_swizzle(reg.Swizzle)) @@ -137,13 +137,12 @@ GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { if (src.Abs) - src.NegateBase = 0; + src.Negate = NEGATE_NONE; while(dst.WriteMask) { const struct swizzle_data *best_swizzle = 0; GLuint best_matchcount = 0; GLuint best_matchmask = 0; - GLboolean rgbnegate; int i, comp; for(i = 0; i < num_native_swizzles; ++i) { @@ -157,6 +156,11 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, if (swz == SWIZZLE_NIL) continue; if (swz == GET_SWZ(sd->hash, comp)) { + /* check if the negate bit of current component + * is the same for already matched components */ + if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp)))) + continue; + matchcount++; matchmask |= 1 << comp; } @@ -170,13 +174,6 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, } } - if ((src.NegateBase & best_matchmask) != 0) { - best_matchmask &= src.NegateBase; - rgbnegate = !src.NegateAbs; - } else { - rgbnegate = src.NegateAbs; - } - struct prog_instruction *inst; _mesa_insert_instructions(s->Program, s->IP, 1); @@ -185,6 +182,7 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, inst->DstReg = dst; inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ dst.WriteMask &= ~inst->DstReg.WriteMask; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 50806575ced..146daa367cd 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -245,7 +245,7 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -254,13 +254,13 @@ static unsigned long t_src(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src->Swizzle, 2)), t_swizzle(GET_SWZ(src->Swizzle, 3)), t_src_class(src->File), - src->NegateBase) | (src->RelAddr << 4); + src->Negate) | (src->RelAddr << 4); } static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) { - /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the NEGATE_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -269,8 +269,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src-> - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4); } @@ -307,7 +306,7 @@ static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 3)), t_src_class(src[0].File), (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = 0; @@ -369,8 +368,7 @@ static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), @@ -378,8 +376,7 @@ static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 1)), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = __CONST(1, SWIZZLE_ZERO); @@ -422,8 +419,7 @@ static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 2)), PVS_SRC_SELECT_FORCE_1, t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = t_src(vp, &src[1]); inst[3] = __CONST(1, SWIZZLE_ZERO); @@ -519,7 +515,7 @@ static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, /* Not 100% sure about this */ (!src[0]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE /*VSF_FLAG_ALL */ ); inst[3] = __CONST(0, SWIZZLE_ZERO); (*u_temp_i)--; @@ -564,8 +560,7 @@ static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = __CONST(0, SWIZZLE_ZERO); inst[3] = __CONST(0, SWIZZLE_ZERO); @@ -592,24 +587,21 @@ static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); return inst; @@ -837,7 +829,7 @@ static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = 0; #else @@ -857,7 +849,7 @@ static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); #endif @@ -905,16 +897,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[3] = __CONST(1, SWIZZLE_ZERO); inst += 4; @@ -931,15 +921,14 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W t_src_class(src[1].File), (!src[1]. - NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index e9c0d89dd43..8f0b70ad3a3 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -152,9 +152,9 @@ GLboolean r500_transform_TEX( * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[2].Negate = tgt[0].SrcReg[2].Negate ^ NEGATE_XYZW; else - tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + tgt[1].SrcReg[0].Negate = tgt[0].SrcReg[0].Negate ^ NEGATE_XYZW; tgt[2].Opcode = OPCODE_CMP; tgt[2].DstReg = orig_inst->DstReg; @@ -196,8 +196,8 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) if (reg.Abs) return GL_FALSE; - if (reg.NegateAbs) - reg.NegateBase ^= 15; + if (reg.Negate) + reg.Negate ^= NEGATE_XYZW; if (opcode == OPCODE_KIL) { if (reg.Swizzle != SWIZZLE_NOOP) @@ -206,7 +206,7 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(reg.Swizzle, i); if (swz == SWIZZLE_NIL) { - reg.NegateBase &= ~(1 << i); + reg.Negate &= ~(1 << i); continue; } if (swz >= 4) @@ -214,15 +214,14 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) } } - if (reg.NegateBase) + if (reg.Negate) return GL_FALSE; return GL_TRUE; } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs - && !reg.NegateBase && !reg.NegateAbs) + if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) return GL_TRUE; return GL_FALSE; @@ -237,7 +236,7 @@ GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) relevant |= 1 << i; } - if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant)) + if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) return GL_FALSE; return GL_TRUE; @@ -260,7 +259,7 @@ void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, GLuint swz = GET_SWZ(src.Swizzle, i); if (swz == SWIZZLE_NIL) continue; - negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i; + negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0)); diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c index a083c3d2436..4a2e1cba402 100644 --- a/src/mesa/drivers/dri/r300/radeon_nqssadce.c +++ b/src/mesa/drivers/dri/r300/radeon_nqssadce.c @@ -61,12 +61,12 @@ static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_reg struct prog_src_register tmp = srcreg; int i; tmp.Swizzle = 0; - tmp.NegateBase = 0; + tmp.Negate = NEGATE_NONE; for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(swizzle, i); if (swz < 4) { tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; } else { tmp.Swizzle |= swz << (i*3); } @@ -103,9 +103,8 @@ static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, inst->SrcReg[src].File = PROGRAM_TEMPORARY; inst->SrcReg[src].Index = dstreg.Index; inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].NegateBase = 0; + inst->SrcReg[src].Negate = NEGATE_NONE; inst->SrcReg[src].Abs = 0; - inst->SrcReg[src].NegateAbs = 0; for(i = 0; i < 4; ++i) { if (GET_BIT(sourced, i)) inst->SrcReg[src].Swizzle |= i << (3*i); diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index 1ef71e74dcf..8283723bad7 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -81,18 +81,6 @@ static struct prog_instruction *emit3(struct gl_program* p, return fpi; } -static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz) -{ - SrcReg->Swizzle &= ~(7 << (3*coordinate)); - SrcReg->Swizzle |= swz << (3*coordinate); -} - -static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) -{ - SrcReg->NegateBase &= ~(1 << coordinate); - SrcReg->NegateBase |= (negate << coordinate); -} - static struct prog_dst_register dstreg(int file, int index) { struct prog_dst_register dst; @@ -156,15 +144,14 @@ static struct prog_src_register absolute(struct prog_src_register reg) { struct prog_src_register newreg = reg; newreg.Abs = 1; - newreg.NegateBase = 0; - newreg.NegateAbs = 0; + newreg.Negate = NEGATE_NONE; return newreg; } static struct prog_src_register negate(struct prog_src_register reg) { struct prog_src_register newreg = reg; - newreg.NegateAbs = !newreg.NegateAbs; + newreg.Negate = newreg.Negate ^ NEGATE_XYZW; return newreg; } @@ -189,8 +176,7 @@ static void transform_ABS(struct radeon_transform_context* t, { struct prog_src_register src = inst->SrcReg[0]; src.Abs = 1; - src.NegateBase = 0; - src.NegateAbs = 0; + src.Negate = NEGATE_NONE; emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src); } @@ -198,18 +184,9 @@ static void transform_DPH(struct radeon_transform_context* t, struct prog_instruction* inst) { struct prog_src_register src0 = inst->SrcReg[0]; - if (src0.NegateAbs) { - if (src0.Abs) { - int tempreg = radeonFindFreeTemporary(t); - emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0); - src0 = srcreg(src0.File, src0.Index); - } else { - src0.NegateAbs = 0; - src0.NegateBase ^= NEGATE_XYZW; - } - } - set_swizzle(&src0, 3, SWIZZLE_ONE); - set_negate_base(&src0, 3, 0); + src0.Negate &= ~NEGATE_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= SWIZZLE_ONE << (3 * 3); emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]); } @@ -649,7 +626,7 @@ GLboolean radeonTransformDeriv(struct radeon_transform_context* t, B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); - B.NegateBase = NEGATE_XYZW; + B.Negate = NEGATE_XYZW; emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], B); diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index 49aa90dd94a..5c6594bc2e1 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -255,8 +255,7 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) inst->SrcReg[2] = inst->SrcReg[1]; inst->SrcReg[1].File = PROGRAM_BUILTIN; inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].NegateBase = 0; - inst->SrcReg[1].NegateAbs = 0; + inst->SrcReg[1].Negate = NEGATE_NONE; inst->Opcode = OPCODE_MAD; break; case OPCODE_CMP: @@ -722,7 +721,6 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ if (pairinst->NeedRGB && !pairinst->IsTranscendent) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; - GLuint negatebase = 0; int j; for(j = 0; j < 3; ++j) { GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); @@ -730,8 +728,6 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ srcrgb = GL_TRUE; else if (swz < 4) srcalpha = GL_TRUE; - if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) - negatebase = 1; } source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) @@ -739,12 +735,11 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ pair->RGB.Arg[i].Source = source; pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); } if (pairinst->NeedAlpha) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; - GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); if (swz < 3) srcrgb = GL_TRUE; @@ -756,7 +751,7 @@ static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_ pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = swz; pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); } } |