summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/common/driverfuncs.c3
-rw-r--r--src/mesa/drivers/common/meta.c28
-rw-r--r--src/mesa/drivers/dri/i915/i830_texstate.c26
-rw-r--r--src/mesa/drivers/dri/i915/i915_texstate.c40
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c39
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c15
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c157
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h55
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c36
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp591
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h19
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp56
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp47
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_state.c127
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c51
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf_state.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.c49
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h28
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_cache.c165
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_dump.c129
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c35
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_state.c130
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c36
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h15
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c17
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c53
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c295
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c303
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c14
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c252
-rw-r--r--src/mesa/drivers/dri/i965/gen6_depthstencil.c154
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sampler_state.c14
-rw-r--r--src/mesa/drivers/dri/i965/gen6_scissor_state.c70
-rw-r--r--src/mesa/drivers/dri/i965/gen6_viewport_state.c57
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c93
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c46
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c5
-rw-r--r--src/mesa/drivers/dri/intel/intel_clear.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c22
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h1
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_copy.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c33
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.h1
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_validate.c11
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile4
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c14
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c27
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c353
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h40
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c270
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h24
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_list.c90
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_list.h46
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c28
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c62
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c778
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c14
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_variable.c484
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_variable.h84
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h5
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c5
-rw-r--r--src/mesa/drivers/windows/fx/fx.rc39
-rw-r--r--src/mesa/drivers/windows/fx/fxopengl.def953
-rw-r--r--src/mesa/drivers/windows/fx/fxwgl.c1307
-rw-r--r--src/mesa/drivers/windows/gdi/InitCritSections.cpp33
-rw-r--r--src/mesa/drivers/windows/gdi/wgl.c2
-rw-r--r--src/mesa/drivers/windows/gdi/wmesa.c263
-rw-r--r--src/mesa/drivers/windows/gdi/wmesadef.h4
-rw-r--r--src/mesa/drivers/x11/xm_dd.c10
92 files changed, 3838 insertions, 4579 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 854dea94504..0dbc7c3e853 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -41,6 +41,7 @@
#include "main/bufferobj.h"
#include "main/fbobject.h"
#include "main/texrender.h"
+#include "main/samplerobj.h"
#include "main/syncobj.h"
#include "main/texturebarrier.h"
#include "main/transformfeedback.h"
@@ -200,6 +201,8 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
_mesa_init_transform_feedback_functions(driver);
+ _mesa_init_sampler_object_functions(driver);
+
/* T&L stuff */
driver->NeedValidate = GL_FALSE;
driver->ValidateTnlModule = NULL;
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 6c35fa10d8a..08b6024639f 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -40,6 +40,7 @@
#include "main/bufferobj.h"
#include "main/buffers.h"
#include "main/colortab.h"
+#include "main/condrender.h"
#include "main/depth.h"
#include "main/enable.h"
#include "main/fbobject.h"
@@ -94,6 +95,7 @@
#define META_VIEWPORT 0x4000
#define META_CLAMP_FRAGMENT_COLOR 0x8000
#define META_CLAMP_VERTEX_COLOR 0x10000
+#define META_CONDITIONAL_RENDER 0x20000
/*@}*/
@@ -188,6 +190,10 @@ struct save_state
/** META_CLAMP_VERTEX_COLOR */
GLenum ClampVertexColor;
+ /** META_CONDITIONAL_RENDER */
+ struct gl_query_object *CondRenderQuery;
+ GLenum CondRenderMode;
+
/** Miscellaneous (always disabled) */
GLboolean Lighting;
};
@@ -597,6 +603,14 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
_mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, GL_FALSE);
}
+ if (state & META_CONDITIONAL_RENDER) {
+ save->CondRenderQuery = ctx->Query.CondRenderQuery;
+ save->CondRenderMode = ctx->Query.CondRenderMode;
+
+ if (ctx->Query.CondRenderQuery)
+ _mesa_EndConditionalRender();
+ }
+
/* misc */
{
save->Lighting = ctx->Light.Enabled;
@@ -869,6 +883,12 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_ClampColorARB(GL_CLAMP_VERTEX_COLOR, save->ClampVertexColor);
}
+ if (state & META_CONDITIONAL_RENDER) {
+ if (save->CondRenderQuery)
+ _mesa_BeginConditionalRender(save->CondRenderQuery->Id,
+ save->CondRenderMode);
+ }
+
/* misc */
if (save->Lighting) {
_mesa_set_enable(ctx, GL_LIGHTING, GL_TRUE);
@@ -1442,7 +1462,10 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
};
struct vertex verts[4];
/* save all state but scissor, pixel pack/unpack */
- GLbitfield metaSave = META_ALL - META_SCISSOR - META_PIXEL_STORE;
+ GLbitfield metaSave = (META_ALL -
+ META_SCISSOR -
+ META_PIXEL_STORE -
+ META_CONDITIONAL_RENDER);
const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
if (buffers & BUFFER_BITS_COLOR) {
@@ -1848,7 +1871,8 @@ _mesa_meta_DrawPixels(struct gl_context *ctx,
* just going for the matching set of channels, in floating
* point.
*/
- if (ctx->Color.ClampFragmentColor != GL_TRUE)
+ if (ctx->Color.ClampFragmentColor != GL_TRUE &&
+ ctx->Extensions.ARB_texture_float)
texIntFormat = GL_RGBA32F;
}
else if (_mesa_is_stencil_format(format)) {
diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c
index 7554bd5e7b9..3298dbb69f5 100644
--- a/src/mesa/drivers/dri/i915/i830_texstate.c
+++ b/src/mesa/drivers/dri/i915/i830_texstate.c
@@ -29,6 +29,7 @@
#include "main/enums.h"
#include "main/colormac.h"
#include "main/macros.h"
+#include "main/samplerobj.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
@@ -120,6 +121,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
struct gl_texture_object *tObj = tUnit->_Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage;
+ struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
GLuint *state = i830->state.Tex[unit], format, pitch;
GLint lodbias;
GLubyte border[4];
@@ -193,7 +195,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
float maxlod;
uint32_t minlod_fixed, maxlod_fixed;
- switch (tObj->Sampler.MinFilter) {
+ switch (sampler->MinFilter) {
case GL_NEAREST:
minFilt = FILTER_NEAREST;
mipFilt = MIPFILTER_NONE;
@@ -222,12 +224,12 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
return GL_FALSE;
}
- if (tObj->Sampler.MaxAnisotropy > 1.0) {
+ if (sampler->MaxAnisotropy > 1.0) {
minFilt = FILTER_ANISOTROPIC;
magFilt = FILTER_ANISOTROPIC;
}
else {
- switch (tObj->Sampler.MagFilter) {
+ switch (sampler->MagFilter) {
case GL_NEAREST:
magFilt = FILTER_NEAREST;
break;
@@ -239,7 +241,7 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
}
}
- lodbias = (int) ((tUnit->LodBias + tObj->Sampler.LodBias) * 16.0);
+ lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0);
if (lodbias < -64)
lodbias = -64;
if (lodbias > 63)
@@ -259,8 +261,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
* addressable (smallest resolution) LOD. Use it to cover both
* MAX_LEVEL and MAX_LOD.
*/
- minlod_fixed = U_FIXED(CLAMP(tObj->Sampler.MinLod, 0.0, 11), 4);
- maxlod = MIN2(tObj->Sampler.MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
+ minlod_fixed = U_FIXED(CLAMP(sampler->MinLod, 0.0, 11), 4);
+ maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM ||
intel->intelScreen->deviceID == PCI_CHIP_I865_G) {
maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2);
@@ -279,8 +281,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
}
{
- GLenum ws = tObj->Sampler.WrapS;
- GLenum wt = tObj->Sampler.WrapT;
+ GLenum ws = sampler->WrapS;
+ GLenum wt = sampler->WrapT;
/* 3D textures not available on i830
@@ -300,10 +302,10 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
}
/* convert border color from float to ubyte */
- CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->Sampler.BorderColor.f[0]);
- CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->Sampler.BorderColor.f[1]);
- CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->Sampler.BorderColor.f[2]);
- CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->Sampler.BorderColor.f[3]);
+ CLAMPED_FLOAT_TO_UBYTE(border[0], sampler->BorderColor.f[0]);
+ CLAMPED_FLOAT_TO_UBYTE(border[1], sampler->BorderColor.f[1]);
+ CLAMPED_FLOAT_TO_UBYTE(border[2], sampler->BorderColor.f[2]);
+ CLAMPED_FLOAT_TO_UBYTE(border[3], sampler->BorderColor.f[3]);
state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3],
border[0],
diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
index 742bb994adb..5aa2ea18048 100644
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -29,6 +29,7 @@
#include "main/enums.h"
#include "main/macros.h"
#include "main/colormac.h"
+#include "main/samplerobj.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
@@ -136,6 +137,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
struct gl_texture_object *tObj = tUnit->_Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage;
+ struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
GLuint *state = i915->state.Tex[unit], format, pitch;
GLint lodbias, aniso = 0;
GLubyte border[4];
@@ -164,7 +166,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
format = translate_texture_format(firstImage->TexFormat,
firstImage->InternalFormat,
- tObj->Sampler.DepthMode);
+ sampler->DepthMode);
pitch = intelObj->mt->region->pitch * intelObj->mt->cpp;
state[I915_TEXREG_MS3] =
@@ -181,7 +183,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
* (lowest resolution) LOD. Use it to cover both MAX_LEVEL and
* MAX_LOD.
*/
- maxlod = MIN2(tObj->Sampler.MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
+ maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
state[I915_TEXREG_MS4] =
((((pitch / 4) - 1) << MS4_PITCH_SHIFT) |
MS4_CUBE_FACE_ENA_MASK |
@@ -192,7 +194,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
{
GLuint minFilt, mipFilt, magFilt;
- switch (tObj->Sampler.MinFilter) {
+ switch (sampler->MinFilter) {
case GL_NEAREST:
minFilt = FILTER_NEAREST;
mipFilt = MIPFILTER_NONE;
@@ -221,16 +223,16 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
return GL_FALSE;
}
- if (tObj->Sampler.MaxAnisotropy > 1.0) {
+ if (sampler->MaxAnisotropy > 1.0) {
minFilt = FILTER_ANISOTROPIC;
magFilt = FILTER_ANISOTROPIC;
- if (tObj->Sampler.MaxAnisotropy > 2.0)
+ if (sampler->MaxAnisotropy > 2.0)
aniso = SS2_MAX_ANISO_4;
else
aniso = SS2_MAX_ANISO_2;
}
else {
- switch (tObj->Sampler.MagFilter) {
+ switch (sampler->MagFilter) {
case GL_NEAREST:
magFilt = FILTER_NEAREST;
break;
@@ -242,7 +244,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
}
}
- lodbias = (int) ((tUnit->LodBias + tObj->Sampler.LodBias) * 16.0);
+ lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0);
if (lodbias < -256)
lodbias = -256;
if (lodbias > 255)
@@ -258,14 +260,14 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
/* Shadow:
*/
- if (tObj->Sampler.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
+ if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
tObj->Target != GL_TEXTURE_3D) {
if (tObj->Target == GL_TEXTURE_1D)
return GL_FALSE;
state[I915_TEXREG_SS2] |=
(SS2_SHADOW_ENABLE |
- intel_translate_shadow_compare_func(tObj->Sampler.CompareFunc));
+ intel_translate_shadow_compare_func(sampler->CompareFunc));
minFilt = FILTER_4X4_FLAT;
magFilt = FILTER_4X4_FLAT;
@@ -278,9 +280,9 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
}
{
- GLenum ws = tObj->Sampler.WrapS;
- GLenum wt = tObj->Sampler.WrapT;
- GLenum wr = tObj->Sampler.WrapR;
+ GLenum ws = sampler->WrapS;
+ GLenum wt = sampler->WrapT;
+ GLenum wr = sampler->WrapR;
float minlod;
/* We program 1D textures as 2D textures, so the 2D texcoord could
@@ -298,8 +300,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
* clamp_to_border.
*/
if (tObj->Target == GL_TEXTURE_3D &&
- (tObj->Sampler.MinFilter != GL_NEAREST ||
- tObj->Sampler.MagFilter != GL_NEAREST) &&
+ (sampler->MinFilter != GL_NEAREST ||
+ sampler->MagFilter != GL_NEAREST) &&
(ws == GL_CLAMP ||
wt == GL_CLAMP ||
wr == GL_CLAMP ||
@@ -322,7 +324,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
(translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) |
(translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT));
- minlod = MIN2(tObj->Sampler.MinLod, tObj->_MaxLevel - tObj->BaseLevel);
+ minlod = MIN2(sampler->MinLod, tObj->_MaxLevel - tObj->BaseLevel);
state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(minlod, 0.0, 11.0), 4) <<
SS3_MIN_LOD_SHIFT);
@@ -330,10 +332,10 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
}
/* convert border color from float to ubyte */
- CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->Sampler.BorderColor.f[0]);
- CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->Sampler.BorderColor.f[1]);
- CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->Sampler.BorderColor.f[2]);
- CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->Sampler.BorderColor.f[3]);
+ CLAMPED_FLOAT_TO_UBYTE(border[0], sampler->BorderColor.f[0]);
+ CLAMPED_FLOAT_TO_UBYTE(border[1], sampler->BorderColor.f[1]);
+ CLAMPED_FLOAT_TO_UBYTE(border[2], sampler->BorderColor.f[2]);
+ CLAMPED_FLOAT_TO_UBYTE(border[3], sampler->BorderColor.f[3]);
if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
/* GL specs that border color for depth textures is taken from the
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index b05ba35d65f..849018b74ae 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -61,7 +61,6 @@ DRIVER_SOURCES = \
brw_sf.c \
brw_sf_emit.c \
brw_sf_state.c \
- brw_state.c \
brw_state_batch.c \
brw_state_cache.c \
brw_state_dump.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index 74a66af31a5..94b8c20b019 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -37,28 +37,36 @@
#include "main/macros.h"
#include "intel_batchbuffer.h"
-void
-brw_update_cc_vp(struct brw_context *brw)
+static void
+prepare_cc_vp(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
- struct brw_cc_viewport ccv;
+ struct brw_cc_viewport *ccv;
- memset(&ccv, 0, sizeof(ccv));
+ ccv = brw_state_batch(brw, sizeof(*ccv), 32, &brw->cc.vp_offset);
/* _NEW_TRANSOFORM */
if (ctx->Transform.DepthClamp) {
/* _NEW_VIEWPORT */
- ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
- ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+ ccv->min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+ ccv->max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
} else {
- ccv.min_depth = 0.0;
- ccv.max_depth = 1.0;
+ ccv->min_depth = 0.0;
+ ccv->max_depth = 1.0;
}
- drm_intel_bo_unreference(brw->cc.vp_bo);
- brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv));
+ brw->state.dirty.cache |= CACHE_NEW_CC_VP;
}
+const struct brw_tracked_state brw_cc_vp = {
+ .dirty = {
+ .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
+ .brw = BRW_NEW_BATCH,
+ .cache = 0
+ },
+ .prepare = prepare_cc_vp
+};
+
/**
* Modify blend function to force destination alpha to 1.0
*
@@ -81,11 +89,6 @@ fix_xRGB_alpha(GLenum function)
return function;
}
-static void prepare_cc_unit(struct brw_context *brw)
-{
- brw_add_validated_bo(brw, brw->cc.vp_bo);
-}
-
/**
* Creates the state cache entry for the given CC unit key.
*/
@@ -209,7 +212,8 @@ static void upload_cc_unit(struct brw_context *brw)
cc->cc5.statistics_enable = 1;
/* CACHE_NEW_CC_VP */
- cc->cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
+ cc->cc4.cc_viewport_state_offset = (intel->batch.bo->offset +
+ brw->cc.vp_offset) >> 5; /* reloc */
brw->state.dirty.cache |= CACHE_NEW_CC_UNIT;
@@ -217,7 +221,7 @@ static void upload_cc_unit(struct brw_context *brw)
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
(brw->cc.state_offset +
offsetof(struct brw_cc_unit_state, cc4)),
- brw->cc.vp_bo, 0,
+ intel->batch.bo, brw->cc.vp_offset,
I915_GEM_DOMAIN_INSTRUCTION, 0);
}
@@ -227,7 +231,6 @@ const struct brw_tracked_state brw_cc_unit = {
.brw = BRW_NEW_BATCH,
.cache = CACHE_NEW_CC_VP
},
- .prepare = prepare_cc_unit,
.emit = upload_cc_unit,
};
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 1be165cc9a1..3c175515408 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -144,14 +144,12 @@ static void compile_clip_prog( struct brw_context *brw,
/* Upload
*/
drm_intel_bo_unreference(brw->clip.prog_bo);
- brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache,
- BRW_CLIP_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- sizeof(c.prog_data),
- &brw->clip.prog_data);
+ brw->clip.prog_bo = brw_upload_cache(&brw->cache,
+ BRW_CLIP_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
+ &brw->clip.prog_data);
}
/* Calculate interpolants for triangle and line rasterization.
@@ -270,7 +268,6 @@ static void upload_clip_prog(struct brw_context *brw)
drm_intel_bo_unreference(brw->clip.prog_bo);
brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
&key, sizeof(key),
- NULL, 0,
&brw->clip.prog_data);
if (brw->clip.prog_bo == NULL)
compile_clip_prog( brw, &key );
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 60fd5fa7d9e..6015c8cbe9f 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -33,148 +33,101 @@
#include "brw_state.h"
#include "brw_defines.h"
-struct brw_clip_unit_key {
- unsigned int total_grf;
- unsigned int urb_entry_read_length;
- unsigned int curb_entry_read_length;
- unsigned int clip_mode;
-
- unsigned int curbe_offset;
-
- unsigned int nr_urb_entries, urb_size;
-
- GLboolean depth_clamp;
-};
-
static void
-clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
-{
- struct gl_context *ctx = &brw->intel.ctx;
- memset(key, 0, sizeof(*key));
-
- /* CACHE_NEW_CLIP_PROG */
- key->total_grf = brw->clip.prog_data->total_grf;
- key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
- key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
- key->clip_mode = brw->clip.prog_data->clip_mode;
-
- /* BRW_NEW_CURBE_OFFSETS */
- key->curbe_offset = brw->curbe.clip_start;
-
- /* BRW_NEW_URB_FENCE */
- key->nr_urb_entries = brw->urb.nr_clip_entries;
- key->urb_size = brw->urb.vsize;
-
- /* _NEW_TRANSOFORM */
- key->depth_clamp = ctx->Transform.DepthClamp;
-}
-
-static drm_intel_bo *
-clip_unit_create_from_key(struct brw_context *brw,
- struct brw_clip_unit_key *key)
+brw_prepare_clip_unit(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct brw_clip_unit_state clip;
- drm_intel_bo *bo;
+ struct gl_context *ctx = &intel->ctx;
+ struct brw_clip_unit_state *clip;
- memset(&clip, 0, sizeof(clip));
+ clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset);
+ memset(clip, 0, sizeof(*clip));
- clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+ /* CACHE_NEW_CLIP_PROG */
+ clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
+ 16 - 1);
/* reloc */
- clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+ clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
- clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- clip.thread1.single_program_flow = 1;
+ clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ clip->thread1.single_program_flow = 1;
- clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
- clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
- clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- clip.thread3.dispatch_grf_start_reg = 1;
- clip.thread3.urb_entry_read_offset = 0;
+ clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+ clip->thread3.const_urb_entry_read_length =
+ brw->clip.prog_data->curb_read_length;
+
+ /* BRW_NEW_CURBE_OFFSETS */
+ clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+ clip->thread3.dispatch_grf_start_reg = 1;
+ clip->thread3.urb_entry_read_offset = 0;
- clip.thread4.nr_urb_entries = key->nr_urb_entries;
- clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ /* BRW_NEW_URB_FENCE */
+ clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+ clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
/* If we have enough clip URB entries to run two threads, do so.
*/
- if (key->nr_urb_entries >= 10) {
+ if (brw->urb.nr_clip_entries >= 10) {
/* Half of the URB entries go to each thread, and it has to be an
* even number.
*/
- assert(key->nr_urb_entries % 2 == 0);
+ assert(brw->urb.nr_clip_entries % 2 == 0);
/* Although up to 16 concurrent Clip threads are allowed on Ironlake,
* only 2 threads can output VUEs at a time.
*/
if (intel->gen == 5)
- clip.thread4.max_threads = 16 - 1;
+ clip->thread4.max_threads = 16 - 1;
else
- clip.thread4.max_threads = 2 - 1;
+ clip->thread4.max_threads = 2 - 1;
} else {
- assert(key->nr_urb_entries >= 5);
- clip.thread4.max_threads = 1 - 1;
+ assert(brw->urb.nr_clip_entries >= 5);
+ clip->thread4.max_threads = 1 - 1;
}
if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
- clip.thread4.max_threads = 0;
+ clip->thread4.max_threads = 0;
if (unlikely(INTEL_DEBUG & DEBUG_STATS))
- clip.thread4.stats_enable = 1;
-
- clip.clip5.userclip_enable_flags = 0x7f;
- clip.clip5.userclip_must_clip = 1;
- clip.clip5.guard_band_enable = 0;
- if (!key->depth_clamp)
- clip.clip5.viewport_z_clip_enable = 1;
- clip.clip5.viewport_xy_clip_enable = 1;
- clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
- clip.clip5.api_mode = BRW_CLIP_API_OGL;
- clip.clip5.clip_mode = key->clip_mode;
+ clip->thread4.stats_enable = 1;
- if (intel->is_g4x)
- clip.clip5.negative_w_clip_test = 1;
+ clip->clip5.userclip_enable_flags = 0x7f;
+ clip->clip5.userclip_must_clip = 1;
+ clip->clip5.guard_band_enable = 0;
+ /* _NEW_TRANSOFORM */
+ if (!ctx->Transform.DepthClamp)
+ clip->clip5.viewport_z_clip_enable = 1;
+ clip->clip5.viewport_xy_clip_enable = 1;
+ clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+ clip->clip5.api_mode = BRW_CLIP_API_OGL;
+ clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
- clip.clip6.clipper_viewport_state_ptr = 0;
- clip.viewport_xmin = -1;
- clip.viewport_xmax = 1;
- clip.viewport_ymin = -1;
- clip.viewport_ymax = 1;
+ if (intel->is_g4x)
+ clip->clip5.negative_w_clip_test = 1;
- bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
- key, sizeof(*key),
- &brw->clip.prog_bo, 1,
- &clip, sizeof(clip));
+ clip->clip6.clipper_viewport_state_ptr = 0;
+ clip->viewport_xmin = -1;
+ clip->viewport_xmax = 1;
+ clip->viewport_ymin = -1;
+ clip->viewport_ymax = 1;
/* Emit clip program relocation */
assert(brw->clip.prog_bo);
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0),
- brw->clip.prog_bo, clip.thread0.grf_reg_count << 1,
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ (brw->clip.state_offset +
+ offsetof(struct brw_clip_unit_state, thread0)),
+ brw->clip.prog_bo, clip->thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
- return bo;
-}
-
-static void upload_clip_unit( struct brw_context *brw )
-{
- struct brw_clip_unit_key key;
-
- clip_unit_populate_key(brw, &key);
-
- drm_intel_bo_unreference(brw->clip.state_bo);
- brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
- &key, sizeof(key),
- &brw->clip.prog_bo, 1,
- NULL);
- if (brw->clip.state_bo == NULL) {
- brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
- }
+ brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
}
const struct brw_tracked_state brw_clip_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
- .brw = (BRW_NEW_CURBE_OFFSETS |
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
},
- .prepare = upload_clip_unit,
+ .prepare = brw_prepare_clip_unit,
};
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 230d326fa12..db6466ff1ae 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -51,9 +51,6 @@ static void brwInitDriverFunctions( struct dd_function_table *functions )
brwInitFragProgFuncs( functions );
brw_init_queryobj_functions(functions);
-
- functions->Enable = brw_enable;
- functions->DepthRange = brw_depth_range;
}
GLboolean brwCreateContext( int api,
@@ -232,11 +229,6 @@ GLboolean brwCreateContext( int api,
brw_draw_init( brw );
- /* Now that most driver functions are hooked up, initialize some of the
- * immediate state.
- */
- brw_update_cc_vp(brw);
-
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1daa49abfb3..26cd8209c65 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -204,13 +204,16 @@ struct brw_wm_prog_data {
GLuint urb_read_length;
GLuint first_curbe_grf;
+ GLuint first_curbe_grf_16;
GLuint total_grf;
+ GLuint total_grf_16;
GLuint total_scratch;
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
GLboolean error;
int dispatch_width;
+ uint32_t prog_offset_16;
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
@@ -308,7 +311,6 @@ enum brw_cache_id {
BRW_CC_VP,
BRW_CC_UNIT,
BRW_WM_PROG,
- BRW_SAMPLER_DEFAULT_COLOR,
BRW_SAMPLER,
BRW_WM_UNIT,
BRW_SF_PROG,
@@ -336,8 +338,6 @@ struct brw_cache_item {
GLuint hash;
GLuint key_size; /* for variable-sized keys */
const void *key;
- drm_intel_bo **reloc_bufs;
- GLuint nr_reloc_bufs;
drm_intel_bo *bo;
@@ -381,7 +381,6 @@ struct brw_tracked_state {
#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
-#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
@@ -630,29 +629,38 @@ struct brw_context
int8_t *constant_map; /* variable array following prog_data */
drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
drm_intel_bo *const_bo;
+ uint32_t state_offset;
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
+
+ uint32_t push_const_offset; /* Offset in the batchbuffer */
+ int push_const_size; /* in 256-bit register increments */
} vs;
struct {
struct brw_gs_prog_data *prog_data;
GLboolean prog_active;
+ uint32_t state_offset;
drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
} gs;
struct {
struct brw_clip_prog_data *prog_data;
drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
- drm_intel_bo *vp_bo;
+
+ /* Offset in the batch to the CLIP state on pre-gen6. */
+ uint32_t state_offset;
+
+ /* As of gen6, this is the offset in the batch to the CLIP VP,
+ * instead of vp_bo.
+ */
+ uint32_t vp_offset;
} clip;
@@ -660,9 +668,7 @@ struct brw_context
struct brw_sf_prog_data *prog_data;
drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
uint32_t state_offset;
- drm_intel_bo *vp_bo;
uint32_t vp_offset;
} sf;
@@ -675,8 +681,9 @@ struct brw_context
*/
GLbitfield input_size_masks[4];
- /** Array of surface default colors (texture border color) */
- drm_intel_bo *sdc_bo[BRW_MAX_TEX_UNIT];
+ /** offsets in the batch to sampler default colors (texture border color)
+ */
+ uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
GLuint render_surf;
GLuint nr_surfaces;
@@ -685,35 +692,32 @@ struct brw_context
drm_intel_bo *scratch_bo;
GLuint sampler_count;
- drm_intel_bo *sampler_bo;
+ uint32_t sampler_offset;
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_WM_MAX_SURF];
+ uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
drm_intel_bo *prog_bo;
- drm_intel_bo *state_bo;
drm_intel_bo *const_bo; /* pull constant buffer. */
/**
- * This is the push constant BO on gen6.
+ * This is offset in the batch to the push constants on gen6.
*
* Pre-gen6, push constants live in the CURBE.
*/
- drm_intel_bo *push_const_bo;
+ uint32_t push_const_offset;
} wm;
struct {
/* gen4 */
drm_intel_bo *prog_bo;
- drm_intel_bo *vp_bo;
-
- /* gen6 */
- drm_intel_bo *blend_state_bo;
- drm_intel_bo *depth_stencil_state_bo;
- drm_intel_bo *color_calc_state_bo;
uint32_t state_offset;
+ uint32_t blend_state_offset;
+ uint32_t depth_stencil_state_offset;
+ uint32_t vp_offset;
} cc;
struct {
@@ -783,9 +787,6 @@ void brwInitFragProgFuncs( struct dd_function_table *functions );
*/
void brw_upload_urb_fence(struct brw_context *brw);
-/* brw_cc.c */
-void brw_update_cc_vp(struct brw_context *brw);
-
/* brw_curbe.c
*/
void brw_upload_cs_urb_state(struct brw_context *brw);
@@ -793,10 +794,6 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
/* brw_disasm.c */
int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
-/* brw_state.c */
-void brw_enable(struct gl_context * ctx, GLenum cap, GLboolean state);
-void brw_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval);
-
/*======================================================================
* Inline conversion functions. These are better-typed than the
* macros used previously:
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 2db70c543ea..9ab533179b8 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -28,6 +28,8 @@
#include "main/glheader.h"
#include "main/context.h"
+#include "main/condrender.h"
+#include "main/samplerobj.h"
#include "main/state.h"
#include "main/enums.h"
#include "tnl/tnl.h"
@@ -278,22 +280,25 @@ static GLboolean check_fallbacks( struct brw_context *brw,
int u;
for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
+
if (texUnit->Enabled) {
+ struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u);
+
if (texUnit->Enabled & TEXTURE_1D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->Sampler.WrapS == GL_CLAMP) {
+ if (sampler->WrapS == GL_CLAMP) {
return GL_TRUE;
}
}
if (texUnit->Enabled & TEXTURE_2D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapT == GL_CLAMP) {
+ if (sampler->WrapS == GL_CLAMP ||
+ sampler->WrapT == GL_CLAMP) {
return GL_TRUE;
}
}
if (texUnit->Enabled & TEXTURE_3D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapT == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapR == GL_CLAMP) {
+ if (sampler->WrapS == GL_CLAMP ||
+ sampler->WrapT == GL_CLAMP ||
+ sampler->WrapR == GL_CLAMP) {
return GL_TRUE;
}
}
@@ -359,15 +364,21 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
for (i = 0; i < nr_prims; i++) {
uint32_t hw_prim;
+ int estimated_max_prim_size;
+
+ estimated_max_prim_size = 512; /* batchbuffer commands */
+ estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
+ (sizeof(struct brw_sampler_state) +
+ sizeof(struct gen5_sampler_default_color)));
+ estimated_max_prim_size += 1024; /* gen6 VS push constants */
+ estimated_max_prim_size += 1024; /* gen6 WM push constants */
+ estimated_max_prim_size += 512; /* misc. pad */
/* Flush the batch if it's approaching full, so that we don't wrap while
* we've got validated state that needs to be in the same batch as the
- * primitives. This fraction is just a guess (minimal full state plus
- * a primitive is around 512 bytes), and would be better if we had
- * an upper bound of how much we might emit in a single
- * brw_try_draw_prims().
+ * primitives.
*/
- intel_batchbuffer_require_space(intel, 1024, false);
+ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
hw_prim = brw_set_prim(brw, &prim[i]);
if (brw->state.dirty.brw) {
@@ -438,6 +449,9 @@ void brw_draw_prims( struct gl_context *ctx,
{
GLboolean retval;
+ if (!_mesa_check_conditional_render(ctx))
+ return;
+
if (!vbo_all_varyings_in_vbos(arrays)) {
if (!index_bounds_valid)
vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 718b3800423..4eb67d57a5a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -293,6 +293,14 @@ static INLINE struct brw_reg retype( struct brw_reg reg,
return reg;
}
+static inline struct brw_reg
+sechalf(struct brw_reg reg)
+{
+ if (reg.vstride)
+ reg.nr++;
+ return reg;
+}
+
static INLINE struct brw_reg suboffset( struct brw_reg reg,
GLuint delta )
{
@@ -856,7 +864,6 @@ void brw_ff_sync(struct brw_compile *p,
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
- struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
GLuint binding_table_index,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 71485cd1f71..859068ec4eb 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1871,7 +1871,6 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
- struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
GLuint binding_table_index,
@@ -1883,6 +1882,12 @@ void brw_fb_WRITE(struct brw_compile *p,
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_control, msg_type;
+ struct brw_reg dest;
+
+ if (dispatch_width == 16)
+ dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ else
+ dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
if (intel->gen >= 6 && binding_table_index == 0) {
insn = next_insn(p, BRW_OPCODE_SENDC);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5426925e372..21eb9e4e5e1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -194,6 +194,32 @@ fs_visitor::fail(const char *format, ...)
}
}
+void
+fs_visitor::push_force_uncompressed()
+{
+ force_uncompressed_stack++;
+}
+
+void
+fs_visitor::pop_force_uncompressed()
+{
+ force_uncompressed_stack--;
+ assert(force_uncompressed_stack >= 0);
+}
+
+void
+fs_visitor::push_force_sechalf()
+{
+ force_sechalf_stack++;
+}
+
+void
+fs_visitor::pop_force_sechalf()
+{
+ force_sechalf_stack--;
+ assert(force_sechalf_stack >= 0);
+}
+
/**
* Returns how many MRFs an FS opcode will write over.
*
@@ -214,9 +240,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case FS_OPCODE_LOG2:
case FS_OPCODE_SIN:
case FS_OPCODE_COS:
- return 1;
+ return 1 * c->dispatch_width / 8;
case FS_OPCODE_POW:
- return 2;
+ return 2 * c->dispatch_width / 8;
case FS_OPCODE_TEX:
case FS_OPCODE_TXB:
case FS_OPCODE_TXD:
@@ -313,6 +339,31 @@ fs_visitor::variable_storage(ir_variable *var)
return (fs_reg *)hash_table_find(this->variable_ht, var);
}
+void
+import_uniforms_callback(const void *key,
+ void *data,
+ void *closure)
+{
+ struct hash_table *dst_ht = (struct hash_table *)closure;
+ const fs_reg *reg = (const fs_reg *)data;
+
+ if (reg->file != UNIFORM)
+ return;
+
+ hash_table_insert(dst_ht, data, key);
+}
+
+/* For 16-wide, we need to follow from the uniform setup of 8-wide dispatch.
+ * This brings in those uniform definitions
+ */
+void
+fs_visitor::import_uniforms(struct hash_table *src_variable_ht)
+{
+ hash_table_call_foreach(src_variable_ht,
+ import_uniforms_callback,
+ variable_ht);
+}
+
/* Our support for uniforms is piggy-backed on the struct
* gl_fragment_program, because that's where the values actually
* get stored, rather than in some global gl_shader_program uniform
@@ -614,7 +665,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
if (intel->gen < 6) {
inst->base_mrf = 2;
- inst->mlen = 1;
+ inst->mlen = c->dispatch_width / 8;
}
return inst;
@@ -652,7 +703,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
inst = emit(opcode, dst, src0, reg_null_f);
inst->base_mrf = base_mrf;
- inst->mlen = 2;
+ inst->mlen = 2 * c->dispatch_width / 8;
}
return inst;
}
@@ -689,6 +740,13 @@ fs_visitor::visit(ir_variable *ir)
if (ir->mode == ir_var_uniform) {
int param_index = c->prog_data.nr_params;
+ if (c->dispatch_width == 16) {
+ if (!variable_storage(ir)) {
+ fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
+ }
+ return;
+ }
+
if (!strncmp(ir->name, "gl_", 3)) {
setup_builtin_uniform_values(ir);
} else {
@@ -1233,32 +1291,34 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
return inst;
}
+/* gen5's sampler has slots for u, v, r, array index, then optional
+ * parameters like shadow comparitor or LOD bias. If optional
+ * parameters aren't present, those base slots are optional and don't
+ * need to be included in the message.
+ *
+ * We don't fill in the unnecessary slots regardless, which may look
+ * surprising in the disassembly.
+ */
fs_inst *
fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
{
- /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
- * optional parameters like shadow comparitor or LOD bias. If
- * optional parameters aren't present, those base slots are
- * optional and don't need to be included in the message.
- *
- * We don't fill in the unnecessary slots regardless, which may
- * look surprising in the disassembly.
- */
int mlen = 1; /* g0 header always present. */
int base_mrf = 1;
+ int reg_width = c->dispatch_width / 8;
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width),
+ coordinate);
coordinate.reg_offset++;
}
- mlen += ir->coordinate->type->vector_elements;
+ mlen += ir->coordinate->type->vector_elements * reg_width;
if (ir->shadow_comparitor) {
- mlen = MAX2(mlen, 5);
+ mlen = MAX2(mlen, 1 + 4 * reg_width);
ir->shadow_comparitor->accept(this);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen++;
+ mlen += reg_width;
}
fs_inst *inst = NULL;
@@ -1268,17 +1328,18 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
break;
case ir_txb:
ir->lod_info.bias->accept(this);
- mlen = MAX2(mlen, 5);
+ mlen = MAX2(mlen, 1 + 4 * reg_width);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen++;
+ mlen += reg_width;
inst = emit(FS_OPCODE_TXB, dst);
+
break;
case ir_txl:
ir->lod_info.lod->accept(this);
- mlen = MAX2(mlen, 5);
+ mlen = MAX2(mlen, 1 + 4 * reg_width);
emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
- mlen++;
+ mlen += reg_width;
inst = emit(FS_OPCODE_TXL, dst);
break;
@@ -1290,6 +1351,10 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
inst->base_mrf = base_mrf;
inst->mlen = mlen;
+ if (mlen > 11) {
+ fail("Message length >11 disallowed by hardware\n");
+ }
+
return inst;
}
@@ -1355,6 +1420,12 @@ fs_visitor::visit(ir_texture *ir)
0
};
+ if (c->dispatch_width == 16) {
+ fail("rectangle scale uniform setup not supported on 16-wide\n");
+ this->result = fs_reg(this, ir->type);
+ return;
+ }
+
c->prog_data.param_convert[c->prog_data.nr_params] =
PARAM_NO_CONVERT;
c->prog_data.param_convert[c->prog_data.nr_params + 1] =
@@ -1731,6 +1802,10 @@ fs_visitor::visit(ir_if *ir)
{
fs_inst *inst;
+ if (c->dispatch_width == 16) {
+ fail("Can't support (non-uniform) control flow on 16-wide\n");
+ }
+
/* Don't point the annotation at the if statement, because then it plus
* the then and else blocks get printed.
*/
@@ -1771,6 +1846,10 @@ fs_visitor::visit(ir_loop *ir)
{
fs_reg counter = reg_undef;
+ if (c->dispatch_width == 16) {
+ fail("Can't support (non-uniform) control flow on 16-wide\n");
+ }
+
if (ir->counter) {
this->base_ir = ir->counter;
ir->counter->accept(this);
@@ -1874,6 +1953,11 @@ fs_visitor::emit(fs_inst inst)
fs_inst *list_inst = new(mem_ctx) fs_inst;
*list_inst = inst;
+ if (force_uncompressed_stack > 0)
+ list_inst->force_uncompressed = true;
+ else if (force_sechalf_stack > 0)
+ list_inst->force_sechalf = true;
+
list_inst->annotation = this->current_annotation;
list_inst->ir = this->base_ir;
@@ -1916,21 +2000,14 @@ fs_visitor::interp_reg(int location, int channel)
void
fs_visitor::emit_interpolation_setup_gen4()
{
- struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
-
this->current_annotation = "compute pixel centers";
this->pixel_x = fs_reg(this, glsl_type::uint_type);
this->pixel_y = fs_reg(this, glsl_type::uint_type);
this->pixel_x.type = BRW_REGISTER_TYPE_UW;
this->pixel_y.type = BRW_REGISTER_TYPE_UW;
- emit(BRW_OPCODE_ADD,
- this->pixel_x,
- fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
- fs_reg(brw_imm_v(0x10101010)));
- emit(BRW_OPCODE_ADD,
- this->pixel_y,
- fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
- fs_reg(brw_imm_v(0x11001100)));
+
+ emit(FS_OPCODE_PIXEL_X, this->pixel_x);
+ emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
this->current_annotation = "compute pixel deltas from v0";
if (brw->has_pln) {
@@ -2001,11 +2078,69 @@ fs_visitor::emit_interpolation_setup_gen6()
}
void
+fs_visitor::emit_color_write(int index, int first_color_mrf, fs_reg color)
+{
+ int reg_width = c->dispatch_width / 8;
+
+ if (c->dispatch_width == 8 || intel->gen == 6) {
+ /* SIMD8 write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ *
+ * gen6 SIMD16 DP write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ * m + 4: b0
+ * m + 5: b1
+ * m + 6: a0
+ * m + 7: a1
+ */
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index * reg_width),
+ color);
+ } else {
+ /* pre-gen6 SIMD16 single source DP write looks like:
+ * m + 0: r0
+ * m + 1: g0
+ * m + 2: b0
+ * m + 3: a0
+ * m + 4: r1
+ * m + 5: g1
+ * m + 6: b1
+ * m + 7: a1
+ */
+ if (brw->has_compr4) {
+ /* By setting the high bit of the MRF register number, we
+ * indicate that we want COMPR4 mode - instead of doing the
+ * usual destination + 1 for the second half we get
+ * destination + 4.
+ */
+ emit(BRW_OPCODE_MOV,
+ fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index), color);
+ } else {
+ push_force_uncompressed();
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index), color);
+ pop_force_uncompressed();
+
+ push_force_sechalf();
+ color.sechalf = true;
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, first_color_mrf + index + 4), color);
+ pop_force_sechalf();
+ color.sechalf = false;
+ }
+ }
+}
+
+void
fs_visitor::emit_fb_writes()
{
this->current_annotation = "FB write header";
GLboolean header_present = GL_TRUE;
int nr = 0;
+ int reg_width = c->dispatch_width / 8;
if (intel->gen >= 6 &&
!this->kill_emitted &&
@@ -2019,31 +2154,44 @@ fs_visitor::emit_fb_writes()
}
if (c->aa_dest_stencil_reg) {
+ push_force_uncompressed();
emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)));
+ pop_force_uncompressed();
}
/* Reserve space for color. It'll be filled in per MRT below. */
int color_mrf = nr;
- nr += 4;
+ nr += 4 * reg_width;
if (c->source_depth_to_render_target) {
+ if (intel->gen == 6 && c->dispatch_width == 16) {
+ /* For outputting oDepth on gen6, SIMD8 writes have to be
+ * used. This would require 8-wide moves of each half to
+ * message regs, kind of like pre-gen5 SIMD16 FB writes.
+ * Just bail on doing so for now.
+ */
+ fail("Missing support for simd16 depth writes on gen6\n");
+ }
+
if (c->computes_depth) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth);
fs_reg depth = *(variable_storage(this->frag_depth));
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth);
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr), depth);
} else {
/* Pass through the payload depth. */
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
}
+ nr += reg_width;
}
if (c->dest_depth_reg) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, nr),
fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)));
+ nr += reg_width;
}
fs_reg color = reg_undef;
@@ -2060,7 +2208,7 @@ fs_visitor::emit_fb_writes()
target);
if (this->frag_color || this->frag_data) {
for (int i = 0; i < 4; i++) {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + i), color);
+ emit_color_write(i, color_mrf, color);
color.reg_offset++;
}
}
@@ -2084,7 +2232,7 @@ fs_visitor::emit_fb_writes()
* renderbuffer.
*/
color.reg_offset += 3;
- emit(BRW_OPCODE_MOV, fs_reg(MRF, color_mrf + 3), color);
+ emit_color_write(3, color_mrf, color);
}
fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
@@ -2144,8 +2292,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)
brw_pop_insn_state(p);
brw_fb_WRITE(p,
- 8, /* dispatch_width */
- retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ c->dispatch_width,
inst->base_mrf,
implied_header,
inst->target,
@@ -2155,6 +2302,40 @@ fs_visitor::generate_fb_write(fs_inst *inst)
inst->header_present);
}
+/* Computes the integer pixel x,y values from the origin.
+ *
+ * This is the basis of gl_FragCoord computation, but is also used
+ * pre-gen6 for computing the deltas from v0 for computing
+ * interpolation.
+ */
+void
+fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
+{
+ struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+ struct brw_reg src;
+ struct brw_reg deltas;
+
+ if (is_x) {
+ src = stride(suboffset(g1_uw, 4), 2, 4, 0);
+ deltas = brw_imm_v(0x10101010);
+ } else {
+ src = stride(suboffset(g1_uw, 5), 2, 4, 0);
+ deltas = brw_imm_v(0x11001100);
+ }
+
+ if (c->dispatch_width == 16) {
+ dst = vec16(dst);
+ }
+
+ /* We do this 8 or 16-wide, but since the destination is UW we
+ * don't do compression in the 16-wide case.
+ */
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_ADD(p, dst, src, deltas);
+ brw_pop_insn_state(p);
+}
+
void
fs_visitor::generate_linterp(fs_inst *inst,
struct brw_reg dst, struct brw_reg *src)
@@ -2214,8 +2395,16 @@ fs_visitor::generate_math(fs_inst *inst,
assert(inst->mlen == 0);
if (inst->opcode == FS_OPCODE_POW) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math2(p, dst, op, src[0], src[1]);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
} else {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p, dst,
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
@@ -2223,10 +2412,23 @@ fs_visitor::generate_math(fs_inst *inst,
0, src[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 0, sechalf(src[0]),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
}
- } else {
+ } else /* gen <= 5 */{
assert(inst->mlen >= 1);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p, dst,
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
@@ -2234,6 +2436,19 @@ fs_visitor::generate_math(fs_inst *inst,
inst->base_mrf, src[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf + 1, sechalf(src[0]),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
}
}
@@ -2244,6 +2459,12 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
int rlen = 4;
uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+ if (c->dispatch_width == 16) {
+ rlen = 8;
+ dst = vec16(dst);
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ }
+
if (intel->gen >= 5) {
switch (inst->opcode) {
case FS_OPCODE_TEX:
@@ -2311,11 +2532,6 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
}
assert(msg_type != -1);
- if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
- rlen = 8;
- dst = vec16(dst);
- }
-
brw_SAMPLE(p,
retype(dst, BRW_REGISTER_TYPE_UW),
inst->base_mrf,
@@ -2408,6 +2624,7 @@ fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask)
} else {
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */
brw_pop_insn_state(p);
}
@@ -2432,6 +2649,7 @@ fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_AND(p, g1, f0, g1);
brw_pop_insn_state(p);
} else {
@@ -2441,6 +2659,7 @@ fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_AND(p, g0, mask, g0);
brw_pop_insn_state(p);
}
@@ -2527,6 +2746,9 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
void
fs_visitor::setup_paramvalues_refs()
{
+ if (c->dispatch_width != 8)
+ return;
+
/* Set up the pointers to ParamValues now that that array is finalized. */
for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
c->prog_data.param[i] =
@@ -2538,8 +2760,12 @@ fs_visitor::setup_paramvalues_refs()
void
fs_visitor::assign_curb_setup()
{
- c->prog_data.first_curbe_grf = c->nr_payload_regs;
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
+ if (c->dispatch_width == 8) {
+ c->prog_data.first_curbe_grf = c->nr_payload_regs;
+ } else {
+ c->prog_data.first_curbe_grf_16 = c->nr_payload_regs;
+ }
/* Map the offsets in the UNIFORM file to fixed HW regs. */
foreach_iter(exec_list_iterator, iter, this->instructions) {
@@ -2548,7 +2774,7 @@ fs_visitor::assign_curb_setup()
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == UNIFORM) {
int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
- struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
+ struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
constant_nr / 8,
constant_nr % 8);
@@ -2600,7 +2826,7 @@ fs_visitor::calculate_urb_setup()
void
fs_visitor::assign_urb_setup()
{
- int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
+ int urb_start = c->nr_payload_regs + c->prog_data.curb_read_length;
/* Offset all the urb_setup[] index by the actual position of the
* setup regs, now that the location of the constants has been chosen.
@@ -2725,6 +2951,11 @@ fs_visitor::setup_pull_constants()
if (c->prog_data.nr_params <= max_uniform_components)
return;
+ if (c->dispatch_width == 16) {
+ fail("Pull constants not supported in 16-wide\n");
+ return;
+ }
+
/* Just demote the end of the list. We could probably do better
* here, demoting things that are rarely used in the program first.
*/
@@ -2884,7 +3115,9 @@ fs_visitor::propagate_constants()
if (inst->opcode != BRW_OPCODE_MOV ||
inst->predicated ||
inst->dst.file != GRF || inst->src[0].file != IMM ||
- inst->dst.type != inst->src[0].type)
+ inst->dst.type != inst->src[0].type ||
+ (c->dispatch_width == 16 &&
+ (inst->force_uncompressed || inst->force_sechalf)))
continue;
/* Don't bother with cases where we should have had the
@@ -3152,6 +3385,20 @@ fs_visitor::compute_to_mrf()
inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1)
continue;
+ /* Work out which hardware MRF registers are written by this
+ * instruction.
+ */
+ int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int mrf_high;
+ if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ mrf_high = mrf_low + 4;
+ } else if (c->dispatch_width == 16 &&
+ (!inst->force_uncompressed && !inst->force_sechalf)) {
+ mrf_high = mrf_low + 1;
+ } else {
+ mrf_high = mrf_low;
+ }
+
/* Can't compute-to-MRF this GRF if someone else was going to
* read it later.
*/
@@ -3179,11 +3426,21 @@ fs_visitor::compute_to_mrf()
}
/* If it's predicated, it (probably) didn't populate all
- * the channels.
+ * the channels. We might be able to rewrite everything
+ * that writes that reg, but it would require smarter
+ * tracking to delay the rewriting until complete success.
*/
if (scan_inst->predicated)
break;
+ /* If it's half of register setup and not the same half as
+ * our MOV we're trying to remove, bail for now.
+ */
+ if (scan_inst->force_uncompressed != inst->force_uncompressed ||
+ scan_inst->force_sechalf != inst->force_sechalf) {
+ break;
+ }
+
/* SEND instructions can't have MRF as a destination. */
if (scan_inst->mlen)
break;
@@ -3233,12 +3490,29 @@ fs_visitor::compute_to_mrf()
if (interfered)
break;
- if (scan_inst->dst.file == MRF &&
- scan_inst->dst.hw_reg == inst->dst.hw_reg) {
- /* Somebody else wrote our MRF here, so we can't can't
+ if (scan_inst->dst.file == MRF) {
+ /* If somebody else writes our MRF here, we can't
* compute-to-MRF before that.
*/
- break;
+ int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int scan_mrf_high;
+
+ if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ scan_mrf_high = scan_mrf_low + 4;
+ } else if (c->dispatch_width == 16 &&
+ (!scan_inst->force_uncompressed &&
+ !scan_inst->force_sechalf)) {
+ scan_mrf_high = scan_mrf_low + 1;
+ } else {
+ scan_mrf_high = scan_mrf_low;
+ }
+
+ if (mrf_low == scan_mrf_low ||
+ mrf_low == scan_mrf_high ||
+ mrf_high == scan_mrf_low ||
+ mrf_high == scan_mrf_high) {
+ break;
+ }
}
if (scan_inst->mlen > 0) {
@@ -3247,8 +3521,12 @@ fs_visitor::compute_to_mrf()
* scan_inst->mlen - 1. Don't go pushing our MRF write up
* above it.
*/
- if (inst->dst.hw_reg >= scan_inst->base_mrf &&
- inst->dst.hw_reg < scan_inst->base_mrf + scan_inst->mlen) {
+ if (mrf_low >= scan_inst->base_mrf &&
+ mrf_low < scan_inst->base_mrf + scan_inst->mlen) {
+ break;
+ }
+ if (mrf_high >= scan_inst->base_mrf &&
+ mrf_high < scan_inst->base_mrf + scan_inst->mlen) {
break;
}
}
@@ -3268,6 +3546,10 @@ fs_visitor::remove_duplicate_mrf_writes()
fs_inst *last_mrf_move[16];
bool progress = false;
+ /* Need to update the MRF tracking for compressed instructions. */
+ if (c->dispatch_width == 16)
+ return false;
+
memset(last_mrf_move, 0, sizeof(last_mrf_move));
foreach_iter(exec_list_iterator, iter, this->instructions) {
@@ -3347,6 +3629,29 @@ fs_visitor::virtual_grf_interferes(int a, int b)
(this->virtual_grf_use[b] != -1 ||
this->virtual_grf_def[b] == MAX_INSTRUCTION));
+ /* If the register is used to store 16 values of less than float
+ * size (only the case for pixel_[xy]), then we can't allocate
+ * another dword-sized thing to that register that would be used in
+ * the same instruction. This is because when the GPU decodes (for
+ * example):
+ *
+ * (declare (in ) vec4 gl_FragCoord@0x97766a0)
+ * add(16) g6<1>F g6<8,8,1>UW 0.5F { align1 compr };
+ *
+ * it's actually processed as:
+ * add(8) g6<1>F g6<8,8,1>UW 0.5F { align1 };
+ * add(8) g7<1>F g6.8<8,8,1>UW 0.5F { align1 sechalf };
+ *
+ * so our second half values in g6 got overwritten in the first
+ * half.
+ */
+ if (c->dispatch_width == 16 && (this->pixel_x.reg == a ||
+ this->pixel_x.reg == b ||
+ this->pixel_y.reg == a ||
+ this->pixel_y.reg == b)) {
+ return start <= end;
+ }
+
return start < end;
}
@@ -3366,6 +3671,8 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
reg->hw_reg, reg->smear);
}
brw_reg = retype(brw_reg, reg->type);
+ if (reg->sechalf)
+ brw_reg = sechalf(brw_reg);
break;
case IMM:
switch (reg->type) {
@@ -3411,7 +3718,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
void
fs_visitor::generate_code()
{
- int last_native_inst = 0;
+ int last_native_inst = p->nr_insn;
const char *last_annotation_string = NULL;
ir_instruction *last_annotation_ir = NULL;
@@ -3427,8 +3734,8 @@ fs_visitor::generate_code()
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("Native code for fragment shader %d:\n",
- ctx->Shader.CurrentFragmentProgram->Name);
+ printf("Native code for fragment shader %d (%d-wide dispatch):\n",
+ ctx->Shader.CurrentFragmentProgram->Name, c->dispatch_width);
}
foreach_iter(exec_list_iterator, iter, this->instructions) {
@@ -3461,6 +3768,14 @@ fs_visitor::generate_code()
brw_set_predicate_inverse(p, inst->predicate_inverse);
brw_set_saturate(p, inst->saturate);
+ if (inst->force_uncompressed || c->dispatch_width == 8) {
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ } else if (inst->force_sechalf) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ } else {
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+
switch (inst->opcode) {
case BRW_OPCODE_MOV:
brw_MOV(p, dst, src[0]);
@@ -3602,6 +3917,12 @@ fs_visitor::generate_code()
case FS_OPCODE_COS:
generate_math(inst, dst, src);
break;
+ case FS_OPCODE_PIXEL_X:
+ generate_pixel_xy(dst, true);
+ break;
+ case FS_OPCODE_PIXEL_Y:
+ generate_pixel_xy(dst, false);
+ break;
case FS_OPCODE_CINTERP:
brw_MOV(p, dst, src[0]);
break;
@@ -3668,6 +3989,10 @@ fs_visitor::generate_code()
last_native_inst = p->nr_insn;
}
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ printf("\n");
+ }
+
ralloc_free(if_stack);
ralloc_free(loop_stack);
ralloc_free(if_depth_in_loop);
@@ -3693,108 +4018,146 @@ fs_visitor::generate_code()
}
}
-GLboolean
-brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
+bool
+fs_visitor::run()
{
- struct intel_context *intel = &brw->intel;
- struct gl_context *ctx = &intel->ctx;
- struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
+ uint32_t prog_offset_16 = 0;
+ uint32_t orig_nr_params = c->prog_data.nr_params;
- if (!prog)
- return GL_FALSE;
+ brw_wm_payload_setup(brw, c);
- struct brw_shader *shader =
- (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- if (!shader)
- return GL_FALSE;
+ if (c->dispatch_width == 16) {
+ /* align to 64 byte boundary. */
+ while ((c->func.nr_insn * sizeof(struct brw_instruction)) % 64) {
+ brw_NOP(p);
+ }
- /* We always use 8-wide mode, at least for now. For one, flow
- * control only works in 8-wide. Also, when we're fragment shader
- * bound, we're almost always under register pressure as well, so
- * 8-wide would save us from the performance cliff of spilling
- * regs.
- */
- c->dispatch_width = 8;
+ /* Save off the start of this 16-wide program in case we succeed. */
+ prog_offset_16 = c->func.nr_insn * sizeof(struct brw_instruction);
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("GLSL IR for native fragment shader %d:\n", prog->Name);
- _mesa_print_ir(shader->ir, NULL);
- printf("\n");
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
- /* Now the main event: Visit the shader IR and generate our FS IR for it.
- */
- fs_visitor v(c, shader);
-
if (0) {
- v.emit_dummy_fs();
+ emit_dummy_fs();
} else {
- v.calculate_urb_setup();
+ calculate_urb_setup();
if (intel->gen < 6)
- v.emit_interpolation_setup_gen4();
+ emit_interpolation_setup_gen4();
else
- v.emit_interpolation_setup_gen6();
+ emit_interpolation_setup_gen6();
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
foreach_iter(exec_list_iterator, iter, *shader->ir) {
ir_instruction *ir = (ir_instruction *)iter.get();
- v.base_ir = ir;
- ir->accept(&v);
+ base_ir = ir;
+ ir->accept(this);
}
- v.emit_fb_writes();
+ emit_fb_writes();
- v.split_virtual_grfs();
+ split_virtual_grfs();
- v.setup_paramvalues_refs();
- v.setup_pull_constants();
+ setup_paramvalues_refs();
+ setup_pull_constants();
bool progress;
do {
progress = false;
- progress = v.remove_duplicate_mrf_writes() || progress;
+ progress = remove_duplicate_mrf_writes() || progress;
- progress = v.propagate_constants() || progress;
- progress = v.register_coalesce() || progress;
- progress = v.compute_to_mrf() || progress;
- progress = v.dead_code_eliminate() || progress;
+ progress = propagate_constants() || progress;
+ progress = register_coalesce() || progress;
+ progress = compute_to_mrf() || progress;
+ progress = dead_code_eliminate() || progress;
} while (progress);
- v.schedule_instructions();
+ schedule_instructions();
- v.assign_curb_setup();
- v.assign_urb_setup();
+ assign_curb_setup();
+ assign_urb_setup();
if (0) {
/* Debug of register spilling: Go spill everything. */
- int virtual_grf_count = v.virtual_grf_next;
+ int virtual_grf_count = virtual_grf_next;
for (int i = 1; i < virtual_grf_count; i++) {
- v.spill_reg(i);
+ spill_reg(i);
}
}
if (0)
- v.assign_regs_trivial();
+ assign_regs_trivial();
else {
- while (!v.assign_regs()) {
- if (v.failed)
+ while (!assign_regs()) {
+ if (failed)
break;
}
}
}
+ assert(force_uncompressed_stack == 0);
+ assert(force_sechalf_stack == 0);
- if (!v.failed)
- v.generate_code();
+ if (failed)
+ return false;
- assert(!v.failed); /* FINISHME: Cleanly fail, tested at link time, etc. */
+ generate_code();
- if (v.failed)
- return GL_FALSE;
+ if (c->dispatch_width == 8) {
+ c->prog_data.total_grf = grf_used;
+ } else {
+ c->prog_data.total_grf_16 = grf_used;
+ c->prog_data.prog_offset_16 = prog_offset_16;
- c->prog_data.total_grf = v.grf_used;
+ /* Make sure we didn't try to sneak in an extra uniform */
+ assert(orig_nr_params == c->prog_data.nr_params);
+ }
- return GL_TRUE;
+ return !failed;
+}
+
+bool
+brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
+
+ if (!prog)
+ return false;
+
+ struct brw_shader *shader =
+ (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+ if (!shader)
+ return false;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ printf("GLSL IR for native fragment shader %d:\n", prog->Name);
+ _mesa_print_ir(shader->ir, NULL);
+ printf("\n\n");
+ }
+
+ /* Now the main event: Visit the shader IR and generate our FS IR for it.
+ */
+ c->dispatch_width = 8;
+
+ fs_visitor v(c, shader);
+ if (!v.run()) {
+ /* FINISHME: Cleanly fail, test at link time, etc. */
+ assert(!"not reached");
+ return false;
+ }
+
+ if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
+ c->dispatch_width = 16;
+ fs_visitor v2(c, shader);
+ v2.import_uniforms(v.variable_ht);
+ v2.run();
+ }
+
+ c->prog_data.dispatch_width = 8;
+
+ return true;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f792906cfe7..518d09180c4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -67,6 +67,8 @@ enum fs_opcodes {
FS_OPCODE_COS,
FS_OPCODE_DDX,
FS_OPCODE_DDY,
+ FS_OPCODE_PIXEL_X,
+ FS_OPCODE_PIXEL_Y,
FS_OPCODE_CINTERP,
FS_OPCODE_LINTERP,
FS_OPCODE_TEX,
@@ -176,6 +178,7 @@ public:
int type;
bool negate;
bool abs;
+ bool sechalf;
struct brw_reg fixed_hw_reg;
int smear; /* -1, or a channel of the reg to smear to all channels. */
@@ -341,6 +344,8 @@ public:
bool eot;
bool header_present;
bool shadow_compare;
+ bool force_uncompressed;
+ bool force_sechalf;
uint32_t offset; /* spill/unspill offset */
/** @{
@@ -403,6 +408,8 @@ public:
this->live_intervals_valid = false;
this->kill_emitted = false;
+ this->force_uncompressed_stack = 0;
+ this->force_sechalf_stack = 0;
}
~fs_visitor()
@@ -413,6 +420,7 @@ public:
fs_reg *variable_storage(ir_variable *var);
int virtual_grf_alloc(int size);
+ void import_uniforms(struct hash_table *src_variable_ht);
void visit(ir_variable *ir);
void visit(ir_assignment *ir);
@@ -459,6 +467,7 @@ public:
return emit(fs_inst(opcode, dst, src0, src1, src2));
}
+ bool run();
void setup_paramvalues_refs();
void assign_curb_setup();
void calculate_urb_setup();
@@ -479,8 +488,14 @@ public:
void schedule_instructions();
void fail(const char *msg, ...);
+ void push_force_uncompressed();
+ void pop_force_uncompressed();
+ void push_force_sechalf();
+ void pop_force_sechalf();
+
void generate_code();
void generate_fb_write(fs_inst *inst);
+ void generate_pixel_xy(struct brw_reg dst, bool is_x);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
struct brw_reg *src);
void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
@@ -508,6 +523,7 @@ public:
void emit_if_gen6(ir_if *ir);
void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+ void emit_color_write(int index, int first_color_mrf, fs_reg color);
void emit_fb_writes();
void emit_assignment_writes(fs_reg &l, fs_reg &r,
const glsl_type *type, bool predicated);
@@ -565,6 +581,9 @@ public:
fs_reg reg_null_cmp;
int grf_used;
+
+ int force_uncompressed_stack;
+ int force_sechalf_stack;
};
GLboolean brw_do_channel_expressions(struct exec_list *instructions);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 67f29ce1816..1e2cf917116 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -48,11 +48,11 @@ extern "C" {
#include "../glsl/ir_print_visitor.h"
static void
-assign_reg(int *reg_hw_locations, fs_reg *reg)
+assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
{
if (reg->file == GRF && reg->reg != 0) {
assert(reg->reg_offset >= 0);
- reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
+ reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
reg->reg = 0;
}
}
@@ -63,32 +63,48 @@ fs_visitor::assign_regs_trivial()
int last_grf = 0;
int hw_reg_mapping[this->virtual_grf_next];
int i;
+ int reg_width = c->dispatch_width / 8;
hw_reg_mapping[0] = 0;
- hw_reg_mapping[1] = this->first_non_payload_grf;
+ /* Note that compressed instructions require alignment to 2 registers. */
+ hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width);
for (i = 2; i < this->virtual_grf_next; i++) {
hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
- this->virtual_grf_sizes[i - 1]);
+ this->virtual_grf_sizes[i - 1] * reg_width);
}
- last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
+ last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
+ reg_width);
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
- assign_reg(hw_reg_mapping, &inst->dst);
- assign_reg(hw_reg_mapping, &inst->src[0]);
- assign_reg(hw_reg_mapping, &inst->src[1]);
+ assign_reg(hw_reg_mapping, &inst->dst, reg_width);
+ assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
+ assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
}
- this->grf_used = last_grf + 1;
+ if (last_grf >= BRW_MAX_GRF) {
+ fail("Ran out of regs on trivial allocator (%d/%d)\n",
+ last_grf, BRW_MAX_GRF);
+ }
+
+ this->grf_used = last_grf + reg_width;
}
bool
fs_visitor::assign_regs()
{
+ /* Most of this allocation was written for a reg_width of 1
+ * (dispatch_width == 8). In extending to 16-wide, the code was
+ * left in place and it was converted to have the hardware
+ * registers it's allocating be contiguous physical pairs of regs
+ * for reg_width == 2.
+ */
+ int reg_width = c->dispatch_width / 8;
int last_grf = 0;
int hw_reg_mapping[this->virtual_grf_next + 1];
- int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
+ int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
+ int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
int class_sizes[base_reg_count];
int class_count = 0;
int aligned_pair_class = -1;
@@ -157,8 +173,8 @@ fs_visitor::assign_regs()
if (0) {
printf("%d/%d conflicts %d/%d\n",
- class_sizes[i], this->first_non_payload_grf + i_r,
- class_sizes[c], this->first_non_payload_grf + c_r);
+ class_sizes[i], first_assigned_grf + i_r,
+ class_sizes[c], first_assigned_grf + c_r);
}
ra_add_reg_conflict(regs,
@@ -172,7 +188,7 @@ fs_visitor::assign_regs()
/* Add a special class for aligned pairs, which we'll put delta_x/y
* in on gen5 so that we can do PLN.
*/
- if (brw->has_pln && intel->gen < 6) {
+ if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
int reg_count = (base_reg_count - 1) / 2;
int unaligned_pair_class = 1;
assert(class_sizes[unaligned_pair_class] == 2);
@@ -182,7 +198,7 @@ fs_visitor::assign_regs()
class_sizes[aligned_pair_class] = 2;
class_base_reg[aligned_pair_class] = 0;
class_reg_count[aligned_pair_class] = 0;
- int start = (this->first_non_payload_grf & 1) ? 1 : 0;
+ int start = (first_assigned_grf & 1) ? 1 : 0;
for (int i = 0; i < reg_count; i++) {
ra_class_add_reg(regs, classes[aligned_pair_class],
@@ -228,6 +244,8 @@ fs_visitor::assign_regs()
if (reg == -1) {
fail("no register to spill\n");
+ } else if (c->dispatch_width == 16) {
+ fail("no spilling support on 16-wide yet\n");
} else {
spill_reg(reg);
}
@@ -257,7 +275,7 @@ fs_visitor::assign_regs()
}
assert(hw_reg >= 0);
- hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
+ hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width;
last_grf = MAX2(last_grf,
hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
}
@@ -265,12 +283,12 @@ fs_visitor::assign_regs()
foreach_iter(exec_list_iterator, iter, this->instructions) {
fs_inst *inst = (fs_inst *)iter.get();
- assign_reg(hw_reg_mapping, &inst->dst);
- assign_reg(hw_reg_mapping, &inst->src[0]);
- assign_reg(hw_reg_mapping, &inst->src[1]);
+ assign_reg(hw_reg_mapping, &inst->dst, reg_width);
+ assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
+ assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
}
- this->grf_used = last_grf + 1;
+ this->grf_used = last_grf + reg_width;
ralloc_free(g);
ralloc_free(regs);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index bff8f82f3f7..fb1192c810a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -145,6 +145,8 @@ public:
void calculate_deps();
void schedule_instructions(fs_inst *next_block_header);
+ bool is_compressed(fs_inst *inst);
+
void *mem_ctx;
int instructions_to_schedule;
@@ -234,6 +236,17 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
}
}
+/* instruction scheduling needs to be aware of when an MRF write
+ * actually writes 2 MRFs.
+ */
+bool
+instruction_scheduler::is_compressed(fs_inst *inst)
+{
+ return (v->c->dispatch_width == 16 &&
+ !inst->force_uncompressed &&
+ !inst->force_sechalf);
+}
+
void
instruction_scheduler::calculate_deps()
{
@@ -297,11 +310,24 @@ instruction_scheduler::calculate_deps()
}
last_grf_write[inst->dst.reg] = n;
} else if (inst->dst.file == MRF) {
- if (last_mrf_write[inst->dst.hw_reg]) {
- add_dep(last_mrf_write[inst->dst.hw_reg], n,
- last_mrf_write[inst->dst.hw_reg]->latency);
+ int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+
+ if (last_mrf_write[reg]) {
+ add_dep(last_mrf_write[reg], n,
+ last_mrf_write[reg]->latency);
+ }
+ last_mrf_write[reg] = n;
+ if (is_compressed(inst)) {
+ if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+ reg += 4;
+ else
+ reg++;
+ if (last_mrf_write[reg]) {
+ add_dep(last_mrf_write[reg], n,
+ last_mrf_write[reg]->latency);
+ }
+ last_mrf_write[reg] = n;
}
- last_mrf_write[inst->dst.hw_reg] = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -369,7 +395,18 @@ instruction_scheduler::calculate_deps()
if (inst->dst.file == GRF) {
last_grf_write[inst->dst.reg] = n;
} else if (inst->dst.file == MRF) {
- last_mrf_write[inst->dst.hw_reg] = n;
+ int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+
+ last_mrf_write[reg] = n;
+
+ if (is_compressed(inst)) {
+ if (inst->dst.hw_reg & BRW_MRF_COMPR4)
+ reg += 4;
+ else
+ reg++;
+
+ last_mrf_write[reg] = n;
+ }
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 14ee6767cd5..f213ae20acd 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -121,13 +121,11 @@ static void compile_gs_prog( struct brw_context *brw,
/* Upload
*/
drm_intel_bo_unreference(brw->gs.prog_bo);
- brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- sizeof(c.prog_data),
- &brw->gs.prog_data);
+ brw->gs.prog_bo = brw_upload_cache(&brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
+ &brw->gs.prog_data);
}
static const GLenum gs_prim[GL_POLYGON+1] = {
@@ -193,7 +191,6 @@ static void prepare_gs_prog(struct brw_context *brw)
if (brw->gs.prog_active) {
brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
&key, sizeof(key),
- NULL, 0,
&brw->gs.prog_data);
if (brw->gs.prog_bo == NULL)
compile_gs_prog( brw, &key );
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c
index 69a5f7a6667..542874b7706 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@@ -35,112 +35,65 @@
#include "brw_state.h"
#include "brw_defines.h"
-struct brw_gs_unit_key {
- unsigned int total_grf;
- unsigned int urb_entry_read_length;
-
- unsigned int curbe_offset;
-
- unsigned int nr_urb_entries, urb_size;
- GLboolean prog_active;
-};
-
static void
-gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
-{
- memset(key, 0, sizeof(*key));
-
- /* CACHE_NEW_GS_PROG */
- key->prog_active = brw->gs.prog_active;
- if (key->prog_active) {
- key->total_grf = brw->gs.prog_data->total_grf;
- key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
- } else {
- key->total_grf = 1;
- key->urb_entry_read_length = 1;
- }
-
- /* BRW_NEW_CURBE_OFFSETS */
- key->curbe_offset = brw->curbe.clip_start;
-
- /* BRW_NEW_URB_FENCE */
- key->nr_urb_entries = brw->urb.nr_gs_entries;
- key->urb_size = brw->urb.vsize;
-}
-
-static drm_intel_bo *
-gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+brw_prepare_gs_unit(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct brw_gs_unit_state gs;
- drm_intel_bo *bo;
-
- memset(&gs, 0, sizeof(gs));
-
- gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
- if (key->prog_active) /* reloc */
- gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+ struct brw_gs_unit_state *gs;
- gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- gs.thread1.single_program_flow = 1;
+ gs = brw_state_batch(brw, sizeof(*gs), 32, &brw->gs.state_offset);
- gs.thread3.dispatch_grf_start_reg = 1;
- gs.thread3.const_urb_entry_read_offset = 0;
- gs.thread3.const_urb_entry_read_length = 0;
- gs.thread3.urb_entry_read_offset = 0;
- gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+ memset(gs, 0, sizeof(*gs));
- gs.thread4.nr_urb_entries = key->nr_urb_entries;
- gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
-
- if (key->nr_urb_entries >= 8)
- gs.thread4.max_threads = 1;
- else
- gs.thread4.max_threads = 0;
-
- if (intel->gen == 5)
- gs.thread4.rendering_enable = 1;
-
- if (unlikely(INTEL_DEBUG & DEBUG_STATS))
- gs.thread4.stats_enable = 1;
-
- bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
- key, sizeof(*key),
- &brw->gs.prog_bo, 1,
- &gs, sizeof(gs));
+ /* CACHE_NEW_GS_PROG */
+ if (brw->gs.prog_active) {
+ gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) /
+ 16 - 1);
+ /* reloc */
+ gs->thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+
+ gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ gs->thread1.single_program_flow = 1;
+
+ gs->thread3.dispatch_grf_start_reg = 1;
+ gs->thread3.const_urb_entry_read_offset = 0;
+ gs->thread3.const_urb_entry_read_length = 0;
+ gs->thread3.urb_entry_read_offset = 0;
+ gs->thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+
+ /* BRW_NEW_URB_FENCE */
+ gs->thread4.nr_urb_entries = brw->urb.nr_gs_entries;
+ gs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+
+ if (brw->urb.nr_gs_entries >= 8)
+ gs->thread4.max_threads = 1;
+ else
+ gs->thread4.max_threads = 0;
- if (key->prog_active) {
/* Emit GS program relocation */
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_gs_unit_state, thread0),
- brw->gs.prog_bo, gs.thread0.grf_reg_count << 1,
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ (brw->gs.state_offset +
+ offsetof(struct brw_gs_unit_state, thread0)),
+ brw->gs.prog_bo, gs->thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
}
- return bo;
-}
-
-static void prepare_gs_unit(struct brw_context *brw)
-{
- struct brw_gs_unit_key key;
+ if (intel->gen == 5)
+ gs->thread4.rendering_enable = 1;
- gs_unit_populate_key(brw, &key);
+ if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+ gs->thread4.stats_enable = 1;
- drm_intel_bo_unreference(brw->gs.state_bo);
- brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
- &key, sizeof(key),
- &brw->gs.prog_bo, 1,
- NULL);
- if (brw->gs.state_bo == NULL) {
- brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
- }
+ brw->state.dirty.cache |= CACHE_NEW_GS_UNIT;
}
const struct brw_tracked_state brw_gs_unit = {
.dirty = {
.mesa = 0,
- .brw = (BRW_NEW_CURBE_OFFSETS |
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_GS_PROG
},
- .prepare = prepare_gs_unit,
+ .prepare = brw_prepare_gs_unit,
};
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 19eea07ebc6..7119786de42 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -143,15 +143,19 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
- OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->vs.state_offset);
if (brw->gs.prog_active)
- OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->gs.state_offset | 1);
else
OUT_BATCH(0);
- OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->clip.state_offset | 1);
OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->sf.state_offset);
- OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->wm.state_offset);
OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->cc.state_offset);
ADVANCE_BATCH();
@@ -159,16 +163,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
brw->state.dirty.brw |= BRW_NEW_PSP;
}
-
-static void prepare_psp_urb_cbs(struct brw_context *brw)
-{
- brw_add_validated_bo(brw, brw->vs.state_bo);
- brw_add_validated_bo(brw, brw->gs.state_bo);
- brw_add_validated_bo(brw, brw->clip.state_bo);
- brw_add_validated_bo(brw, brw->sf.state_bo);
- brw_add_validated_bo(brw, brw->wm.state_bo);
-}
-
static void upload_psp_urb_cbs(struct brw_context *brw )
{
upload_pipelined_state_pointers(brw);
@@ -188,7 +182,6 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
CACHE_NEW_WM_UNIT |
CACHE_NEW_CC_UNIT)
},
- .prepare = prepare_psp_urb_cbs,
.emit = upload_psp_urb_cbs,
};
@@ -551,12 +544,28 @@ static void upload_state_base_address( struct brw_context *brw )
if (intel->gen >= 6) {
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
- OUT_BATCH(1); /* General state base address */
- OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
- 1); /* Surface state base address */
- OUT_BATCH(1); /* Dynamic state base address */
- OUT_BATCH(1); /* Indirect object base address */
- OUT_BATCH(1); /* Instruction base address */
+ /* General state base address: stateless DP read/write requests */
+ OUT_BATCH(1);
+ /* Surface state base address:
+ * BINDING_TABLE_STATE
+ * SURFACE_STATE
+ */
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+ /* Dynamic state base address:
+ * SAMPLER_STATE
+ * SAMPLER_BORDER_COLOR_STATE
+ * CLIP, SF, WM/CC viewport state
+ * COLOR_CALC_STATE
+ * DEPTH_STENCIL_STATE
+ * BLEND_STATE
+ * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
+ * Disable is clear, which we rely on)
+ */
+ OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
+ I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+
+ OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
+ OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Dynamic state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 6da155b1a9b..5a03851b8e6 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -119,13 +119,11 @@ static void compile_sf_prog( struct brw_context *brw,
/* Upload
*/
drm_intel_bo_unreference(brw->sf.prog_bo);
- brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- sizeof(c.prog_data),
- &brw->sf.prog_data);
+ brw->sf.prog_bo = brw_upload_cache(&brw->cache, BRW_SF_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
+ &brw->sf.prog_data);
}
/* Calculate interpolants for triangle and line rasterization.
@@ -194,7 +192,6 @@ static void upload_sf_prog(struct brw_context *brw)
drm_intel_bo_unreference(brw->sf.prog_bo);
brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
&key, sizeof(key),
- NULL, 0,
&brw->sf.prog_data);
if (brw->sf.prog_bo == NULL)
compile_sf_prog( brw, &key );
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index 66d91a0bde7..78b22c4df3d 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -39,7 +39,7 @@
static void upload_sf_vp(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct gl_context *ctx = &brw->intel.ctx;
+ struct gl_context *ctx = &intel->ctx;
const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
struct brw_sf_viewport *sfv;
GLfloat y_scale, y_bias;
@@ -106,11 +106,6 @@ static void upload_sf_vp(struct brw_context *brw)
sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
}
- /* Keep a pointer to it for brw_state_dump.c */
- drm_intel_bo_unreference(brw->sf.vp_bo);
- drm_intel_bo_reference(intel->batch.bo);
- brw->sf.vp_bo = intel->batch.bo;
-
brw->state.dirty.cache |= CACHE_NEW_SF_VP;
}
@@ -177,7 +172,7 @@ static void upload_sf_unit( struct brw_context *brw )
sf->thread4.stats_enable = 1;
/* CACHE_NEW_SF_VP */
- sf->sf5.sf_viewport_state_offset = (brw->sf.vp_bo->offset +
+ sf->sf5.sf_viewport_state_offset = (intel->batch.bo->offset +
brw->sf.vp_offset) >> 5; /* reloc */
sf->sf5.viewport_transform = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_state.c b/src/mesa/drivers/dri/i965/brw_state.c
deleted file mode 100644
index 13b231d5cf5..00000000000
--- a/src/mesa/drivers/dri/i965/brw_state.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright © 2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <[email protected]>
- *
- */
-
-#include "brw_context.h"
-
-void
-brw_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
-{
- struct brw_context *brw = brw_context(ctx);
-
- switch (cap) {
- case GL_DEPTH_CLAMP:
- brw_update_cc_vp(brw);
- break;
- }
-}
-
-void
-brw_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval)
-{
- struct brw_context *brw = brw_context(ctx);
-
- if (ctx->Transform.DepthClamp)
- brw_update_cc_vp(brw);
-}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 86b0caa4a4e..8b9e3a4ec5d 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -47,6 +47,7 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo)
};
extern const struct brw_tracked_state brw_blend_constant_color;
+extern const struct brw_tracked_state brw_cc_vp;
extern const struct brw_tracked_state brw_cc_unit;
extern const struct brw_tracked_state brw_check_fallback;
extern const struct brw_tracked_state brw_clip_prog;
@@ -102,11 +103,11 @@ extern const struct brw_tracked_state gen6_depth_stencil_state;
extern const struct brw_tracked_state gen6_gs_state;
extern const struct brw_tracked_state gen6_sampler_state;
extern const struct brw_tracked_state gen6_scissor_state;
-extern const struct brw_tracked_state gen6_scissor_state_pointers;
extern const struct brw_tracked_state gen6_sf_state;
extern const struct brw_tracked_state gen6_sf_vp;
extern const struct brw_tracked_state gen6_urb;
extern const struct brw_tracked_state gen6_viewport_state;
+extern const struct brw_tracked_state gen6_vs_constants;
extern const struct brw_tracked_state gen6_vs_state;
extern const struct brw_tracked_state gen6_wm_constants;
extern const struct brw_tracked_state gen6_wm_state;
@@ -123,38 +124,21 @@ void brw_clear_validated_bos(struct brw_context *brw);
/***********************************************************************
* brw_state_cache.c
*/
-drm_intel_bo *brw_cache_data(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data,
- GLuint size);
drm_intel_bo *brw_upload_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_sz,
- drm_intel_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
const void *data,
- GLuint data_sz);
-
-drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_sz,
- drm_intel_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
- const void *data,
- GLuint data_sz,
- const void *aux,
- GLuint aux_sz,
- void *aux_return);
+ GLuint data_sz,
+ const void *aux,
+ GLuint aux_sz,
+ void *aux_return);
drm_intel_bo *brw_search_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
- drm_intel_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
void *aux_return);
void brw_state_cache_check_size( struct brw_context *brw );
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index 01eeb19a684..f13a41fa7cc 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -31,29 +31,17 @@
/** @file brw_state_cache.c
*
- * This file implements a simple static state cache for 965. The consumers
- * can query the hash table of state using a cache_id, opaque key data,
- * and list of buffers that will be used in relocations, and receive the
- * corresponding state buffer object of state (plus associated auxiliary
- * data) in return.
+ * This file implements a simple static state cache for 965. The
+ * consumers can query the hash table of state using a cache_id,
+ * opaque key data, and receive the corresponding state buffer object
+ * of state (plus associated auxiliary data) in return. Objects in
+ * the cache may not have relocations (pointers to other BOs) in them.
*
- * The inner workings are a simple hash table based on a CRC of the key data.
- * The cache_id and relocation target buffers associated with the state
- * buffer are included as auxiliary key data, but are not part of the hash
- * value (this should be fixed, but will likely be fixed instead by making
- * consumers use structured keys).
+ * The inner workings are a simple hash table based on a CRC of the
+ * key data.
*
- * Replacement is not implemented. Instead, when the cache gets too big, at
- * a safe point (unlock) we throw out all of the cache data and let it
- * regenerate for the next rendering operation.
- *
- * The reloc_buf pointers need to be included as key data, otherwise the
- * non-unique values stuffed in the offset in key data through
- * brw_cache_data() may result in successful probe for state buffers
- * even when the buffer being referenced doesn't match. The result would be
- * that the same state cache entry is used twice for different buffers,
- * only one of the two buffers referenced gets put into the offset, and the
- * incorrect program is run for the other instance.
+ * Replacement is not implemented. Instead, when the cache gets too
+ * big we throw out all of the cache data and let it get regenerated.
*/
#include "main/imports.h"
@@ -76,13 +64,6 @@ hash_key(struct brw_cache_item *item)
hash = (hash << 5) | (hash >> 27);
}
- /* Include the BO pointers as key data as well */
- ikey = (GLuint *)item->reloc_bufs;
- for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) {
- hash ^= ikey[i];
- hash = (hash << 5) | (hash >> 27);
- }
-
return hash;
}
@@ -110,10 +91,7 @@ brw_cache_item_equals(const struct brw_cache_item *a,
return a->cache_id == b->cache_id &&
a->hash == b->hash &&
a->key_size == b->key_size &&
- (memcmp(a->key, b->key, a->key_size) == 0) &&
- a->nr_reloc_bufs == b->nr_reloc_bufs &&
- (memcmp(a->reloc_bufs, b->reloc_bufs,
- a->nr_reloc_bufs * sizeof(drm_intel_bo *)) == 0);
+ (memcmp(a->key, b->key, a->key_size) == 0);
}
static struct brw_cache_item *
@@ -170,9 +148,7 @@ rehash(struct brw_cache *cache)
drm_intel_bo *
brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- drm_intel_bo **reloc_bufs, GLuint nr_reloc_bufs,
+ const void *key, GLuint key_size,
void *aux_return)
{
struct brw_cache_item *item;
@@ -182,8 +158,6 @@ brw_search_cache(struct brw_cache *cache,
lookup.cache_id = cache_id;
lookup.key = key;
lookup.key_size = key_size;
- lookup.reloc_bufs = reloc_bufs;
- lookup.nr_reloc_bufs = nr_reloc_bufs;
hash = hash_key(&lookup);
lookup.hash = hash;
@@ -203,30 +177,24 @@ brw_search_cache(struct brw_cache *cache,
drm_intel_bo *
-brw_upload_cache_with_auxdata(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- drm_intel_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
- const void *data,
- GLuint data_size,
- const void *aux,
- GLuint aux_size,
- void *aux_return)
+brw_upload_cache(struct brw_cache *cache,
+ enum brw_cache_id cache_id,
+ const void *key,
+ GLuint key_size,
+ const void *data,
+ GLuint data_size,
+ const void *aux,
+ GLuint aux_size,
+ void *aux_return)
{
struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
GLuint hash;
- GLuint relocs_size = nr_reloc_bufs * sizeof(drm_intel_bo *);
void *tmp;
drm_intel_bo *bo;
- int i;
item->cache_id = cache_id;
item->key = key;
item->key_size = key_size;
- item->reloc_bufs = reloc_bufs;
- item->nr_reloc_bufs = nr_reloc_bufs;
hash = hash_key(item);
item->hash = hash;
@@ -235,19 +203,13 @@ brw_upload_cache_with_auxdata(struct brw_cache *cache,
cache->name[cache_id], data_size, 1 << 6);
- /* Set up the memory containing the key, aux_data, and reloc_bufs */
- tmp = malloc(key_size + aux_size + relocs_size);
+ /* Set up the memory containing the key and aux_data */
+ tmp = malloc(key_size + aux_size);
memcpy(tmp, key, key_size);
memcpy(tmp + key_size, aux, aux_size);
- memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
- for (i = 0; i < nr_reloc_bufs; i++) {
- if (reloc_bufs[i] != NULL)
- drm_intel_bo_reference(reloc_bufs[i]);
- }
item->key = tmp;
- item->reloc_bufs = tmp + key_size + aux_size;
item->bo = bo;
drm_intel_bo_reference(bo);
@@ -276,73 +238,6 @@ brw_upload_cache_with_auxdata(struct brw_cache *cache,
return bo;
}
-drm_intel_bo *
-brw_upload_cache(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *key,
- GLuint key_size,
- drm_intel_bo **reloc_bufs,
- GLuint nr_reloc_bufs,
- const void *data,
- GLuint data_size)
-{
- return brw_upload_cache_with_auxdata(cache, cache_id,
- key, key_size,
- reloc_bufs, nr_reloc_bufs,
- data, data_size,
- NULL, 0,
- NULL);
-}
-
-/**
- * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
- *
- * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
- * better to use, as the potentially changing offsets in the data-used-as-key
- * will result in excessive cache misses.
- *
- * If aux data is involved, use search/upload instead.
-
- */
-drm_intel_bo *
-brw_cache_data(struct brw_cache *cache,
- enum brw_cache_id cache_id,
- const void *data,
- GLuint data_size)
-{
- drm_intel_bo *bo;
- struct brw_cache_item *item, lookup;
- GLuint hash;
-
- lookup.cache_id = cache_id;
- lookup.key = data;
- lookup.key_size = data_size;
- lookup.reloc_bufs = NULL;
- lookup.nr_reloc_bufs = 0;
- hash = hash_key(&lookup);
- lookup.hash = hash;
-
- item = search_cache(cache, hash, &lookup);
- if (item) {
- update_cache_last(cache, cache_id, item->bo);
- drm_intel_bo_reference(item->bo);
- return item->bo;
- }
-
- bo = brw_upload_cache(cache, cache_id,
- data, data_size,
- NULL, 0,
- data, data_size);
-
- return bo;
-}
-
-enum pool_type {
- DW_SURFACE_STATE,
- DW_GENERAL_STATE
-};
-
-
static void
brw_init_cache_id(struct brw_cache *cache,
const char *name,
@@ -352,8 +247,8 @@ brw_init_cache_id(struct brw_cache *cache,
}
-static void
-brw_init_non_surface_cache(struct brw_context *brw)
+void
+brw_init_caches(struct brw_context *brw)
{
struct brw_cache *cache = &brw->cache;
@@ -367,7 +262,6 @@ brw_init_non_surface_cache(struct brw_context *brw)
brw_init_cache_id(cache, "CC_VP", BRW_CC_VP);
brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT);
brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG);
- brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR);
brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER);
brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT);
brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG);
@@ -392,13 +286,6 @@ brw_init_non_surface_cache(struct brw_context *brw)
brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE);
}
-void
-brw_init_caches(struct brw_context *brw)
-{
- brw_init_non_surface_cache(brw);
-}
-
-
static void
brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
@@ -409,11 +296,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
- int j;
-
next = c->next;
- for (j = 0; j < c->nr_reloc_bufs; j++)
- drm_intel_bo_unreference(c->reloc_bufs[j]);
drm_intel_bo_unreference(c->bo);
free((void *)c->key);
free(c);
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
index b393259c915..3a3aa8c0346 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -140,19 +140,15 @@ static void dump_wm_surface_state(struct brw_context *brw)
static void dump_wm_sampler_state(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
int i;
- if (!brw->wm.sampler_bo) {
- fprintf(stderr, "WM_SAMPLER: NULL\n");
- return;
- }
-
- drm_intel_bo_map(brw->wm.sampler_bo, GL_FALSE);
+ drm_intel_bo_map(intel->batch.bo, GL_FALSE);
for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
unsigned int offset;
+ uint32_t sdc_offset;
struct brw_sampler_state *samp;
- struct brw_sampler_default_color *sdc;
char name[20];
if (!ctx->Texture.Unit[i]._ReallyEnabled) {
@@ -160,9 +156,11 @@ static void dump_wm_sampler_state(struct brw_context *brw)
continue;
}
- offset = brw->wm.sampler_bo->offset +
- i * sizeof(struct brw_sampler_state);
- samp = (struct brw_sampler_state *)(brw->wm.sampler_bo->virtual +
+ offset = (intel->batch.bo->offset +
+ brw->wm.sampler_offset +
+ i * sizeof(struct brw_sampler_state));
+ samp = (struct brw_sampler_state *)(intel->batch.bo->virtual +
+ brw->wm.sampler_offset +
i * sizeof(struct brw_sampler_state));
sprintf(name, "WM SAMP%d", i);
@@ -173,30 +171,45 @@ static void dump_wm_sampler_state(struct brw_context *brw)
sprintf(name, " WM SDC%d", i);
- drm_intel_bo_map(brw->wm.sdc_bo[i], GL_FALSE);
- sdc = (struct brw_sampler_default_color *)(brw->wm.sdc_bo[i]->virtual);
- state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 0, "r\n");
- state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 1, "g\n");
- state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 2, "b\n");
- state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 3, "a\n");
- drm_intel_bo_unmap(brw->wm.sdc_bo[i]);
+ sdc_offset = intel->batch.bo->offset + brw->wm.sdc_offset[i];
+ if (intel->gen >= 5) {
+ struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual +
+ brw->wm.sdc_offset[i]);
+ state_out(name, sdc, sdc_offset, 0, "unorm rgba\n");
+ state_out(name, sdc, sdc_offset, 1, "r %f\n", sdc->f[0]);
+ state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->f[1]);
+ state_out(name, sdc, sdc_offset, 3, "g %f\n", sdc->f[2]);
+ state_out(name, sdc, sdc_offset, 4, "a %f\n", sdc->f[3]);
+ state_out(name, sdc, sdc_offset, 5, "half float rg\n");
+ state_out(name, sdc, sdc_offset, 6, "half float ba\n");
+ state_out(name, sdc, sdc_offset, 7, "u16 rg\n");
+ state_out(name, sdc, sdc_offset, 8, "u16 ba\n");
+ state_out(name, sdc, sdc_offset, 9, "s16 rg\n");
+ state_out(name, sdc, sdc_offset, 10, "s16 ba\n");
+ state_out(name, sdc, sdc_offset, 11, "s8 rgba\n");
+ } else {
+ struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual +
+ brw->wm.sdc_offset[i]);
+ state_out(name, sdc, sdc_offset, 0, "r %f\n", sdc->color[0]);
+ state_out(name, sdc, sdc_offset, 1, "g %f\n", sdc->color[1]);
+ state_out(name, sdc, sdc_offset, 2, "b %f\n", sdc->color[2]);
+ state_out(name, sdc, sdc_offset, 3, "a %f\n", sdc->color[3]);
+ }
}
- drm_intel_bo_unmap(brw->wm.sampler_bo);
+ drm_intel_bo_unmap(intel->batch.bo);
}
static void dump_sf_viewport_state(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
const char *name = "SF VP";
struct brw_sf_viewport *vp;
uint32_t vp_off;
- if (brw->sf.vp_bo == NULL)
- return;
-
- drm_intel_bo_map(brw->sf.vp_bo, GL_FALSE);
+ drm_intel_bo_map(intel->batch.bo, GL_FALSE);
- vp = brw->sf.vp_bo->virtual + brw->sf.vp_offset;
- vp_off = brw->sf.vp_bo->offset + brw->sf.vp_offset;
+ vp = intel->batch.bo->virtual + brw->sf.vp_offset;
+ vp_off = intel->batch.bo->offset + brw->sf.vp_offset;
state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
@@ -210,62 +223,56 @@ static void dump_sf_viewport_state(struct brw_context *brw)
state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
vp->scissor.xmax, vp->scissor.ymax);
- drm_intel_bo_unmap(brw->sf.vp_bo);
+ drm_intel_bo_unmap(intel->batch.bo);
}
static void dump_clip_viewport_state(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
const char *name = "CLIP VP";
struct brw_clipper_viewport *vp;
uint32_t vp_off;
- if (brw->clip.vp_bo == NULL)
- return;
-
- drm_intel_bo_map(brw->clip.vp_bo, GL_FALSE);
+ drm_intel_bo_map(intel->batch.bo, GL_FALSE);
- vp = brw->clip.vp_bo->virtual;
- vp_off = brw->clip.vp_bo->offset;
+ vp = intel->batch.bo->virtual + brw->clip.vp_offset;
+ vp_off = intel->batch.bo->offset + brw->clip.vp_offset;
state_out(name, vp, vp_off, 0, "xmin = %f\n", vp->xmin);
state_out(name, vp, vp_off, 1, "xmax = %f\n", vp->xmax);
state_out(name, vp, vp_off, 2, "ymin = %f\n", vp->ymin);
state_out(name, vp, vp_off, 3, "ymax = %f\n", vp->ymax);
- drm_intel_bo_unmap(brw->clip.vp_bo);
+ drm_intel_bo_unmap(intel->batch.bo);
}
static void dump_cc_viewport_state(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
const char *name = "CC VP";
struct brw_cc_viewport *vp;
uint32_t vp_off;
- if (brw->cc.vp_bo == NULL)
- return;
-
- drm_intel_bo_map(brw->cc.vp_bo, GL_FALSE);
+ drm_intel_bo_map(intel->batch.bo, GL_FALSE);
- vp = brw->cc.vp_bo->virtual;
- vp_off = brw->cc.vp_bo->offset;
+ vp = intel->batch.bo->virtual + brw->cc.vp_offset;
+ vp_off = intel->batch.bo->offset + brw->cc.vp_offset;
state_out(name, vp, vp_off, 0, "min_depth = %f\n", vp->min_depth);
state_out(name, vp, vp_off, 1, "max_depth = %f\n", vp->max_depth);
- drm_intel_bo_unmap(brw->cc.vp_bo);
+ drm_intel_bo_unmap(intel->batch.bo);
}
static void dump_depth_stencil_state(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
const char *name = "DEPTH STENCIL";
struct gen6_depth_stencil_state *ds;
uint32_t ds_off;
- if (brw->cc.depth_stencil_state_bo == NULL)
- return;
+ drm_intel_bo_map(intel->batch.bo, GL_FALSE);
- drm_intel_bo_map(brw->cc.depth_stencil_state_bo, GL_FALSE);
-
- ds = brw->cc.depth_stencil_state_bo->virtual;
- ds_off = brw->cc.depth_stencil_state_bo->offset;
+ ds = intel->batch.bo->virtual + brw->cc.depth_stencil_state_offset;
+ ds_off = intel->batch.bo->offset + brw->cc.depth_stencil_state_offset;
state_out(name, ds, ds_off, 0, "stencil %sable, func %d, write %sable\n",
ds->ds0.stencil_enable ? "en" : "dis",
@@ -277,7 +284,7 @@ static void dump_depth_stencil_state(struct brw_context *brw)
ds->ds2.depth_test_enable ? "en" : "dis",
ds->ds2.depth_test_func,
ds->ds2.depth_write_enable ? "en" : "dis");
- drm_intel_bo_unmap(brw->cc.depth_stencil_state_bo);
+ drm_intel_bo_unmap(intel->batch.bo);
}
static void dump_cc_state(struct brw_context *brw)
@@ -291,8 +298,8 @@ static void dump_cc_state(struct brw_context *brw)
return;
drm_intel_bo_map(bo, GL_FALSE);
- cc = bo->virtual;
- cc_off = bo->offset;
+ cc = bo->virtual + brw->cc.state_offset;
+ cc_off = bo->offset + brw->cc.state_offset;
state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d,"
"bf stencil ref %d\n",
@@ -312,22 +319,20 @@ static void dump_cc_state(struct brw_context *brw)
static void dump_blend_state(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
const char *name = "BLEND";
struct gen6_blend_state *blend;
uint32_t blend_off;
- if (brw->cc.blend_state_bo == NULL)
- return;
-
- drm_intel_bo_map(brw->cc.blend_state_bo, GL_FALSE);
+ drm_intel_bo_map(intel->batch.bo, GL_FALSE);
- blend = brw->cc.blend_state_bo->virtual;
- blend_off = brw->cc.blend_state_bo->offset;
+ blend = intel->batch.bo->virtual + brw->cc.blend_state_offset;
+ blend_off = intel->batch.bo->offset + brw->cc.blend_state_offset;
state_out(name, blend, blend_off, 0, "\n");
state_out(name, blend, blend_off, 1, "\n");
- drm_intel_bo_unmap(brw->cc.blend_state_bo);
+ drm_intel_bo_unmap(intel->batch.bo);
}
@@ -383,21 +388,25 @@ void brw_debug_batch(struct intel_context *intel)
dump_wm_sampler_state(brw);
if (intel->gen < 6)
- state_struct_out("VS", brw->vs.state_bo, 0, sizeof(struct brw_vs_unit_state));
+ state_struct_out("VS", intel->batch.bo, brw->vs.state_offset,
+ sizeof(struct brw_vs_unit_state));
brw_debug_prog("VS prog", brw->vs.prog_bo);
if (intel->gen < 6)
- state_struct_out("GS", brw->gs.state_bo, 0, sizeof(struct brw_gs_unit_state));
+ state_struct_out("GS", intel->batch.bo, brw->gs.state_offset,
+ sizeof(struct brw_gs_unit_state));
brw_debug_prog("GS prog", brw->gs.prog_bo);
if (intel->gen < 6) {
- state_struct_out("SF", brw->sf.state_bo, 0, sizeof(struct brw_sf_unit_state));
- brw_debug_prog("SF prog", brw->sf.prog_bo);
+ state_struct_out("SF", intel->batch.bo, brw->sf.state_offset,
+ sizeof(struct brw_sf_unit_state));
+ brw_debug_prog("SF prog", brw->sf.prog_bo);
}
dump_sf_viewport_state(brw);
if (intel->gen < 6)
- state_struct_out("WM", brw->wm.state_bo, 0, sizeof(struct brw_wm_unit_state));
+ state_struct_out("WM", intel->batch.bo, brw->wm.state_offset,
+ sizeof(struct brw_wm_unit_state));
brw_debug_prog("WM prog", brw->wm.prog_bo);
if (intel->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 6f521be6599..008aceb222b 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -60,6 +60,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
&brw_curbe_offsets,
&brw_recalculate_urb_fence,
+ &brw_cc_vp,
&brw_cc_unit,
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
@@ -119,6 +120,10 @@ static const struct brw_tracked_state *gen6_atoms[] =
/* Command packets: */
&brw_invarient_state,
+ /* must do before binding table pointers, cc state ptrs */
+ &brw_state_base_address,
+
+ &brw_cc_vp,
&gen6_viewport_state, /* must do after *_vp stages */
&gen6_urb,
@@ -129,6 +134,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
&brw_wm_constants, /* Before wm_surfaces and constant_buffer */
+ &gen6_vs_constants, /* Before vs_state */
&gen6_wm_constants, /* Before wm_state */
&brw_vs_surfaces, /* must do before unit */
@@ -146,9 +152,6 @@ static const struct brw_tracked_state *gen6_atoms[] =
&gen6_wm_state,
&gen6_scissor_state,
- &gen6_scissor_state_pointers,
-
- &brw_state_base_address,
&gen6_binding_table_pointers,
@@ -314,7 +317,6 @@ static struct dirty_bit_map cache_bits[] = {
DEFINE_BIT(CACHE_NEW_CC_VP),
DEFINE_BIT(CACHE_NEW_CC_UNIT),
DEFINE_BIT(CACHE_NEW_WM_PROG),
- DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
DEFINE_BIT(CACHE_NEW_SAMPLER),
DEFINE_BIT(CACHE_NEW_WM_UNIT),
DEFINE_BIT(CACHE_NEW_SF_PROG),
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 63ae13191f9..31a2b518c40 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -103,13 +103,11 @@ static void do_vs_prog( struct brw_context *brw,
aux_size += c.vp->program.Base.Parameters->NumParameters;
drm_intel_bo_unreference(brw->vs.prog_bo);
- brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- aux_size,
- &brw->vs.prog_data);
+ brw->vs.prog_bo = brw_upload_cache(&brw->cache, BRW_VS_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, aux_size,
+ &brw->vs.prog_data);
}
@@ -148,7 +146,6 @@ static void brw_upload_vs_prog(struct brw_context *brw)
drm_intel_bo_unreference(brw->vs.prog_bo);
brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
&key, sizeof(key),
- NULL, 0,
&brw->vs.prog_data);
if (brw->vs.prog_bo == NULL)
do_vs_prog(brw, vp, &key);
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index dd4e1e6c6ad..a28cdc0bfe9 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1553,6 +1553,26 @@ static void emit_swz( struct brw_vs_compile *c,
}
}
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen >= 6) {
+ /* URB data written (does not include the message header reg) must
+ * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
+ * section 5.4.3.2.2: URB_INTERLEAVED.
+ *
+ * URB entries are allocated on a multiple of 1024 bits, so an
+ * extra 128 bits written here to make the end align to 256 is
+ * no problem.
+ */
+ if ((mlen % 2) != 1)
+ mlen++;
+ }
+
+ return mlen;
+}
/**
* Post-vertex-program processing. Send the results to the URB.
@@ -1734,12 +1754,11 @@ static void emit_vertex_write( struct brw_vs_compile *c)
eot = (c->first_overflow_output == 0);
- msg_len = c->nr_outputs + 2 + len_vertex_header;
- if (intel->gen >= 6) {
- /* interleaved urb write message length for gen6 should be multiple of 2 */
- if ((msg_len % 2) != 0)
- msg_len++;
- }
+ /* Message header, plus VUE header, plus the (first set of) outputs. */
+ msg_len = 1 + len_vertex_header + c->nr_outputs;
+ msg_len = align_interleaved_urb_mlen(brw, msg_len);
+ /* Any outputs beyond BRW_MAX_MRF should be past first_overflow_output */
+ msg_len = MIN2(msg_len, (BRW_MAX_MRF - 1)),
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -1747,7 +1766,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
c->r0, /* src */
0, /* allocate */
1, /* used */
- MIN2(msg_len - 1, (BRW_MAX_MRF - 1)), /* msg len */
+ msg_len,
0, /* response len */
eot, /* eot */
eot, /* writes complete */
@@ -1774,7 +1793,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
c->r0, /* src */
0, /* allocate */
1, /* used */
- mrf, /* msg len */
+ align_interleaved_urb_mlen(brw, mrf),
0, /* response len */
1, /* eot */
1, /* writes complete */
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index c3a7cc247c5..1eee5b7e5de 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -49,48 +49,19 @@ struct brw_vs_unit_key {
};
static void
-vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
-{
- struct gl_context *ctx = &brw->intel.ctx;
-
- memset(key, 0, sizeof(*key));
-
- /* CACHE_NEW_VS_PROG */
- key->total_grf = brw->vs.prog_data->total_grf;
- key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
- key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
-
- /* BRW_NEW_URB_FENCE */
- key->nr_urb_entries = brw->urb.nr_vs_entries;
- key->urb_size = brw->urb.vsize;
-
- /* BRW_NEW_NR_VS_SURFACES */
- key->nr_surfaces = brw->vs.nr_surfaces;
-
- /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
- if (ctx->Transform.ClipPlanesEnabled) {
- /* Note that we read in the userclip planes as well, hence
- * clip_start:
- */
- key->curbe_offset = brw->curbe.clip_start;
- }
- else {
- key->curbe_offset = brw->curbe.vs_start;
- }
-}
-
-static drm_intel_bo *
-vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+brw_prepare_vs_unit(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct brw_vs_unit_state vs;
- drm_intel_bo *bo;
+ struct gl_context *ctx = &intel->ctx;
+ struct brw_vs_unit_state *vs;
- memset(&vs, 0, sizeof(vs));
+ vs = brw_state_batch(brw, sizeof(*vs), 32, &brw->vs.state_offset);
+ memset(vs, 0, sizeof(*vs));
- vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
- vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
- vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+ /* CACHE_NEW_VS_PROG */
+ vs->thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+ vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+ vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
/* Choosing multiple program flow means that we may get 2-vertex threads,
* which will have the channel mask for dwords 4-7 enabled in the thread,
* and those dwords will be written to the second URB handle when we
@@ -103,21 +74,34 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
* The most notable and reliably failing application is the Humus
* demo "CelShading"
*/
- vs.thread1.single_program_flow = (intel->gen == 5);
+ vs->thread1.single_program_flow = (intel->gen == 5);
+ /* BRW_NEW_NR_VS_SURFACES */
if (intel->gen == 5)
- vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+ vs->thread1.binding_table_entry_count = 0; /* hardware requirement */
else
- vs.thread1.binding_table_entry_count = key->nr_surfaces;
+ vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
+
+ vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+ vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
+ vs->thread3.dispatch_grf_start_reg = 1;
+ vs->thread3.urb_entry_read_offset = 0;
- vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
- vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
- vs.thread3.dispatch_grf_start_reg = 1;
- vs.thread3.urb_entry_read_offset = 0;
- vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+ /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
+ if (ctx->Transform.ClipPlanesEnabled) {
+ /* Note that we read in the userclip planes as well, hence
+ * clip_start:
+ */
+ vs->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+ }
+ else {
+ vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
+ }
+
+ /* BRW_NEW_URB_FENCE */
if (intel->gen == 5) {
- switch (key->nr_urb_entries) {
+ switch (brw->urb.nr_vs_entries) {
case 8:
case 12:
case 16:
@@ -129,13 +113,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
case 192:
case 224:
case 256:
- vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+ vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
break;
default:
assert(0);
}
} else {
- switch (key->nr_urb_entries) {
+ switch (brw->urb.nr_vs_entries) {
case 8:
case 12:
case 16:
@@ -147,63 +131,45 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
default:
assert(0);
}
- vs.thread4.nr_urb_entries = key->nr_urb_entries;
+ vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
}
- vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+ vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
- vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
- 1, brw->vs_max_threads) - 1;
+ vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
+ 1, brw->vs_max_threads) - 1;
/* No samplers for ARB_vp programs:
*/
/* It has to be set to 0 for Ironlake
*/
- vs.vs5.sampler_count = 0;
+ vs->vs5.sampler_count = 0;
if (unlikely(INTEL_DEBUG & DEBUG_STATS))
- vs.thread4.stats_enable = 1;
+ vs->thread4.stats_enable = 1;
/* Vertex program always enabled:
*/
- vs.vs6.vs_enable = 1;
-
- bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
- key, sizeof(*key),
- &brw->vs.prog_bo, 1,
- &vs, sizeof(vs));
+ vs->vs6.vs_enable = 1;
/* Emit VS program relocation */
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_vs_unit_state, thread0),
- brw->vs.prog_bo, vs.thread0.grf_reg_count << 1,
+ drm_intel_bo_emit_reloc(intel->batch.bo, (brw->vs.state_offset +
+ offsetof(struct brw_vs_unit_state,
+ thread0)),
+ brw->vs.prog_bo, vs->thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
- return bo;
-}
-
-static void prepare_vs_unit(struct brw_context *brw)
-{
- struct brw_vs_unit_key key;
-
- vs_unit_populate_key(brw, &key);
-
- drm_intel_bo_unreference(brw->vs.state_bo);
- brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
- &key, sizeof(key),
- &brw->vs.prog_bo, 1,
- NULL);
- if (brw->vs.state_bo == NULL) {
- brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
- }
+ brw->state.dirty.cache |= CACHE_NEW_VS_UNIT;
}
const struct brw_tracked_state brw_vs_unit = {
.dirty = {
.mesa = _NEW_TRANSFORM,
- .brw = (BRW_NEW_CURBE_OFFSETS |
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_VS_PROG
},
- .prepare = prepare_vs_unit,
+ .prepare = brw_prepare_vs_unit,
};
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index ce8712a260f..f2c417d8a81 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -60,7 +60,6 @@ dri_bo_release(drm_intel_bo **bo)
static void brw_destroy_context( struct intel_context *intel )
{
struct brw_context *brw = brw_context(&intel->ctx);
- int i;
brw_destroy_state(brw);
brw_draw_destroy( brw );
@@ -77,28 +76,13 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->curbe.curbe_bo);
dri_bo_release(&brw->vs.prog_bo);
- dri_bo_release(&brw->vs.state_bo);
dri_bo_release(&brw->vs.const_bo);
dri_bo_release(&brw->gs.prog_bo);
- dri_bo_release(&brw->gs.state_bo);
dri_bo_release(&brw->clip.prog_bo);
- dri_bo_release(&brw->clip.state_bo);
- dri_bo_release(&brw->clip.vp_bo);
dri_bo_release(&brw->sf.prog_bo);
- dri_bo_release(&brw->sf.state_bo);
- dri_bo_release(&brw->sf.vp_bo);
- for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
- dri_bo_release(&brw->wm.sdc_bo[i]);
- dri_bo_release(&brw->wm.sampler_bo);
dri_bo_release(&brw->wm.prog_bo);
- dri_bo_release(&brw->wm.state_bo);
dri_bo_release(&brw->wm.const_bo);
- dri_bo_release(&brw->wm.push_const_bo);
dri_bo_release(&brw->cc.prog_bo);
- dri_bo_release(&brw->cc.vp_bo);
- dri_bo_release(&brw->cc.blend_state_bo);
- dri_bo_release(&brw->cc.depth_stencil_state_bo);
- dri_bo_release(&brw->cc.color_calc_state_bo);
free(brw->curbe.last_buf);
free(brw->curbe.next_buf);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 65af227d831..06512de940f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -33,6 +33,7 @@
#include "brw_wm.h"
#include "brw_state.h"
#include "main/formats.h"
+#include "main/samplerobj.h"
/** Return number of src args for given instruction */
GLuint brw_wm_nr_args( GLuint opcode )
@@ -119,7 +120,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_emit(c);
}
-static void
+void
brw_wm_payload_setup(struct brw_context *brw,
struct brw_wm_compile *c)
{
@@ -224,18 +225,13 @@ static void do_wm_prog( struct brw_context *brw,
brw_init_compile(brw, &c->func);
- brw_wm_payload_setup(brw, c);
-
if (!brw_wm_fs_emit(brw, c)) {
- /*
- * Shader which use GLSL features such as flow control are handled
- * differently from "simple" shaders.
- */
+ /* Fallback for fixed function and ARB_fp shaders. */
c->dispatch_width = 16;
brw_wm_payload_setup(brw, c);
brw_wm_non_glsl_emit(brw, c);
+ c->prog_data.dispatch_width = 16;
}
- c->prog_data.dispatch_width = c->dispatch_width;
/* Scratch space is used for register spilling */
if (c->last_scratch) {
@@ -272,13 +268,11 @@ static void do_wm_prog( struct brw_context *brw,
program = brw_get_program(&c->func, &program_size);
drm_intel_bo_unreference(brw->wm.prog_bo);
- brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG,
- &c->key, sizeof(c->key),
- NULL, 0,
- program, program_size,
- &c->prog_data,
- sizeof(c->prog_data),
- &brw->wm.prog_data);
+ brw->wm.prog_bo = brw_upload_cache(&brw->cache, BRW_WM_PROG,
+ &c->key, sizeof(c->key),
+ program, program_size,
+ &c->prog_data, sizeof(c->prog_data),
+ &brw->wm.prog_data);
}
@@ -373,6 +367,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
if (unit->_ReallyEnabled) {
const struct gl_texture_object *t = unit->_Current;
const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+ struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, i);
int swizzles[SWIZZLE_NIL + 1] = {
SWIZZLE_X,
SWIZZLE_Y,
@@ -388,14 +383,14 @@ static void brw_wm_populate_key( struct brw_context *brw,
* well and our shadow compares always return the result in
* all 4 channels.
*/
- if (t->Sampler.CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
- if (t->Sampler.DepthMode == GL_ALPHA) {
+ if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+ if (sampler->DepthMode == GL_ALPHA) {
swizzles[0] = SWIZZLE_ZERO;
swizzles[1] = SWIZZLE_ZERO;
swizzles[2] = SWIZZLE_ZERO;
- } else if (t->Sampler.DepthMode == GL_LUMINANCE) {
+ } else if (sampler->DepthMode == GL_LUMINANCE) {
swizzles[3] = SWIZZLE_ONE;
- } else if (t->Sampler.DepthMode == GL_RED) {
+ } else if (sampler->DepthMode == GL_RED) {
/* See table 3.23 of the GL 3.0 spec. */
swizzles[1] = SWIZZLE_ZERO;
swizzles[2] = SWIZZLE_ZERO;
@@ -465,7 +460,7 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
struct brw_wm_prog_key key;
struct brw_fragment_program *fp = (struct brw_fragment_program *)
brw->fragment_program;
-
+
brw_wm_populate_key(brw, &key);
/* Make an early check for the key.
@@ -473,7 +468,6 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
drm_intel_bo_unreference(brw->wm.prog_bo);
brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
&key, sizeof(key),
- NULL, 0,
&brw->wm.prog_data);
if (brw->wm.prog_bo == NULL)
do_wm_prog(brw, fp, &key);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 40659f26025..a5f99a0a657 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -201,11 +201,11 @@ struct brw_wm_compile {
PASS2_DONE
} state;
- GLuint source_depth_reg:3;
- GLuint source_w_reg:3;
- GLuint aa_dest_stencil_reg:3;
- GLuint dest_depth_reg:3;
- GLuint nr_payload_regs:4;
+ uint8_t source_depth_reg;
+ uint8_t source_w_reg;
+ uint8_t aa_dest_stencil_reg;
+ uint8_t dest_depth_reg;
+ uint8_t nr_payload_regs;
GLuint computes_depth:1; /* could be derived from program string */
GLuint source_depth_to_render_target:1;
GLuint runtime_check_aads_emit:1;
@@ -218,7 +218,6 @@ struct brw_wm_compile {
GLuint nr_fp_insns;
GLuint fp_temp;
GLuint fp_interp_emitted;
- GLuint fp_fragcolor_emitted;
struct prog_src_register pixel_xy;
struct prog_src_register delta_xy;
@@ -315,7 +314,7 @@ void brw_wm_print_program( struct brw_wm_compile *c,
void brw_wm_lookup_iz(struct intel_context *intel,
struct brw_wm_compile *c);
-GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
+bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
/* brw_wm_emit.c */
void emit_alu1(struct brw_compile *p,
@@ -475,5 +474,7 @@ struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint
bool brw_color_buffer_write_enabled(struct brw_context *brw);
bool brw_render_target_supported(gl_format format);
+void brw_wm_payload_setup(struct brw_context *brw,
+ struct brw_wm_compile *c);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index cdc1f367e5c..fd4cd892f41 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -51,16 +51,6 @@ static GLboolean can_do_pln(struct intel_context *intel,
return GL_TRUE;
}
-/* Not quite sure how correct this is - need to understand horiz
- * vs. vertical strides a little better.
- */
-static INLINE struct brw_reg sechalf( struct brw_reg reg )
-{
- if (reg.vstride)
- reg.nr++;
- return reg;
-}
-
/* Return the SrcReg index of the channels that can be immediate float operands
* instead of usage of PROGRAM_CONSTANT values through push/pull.
*/
@@ -1325,12 +1315,6 @@ static void fire_fb_write( struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &p->brw->intel;
- struct brw_reg dst;
-
- if (c->dispatch_width == 16)
- dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
- else
- dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
/* Pass through control information:
*
@@ -1352,7 +1336,6 @@ static void fire_fb_write( struct brw_wm_compile *c,
/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
brw_fb_WRITE(p,
c->dispatch_width,
- dst,
base_reg,
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
target,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 4759b289a0c..9ddbee2edf4 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -961,35 +961,31 @@ static void emit_render_target_writes( struct brw_wm_compile *c )
struct prog_src_register outcolor;
GLuint i;
- struct prog_instruction *inst, *last_inst = NULL;
+ struct prog_instruction *inst = NULL;
/* The inst->Aux field is used for FB write target and the EOT marker */
- if (c->key.nr_color_regions > 1) {
- for (i = 0 ; i < c->key.nr_color_regions; i++) {
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
- last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
- 0, outcolor, payload_r0_depth, outdepth);
- inst->Aux = INST_AUX_TARGET(i);
- if (c->fp_fragcolor_emitted) {
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
- last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
- 0, outcolor, payload_r0_depth, outdepth);
- inst->Aux = INST_AUX_TARGET(i);
- }
+ for (i = 0; i < c->key.nr_color_regions; i++) {
+ if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) {
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+ } else {
+ outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
}
- last_inst->Aux |= INST_AUX_EOT;
+ inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
+ 0, outcolor, payload_r0_depth, outdepth);
+ inst->Aux = INST_AUX_TARGET(i);
}
- else {
- /* if gl_FragData[0] is written, use it, else use gl_FragColor */
- if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0))
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
- else
- outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
- inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
- 0, outcolor, payload_r0_depth, outdepth);
- inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
+ /* Mark the last FB write as final, or emit a dummy write if we had
+ * no render targets bound.
+ */
+ if (c->key.nr_color_regions != 0) {
+ inst->Aux |= INST_AUX_EOT;
+ } else {
+ inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
+ 0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR),
+ payload_r0_depth, outdepth);
+ inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT;
}
}
@@ -1015,16 +1011,6 @@ static void validate_src_regs( struct brw_wm_compile *c,
}
}
}
-
-static void validate_dst_regs( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- if (inst->DstReg.File == PROGRAM_OUTPUT) {
- GLuint idx = inst->DstReg.Index;
- if (idx == FRAG_RESULT_COLOR)
- c->fp_fragcolor_emitted = 1;
- }
-}
static void print_insns( const struct prog_instruction *insn,
GLuint nr )
@@ -1083,7 +1069,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
validate_src_regs(c, inst);
- validate_dst_regs(c, inst);
}
/* Loop over all instructions doing assorted simplifications and
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index cfc30d8613f..7b93bf90241 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -35,7 +35,7 @@
#include "brw_defines.h"
#include "main/macros.h"
-
+#include "main/samplerobj.h"
/* Samplers aren't strictly wm state from the hardware's perspective,
@@ -66,81 +66,93 @@ static GLuint translate_wrap_mode( GLenum wrap )
}
}
-static drm_intel_bo *upload_default_color( struct brw_context *brw,
- const GLfloat *color )
+static void
+upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler,
+ int unit)
{
struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ struct gl_texture_object *texObj = texUnit->_Current;
+ struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel];
+ float color[4];
+
+ if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+ /* GL specs that border color for depth textures is taken from the
+ * R channel, while the hardware uses A. Spam R into all the
+ * channels for safety.
+ */
+ color[0] = sampler->BorderColor.f[0];
+ color[1] = sampler->BorderColor.f[0];
+ color[2] = sampler->BorderColor.f[0];
+ color[3] = sampler->BorderColor.f[0];
+ } else {
+ color[0] = sampler->BorderColor.f[0];
+ color[1] = sampler->BorderColor.f[1];
+ color[2] = sampler->BorderColor.f[2];
+ color[3] = sampler->BorderColor.f[3];
+ }
if (intel->gen >= 5) {
- struct gen5_sampler_default_color sdc;
+ struct gen5_sampler_default_color *sdc;
+
+ sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]);
- memset(&sdc, 0, sizeof(sdc));
+ memset(sdc, 0, sizeof(*sdc));
- UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color[3]);
- UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]);
- UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]);
- UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]);
- UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color[0]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color[1]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color[2]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color[3]);
- UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]);
- UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]);
- UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]);
- UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color[0]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color[1]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color[2]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color[3]);
- /* XXX: Fill in half floats */
- /* XXX: Fill in signed bytes */
+ sdc->hf[0] = _mesa_float_to_half(color[0]);
+ sdc->hf[1] = _mesa_float_to_half(color[1]);
+ sdc->hf[2] = _mesa_float_to_half(color[2]);
+ sdc->hf[3] = _mesa_float_to_half(color[3]);
- COPY_4V(sdc.f, color);
+ sdc->b[0] = sdc->s[0] >> 8;
+ sdc->b[1] = sdc->s[1] >> 8;
+ sdc->b[2] = sdc->s[2] >> 8;
+ sdc->b[3] = sdc->s[3] >> 8;
- return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
- &sdc, sizeof(sdc));
+ sdc->f[0] = color[0];
+ sdc->f[1] = color[1];
+ sdc->f[2] = color[2];
+ sdc->f[3] = color[3];
} else {
- struct brw_sampler_default_color sdc;
+ struct brw_sampler_default_color *sdc;
- COPY_4V(sdc.color, color);
+ sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]);
- return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
- &sdc, sizeof(sdc));
+ COPY_4V(sdc->color, color);
}
}
-
-struct wm_sampler_key {
- int sampler_count;
-
- struct wm_sampler_entry {
- GLenum tex_target;
- GLenum wrap_r, wrap_s, wrap_t;
- float maxlod, minlod;
- float lod_bias;
- float max_aniso;
- GLenum minfilter, magfilter;
- GLenum comparemode, comparefunc;
-
- /** If target is cubemap, take context setting.
- */
- GLboolean seamless_cube_map;
- } sampler[BRW_MAX_TEX_UNIT];
-};
-
/**
* Sets the sampler state for a single unit based off of the sampler key
* entry.
*/
static void brw_update_sampler_state(struct brw_context *brw,
- struct wm_sampler_entry *key,
- drm_intel_bo *sdc_bo,
+ int unit,
struct brw_sampler_state *sampler)
{
struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+ struct gl_texture_object *texObj = texUnit->_Current;
+ struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
- memset(sampler, 0, sizeof(*sampler));
-
- switch (key->minfilter) {
+ switch (gl_sampler->MinFilter) {
case GL_NEAREST:
sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
@@ -171,17 +183,17 @@ static void brw_update_sampler_state(struct brw_context *brw,
/* Set Anisotropy:
*/
- if (key->max_aniso > 1.0) {
+ if (gl_sampler->MaxAnisotropy > 1.0) {
sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
- if (key->max_aniso > 2.0) {
- sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
+ if (gl_sampler->MaxAnisotropy > 2.0) {
+ sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2,
BRW_ANISORATIO_16);
}
}
else {
- switch (key->magfilter) {
+ switch (gl_sampler->MagFilter) {
case GL_NEAREST:
sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
break;
@@ -193,9 +205,9 @@ static void brw_update_sampler_state(struct brw_context *brw,
}
}
- sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
- sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
- sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+ sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
+ sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
+ sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
if (intel->gen >= 6 &&
sampler->ss0.min_filter != sampler->ss0.mag_filter)
@@ -204,9 +216,10 @@ static void brw_update_sampler_state(struct brw_context *brw,
/* Cube-maps on 965 and later must use the same wrap mode for all 3
* coordinate dimensions. Futher, only CUBE and CLAMP are valid.
*/
- if (key->tex_target == GL_TEXTURE_CUBE_MAP) {
- if (key->seamless_cube_map &&
- (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
+ if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+ if (ctx->Texture.CubeMapSeamless &&
+ (gl_sampler->MinFilter != GL_NEAREST ||
+ gl_sampler->MagFilter != GL_NEAREST)) {
sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
@@ -215,7 +228,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
}
- } else if (key->tex_target == GL_TEXTURE_1D) {
+ } else if (texObj->Target == GL_TEXTURE_1D) {
/* There's a bug in 1D texture sampling - it actually pays
* attention to the wrap_t value, though it should not.
* Override the wrap_t value here to GL_REPEAT to keep
@@ -227,18 +240,19 @@ static void brw_update_sampler_state(struct brw_context *brw,
/* Set shadow function:
*/
- if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+ if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
/* Shadowing is "enabled" by emitting a particular sampler
* message (sample_c). So need to recompile WM program when
* shadow comparison is enabled on each/any texture unit.
*/
sampler->ss0.shadow_function =
- intel_translate_shadow_compare_func(key->comparefunc);
+ intel_translate_shadow_compare_func(gl_sampler->CompareFunc);
}
/* Set LOD bias:
*/
- sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
+ sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias +
+ gl_sampler->LodBias, -16, 15), 6);
sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
@@ -252,150 +266,67 @@ static void brw_update_sampler_state(struct brw_context *brw,
*/
sampler->ss0.base_level = U_FIXED(0, 1);
- sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6);
- sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6);
-
- sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
-}
+ sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
+ sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
+ upload_default_color(brw, gl_sampler, unit);
-/** Sets up the cache key for sampler state for all texture units */
-static void
-brw_wm_sampler_populate_key(struct brw_context *brw,
- struct wm_sampler_key *key)
-{
- struct gl_context *ctx = &brw->intel.ctx;
- int unit;
- char *last_entry_end = ((char*)&key->sampler_count) +
- sizeof(key->sampler_count);
-
- key->sampler_count = 0;
-
- for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
- if (ctx->Texture.Unit[unit]._ReallyEnabled) {
- struct wm_sampler_entry *entry = &key->sampler[unit];
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
- struct gl_texture_object *texObj = texUnit->_Current;
- struct gl_texture_image *firstImage =
- texObj->Image[0][texObj->BaseLevel];
-
- memset(last_entry_end, 0,
- (char*)entry - last_entry_end + sizeof(*entry));
- last_entry_end = ((char*)entry) + sizeof(*entry);
-
- entry->tex_target = texObj->Target;
-
- entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
- ? ctx->Texture.CubeMapSeamless : GL_FALSE;
-
- entry->wrap_r = texObj->Sampler.WrapR;
- entry->wrap_s = texObj->Sampler.WrapS;
- entry->wrap_t = texObj->Sampler.WrapT;
-
- entry->maxlod = texObj->Sampler.MaxLod;
- entry->minlod = texObj->Sampler.MinLod;
- entry->lod_bias = texUnit->LodBias + texObj->Sampler.LodBias;
- entry->max_aniso = texObj->Sampler.MaxAnisotropy;
- entry->minfilter = texObj->Sampler.MinFilter;
- entry->magfilter = texObj->Sampler.MagFilter;
- entry->comparemode = texObj->Sampler.CompareMode;
- entry->comparefunc = texObj->Sampler.CompareFunc;
-
- drm_intel_bo_unreference(brw->wm.sdc_bo[unit]);
- if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
- float bordercolor[4] = {
- texObj->Sampler.BorderColor.f[0],
- texObj->Sampler.BorderColor.f[0],
- texObj->Sampler.BorderColor.f[0],
- texObj->Sampler.BorderColor.f[0]
- };
- /* GL specs that border color for depth textures is taken from the
- * R channel, while the hardware uses A. Spam R into all the
- * channels for safety.
- */
- brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
- } else {
- brw->wm.sdc_bo[unit] = upload_default_color(brw,
- texObj->Sampler.BorderColor.f);
- }
- key->sampler_count = unit + 1;
- }
+ if (intel->gen >= 6) {
+ sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
+ } else {
+ /* reloc */
+ sampler->ss2.default_color_pointer = (intel->batch.bo->offset +
+ brw->wm.sdc_offset[unit]) >> 5;
+
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ brw->wm.sampler_offset +
+ unit * sizeof(struct brw_sampler_state) +
+ offsetof(struct brw_sampler_state, ss2),
+ intel->batch.bo, brw->wm.sdc_offset[unit],
+ I915_GEM_DOMAIN_SAMPLER, 0);
}
- struct wm_sampler_entry *entry = &key->sampler[key->sampler_count];
- memset(last_entry_end, 0, (char*)entry - last_entry_end);
}
+
/* All samplers must be uploaded in a single contiguous array, which
* complicates various things. However, this is still too confusing -
* FIXME: simplify all the different new texture state flags.
*/
-static void upload_wm_samplers( struct brw_context *brw )
+static void
+prepare_wm_samplers(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
- struct wm_sampler_key key;
- int i, sampler_key_size;
-
- brw_wm_sampler_populate_key(brw, &key);
+ struct brw_sampler_state *samplers;
+ int i;
- if (brw->wm.sampler_count != key.sampler_count) {
- brw->wm.sampler_count = key.sampler_count;
- brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+ brw->wm.sampler_count = 0;
+ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled)
+ brw->wm.sampler_count = i + 1;
}
- drm_intel_bo_unreference(brw->wm.sampler_bo);
- brw->wm.sampler_bo = NULL;
if (brw->wm.sampler_count == 0)
return;
- /* Only include the populated portion of the key in the search. */
- sampler_key_size = offsetof(struct wm_sampler_key,
- sampler[key.sampler_count]);
- brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
- &key, sampler_key_size,
- brw->wm.sdc_bo, key.sampler_count,
- NULL);
+ samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers),
+ 32, &brw->wm.sampler_offset);
+ memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
- /* If we didnt find it in the cache, compute the state and put it in the
- * cache.
- */
- if (brw->wm.sampler_bo == NULL) {
- struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
-
- memset(sampler, 0, sizeof(sampler));
- for (i = 0; i < key.sampler_count; i++) {
- if (brw->wm.sdc_bo[i] == NULL)
- continue;
-
- brw_update_sampler_state(brw, &key.sampler[i], brw->wm.sdc_bo[i],
- &sampler[i]);
- }
-
- brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
- &key, sampler_key_size,
- brw->wm.sdc_bo, key.sampler_count,
- &sampler, sizeof(sampler));
-
- /* Emit SDC relocations */
- for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
- if (!ctx->Texture.Unit[i]._ReallyEnabled)
- continue;
-
- drm_intel_bo_emit_reloc(brw->wm.sampler_bo,
- i * sizeof(struct brw_sampler_state) +
- offsetof(struct brw_sampler_state, ss2),
- brw->wm.sdc_bo[i], 0,
- I915_GEM_DOMAIN_SAMPLER, 0);
- }
+ for (i = 0; i < brw->wm.sampler_count; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled)
+ brw_update_sampler_state(brw, i, &samplers[i]);
}
+
+ brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
}
const struct brw_tracked_state brw_wm_samplers = {
.dirty = {
.mesa = _NEW_TEXTURE,
- .brw = 0,
+ .brw = BRW_NEW_BATCH,
.cache = 0
},
- .prepare = upload_wm_samplers,
+ .prepare = prepare_wm_samplers,
};
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index be4b260a5ff..a91ae511b7f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -40,21 +40,6 @@
* WM unit - fragment programs and rasterization
*/
-struct brw_wm_unit_key {
- unsigned int total_grf, total_scratch;
- unsigned int urb_entry_read_length;
- unsigned int curb_entry_read_length;
- unsigned int dispatch_grf_start_reg;
-
- unsigned int curbe_offset;
-
- unsigned int nr_surfaces, sampler_count;
- GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
- GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
- GLboolean color_write_enable;
- GLfloat offset_units, offset_factor;
-};
-
bool
brw_color_buffer_write_enabled(struct brw_context *brw)
{
@@ -81,219 +66,192 @@ brw_color_buffer_write_enabled(struct brw_context *brw)
return false;
}
+/**
+ * Setup wm hardware state. See page 225 of Volume 2
+ */
static void
-wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
+brw_prepare_wm_unit(struct brw_context *brw)
{
- struct gl_context *ctx = &brw->intel.ctx;
- const struct gl_fragment_program *fp = brw->fragment_program;
struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ const struct gl_fragment_program *fp = brw->fragment_program;
+ struct brw_wm_unit_state *wm;
- memset(key, 0, sizeof(*key));
+ wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.state_offset);
+ memset(wm, 0, sizeof(*wm));
+
+ if (brw->wm.prog_data->prog_offset_16) {
+ /* These two fields should be the same pre-gen6, which is why we
+ * only have one hardware field to program for both dispatch
+ * widths.
+ */
+ assert(brw->wm.prog_data->first_curbe_grf ==
+ brw->wm.prog_data->first_curbe_grf_16);
+ }
/* CACHE_NEW_WM_PROG */
- key->total_grf = brw->wm.prog_data->total_grf;
- key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
- key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
- key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
- key->total_scratch = brw->wm.prog_data->total_scratch;
+ wm->thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
+ wm->wm9.grf_reg_count_2 = ALIGN(brw->wm.prog_data->total_grf_16, 16) / 16 - 1;
+ wm->thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+ /* reloc */
+ wm->wm9.kernel_start_pointer_2 = (brw->wm.prog_bo->offset +
+ brw->wm.prog_data->prog_offset_16) >> 6;
+ wm->thread1.depth_coef_urb_read_offset = 1;
+ wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- /* BRW_NEW_CURBE_OFFSETS */
- key->curbe_offset = brw->curbe.wm_start;
+ if (intel->gen == 5)
+ wm->thread1.binding_table_entry_count = 0; /* hardware requirement */
+ else {
+ /* BRW_NEW_NR_SURFACES */
+ wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces;
+ }
- /* BRW_NEW_NR_SURFACEs */
- key->nr_surfaces = brw->wm.nr_surfaces;
+ if (brw->wm.prog_data->total_scratch != 0) {
+ wm->thread2.scratch_space_base_pointer =
+ brw->wm.scratch_bo->offset >> 10; /* reloc */
+ wm->thread2.per_thread_scratch_space =
+ ffs(brw->wm.prog_data->total_scratch) - 11;
+ } else {
+ wm->thread2.scratch_space_base_pointer = 0;
+ wm->thread2.per_thread_scratch_space = 0;
+ }
- /* CACHE_NEW_SAMPLER */
- key->sampler_count = brw->wm.sampler_count;
+ wm->thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+ wm->thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+ wm->thread3.urb_entry_read_offset = 0;
+ wm->thread3.const_urb_entry_read_length =
+ brw->wm.prog_data->curb_read_length;
+ /* BRW_NEW_CURBE_OFFSETS */
+ wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
- /* _NEW_POLYGONSTIPPLE */
- key->polygon_stipple = ctx->Polygon.StippleFlag;
+ if (intel->gen == 5)
+ wm->wm4.sampler_count = 0; /* hardware requirement */
+ else {
+ /* CACHE_NEW_SAMPLER */
+ wm->wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
+ }
- /* BRW_NEW_FRAGMENT_PROGRAM */
- key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+ if (brw->wm.sampler_count) {
+ /* reloc */
+ wm->wm4.sampler_state_pointer = (intel->batch.bo->offset +
+ brw->wm.sampler_offset) >> 5;
+ } else {
+ wm->wm4.sampler_state_pointer = 0;
+ }
- /* as far as we can tell */
- key->computes_depth =
- (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ wm->wm5.program_uses_depth = (fp->Base.InputsRead &
+ (1 << FRAG_ATTRIB_WPOS)) != 0;
+ wm->wm5.program_computes_depth = (fp->Base.OutputsWritten &
+ BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
/* BRW_NEW_DEPTH_BUFFER
* Override for NULL depthbuffer case, required by the Pixel Shader Computed
* Depth field.
*/
if (brw->state.depth_region == NULL)
- key->computes_depth = 0;
-
- /* _NEW_BUFFERS | _NEW_COLOR */
- key->color_write_enable = brw_color_buffer_write_enabled(brw);
+ wm->wm5.program_computes_depth = 0;
/* _NEW_COLOR */
- key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
+ wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled;
- /* If using the fragment shader backend, the program is always
- * 8-wide.
+
+ /* BRW_NEW_FRAGMENT_PROGRAM
+ *
+ * If using the fragment shader backend, the program is always
+ * 8-wide. If not, it's always 16.
*/
if (ctx->Shader.CurrentFragmentProgram) {
struct brw_shader *shader = (struct brw_shader *)
ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
if (shader != NULL && shader->ir != NULL) {
- key->is_glsl = GL_TRUE;
+ wm->wm5.enable_8_pix = 1;
+ if (brw->wm.prog_data->prog_offset_16)
+ wm->wm5.enable_16_pix = 1;
}
}
+ if (!wm->wm5.enable_8_pix)
+ wm->wm5.enable_16_pix = 1;
- /* _NEW_DEPTH */
- key->stats_wm = intel->stats_wm;
+ wm->wm5.max_threads = brw->wm_max_threads - 1;
- /* _NEW_LINE */
- key->line_stipple = ctx->Line.StippleFlag;
-
- /* _NEW_POLYGON */
- key->offset_enable = ctx->Polygon.OffsetFill;
- key->offset_units = ctx->Polygon.OffsetUnits;
- key->offset_factor = ctx->Polygon.OffsetFactor;
-}
-
-/**
- * Setup wm hardware state. See page 225 of Volume 2
- */
-static drm_intel_bo *
-wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
- drm_intel_bo **reloc_bufs)
-{
- struct intel_context *intel = &brw->intel;
- struct brw_wm_unit_state wm;
- drm_intel_bo *bo;
-
- memset(&wm, 0, sizeof(wm));
-
- wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
- wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
- wm.thread1.depth_coef_urb_read_offset = 1;
- wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-
- if (intel->gen == 5)
- wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
- else
- wm.thread1.binding_table_entry_count = key->nr_surfaces;
-
- if (key->total_scratch != 0) {
- wm.thread2.scratch_space_base_pointer =
- brw->wm.scratch_bo->offset >> 10; /* reloc */
- wm.thread2.per_thread_scratch_space = ffs(key->total_scratch) - 11;
- } else {
- wm.thread2.scratch_space_base_pointer = 0;
- wm.thread2.per_thread_scratch_space = 0;
- }
-
- wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
- wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
- wm.thread3.urb_entry_read_offset = 0;
- wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
- wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
-
- if (intel->gen == 5)
- wm.wm4.sampler_count = 0; /* hardware requirement */
- else
- wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
-
- if (brw->wm.sampler_bo != NULL) {
- /* reloc */
- wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
- } else {
- wm.wm4.sampler_state_pointer = 0;
- }
-
- wm.wm5.program_uses_depth = key->uses_depth;
- wm.wm5.program_computes_depth = key->computes_depth;
- wm.wm5.program_uses_killpixel = key->uses_kill;
-
- if (key->is_glsl)
- wm.wm5.enable_8_pix = 1;
- else
- wm.wm5.enable_16_pix = 1;
-
- wm.wm5.max_threads = brw->wm_max_threads - 1;
-
- if (key->color_write_enable ||
- key->uses_kill ||
- key->computes_depth) {
- wm.wm5.thread_dispatch_enable = 1;
+ /* _NEW_BUFFERS | _NEW_COLOR */
+ if (brw_color_buffer_write_enabled(brw) ||
+ wm->wm5.program_uses_killpixel ||
+ wm->wm5.program_computes_depth) {
+ wm->wm5.thread_dispatch_enable = 1;
}
- wm.wm5.legacy_line_rast = 0;
- wm.wm5.legacy_global_depth_bias = 0;
- wm.wm5.early_depth_test = 1; /* never need to disable */
- wm.wm5.line_aa_region_width = 0;
- wm.wm5.line_endcap_aa_region_width = 1;
+ wm->wm5.legacy_line_rast = 0;
+ wm->wm5.legacy_global_depth_bias = 0;
+ wm->wm5.early_depth_test = 1; /* never need to disable */
+ wm->wm5.line_aa_region_width = 0;
+ wm->wm5.line_endcap_aa_region_width = 1;
- wm.wm5.polygon_stipple = key->polygon_stipple;
+ /* _NEW_POLYGONSTIPPLE */
+ wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag;
- if (key->offset_enable) {
- wm.wm5.depth_offset = 1;
+ /* _NEW_POLYGON */
+ if (ctx->Polygon.OffsetFill) {
+ wm->wm5.depth_offset = 1;
/* Something wierd going on with legacy_global_depth_bias,
* offset_constant, scaling and MRD. This value passes glean
* but gives some odd results elsewere (eg. the
* quad-offset-units test).
*/
- wm.global_depth_offset_constant = key->offset_units * 2;
+ wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2;
/* This is the only value that passes glean:
*/
- wm.global_depth_offset_scale = key->offset_factor;
+ wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor;
}
- wm.wm5.line_stipple = key->line_stipple;
-
- if (unlikely(INTEL_DEBUG & DEBUG_STATS) || key->stats_wm)
- wm.wm4.stats_enable = 1;
+ /* _NEW_LINE */
+ wm->wm5.line_stipple = ctx->Line.StippleFlag;
- bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
- key, sizeof(*key),
- reloc_bufs, 3,
- &wm, sizeof(wm));
+ /* _NEW_DEPTH */
+ if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm)
+ wm->wm4.stats_enable = 1;
/* Emit WM program relocation */
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread0),
- brw->wm.prog_bo, wm.thread0.grf_reg_count << 1,
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ brw->wm.state_offset +
+ offsetof(struct brw_wm_unit_state, thread0),
+ brw->wm.prog_bo, wm->thread0.grf_reg_count << 1,
I915_GEM_DOMAIN_INSTRUCTION, 0);
+ if (brw->wm.prog_data->prog_offset_16) {
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ brw->wm.state_offset +
+ offsetof(struct brw_wm_unit_state, wm9),
+ brw->wm.prog_bo,
+ ((wm->wm9.grf_reg_count_2 << 1) +
+ brw->wm.prog_data->prog_offset_16),
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
+ }
+
/* Emit scratch space relocation */
- if (key->total_scratch != 0) {
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),
+ if (brw->wm.prog_data->total_scratch != 0) {
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ brw->wm.state_offset +
+ offsetof(struct brw_wm_unit_state, thread2),
brw->wm.scratch_bo,
- wm.thread2.per_thread_scratch_space,
+ wm->thread2.per_thread_scratch_space,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
/* Emit sampler state relocation */
- if (key->sampler_count != 0) {
- drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm4),
- brw->wm.sampler_bo, (wm.wm4.stats_enable |
- (wm.wm4.sampler_count << 2)),
+ if (brw->wm.sampler_count != 0) {
+ drm_intel_bo_emit_reloc(intel->batch.bo,
+ brw->wm.state_offset +
+ offsetof(struct brw_wm_unit_state, wm4),
+ intel->batch.bo, (brw->wm.sampler_offset |
+ wm->wm4.stats_enable |
+ (wm->wm4.sampler_count << 2)),
I915_GEM_DOMAIN_INSTRUCTION, 0);
}
- return bo;
-}
-
-
-static void upload_wm_unit( struct brw_context *brw )
-{
- struct brw_wm_unit_key key;
- drm_intel_bo *reloc_bufs[3];
- wm_unit_populate_key(brw, &key);
-
- reloc_bufs[0] = brw->wm.prog_bo;
- reloc_bufs[1] = brw->wm.scratch_bo;
- reloc_bufs[2] = brw->wm.sampler_bo;
-
- drm_intel_bo_unreference(brw->wm.state_bo);
- brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
- &key, sizeof(key),
- reloc_bufs, 3,
- NULL);
- if (brw->wm.state_bo == NULL) {
- brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
- }
+ brw->state.dirty.cache |= CACHE_NEW_WM_UNIT;
}
const struct brw_tracked_state brw_wm_unit = {
@@ -305,7 +263,8 @@ const struct brw_tracked_state brw_wm_unit = {
_NEW_DEPTH |
_NEW_BUFFERS),
- .brw = (BRW_NEW_FRAGMENT_PROGRAM |
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_DEPTH_BUFFER |
BRW_NEW_NR_WM_SURFACES),
@@ -313,6 +272,6 @@ const struct brw_tracked_state brw_wm_unit = {
.cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)
},
- .prepare = upload_wm_unit,
+ .prepare = brw_prepare_wm_unit,
};
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index e3396a3cbd4..47b8b511f05 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -31,6 +31,7 @@
#include "main/mtypes.h"
+#include "main/samplerobj.h"
#include "main/texstore.h"
#include "program/prog_parameter.h"
@@ -112,6 +113,10 @@ static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] =
[MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT,
[MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT,
[MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_L32A32_FLOAT,
+ [MESA_FORMAT_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_UNORM,
+ [MESA_FORMAT_SIGNED_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_SNORM,
+ [MESA_FORMAT_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_UNORM,
+ [MESA_FORMAT_SIGNED_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_SNORM,
};
bool
@@ -213,6 +218,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
+ struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
const GLuint surf_index = SURF_INDEX_TEXTURE(unit);
struct brw_surface_state *surf;
@@ -224,8 +230,8 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
surf->ss0.surface_type = translate_tex_target(tObj->Target);
surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat,
firstImage->InternalFormat,
- tObj->Sampler.DepthMode,
- tObj->Sampler.sRGBDecode);
+ sampler->DepthMode,
+ sampler->sRGBDecode);
/* This is ok for all textures with channel width 8bit or less:
*/
@@ -309,7 +315,7 @@ brw_create_constant_surface(struct brw_context *brw,
* state atom.
*/
static void
-prepare_wm_constants(struct brw_context *brw)
+prepare_wm_pull_constants(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
struct intel_context *intel = &brw->intel;
@@ -353,7 +359,7 @@ const struct brw_tracked_state brw_wm_constants = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM),
.cache = 0
},
- .prepare = prepare_wm_constants,
+ .prepare = prepare_wm_pull_constants,
};
/**
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index 1b935fb5e70..66357f00fa6 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -32,82 +32,39 @@
#include "intel_batchbuffer.h"
#include "main/macros.h"
-struct gen6_blend_state_key {
- GLboolean color_blend, alpha_enabled;
- GLboolean dither;
- GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4];
-
- GLenum logic_op;
-
- GLenum blend_eq_rgb, blend_eq_a;
- GLenum blend_src_rgb, blend_src_a;
- GLenum blend_dst_rgb, blend_dst_a;
-
- GLenum alpha_func;
-};
-
static void
-blend_state_populate_key(struct brw_context *brw,
- struct gen6_blend_state_key *key)
+prepare_blend_state(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
-
- memset(key, 0, sizeof(*key));
-
- /* _NEW_COLOR */
- memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask));
-
- /* _NEW_COLOR */
- if (ctx->Color._LogicOpEnabled)
- key->logic_op = ctx->Color.LogicOp;
- else
- key->logic_op = GL_COPY;
-
- /* _NEW_COLOR */
- key->color_blend = ctx->Color.BlendEnabled;
- if (key->color_blend) {
- key->blend_eq_rgb = ctx->Color.Blend[0].EquationRGB;
- key->blend_eq_a = ctx->Color.Blend[0].EquationA;
- key->blend_src_rgb = ctx->Color.Blend[0].SrcRGB;
- key->blend_dst_rgb = ctx->Color.Blend[0].DstRGB;
- key->blend_src_a = ctx->Color.Blend[0].SrcA;
- key->blend_dst_a = ctx->Color.Blend[0].DstA;
- }
-
- /* _NEW_COLOR */
- key->alpha_enabled = ctx->Color.AlphaEnabled;
- if (key->alpha_enabled) {
- key->alpha_func = ctx->Color.AlphaFunc;
- }
-
- /* _NEW_COLOR */
- key->dither = ctx->Color.DitherFlag;
-}
-
-/**
- * Creates the state cache entry for the given CC unit key.
- */
-static drm_intel_bo *
-blend_state_create_from_key(struct brw_context *brw,
- struct gen6_blend_state_key *key)
-{
- struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS];
- drm_intel_bo *bo;
+ struct gen6_blend_state *blend;
int b;
-
- memset(&blend, 0, sizeof(blend));
-
- for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) {
- if (key->logic_op != GL_COPY) {
- blend[b].blend1.logic_op_enable = 1;
- blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
- } else if (key->color_blend & (1 << b)) {
- GLenum eqRGB = key->blend_eq_rgb;
- GLenum eqA = key->blend_eq_a;
- GLenum srcRGB = key->blend_src_rgb;
- GLenum dstRGB = key->blend_dst_rgb;
- GLenum srcA = key->blend_src_a;
- GLenum dstA = key->blend_dst_a;
+ int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
+ int size = sizeof(*blend) * nr_draw_buffers;
+
+ blend = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset);
+
+ memset(blend, 0, size);
+
+ for (b = 0; b < nr_draw_buffers; b++) {
+ /* _NEW_COLOR */
+ if (ctx->Color._LogicOpEnabled) {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[b];
+ /* _NEW_BUFFERS */
+ /* Floating point RTs should have no effect from LogicOp,
+ * except for disabling of blending
+ */
+ if (_mesa_get_format_datatype(rb->Format) != GL_FLOAT) {
+ blend[b].blend1.logic_op_enable = 1;
+ blend[b].blend1.logic_op_func =
+ intel_translate_logic_op(ctx->Color.LogicOp);
+ }
+ } else if (ctx->Color.BlendEnabled & (1 << b)) {
+ GLenum eqRGB = ctx->Color.Blend[0].EquationRGB;
+ GLenum eqA = ctx->Color.Blend[0].EquationA;
+ GLenum srcRGB = ctx->Color.Blend[0].SrcRGB;
+ GLenum dstRGB = ctx->Color.Blend[0].DstRGB;
+ GLenum srcA = ctx->Color.Blend[0].SrcA;
+ GLenum dstA = ctx->Color.Blend[0].DstA;
if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
srcRGB = dstRGB = GL_ONE;
@@ -131,146 +88,74 @@ blend_state_create_from_key(struct brw_context *brw,
eqA != eqRGB);
}
- if (key->alpha_enabled) {
+
+ /* _NEW_COLOR */
+ if (ctx->Color.AlphaEnabled) {
blend[b].blend1.alpha_test_enable = 1;
- blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+ blend[b].blend1.alpha_test_func =
+ intel_translate_compare_func(ctx->Color.AlphaFunc);
}
- if (key->dither) {
+ /* _NEW_COLOR */
+ if (ctx->Color.DitherFlag) {
blend[b].blend1.dither_enable = 1;
blend[b].blend1.y_dither_offset = 0;
blend[b].blend1.x_dither_offset = 0;
}
- blend[b].blend1.write_disable_r = !key->color_mask[b][0];
- blend[b].blend1.write_disable_g = !key->color_mask[b][1];
- blend[b].blend1.write_disable_b = !key->color_mask[b][2];
- blend[b].blend1.write_disable_a = !key->color_mask[b][3];
+ blend[b].blend1.write_disable_r = !ctx->Color.ColorMask[b][0];
+ blend[b].blend1.write_disable_g = !ctx->Color.ColorMask[b][1];
+ blend[b].blend1.write_disable_b = !ctx->Color.ColorMask[b][2];
+ blend[b].blend1.write_disable_a = !ctx->Color.ColorMask[b][3];
}
- bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE,
- key, sizeof(*key),
- NULL, 0,
- &blend, sizeof(blend));
-
- return bo;
-}
-
-static void
-prepare_blend_state(struct brw_context *brw)
-{
- struct gen6_blend_state_key key;
-
- blend_state_populate_key(brw, &key);
-
- drm_intel_bo_unreference(brw->cc.blend_state_bo);
- brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE,
- &key, sizeof(key),
- NULL, 0,
- NULL);
-
- if (brw->cc.blend_state_bo == NULL)
- brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key);
+ brw->state.dirty.cache |= CACHE_NEW_BLEND_STATE;
}
const struct brw_tracked_state gen6_blend_state = {
.dirty = {
- .mesa = _NEW_COLOR,
- .brw = 0,
+ .mesa = (_NEW_COLOR |
+ _NEW_BUFFERS),
+ .brw = BRW_NEW_BATCH,
.cache = 0,
},
.prepare = prepare_blend_state,
};
-struct gen6_color_calc_state_key {
- float blend_constant_color[4];
- GLclampf alpha_ref;
- GLubyte stencil_ref[2];
-};
-
static void
-color_calc_state_populate_key(struct brw_context *brw,
- struct gen6_color_calc_state_key *key)
+gen6_prepare_color_calc_state(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
+ struct gen6_color_calc_state *cc;
- memset(key, 0, sizeof(*key));
-
- /* _NEW_STENCIL */
- if (ctx->Stencil._Enabled) {
- const unsigned back = ctx->Stencil._BackFace;
-
- key->stencil_ref[0] = ctx->Stencil.Ref[0];
- if (ctx->Stencil._TestTwoSide)
- key->stencil_ref[1] = ctx->Stencil.Ref[back];
- }
+ cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset);
+ memset(cc, 0, sizeof(*cc));
/* _NEW_COLOR */
- if (ctx->Color.AlphaEnabled)
- key->alpha_ref = ctx->Color.AlphaRef;
-
- key->blend_constant_color[0] = ctx->Color.BlendColorUnclamped[0];
- key->blend_constant_color[1] = ctx->Color.BlendColorUnclamped[1];
- key->blend_constant_color[2] = ctx->Color.BlendColorUnclamped[2];
- key->blend_constant_color[3] = ctx->Color.BlendColorUnclamped[3];
-}
-
-/**
- * Creates the state cache entry for the given CC state key.
- */
-static drm_intel_bo *
-color_calc_state_create_from_key(struct brw_context *brw,
- struct gen6_color_calc_state_key *key)
-{
- struct gen6_color_calc_state cc;
- drm_intel_bo *bo;
-
- memset(&cc, 0, sizeof(cc));
+ cc->cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+ UNCLAMPED_FLOAT_TO_UBYTE(cc->cc1.alpha_ref_fi.ui, ctx->Color.AlphaRef);
- cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
- UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref);
-
- cc.cc0.stencil_ref = key->stencil_ref[0];
- cc.cc0.bf_stencil_ref = key->stencil_ref[1];
-
- cc.constant_r = key->blend_constant_color[0];
- cc.constant_g = key->blend_constant_color[1];
- cc.constant_b = key->blend_constant_color[2];
- cc.constant_a = key->blend_constant_color[3];
-
- bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE,
- key, sizeof(*key),
- NULL, 0,
- &cc, sizeof(cc));
-
- return bo;
-}
-
-static void
-prepare_color_calc_state(struct brw_context *brw)
-{
- struct gen6_color_calc_state_key key;
-
- color_calc_state_populate_key(brw, &key);
+ /* _NEW_STENCIL */
+ cc->cc0.stencil_ref = ctx->Stencil.Ref[0];
+ cc->cc0.bf_stencil_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
- drm_intel_bo_unreference(brw->cc.color_calc_state_bo);
- brw->cc.color_calc_state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE,
- &key, sizeof(key),
- NULL, 0,
- NULL);
+ /* _NEW_COLOR */
+ cc->constant_r = ctx->Color.BlendColorUnclamped[0];
+ cc->constant_g = ctx->Color.BlendColorUnclamped[1];
+ cc->constant_b = ctx->Color.BlendColorUnclamped[2];
+ cc->constant_a = ctx->Color.BlendColorUnclamped[3];
- if (brw->cc.color_calc_state_bo == NULL)
- brw->cc.color_calc_state_bo = color_calc_state_create_from_key(brw, &key);
+ brw->state.dirty.cache |= CACHE_NEW_COLOR_CALC_STATE;
}
const struct brw_tracked_state gen6_color_calc_state = {
.dirty = {
.mesa = _NEW_COLOR | _NEW_STENCIL,
- .brw = 0,
+ .brw = BRW_NEW_BATCH,
.cache = 0,
},
- .prepare = prepare_color_calc_state,
+ .prepare = gen6_prepare_color_calc_state,
};
static void upload_cc_state_pointers(struct brw_context *brw)
@@ -279,20 +164,12 @@ static void upload_cc_state_pointers(struct brw_context *brw)
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
- OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
- OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
- OUT_RELOC(brw->cc.color_calc_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_BATCH(brw->cc.blend_state_offset | 1);
+ OUT_BATCH(brw->cc.depth_stencil_state_offset | 1);
+ OUT_BATCH(brw->cc.state_offset | 1);
ADVANCE_BATCH();
}
-
-static void prepare_cc_state_pointers(struct brw_context *brw)
-{
- brw_add_validated_bo(brw, brw->cc.color_calc_state_bo);
- brw_add_validated_bo(brw, brw->cc.blend_state_bo);
- brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo);
-}
-
const struct brw_tracked_state gen6_cc_state_pointers = {
.dirty = {
.mesa = 0,
@@ -301,6 +178,5 @@ const struct brw_tracked_state gen6_cc_state_pointers = {
CACHE_NEW_COLOR_CALC_STATE |
CACHE_NEW_DEPTH_STENCIL_STATE)
},
- .prepare = prepare_cc_state_pointers,
.emit = upload_cc_state_pointers,
};
diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
index 96e6eade6b7..775e1ce2c9c 100644
--- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c
+++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
@@ -28,138 +28,68 @@
#include "brw_context.h"
#include "brw_state.h"
-struct brw_depth_stencil_state_key {
- GLenum depth_func;
- GLboolean depth_test, depth_write;
- GLboolean stencil, stencil_two_side;
- GLenum stencil_func[2], stencil_fail_op[2];
- GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
- GLubyte stencil_write_mask[2], stencil_test_mask[2];
-};
-
static void
-depth_stencil_state_populate_key(struct brw_context *brw,
- struct brw_depth_stencil_state_key *key)
+gen6_prepare_depth_stencil_state(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
- const unsigned back = ctx->Stencil._BackFace;
+ struct gen6_depth_stencil_state *ds;
- memset(key, 0, sizeof(*key));
+ ds = brw_state_batch(brw, sizeof(*ds), 64,
+ &brw->cc.depth_stencil_state_offset);
+ memset(ds, 0, sizeof(*ds));
/* _NEW_STENCIL */
- key->stencil = ctx->Stencil._Enabled;
- key->stencil_two_side = ctx->Stencil._TestTwoSide;
-
- if (key->stencil) {
- key->stencil_func[0] = ctx->Stencil.Function[0];
- key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
- key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
- key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
- key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
- key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
- }
- if (key->stencil_two_side) {
- key->stencil_func[1] = ctx->Stencil.Function[back];
- key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
- key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
- key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
- key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
- key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
- }
-
- key->depth_test = ctx->Depth.Test;
- if (key->depth_test) {
- key->depth_func = ctx->Depth.Func;
- key->depth_write = ctx->Depth.Mask;
- }
-}
-
-/**
- * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key.
- */
-static drm_intel_bo *
-depth_stencil_state_create_from_key(struct brw_context *brw,
- struct brw_depth_stencil_state_key *key)
-{
- struct gen6_depth_stencil_state ds;
- drm_intel_bo *bo;
-
- memset(&ds, 0, sizeof(ds));
-
- /* _NEW_STENCIL */
- if (key->stencil) {
- ds.ds0.stencil_enable = 1;
- ds.ds0.stencil_func =
- intel_translate_compare_func(key->stencil_func[0]);
- ds.ds0.stencil_fail_op =
- intel_translate_stencil_op(key->stencil_fail_op[0]);
- ds.ds0.stencil_pass_depth_fail_op =
- intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
- ds.ds0.stencil_pass_depth_pass_op =
- intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
- ds.ds1.stencil_write_mask = key->stencil_write_mask[0];
- ds.ds1.stencil_test_mask = key->stencil_test_mask[0];
-
- if (key->stencil_two_side) {
- ds.ds0.bf_stencil_enable = 1;
- ds.ds0.bf_stencil_func =
- intel_translate_compare_func(key->stencil_func[1]);
- ds.ds0.bf_stencil_fail_op =
- intel_translate_stencil_op(key->stencil_fail_op[1]);
- ds.ds0.bf_stencil_pass_depth_fail_op =
- intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
- ds.ds0.bf_stencil_pass_depth_pass_op =
- intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
- ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1];
- ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1];
+ if (ctx->Stencil._Enabled) {
+ int back = ctx->Stencil._BackFace;
+
+ ds->ds0.stencil_enable = 1;
+ ds->ds0.stencil_func =
+ intel_translate_compare_func(ctx->Stencil.Function[0]);
+ ds->ds0.stencil_fail_op =
+ intel_translate_stencil_op(ctx->Stencil.FailFunc[0]);
+ ds->ds0.stencil_pass_depth_fail_op =
+ intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
+ ds->ds0.stencil_pass_depth_pass_op =
+ intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
+ ds->ds1.stencil_write_mask = ctx->Stencil.WriteMask[0];
+ ds->ds1.stencil_test_mask = ctx->Stencil.ValueMask[0];
+
+ if (ctx->Stencil._TestTwoSide) {
+ ds->ds0.bf_stencil_enable = 1;
+ ds->ds0.bf_stencil_func =
+ intel_translate_compare_func(ctx->Stencil.Function[back]);
+ ds->ds0.bf_stencil_fail_op =
+ intel_translate_stencil_op(ctx->Stencil.FailFunc[back]);
+ ds->ds0.bf_stencil_pass_depth_fail_op =
+ intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
+ ds->ds0.bf_stencil_pass_depth_pass_op =
+ intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
+ ds->ds1.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
+ ds->ds1.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
}
/* Not really sure about this:
*/
- if (key->stencil_write_mask[0] ||
- (key->stencil_two_side && key->stencil_write_mask[1]))
- ds.ds0.stencil_write_enable = 1;
+ if (ctx->Stencil.WriteMask[0] ||
+ (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back]))
+ ds->ds0.stencil_write_enable = 1;
}
/* _NEW_DEPTH */
- if (key->depth_test) {
- ds.ds2.depth_test_enable = 1;
- ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func);
- ds.ds2.depth_write_enable = key->depth_write;
+ if (ctx->Depth.Test) {
+ ds->ds2.depth_test_enable = 1;
+ ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func);
+ ds->ds2.depth_write_enable = ctx->Depth.Mask;
}
- bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE,
- key, sizeof(*key),
- NULL, 0,
- &ds, sizeof(ds));
-
- return bo;
-}
-
-static void
-prepare_depth_stencil_state(struct brw_context *brw)
-{
- struct brw_depth_stencil_state_key key;
-
- depth_stencil_state_populate_key(brw, &key);
-
- drm_intel_bo_unreference(brw->cc.depth_stencil_state_bo);
- brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache,
- BRW_DEPTH_STENCIL_STATE,
- &key, sizeof(key),
- NULL, 0,
- NULL);
-
- if (brw->cc.depth_stencil_state_bo == NULL)
- brw->cc.depth_stencil_state_bo =
- depth_stencil_state_create_from_key(brw, &key);
+ brw->state.dirty.cache |= CACHE_NEW_DEPTH_STENCIL_STATE;
}
const struct brw_tracked_state gen6_depth_stencil_state = {
.dirty = {
.mesa = _NEW_DEPTH | _NEW_STENCIL,
- .brw = 0,
+ .brw = BRW_NEW_BATCH,
.cache = 0,
},
- .prepare = prepare_depth_stencil_state,
+ .prepare = gen6_prepare_depth_stencil_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
index f65c651bdff..4cdec699df6 100644
--- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
@@ -43,27 +43,15 @@ upload_sampler_state_pointers(struct brw_context *brw)
(4 - 2));
OUT_BATCH(0); /* VS */
OUT_BATCH(0); /* GS */
- if (brw->wm.sampler_bo)
- OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- else
- OUT_BATCH(0);
-
+ OUT_BATCH(brw->wm.sampler_offset);
ADVANCE_BATCH();
}
-
-static void
-prepare_sampler_state_pointers(struct brw_context *brw)
-{
- brw_add_validated_bo(brw, brw->wm.sampler_bo);
-}
-
const struct brw_tracked_state gen6_sampler_state = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH,
.cache = CACHE_NEW_SAMPLER
},
- .prepare = prepare_sampler_state_pointers,
.emit = upload_sampler_state_pointers,
};
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
index 12b65826ae9..fad3ca0dd04 100644
--- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -31,11 +31,15 @@
#include "intel_batchbuffer.h"
static void
-prepare_scissor_state(struct brw_context *brw)
+gen6_prepare_scissor_state(struct brw_context *brw)
{
- struct gl_context *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
- struct gen6_scissor_rect scissor;
+ struct gen6_scissor_rect *scissor;
+ uint32_t scissor_state_offset;
+
+ scissor = brw_state_batch(brw, sizeof(*scissor), 32, &scissor_state_offset);
/* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
@@ -54,62 +58,36 @@ prepare_scissor_state(struct brw_context *brw)
* anything. Instead, just provide a min > max scissor inside
* the bounds, which produces the expected no rendering.
*/
- scissor.xmin = 1;
- scissor.xmax = 0;
- scissor.ymin = 1;
- scissor.ymax = 0;
+ scissor->xmin = 1;
+ scissor->xmax = 0;
+ scissor->ymin = 1;
+ scissor->ymax = 0;
} else if (render_to_fbo) {
/* texmemory: Y=0=bottom */
- scissor.xmin = ctx->DrawBuffer->_Xmin;
- scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
- scissor.ymin = ctx->DrawBuffer->_Ymin;
- scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+ scissor->xmin = ctx->DrawBuffer->_Xmin;
+ scissor->xmax = ctx->DrawBuffer->_Xmax - 1;
+ scissor->ymin = ctx->DrawBuffer->_Ymin;
+ scissor->ymax = ctx->DrawBuffer->_Ymax - 1;
}
else {
/* memory: Y=0=top */
- scissor.xmin = ctx->DrawBuffer->_Xmin;
- scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
- scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
- scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+ scissor->xmin = ctx->DrawBuffer->_Xmin;
+ scissor->xmax = ctx->DrawBuffer->_Xmax - 1;
+ scissor->ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+ scissor->ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
}
- drm_intel_bo_unreference(brw->sf.state_bo);
- brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT,
- &scissor, sizeof(scissor));
-}
-
-const struct brw_tracked_state gen6_scissor_state = {
- .dirty = {
- .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT,
- .brw = 0,
- .cache = 0,
- },
- .prepare = prepare_scissor_state,
-};
-
-static void upload_scissor_state_pointers(struct brw_context *brw)
-{
- struct intel_context *intel = &brw->intel;
-
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
- OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH(scissor_state_offset);
ADVANCE_BATCH();
-
}
-
-static void prepare_scissor_state_pointers(struct brw_context *brw)
-{
- brw_add_validated_bo(brw, brw->sf.state_bo);
-}
-
-const struct brw_tracked_state gen6_scissor_state_pointers = {
+const struct brw_tracked_state gen6_scissor_state = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT,
.brw = BRW_NEW_BATCH,
- .cache = CACHE_NEW_SF_UNIT
+ .cache = 0,
},
- .prepare = prepare_scissor_state_pointers,
- .emit = upload_scissor_state_pointers,
+ .prepare = gen6_prepare_scissor_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
index cd7d209e3ea..4116bdb96de 100644
--- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -41,22 +41,22 @@
static void
prepare_clip_vp(struct brw_context *brw)
{
- struct brw_clipper_viewport vp;
+ struct brw_clipper_viewport *vp;
- vp.xmin = -1.0;
- vp.xmax = 1.0;
- vp.ymin = -1.0;
- vp.ymax = 1.0;
+ vp = brw_state_batch(brw, sizeof(*vp), 32, &brw->clip.vp_offset);
- drm_intel_bo_unreference(brw->clip.vp_bo);
- brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP,
- &vp, sizeof(vp));
+ vp->xmin = -1.0;
+ vp->xmax = 1.0;
+ vp->ymin = -1.0;
+ vp->ymax = 1.0;
+
+ brw->state.dirty.cache |= CACHE_NEW_CLIP_VP;
}
const struct brw_tracked_state gen6_clip_vp = {
.dirty = {
- .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */
- .brw = 0,
+ .mesa = 0,
+ .brw = BRW_NEW_BATCH,
.cache = 0,
},
.prepare = prepare_clip_vp,
@@ -67,12 +67,13 @@ prepare_sf_vp(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
- struct brw_sf_viewport sfv;
+ struct brw_sf_viewport *sfv;
GLfloat y_scale, y_bias;
const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
const GLfloat *v = ctx->Viewport._WindowMap.m;
- memset(&sfv, 0, sizeof(sfv));
+ sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset);
+ memset(sfv, 0, sizeof(*sfv));
/* _NEW_BUFFERS */
if (render_to_fbo) {
@@ -84,34 +85,25 @@ prepare_sf_vp(struct brw_context *brw)
}
/* _NEW_VIEWPORT */
- sfv.viewport.m00 = v[MAT_SX];
- sfv.viewport.m11 = v[MAT_SY] * y_scale;
- sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
- sfv.viewport.m30 = v[MAT_TX];
- sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
- sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+ sfv->viewport.m00 = v[MAT_SX];
+ sfv->viewport.m11 = v[MAT_SY] * y_scale;
+ sfv->viewport.m22 = v[MAT_SZ] * depth_scale;
+ sfv->viewport.m30 = v[MAT_TX];
+ sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+ sfv->viewport.m32 = v[MAT_TZ] * depth_scale;
- drm_intel_bo_unreference(brw->sf.vp_bo);
- brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP,
- &sfv, sizeof(sfv));
+ brw->state.dirty.cache |= CACHE_NEW_SF_VP;
}
const struct brw_tracked_state gen6_sf_vp = {
.dirty = {
.mesa = _NEW_VIEWPORT | _NEW_BUFFERS,
- .brw = 0,
+ .brw = BRW_NEW_BATCH,
.cache = 0,
},
.prepare = prepare_sf_vp,
};
-static void prepare_viewport_state_pointers(struct brw_context *brw)
-{
- brw_add_validated_bo(brw, brw->clip.vp_bo);
- brw_add_validated_bo(brw, brw->sf.vp_bo);
- brw_add_validated_bo(brw, brw->cc.vp_bo);
-}
-
static void upload_viewport_state_pointers(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
@@ -121,9 +113,9 @@ static void upload_viewport_state_pointers(struct brw_context *brw)
GEN6_CC_VIEWPORT_MODIFY |
GEN6_SF_VIEWPORT_MODIFY |
GEN6_CLIP_VIEWPORT_MODIFY);
- OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BATCH(brw->clip.vp_offset);
+ OUT_BATCH(brw->sf.vp_offset);
+ OUT_BATCH(brw->cc.vp_offset);
ADVANCE_BATCH();
}
@@ -135,6 +127,5 @@ const struct brw_tracked_state gen6_viewport_state = {
CACHE_NEW_SF_VP |
CACHE_NEW_CC_VP)
},
- .prepare = prepare_viewport_state_pointers,
.emit = upload_viewport_state_pointers,
};
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index a10cec318d6..b46368e36e2 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -34,43 +34,36 @@
#include "intel_batchbuffer.h"
static void
-upload_vs_state(struct brw_context *brw)
+gen6_prepare_vs_push_constants(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
+ /* _BRW_NEW_VERTEX_PROGRAM */
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
unsigned int nr_params = brw->vs.prog_data->nr_params / 4;
- drm_intel_bo *constant_bo;
- int i;
+ if (brw->vertex_program->IsNVProgram)
+ _mesa_load_tracked_matrices(ctx);
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ /* XXX: Should this happen somewhere before to get our state flag set? */
+ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+
+ /* CACHE_NEW_VS_PROG | _NEW_TRANSFORM */
if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) {
- /* Disable the push constant buffers. */
- BEGIN_BATCH(5);
- OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ brw->vs.push_const_size = 0;
} else {
- int params_uploaded = 0, param_regs;
+ int params_uploaded = 0;
float *param;
+ int i;
- if (brw->vertex_program->IsNVProgram)
- _mesa_load_tracked_matrices(ctx);
-
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
-
- constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
- (MAX_CLIP_PLANES + nr_params) *
- 4 * sizeof(float),
- 4096);
- drm_intel_gem_bo_map_gtt(constant_bo);
- param = constant_bo->virtual;
+ param = brw_state_batch(brw,
+ (MAX_CLIP_PLANES + nr_params) *
+ 4 * sizeof(float),
+ 32, &brw->vs.push_const_offset);
/* This should be loaded like any other param, but it's ad-hoc
* until we redo the VS backend.
@@ -100,30 +93,56 @@ upload_vs_state(struct brw_context *brw)
if (0) {
printf("VS constant buffer:\n");
for (i = 0; i < params_uploaded; i++) {
- float *buf = (float *)constant_bo->virtual + i * 4;
+ float *buf = param + i * 4;
printf("%d: %f %f %f %f\n",
i, buf[0], buf[1], buf[2], buf[3]);
}
}
- drm_intel_gem_bo_unmap_gtt(constant_bo);
+ brw->vs.push_const_size = (params_uploaded + 1) / 2;
+ /* We can only push 32 registers of constants at a time. */
+ assert(brw->vs.push_const_size <= 32);
+ }
+}
+
+const struct brw_tracked_state gen6_vs_constants = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_VERTEX_PROGRAM),
+ .cache = 0,
+ },
+ .prepare = gen6_prepare_vs_push_constants,
+};
- param_regs = (params_uploaded + 1) / 2;
- assert(param_regs <= 32);
+static void
+upload_vs_state(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ if (brw->vs.push_const_size == 0) {
+ /* Disable the push constant buffers. */
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else {
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
- OUT_RELOC(constant_bo,
- I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- param_regs - 1);
+ /* Pointer to the VS constant buffer. Covered by the set of
+ * state flags from gen6_prepare_wm_constants
+ */
+ OUT_BATCH(brw->vs.push_const_offset +
+ brw->vs.push_const_size - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
-
- drm_intel_bo_unreference(constant_bo);
}
BEGIN_BATCH(6);
@@ -149,7 +168,9 @@ const struct brw_tracked_state gen6_vs_state = {
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE |
- BRW_NEW_CONTEXT),
+ BRW_NEW_CONTEXT |
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_BATCH),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 8215cb15a9c..33b233414c6 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -35,16 +35,13 @@
#include "intel_batchbuffer.h"
static void
-prepare_wm_constants(struct brw_context *brw)
+gen6_prepare_wm_push_constants(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
- drm_intel_bo_unreference(brw->wm.push_const_bo);
- brw->wm.push_const_bo = NULL;
-
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
*/
@@ -55,13 +52,11 @@ prepare_wm_constants(struct brw_context *brw)
float *constants;
unsigned int i;
- brw->wm.push_const_bo = drm_intel_bo_alloc(intel->bufmgr,
- "WM constant_bo",
- brw->wm.prog_data->nr_params *
- sizeof(float),
- 4096);
- drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo);
- constants = brw->wm.push_const_bo->virtual;
+ constants = brw_state_batch(brw,
+ brw->wm.prog_data->nr_params *
+ sizeof(float),
+ 32, &brw->wm.push_const_offset);
+
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
*brw->wm.prog_data->param[i]);
@@ -80,18 +75,17 @@ prepare_wm_constants(struct brw_context *brw)
printf("\n");
printf("\n");
}
-
- drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo);
}
}
const struct brw_tracked_state gen6_wm_constants = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
- .brw = BRW_NEW_FRAGMENT_PROGRAM,
+ .brw = (BRW_NEW_BATCH |
+ BRW_NEW_FRAGMENT_PROGRAM),
.cache = 0,
},
- .prepare = prepare_wm_constants,
+ .prepare = gen6_prepare_wm_push_constants,
};
static void
@@ -118,8 +112,10 @@ upload_wm_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
- OUT_RELOC(brw->wm.push_const_bo,
- I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+ /* Pointer to the WM constant buffer. Covered by the set of
+ * state flags from gen6_prepare_wm_constants
+ */
+ OUT_BATCH(brw->wm.push_const_offset +
ALIGN(brw->wm.prog_data->nr_params,
brw->wm.prog_data->dispatch_width) / 8 - 1);
OUT_BATCH(0);
@@ -143,14 +139,19 @@ upload_wm_state(struct brw_context *brw)
dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
dw4 |= (brw->wm.prog_data->first_curbe_grf <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+ dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
+ GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
dw5 |= (brw->wm_max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
/* CACHE_NEW_WM_PROG */
- if (brw->wm.prog_data->dispatch_width == 8)
+ if (brw->wm.prog_data->dispatch_width == 8) {
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
- else
+ if (brw->wm.prog_data->prog_offset_16)
+ dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
+ } else {
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
+ }
/* _NEW_LINE */
if (ctx->Line.StippleFlag)
@@ -194,7 +195,12 @@ upload_wm_state(struct brw_context *brw)
OUT_BATCH(dw5);
OUT_BATCH(dw6);
OUT_BATCH(0); /* kernel 1 pointer */
- OUT_BATCH(0); /* kernel 2 pointer */
+ if (brw->wm.prog_data->prog_offset_16) {
+ OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ brw->wm.prog_data->prog_offset_16);
+ } else {
+ OUT_BATCH(0); /* kernel 2 pointer */
+ }
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 53d6e7c6acc..377989bcc14 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -128,6 +128,11 @@ _intel_batchbuffer_flush(struct intel_context *intel,
if (intel->batch.used == 0)
return;
+ if (intel->first_post_swapbuffers_batch == NULL) {
+ intel->first_post_swapbuffers_batch = intel->batch.bo;
+ drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+ }
+
if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
4*intel->batch.used);
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 82d29e76712..5a96232107e 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -28,6 +28,7 @@
#include "main/glheader.h"
#include "main/mtypes.h"
+#include "main/condrender.h"
#include "swrast/swrast.h"
#include "drivers/common/meta.h"
@@ -88,6 +89,9 @@ intelClear(struct gl_context *ctx, GLbitfield mask)
struct intel_renderbuffer *irb;
int i;
+ if (!_mesa_check_conditional_render(ctx))
+ return;
+
if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
intel->front_buffer_dirty = GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 02e7f7717fc..acdf35fc71b 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -466,9 +466,11 @@ intel_prepare_render(struct intel_context *intel)
* the swap, and getting our hands on that doesn't seem worth it,
* so we just us the first batch we emitted after the last swap.
*/
- if (intel->need_throttle) {
- drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE);
- intel->need_throttle = GL_FALSE;
+ if (intel->need_throttle && intel->first_post_swapbuffers_batch) {
+ drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+ drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
+ intel->need_throttle = GL_FALSE;
}
}
@@ -650,27 +652,23 @@ intelInitContext(struct intel_context *intel,
intel->driFd = sPriv->fd;
intel->has_xrgb_textures = GL_TRUE;
+ intel->gen = intelScreen->gen;
if (IS_GEN6(intel->intelScreen->deviceID)) {
- intel->gen = 6;
intel->needs_ff_sync = GL_TRUE;
intel->has_luminance_srgb = GL_TRUE;
} else if (IS_GEN5(intel->intelScreen->deviceID)) {
- intel->gen = 5;
intel->needs_ff_sync = GL_TRUE;
intel->has_luminance_srgb = GL_TRUE;
} else if (IS_965(intel->intelScreen->deviceID)) {
- intel->gen = 4;
if (IS_G4X(intel->intelScreen->deviceID)) {
intel->has_luminance_srgb = GL_TRUE;
intel->is_g4x = GL_TRUE;
}
} else if (IS_9XX(intel->intelScreen->deviceID)) {
- intel->gen = 3;
if (IS_945(intel->intelScreen->deviceID)) {
intel->is_945 = GL_TRUE;
}
} else {
- intel->gen = 2;
if (intel->intelScreen->deviceID == PCI_CHIP_I830_M ||
intel->intelScreen->deviceID == PCI_CHIP_845_G) {
intel->has_xrgb_textures = GL_FALSE;
@@ -718,6 +716,12 @@ intelInitContext(struct intel_context *intel,
ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT5] = GL_TRUE;
#ifndef I915
+ /* GL_ARB_texture_compression_rgtc */
+ ctx->TextureFormatSupported[MESA_FORMAT_RED_RGTC1] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_SIGNED_RED_RGTC1] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_RG_RGTC2] = GL_TRUE;
+ ctx->TextureFormatSupported[MESA_FORMAT_SIGNED_RG_RGTC2] = GL_TRUE;
+
/* GL_ARB_texture_rg */
ctx->TextureFormatSupported[MESA_FORMAT_R8] = GL_TRUE;
ctx->TextureFormatSupported[MESA_FORMAT_R16] = GL_TRUE;
@@ -936,6 +940,8 @@ intelDestroyContext(__DRIcontext * driContextPriv)
intel->prim.vb = NULL;
drm_intel_bo_unreference(intel->prim.vb_bo);
intel->prim.vb_bo = NULL;
+ drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
driDestroyOptionCache(&intel->optionCache);
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index c59119373da..d3a8a659caa 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -182,6 +182,7 @@ struct intel_context
bool is_blit;
} batch;
+ drm_intel_bo *first_post_swapbuffers_batch;
GLboolean need_throttle;
GLboolean no_batch_wrap;
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index e107534a4da..3fd987abd8c 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -91,6 +91,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_ARB_pixel_buffer_object", NULL },
{ "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
{ "GL_ARB_point_sprite", NULL },
+ { "GL_ARB_sampler_objects", NULL },
{ "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
{ "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
{ "GL_ARB_sync", GL_ARB_sync_functions },
@@ -176,6 +177,7 @@ static const struct dri_extension brw_extensions[] = {
{ "GL_ARB_texture_float", NULL },
#endif
{ "GL_MESA_texture_signed_rgba", NULL },
+ { "GL_ARB_texture_compression_rgtc", NULL },
{ "GL_ARB_texture_non_power_of_two", NULL },
{ "GL_ARB_texture_rg", NULL },
{ "GL_EXT_draw_buffers2", GL_EXT_draw_buffers2_functions },
@@ -189,6 +191,7 @@ static const struct dri_extension brw_extensions[] = {
{ "GL_ATI_envmap_bumpmap", GL_ATI_envmap_bumpmap_functions },
{ "GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions },
{ "GL_ATI_texture_env_combine3", NULL },
+ { "GL_NV_conditional_render", NULL },
{ "GL_NV_texture_env_combine4", NULL },
{ NULL, NULL }
};
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 43cdd0d2bac..64c7acce1e9 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -29,6 +29,7 @@
#include "main/enums.h"
#include "main/image.h"
#include "main/colormac.h"
+#include "main/condrender.h"
#include "main/mtypes.h"
#include "main/macros.h"
#include "main/pbo.h"
@@ -68,7 +69,7 @@ static const GLubyte *map_pbo( struct gl_context *ctx,
if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
GL_COLOR_INDEX, GL_BITMAP,
- (GLvoid *) bitmap)) {
+ INT_MAX, (const GLvoid *) bitmap)) {
_mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
return NULL;
}
@@ -329,6 +330,9 @@ intelBitmap(struct gl_context * ctx,
{
struct intel_context *intel = intel_context(ctx);
+ if (!_mesa_check_conditional_render(ctx))
+ return;
+
if (do_blit_bitmap(ctx, x, y, width, height,
unpack, pixels))
return;
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
index a7ca780e944..e83f1bfab94 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -29,6 +29,7 @@
#include "main/image.h"
#include "main/state.h"
#include "main/mtypes.h"
+#include "main/condrender.h"
#include "drivers/common/meta.h"
#include "intel_context.h"
@@ -204,6 +205,9 @@ intelCopyPixels(struct gl_context * ctx,
{
DBG("%s\n", __FUNCTION__);
+ if (!_mesa_check_conditional_render(ctx))
+ return;
+
if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
return;
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 64a21a147f0..5dacbb06633 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -216,8 +216,16 @@ intel_create_image(__DRIscreen *screen,
{
__DRIimage *image;
struct intel_screen *intelScreen = screen->private;
+ uint32_t tiling;
int cpp;
+ tiling = I915_TILING_X;
+ if (use & __DRI_IMAGE_USE_CURSOR) {
+ if (width != 64 || height != 64)
+ return NULL;
+ tiling = I915_TILING_NONE;
+ }
+
image = CALLOC(sizeof *image);
if (image == NULL)
return NULL;
@@ -247,7 +255,7 @@ intel_create_image(__DRIscreen *screen,
cpp = _mesa_get_format_bytes(image->format);
image->region =
- intel_region_alloc(intelScreen, I915_TILING_NONE,
+ intel_region_alloc(intelScreen, tiling,
cpp, width, height, GL_TRUE);
if (image->region == NULL) {
FREE(image);
@@ -548,6 +556,18 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
intelScreen->deviceID = strtod(devid_override, NULL);
}
+ if (IS_GEN6(intelScreen->deviceID)) {
+ intelScreen->gen = 6;
+ } else if (IS_GEN5(intelScreen->deviceID)) {
+ intelScreen->gen = 5;
+ } else if (IS_965(intelScreen->deviceID)) {
+ intelScreen->gen = 4;
+ } else if (IS_9XX(intelScreen->deviceID)) {
+ intelScreen->gen = 3;
+ } else {
+ intelScreen->gen = 2;
+ }
+
api_mask = (1 << __DRI_API_OPENGL);
#if FEATURE_ES1
api_mask |= (1 << __DRI_API_GLES);
@@ -660,12 +680,21 @@ intelAllocateBuffer(__DRIscreen *screen,
{
struct intel_buffer *intelBuffer;
struct intel_screen *intelScreen = screen->private;
+ uint32_t tiling;
intelBuffer = CALLOC(sizeof *intelBuffer);
if (intelBuffer == NULL)
return NULL;
- intelBuffer->region = intel_region_alloc(intelScreen, I915_TILING_NONE,
+ if ((attachment == __DRI_BUFFER_DEPTH ||
+ attachment == __DRI_BUFFER_STENCIL ||
+ attachment == __DRI_BUFFER_DEPTH_STENCIL) &&
+ intelScreen->gen >= 4)
+ tiling = I915_TILING_Y;
+ else
+ tiling = I915_TILING_X;
+
+ intelBuffer->region = intel_region_alloc(intelScreen, tiling,
format / 8, width, height, GL_TRUE);
if (intelBuffer->region == NULL) {
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
index 0f0b5be56dc..4613c9858c4 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -37,6 +37,7 @@
struct intel_screen
{
int deviceID;
+ int gen;
int logTextureGranularity;
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index 5e705c93619..27f2646ebf5 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -1,5 +1,6 @@
#include "main/mtypes.h"
#include "main/macros.h"
+#include "main/samplerobj.h"
#include "intel_context.h"
#include "intel_mipmap_tree.h"
@@ -14,11 +15,13 @@
*/
static void
intel_update_max_level(struct intel_context *intel,
- struct intel_texture_object *intelObj)
+ struct intel_texture_object *intelObj,
+ struct gl_sampler_object *sampler)
{
struct gl_texture_object *tObj = &intelObj->base;
- if (tObj->Sampler.MinFilter == GL_NEAREST || tObj->Sampler.MinFilter == GL_LINEAR) {
+ if (sampler->MinFilter == GL_NEAREST ||
+ sampler->MinFilter == GL_LINEAR) {
intelObj->_MaxLevel = tObj->BaseLevel;
} else {
intelObj->_MaxLevel = tObj->_MaxLevel;
@@ -70,8 +73,10 @@ copy_image_data_to_tree(struct intel_context *intel,
GLuint
intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
{
+ struct gl_context *ctx = &intel->ctx;
struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current;
struct intel_texture_object *intelObj = intel_texture_object(tObj);
+ struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
int comp_byte = 0;
int cpp;
GLuint face, i;
@@ -84,7 +89,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
/* What levels must the tree include at a minimum?
*/
- intel_update_max_level(intel, intelObj);
+ intel_update_max_level(intel, intelObj, sampler);
firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]);
/* Fallback case:
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 51b896ae91f..5c9f57b4eac 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -20,12 +20,15 @@ C_SOURCES = \
radeon_pair_translate.c \
radeon_pair_schedule.c \
radeon_pair_regalloc.c \
+ radeon_pair_dead_sources.c \
radeon_dataflow.c \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
+ radeon_list.c \
radeon_optimize.c \
radeon_remove_constants.c \
radeon_rename_regs.c \
+ radeon_variable.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
@@ -48,6 +51,7 @@ INCLUDES = \
-I. \
-I$(TOP)/include \
-I$(TOP)/src/mesa \
+ -I$(TOP)/src/glsl \
##### TARGETS #####
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index 2b4bce1c08c..9931537492e 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -3,6 +3,7 @@ Import('*')
env = env.Clone()
env.Append(CPPPATH = '#/include')
env.Append(CPPPATH = '#/src/mesa')
+env.Append(CPPPATH = '#/src/glsl')
# temporary fix
env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '')
@@ -22,6 +23,7 @@ r300compiler = env.ConvenienceLibrary(
'radeon_pair_translate.c',
'radeon_pair_schedule.c',
'radeon_pair_regalloc.c',
+ 'radeon_pair_dead_sources.c',
'radeon_optimize.c',
'radeon_remove_constants.c',
'radeon_rename_regs.c',
@@ -30,6 +32,8 @@ r300compiler = env.ConvenienceLibrary(
'radeon_dataflow.c',
'radeon_dataflow_deadcode.c',
'radeon_dataflow_swizzles.c',
+ 'radeon_variable.c',
+ 'radeon_list.c',
'r3xx_fragprog.c',
'r300_fragprog.c',
'r300_fragprog_swizzle.c',
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 8b73409136f..e6fd1fde62d 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -93,7 +93,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r
if (src.File == RC_FILE_CONSTANT) {
return src.Index | (1 << 5);
- } else if (src.File == RC_FILE_TEMPORARY) {
+ } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index & 0x1f;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index 5223aaa71a4..b7bca8c0cfa 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -87,6 +87,18 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
return 0;
}
+/**
+ * Determines if the given swizzle is valid for r300/r400. In most situations
+ * it is better to use r300_swizzle_is_native() which can be accesed via
+ * struct radeon_compiler *c; c->SwizzleCaps->IsNative().
+ */
+int r300_swizzle_is_native_basic(unsigned int swizzle)
+{
+ if(lookup_native_swizzle(swizzle))
+ return 1;
+ else
+ return 0;
+}
/**
* Check whether the given instruction supports the swizzle and negate
@@ -140,7 +152,6 @@ static void r300_swizzle_split(
split->NumPhases = 0;
while(mask) {
- const struct swizzle_data *best_swizzle = 0;
unsigned int best_matchcount = 0;
unsigned int best_matchmask = 0;
int i, comp;
@@ -167,7 +178,6 @@ static void r300_swizzle_split(
}
}
if (matchcount > best_matchcount) {
- best_swizzle = sd;
best_matchcount = matchcount;
best_matchmask = matchmask;
if (matchmask == (mask & RC_MASK_XYZ))
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
index 118476af132..f2635be140d 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
@@ -34,5 +34,6 @@ extern struct rc_swizzle_caps r300_swizzle_caps;
unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
+int r300_swizzle_is_native_basic(unsigned int swizzle);
#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 9286733635f..e2441e97d87 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -148,8 +148,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"register rename", 1, !is_r500, rc_rename_regs, NULL},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, NULL},
- {"register allocation", 1, opt, rc_pair_regalloc, NULL},
- {"dumb register allocation", 1, !opt, rc_pair_regalloc_inputs_only, NULL},
+ {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL},
+ {"register allocation", 1, 1, rc_pair_regalloc, &opt},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL},
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL},
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 140eeed3de3..5e0be6b8881 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -70,6 +70,8 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
if (opcode == RC_OPCODE_TEX ||
opcode == RC_OPCODE_TXB ||
opcode == RC_OPCODE_TXP ||
+ opcode == RC_OPCODE_TXD ||
+ opcode == RC_OPCODE_TXL ||
opcode == RC_OPCODE_KIL) {
if (reg.Abs)
return 0;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index c7f79bc53c7..c30cd753d15 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -207,7 +207,7 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
if (src.File == RC_FILE_CONSTANT) {
return src.Index | R500_RGB_ADDR0_CONST;
- } else if (src.File == RC_FILE_TEMPORARY) {
+ } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index;
}
@@ -396,6 +396,12 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
case RC_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
+ case RC_OPCODE_TXD:
+ code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
+ break;
+ case RC_OPCODE_TXL:
+ code->inst[ip].inst1 |= R500_TEX_INST_LOD;
+ break;
default:
error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
}
@@ -407,8 +413,23 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
| R500_TEX_DST_ADDR(inst->DstReg.Index)
- | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
- | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
+ | (GET_SWZ(inst->TexSwizzle, 0) << 24)
+ | (GET_SWZ(inst->TexSwizzle, 1) << 26)
+ | (GET_SWZ(inst->TexSwizzle, 2) << 28)
+ | (GET_SWZ(inst->TexSwizzle, 3) << 30)
+ ;
+
+ if (inst->Opcode == RC_OPCODE_TXD) {
+ use_temporary(code, inst->SrcReg[1].Index);
+ use_temporary(code, inst->SrcReg[2].Index);
+
+ /* DX and DY parameters are specified in a separate register. */
+ code->inst[ip].inst3 =
+ R500_DX_ADDR(inst->SrcReg[1].Index) |
+ (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
+ R500_DY_ADDR(inst->SrcReg[2].Index) |
+ (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
+ }
return 1;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
index 15ec4418cb8..b077e7b7d65 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@@ -124,6 +124,165 @@ unsigned swizzle_mask(unsigned swizzle, unsigned mask)
return ret;
}
+static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
+{
+ if (info->HasTexture) {
+ return 0;
+ }
+ switch (info->Opcode) {
+ case RC_OPCODE_DP2:
+ case RC_OPCODE_DP3:
+ case RC_OPCODE_DP4:
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+/**
+ * @return A swizzle the results from converting old_swizzle using
+ * conversion_swizzle
+ */
+unsigned int rc_adjust_channels(
+ unsigned int old_swizzle,
+ unsigned int conversion_swizzle)
+{
+ unsigned int i;
+ unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+ for (i = 0; i < 4; i++) {
+ unsigned int new_chan = get_swz(conversion_swizzle, i);
+ if (new_chan == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
+ }
+ return new_swizzle;
+}
+
+static unsigned int rewrite_writemask(
+ unsigned int old_mask,
+ unsigned int conversion_swizzle)
+{
+ unsigned int new_mask = 0;
+ unsigned int i;
+
+ for (i = 0; i < 4; i++) {
+ if (!GET_BIT(old_mask, i)
+ || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
+ }
+
+ return new_mask;
+}
+
+/**
+ * This function rewrites the writemask of sub and adjusts the swizzles
+ * of all its source registers based on the conversion_swizzle.
+ * conversion_swizzle represents a mapping of the old writemask to the
+ * new writemask. For a detailed description of how conversion swizzles
+ * work see rc_rewrite_swizzle().
+ */
+void rc_pair_rewrite_writemask(
+ struct rc_pair_sub_instruction * sub,
+ unsigned int conversion_swizzle)
+{
+ const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+ unsigned int i;
+
+ sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
+
+ if (!srcs_need_rewrite(info)) {
+ return ;
+ }
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ sub->Arg[i].Swizzle =
+ rc_adjust_channels(sub->Arg[i].Swizzle,
+ conversion_swizzle);
+ }
+}
+
+static void normal_rewrite_writemask_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ unsigned int * new_mask = (unsigned int *)userdata;
+ src->Swizzle = rc_adjust_channels(src->Swizzle, *new_mask);
+}
+
+/**
+ * This function is the same as rc_pair_rewrite_writemask() except it
+ * operates on normal instructions.
+ */
+void rc_normal_rewrite_writemask(
+ struct rc_instruction * inst,
+ unsigned int conversion_swizzle)
+{
+ unsigned int new_mask;
+ struct rc_sub_instruction * sub = &inst->U.I;
+ const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+ sub->DstReg.WriteMask =
+ rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
+
+ if (info->HasTexture) {
+ unsigned int i;
+ assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
+ for (i = 0; i < 4; i++) {
+ unsigned int swz = GET_SWZ(conversion_swizzle, i);
+ if (swz > 3)
+ continue;
+ SET_SWZ(sub->TexSwizzle, swz, i);
+ }
+ }
+
+ if (!srcs_need_rewrite(info)) {
+ return;
+ }
+
+ new_mask = sub->DstReg.WriteMask;
+ rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask);
+}
+
+/**
+ * This function replaces each value 'swz' in swizzle with the value of
+ * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's
+ * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want
+ * to change all the Y's in swizzle to X, then conversion_swizzle should be
+ * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then
+ * conversion swizzle should be YX__ (0xfc1).
+ * @param swizzle The swizzle to change
+ * @param conversion_swizzle Describes the conversion to perform on the swizzle
+ * @return A converted swizzle
+ */
+unsigned int rc_rewrite_swizzle(
+ unsigned int swizzle,
+ unsigned int conversion_swizzle)
+{
+ unsigned int chan;
+ unsigned int out_swizzle = swizzle;
+
+ for (chan = 0; chan < 4; chan++) {
+ unsigned int swz = GET_SWZ(swizzle, chan);
+ unsigned int new_swz;
+ if (swz > 3) {
+ SET_SWZ(out_swizzle, chan, swz);
+ } else {
+ new_swz = GET_SWZ(conversion_swizzle, swz);
+ if (new_swz != RC_SWIZZLE_UNUSED) {
+ SET_SWZ(out_swizzle, chan, new_swz);
+ } else {
+ SET_SWZ(out_swizzle, chan, swz);
+ }
+ }
+ }
+ return out_swizzle;
+}
+
/**
* Left multiplication of a register with a swizzle
*/
@@ -281,3 +440,197 @@ unsigned int rc_inst_can_use_presub(
return 1;
}
+struct max_data {
+ unsigned int Max;
+ unsigned int HasFileType;
+ rc_register_file File;
+};
+
+static void max_callback(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct max_data * d = (struct max_data*)userdata;
+ if (file == d->File && (!d->HasFileType || index > d->Max)) {
+ d->Max = index;
+ d->HasFileType = 1;
+ }
+}
+
+/**
+ * @return The maximum index of the specified register file used by the
+ * program.
+ */
+int rc_get_max_index(
+ struct radeon_compiler * c,
+ rc_register_file file)
+{
+ struct max_data data;
+ data.Max = 0;
+ data.HasFileType = 0;
+ data.File = file;
+ struct rc_instruction * inst;
+ for (inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, max_callback, &data);
+ rc_for_all_writes_mask(inst, max_callback, &data);
+ }
+ if (!data.HasFileType) {
+ return -1;
+ } else {
+ return data.Max;
+ }
+}
+
+static unsigned int get_source_readmask(
+ struct rc_pair_sub_instruction * sub,
+ unsigned int source,
+ unsigned int src_type)
+{
+ unsigned int i;
+ unsigned int readmask = 0;
+ const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ if (sub->Arg[i].Source != source
+ || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
+ continue;
+ }
+ readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
+ }
+ return readmask;
+}
+
+/**
+ * This function attempts to remove a source from a pair instructions.
+ * @param inst
+ * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
+ * @param source The index of the source to remove
+ * @param new_readmask A mask representing the components that are read by
+ * the source that is intended to replace the one you are removing. If you
+ * want to remove a source only and not replace it, this parameter should be
+ * zero.
+ * @return 1 if the source was successfully removed, 0 if it was not
+ */
+unsigned int rc_pair_remove_src(
+ struct rc_instruction * inst,
+ unsigned int src_type,
+ unsigned int source,
+ unsigned int new_readmask)
+{
+ unsigned int readmask = 0;
+
+ readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
+ readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
+
+ if ((new_readmask & readmask) != readmask)
+ return 0;
+
+ if (src_type & RC_SOURCE_RGB) {
+ memset(&inst->U.P.RGB.Src[source], 0,
+ sizeof(struct rc_pair_instruction_source));
+ }
+
+ if (src_type & RC_SOURCE_ALPHA) {
+ memset(&inst->U.P.Alpha.Src[source], 0,
+ sizeof(struct rc_pair_instruction_source));
+ }
+
+ return 1;
+}
+
+/**
+ * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
+ * @return The opcode of inst if it is a flow control instruction.
+ */
+rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * info;
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ info = rc_get_opcode_info(inst->U.I.Opcode);
+ } else {
+ info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
+ /*A flow control instruction shouldn't have an alpha
+ * instruction.*/
+ assert(!info->IsFlowControl ||
+ inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
+ }
+
+ if (info->IsFlowControl)
+ return info->Opcode;
+ else
+ return RC_OPCODE_NOP;
+
+}
+
+/**
+ * @return The BGNLOOP instruction that starts the loop ended by endloop.
+ */
+struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop)
+{
+ unsigned int endloop_count = 0;
+ struct rc_instruction * inst;
+ for (inst = endloop->Prev; inst != endloop; inst = inst->Prev) {
+ rc_opcode op = rc_get_flow_control_inst(inst);
+ if (op == RC_OPCODE_ENDLOOP) {
+ endloop_count++;
+ } else if (op == RC_OPCODE_BGNLOOP) {
+ if (endloop_count == 0) {
+ return inst;
+ } else {
+ endloop_count--;
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * @return The ENDLOOP instruction that ends the loop started by bgnloop.
+ */
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop)
+{
+ unsigned int bgnloop_count = 0;
+ struct rc_instruction * inst;
+ for (inst = bgnloop->Next; inst!=bgnloop; inst = inst->Next) {
+ rc_opcode op = rc_get_flow_control_inst(inst);
+ if (op == RC_OPCODE_BGNLOOP) {
+ bgnloop_count++;
+ } else if (op == RC_OPCODE_ENDLOOP) {
+ if (bgnloop_count == 0) {
+ return inst;
+ } else {
+ bgnloop_count--;
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * @return A conversion swizzle for converting from old_mask->new_mask
+ */
+unsigned int rc_make_conversion_swizzle(
+ unsigned int old_mask,
+ unsigned int new_mask)
+{
+ unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+ unsigned int old_idx;
+ unsigned int new_idx = 0;
+ for (old_idx = 0; old_idx < 4; old_idx++) {
+ if (!GET_BIT(old_mask, old_idx))
+ continue;
+ for ( ; new_idx < 4; new_idx++) {
+ if (GET_BIT(new_mask, new_idx)) {
+ SET_SWZ(conversion_swizzle, old_idx, new_idx);
+ new_idx++;
+ break;
+ }
+ }
+ }
+ return conversion_swizzle;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
index dd0f6c66156..2af289dfabd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
@@ -3,7 +3,12 @@
#ifndef RADEON_PROGRAM_UTIL_H
#define RADEON_PROGRAM_UTIL_H
+#include "radeon_opcodes.h"
+
+struct radeon_compiler;
struct rc_instruction;
+struct rc_pair_instruction;
+struct rc_pair_sub_instruction;
struct rc_src_register;
unsigned int rc_swizzle_to_writemask(unsigned int swz);
@@ -22,6 +27,22 @@ rc_swizzle rc_mask_to_swizzle(unsigned int mask);
unsigned swizzle_mask(unsigned swizzle, unsigned mask);
+unsigned int rc_adjust_channels(
+ unsigned int old_swizzle,
+ unsigned int conversion_swizzle);
+
+void rc_pair_rewrite_writemask(
+ struct rc_pair_sub_instruction * sub,
+ unsigned int conversion_swizzle);
+
+void rc_normal_rewrite_writemask(
+ struct rc_instruction * inst,
+ unsigned int conversion_swizzle);
+
+unsigned int rc_rewrite_swizzle(
+ unsigned int swizzle,
+ unsigned int new_mask);
+
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
void reset_srcreg(struct rc_src_register* reg);
@@ -46,4 +67,23 @@ unsigned int rc_inst_can_use_presub(
struct rc_src_register presub_src0,
struct rc_src_register presub_src1);
+int rc_get_max_index(
+ struct radeon_compiler * c,
+ rc_register_file file);
+
+unsigned int rc_pair_remove_src(
+ struct rc_instruction * inst,
+ unsigned int src_type,
+ unsigned int source,
+ unsigned int new_readmask);
+
+rc_opcode rc_get_flow_control_inst(struct rc_instruction * inst);
+
+struct rc_instruction * rc_match_endloop(struct rc_instruction * endloop);
+struct rc_instruction * rc_match_bgnloop(struct rc_instruction * bgnloop);
+
+unsigned int rc_make_conversion_swizzle(
+ unsigned int old_mask,
+ unsigned int new_mask);
+
#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index d1a7eab50f7..b0deb751be0 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -151,6 +151,7 @@ static void pair_sub_for_all_args(
unsigned int presub_src_count;
struct rc_pair_instruction_source * src_array;
unsigned int j;
+
if (src_type & RC_SOURCE_RGB) {
presub_type = fullinst->
U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
@@ -446,30 +447,6 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v
remap_pair_instruction(inst, cb, userdata);
}
-/**
- * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
- * @return The opcode of inst if it is a flow control instruction.
- */
-static rc_opcode get_flow_control_inst(struct rc_instruction * inst)
-{
- const struct rc_opcode_info * info;
- if (inst->Type == RC_INSTRUCTION_NORMAL) {
- info = rc_get_opcode_info(inst->U.I.Opcode);
- } else {
- info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
- /*A flow control instruction shouldn't have an alpha
- * instruction.*/
- assert(!info->IsFlowControl ||
- inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
- }
-
- if (info->IsFlowControl)
- return info->Opcode;
- else
- return RC_OPCODE_NOP;
-
-}
-
struct branch_write_mask {
unsigned int IfWriteMask:4;
unsigned int ElseWriteMask:4;
@@ -495,12 +472,11 @@ struct get_readers_callback_data {
struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
};
-static void add_reader(
+static struct rc_reader * add_reader(
struct memory_pool * pool,
struct rc_reader_data * data,
struct rc_instruction * inst,
- unsigned int mask,
- void * arg_or_src)
+ unsigned int mask)
{
struct rc_reader * new;
memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
@@ -508,11 +484,32 @@ static void add_reader(
new = &data->Readers[data->ReaderCount++];
new->Inst = inst;
new->WriteMask = mask;
- if (inst->Type == RC_INSTRUCTION_NORMAL) {
- new->U.Src = arg_or_src;
- } else {
- new->U.Arg = arg_or_src;
- }
+ return new;
+}
+
+static void add_reader_normal(
+ struct memory_pool * pool,
+ struct rc_reader_data * data,
+ struct rc_instruction * inst,
+ unsigned int mask,
+ struct rc_src_register * src)
+{
+ struct rc_reader * new = add_reader(pool, data, inst, mask);
+ new->U.I.Src = src;
+}
+
+
+static void add_reader_pair(
+ struct memory_pool * pool,
+ struct rc_reader_data * data,
+ struct rc_instruction * inst,
+ unsigned int mask,
+ struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src)
+{
+ struct rc_reader * new = add_reader(pool, data, inst, mask);
+ new->U.P.Src = src;
+ new->U.P.Arg = arg;
}
static unsigned int get_readers_read_callback(
@@ -544,6 +541,11 @@ static unsigned int get_readers_read_callback(
return shared_mask;
}
+ if (cb_data->ReaderData->LoopDepth > 0) {
+ cb_data->ReaderData->AbortOnWrite |=
+ (read_mask & cb_data->AliveWriteMask);
+ }
+
/* XXX The behavior in this case should be configurable. */
if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
cb_data->ReaderData->Abort = 1;
@@ -572,10 +574,10 @@ static void get_readers_pair_read_callback(
if (d->ReadPairCB)
d->ReadPairCB(d->ReaderData, inst, arg, src);
- if (d->ReaderData->Abort)
+ if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
- add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg);
+ add_reader_pair(&d->C->Pool, d->ReaderData, inst, shared_mask, arg, src);
}
/**
@@ -600,10 +602,10 @@ static void get_readers_normal_read_callback(
if (d->ReadNormalCB)
d->ReadNormalCB(d->ReaderData, inst, src);
- if (d->ReaderData->Abort)
+ if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
- add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
+ add_reader_normal(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
}
/**
@@ -624,12 +626,57 @@ static void get_readers_write_callback(
unsigned int shared_mask = mask & d->DstMask;
d->ReaderData->AbortOnRead &= ~shared_mask;
d->AliveWriteMask &= ~shared_mask;
+ if (d->ReaderData->AbortOnWrite & shared_mask) {
+ d->ReaderData->Abort = 1;
+ }
}
if(d->WriteCB)
d->WriteCB(d->ReaderData, inst, file, index, mask);
}
+static void push_branch_mask(
+ struct get_readers_callback_data * d,
+ unsigned int * branch_depth)
+{
+ (*branch_depth)++;
+ if (*branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+ d->ReaderData->Abort = 1;
+ return;
+ }
+ d->BranchMasks[*branch_depth].IfWriteMask =
+ d->AliveWriteMask;
+}
+
+static void pop_branch_mask(
+ struct get_readers_callback_data * d,
+ unsigned int * branch_depth)
+{
+ struct branch_write_mask * masks = &d->BranchMasks[*branch_depth];
+
+ if (masks->HasElse) {
+ /* Abort on read for components that were written in the IF
+ * block. */
+ d->ReaderData->AbortOnRead |=
+ masks->IfWriteMask & ~masks->ElseWriteMask;
+ /* Abort on read for components that were written in the ELSE
+ * block. */
+ d->ReaderData->AbortOnRead |=
+ masks->ElseWriteMask & ~d->AliveWriteMask;
+
+ d->AliveWriteMask = masks->IfWriteMask
+ ^ ((masks->IfWriteMask ^ masks->ElseWriteMask)
+ & (masks->IfWriteMask ^ d->AliveWriteMask));
+ } else {
+ d->ReaderData->AbortOnRead |=
+ masks->IfWriteMask & ~d->AliveWriteMask;
+ d->AliveWriteMask = masks->IfWriteMask;
+
+ }
+ memset(masks, 0, sizeof(struct branch_write_mask));
+ (*branch_depth)--;
+}
+
static void get_readers_for_single_write(
void * userdata,
struct rc_instruction * writer,
@@ -639,10 +686,14 @@ static void get_readers_for_single_write(
{
struct rc_instruction * tmp;
unsigned int branch_depth = 0;
+ struct rc_instruction * endloop = NULL;
+ unsigned int abort_on_read_at_endloop;
struct get_readers_callback_data * d = userdata;
d->ReaderData->Writer = writer;
d->ReaderData->AbortOnRead = 0;
+ d->ReaderData->AbortOnWrite = 0;
+ d->ReaderData->LoopDepth = 0;
d->ReaderData->InElse = 0;
d->DstFile = dst_file;
d->DstIndex = dst_index;
@@ -655,32 +706,43 @@ static void get_readers_for_single_write(
for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
tmp = tmp->Next){
- rc_opcode opcode = get_flow_control_inst(tmp);
+ rc_opcode opcode = rc_get_flow_control_inst(tmp);
switch(opcode) {
case RC_OPCODE_BGNLOOP:
- /* XXX We can do better when we see a BGNLOOP if we
- * add a flag called AbortOnWrite to struct
- * rc_reader_data and leave it set until the next
- * ENDLOOP. */
+ d->ReaderData->LoopDepth++;
+ push_branch_mask(d, &branch_depth);
+ break;
case RC_OPCODE_ENDLOOP:
- /* XXX We can do better when we see an ENDLOOP by
- * searching backwards from writer and looking for
- * readers of writer's destination index. If we find a
- * reader before we get to the BGNLOOP, we must abort
- * unless there is another writer between that reader
- * and the BGNLOOP. */
- case RC_OPCODE_BRK:
- case RC_OPCODE_CONT:
- d->ReaderData->Abort = 1;
- return;
- case RC_OPCODE_IF:
- branch_depth++;
- if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
- d->ReaderData->Abort = 1;
- return;
+ if (d->ReaderData->LoopDepth > 0) {
+ d->ReaderData->LoopDepth--;
+ if (d->ReaderData->LoopDepth == 0) {
+ d->ReaderData->AbortOnWrite = 0;
+ }
+ pop_branch_mask(d, &branch_depth);
+ } else {
+ /* Here we have reached an ENDLOOP without
+ * seeing its BGNLOOP. These means that
+ * the writer was written inside of a loop,
+ * so it could have readers that are above it
+ * (i.e. they have a lower IP). To find these
+ * readers we jump to the BGNLOOP instruction
+ * and check each instruction until we get
+ * back to the writer.
+ */
+ endloop = tmp;
+ tmp = rc_match_endloop(tmp);
+ if (!tmp) {
+ rc_error(d->C, "Failed to match endloop.\n");
+ d->ReaderData->Abort = 1;
+ return;
+ }
+ abort_on_read_at_endloop = d->ReaderData->AbortOnRead;
+ d->ReaderData->AbortOnRead |= d->AliveWriteMask;
+ continue;
}
- d->BranchMasks[branch_depth].IfWriteMask =
- d->AliveWriteMask;
+ break;
+ case RC_OPCODE_IF:
+ push_branch_mask(d, &branch_depth);
break;
case RC_OPCODE_ELSE:
if (branch_depth == 0) {
@@ -700,35 +762,7 @@ static void get_readers_for_single_write(
d->ReaderData->InElse = 0;
}
else {
- struct branch_write_mask * masks =
- &d->BranchMasks[branch_depth];
-
- if (masks->HasElse) {
- /* Abort on read for components that
- * were written in the IF block. */
- d->ReaderData->AbortOnRead |=
- masks->IfWriteMask
- & ~masks->ElseWriteMask;
- /* Abort on read for components that
- * were written in the ELSE block. */
- d->ReaderData->AbortOnRead |=
- masks->ElseWriteMask
- & ~d->AliveWriteMask;
- d->AliveWriteMask = masks->IfWriteMask
- ^ ((masks->IfWriteMask ^
- masks->ElseWriteMask)
- & (masks->IfWriteMask
- ^ d->AliveWriteMask));
- } else {
- d->ReaderData->AbortOnRead |=
- masks->IfWriteMask
- & ~d->AliveWriteMask;
- d->AliveWriteMask = masks->IfWriteMask;
-
- }
- memset(masks, 0,
- sizeof(struct branch_write_mask));
- branch_depth--;
+ pop_branch_mask(d, &branch_depth);
}
break;
default:
@@ -745,9 +779,17 @@ static void get_readers_for_single_write(
rc_pair_for_all_reads_arg(tmp,
get_readers_pair_read_callback, d);
}
+
+ /* This can happen when we jump from an ENDLOOP to BGNLOOP */
+ if (tmp == writer) {
+ tmp = endloop;
+ endloop = NULL;
+ d->ReaderData->AbortOnRead = abort_on_read_at_endloop;
+ continue;
+ }
rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
- if (d->ReaderData->Abort)
+ if (d->ReaderData->ExitOnAbort && d->ReaderData->Abort)
return;
if (branch_depth == 0 && !d->AliveWriteMask)
@@ -755,6 +797,26 @@ static void get_readers_for_single_write(
}
}
+static void init_get_readers_callback_data(
+ struct get_readers_callback_data * d,
+ struct rc_reader_data * reader_data,
+ struct radeon_compiler * c,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
+ rc_read_write_mask_fn write_cb)
+{
+ reader_data->Abort = 0;
+ reader_data->ReaderCount = 0;
+ reader_data->ReadersReserved = 0;
+ reader_data->Readers = NULL;
+
+ d->C = c;
+ d->ReaderData = reader_data;
+ d->ReadNormalCB = read_normal_cb;
+ d->ReadPairCB = read_pair_cb;
+ d->WriteCB = write_cb;
+}
+
/**
* This function will create a list of readers via the rc_reader_data struct.
* This function will abort (set the flag data->Abort) and return if it
@@ -803,16 +865,28 @@ void rc_get_readers(
{
struct get_readers_callback_data d;
- data->Abort = 0;
- data->ReaderCount = 0;
- data->ReadersReserved = 0;
- data->Readers = NULL;
-
- d.C = c;
- d.ReaderData = data;
- d.ReadNormalCB = read_normal_cb;
- d.ReadPairCB = read_pair_cb;
- d.WriteCB = write_cb;
+ init_get_readers_callback_data(&d, data, c, read_normal_cb,
+ read_pair_cb, write_cb);
rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
}
+
+void rc_get_readers_sub(
+ struct radeon_compiler * c,
+ struct rc_instruction * writer,
+ struct rc_pair_sub_instruction * sub_writer,
+ struct rc_reader_data * data,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
+ rc_read_write_mask_fn write_cb)
+{
+ struct get_readers_callback_data d;
+
+ init_get_readers_callback_data(&d, data, c, read_normal_cb,
+ read_pair_cb, write_cb);
+
+ if (sub_writer->WriteMask) {
+ get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY,
+ sub_writer->DestIndex, sub_writer->WriteMask);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index ef971c5b234..d8a627258ea 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -37,6 +37,7 @@ struct rc_swizzle_caps;
struct rc_src_register;
struct rc_pair_instruction_arg;
struct rc_pair_instruction_source;
+struct rc_pair_sub_instruction;
struct rc_compiler;
@@ -74,14 +75,21 @@ struct rc_reader {
struct rc_instruction * Inst;
unsigned int WriteMask;
union {
- struct rc_src_register * Src;
- struct rc_pair_instruction_arg * Arg;
+ struct {
+ struct rc_src_register * Src;
+ } I;
+ struct {
+ struct rc_pair_instruction_arg * Arg;
+ struct rc_pair_instruction_source * Src;
+ } P;
} U;
};
struct rc_reader_data {
unsigned int Abort;
unsigned int AbortOnRead;
+ unsigned int AbortOnWrite;
+ unsigned int LoopDepth;
unsigned int InElse;
struct rc_instruction * Writer;
@@ -89,6 +97,9 @@ struct rc_reader_data {
unsigned int ReadersReserved;
struct rc_reader * Readers;
+ /* If this flag is enabled, rc_get_readers will exit as soon possbile
+ * after the Abort flag is set.*/
+ unsigned int ExitOnAbort;
void * CbData;
};
@@ -99,6 +110,15 @@ void rc_get_readers(
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
+
+void rc_get_readers_sub(
+ struct radeon_compiler * c,
+ struct rc_instruction * writer,
+ struct rc_pair_sub_instruction * sub_writer,
+ struct rc_reader_data * data,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
+ rc_read_write_mask_fn write_cb);
/**
* Compiler passes based on dataflow analysis.
*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.c b/src/mesa/drivers/dri/r300/compiler/radeon_list.c
new file mode 100644
index 00000000000..811c908a81a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_list.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2011 Tom Stellard <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_list.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "memory_pool.h"
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item)
+{
+ struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
+ new->Item = item;
+ new->Next = NULL;
+ new->Prev = NULL;
+
+ return new;
+}
+
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
+{
+ struct rc_list * temp;
+
+ if (*list == NULL) {
+ *list = new_value;
+ return;
+ }
+
+ for (temp = *list; temp->Next; temp = temp->Next);
+
+ temp->Next = new_value;
+ new_value->Prev = temp;
+}
+
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
+{
+ if (*list == rm_value) {
+ *list = rm_value->Next;
+ return;
+ }
+
+ rm_value->Prev->Next = rm_value->Next;
+ if (rm_value->Next) {
+ rm_value->Next->Prev = rm_value->Prev;
+ }
+}
+
+unsigned int rc_list_count(struct rc_list * list)
+{
+ unsigned int count = 0;
+ while (list) {
+ count++;
+ list = list->Next;
+ }
+ return count;
+}
+
+void rc_list_print(struct rc_list * list)
+{
+ while(list) {
+ fprintf(stderr, "%p->", list->Item);
+ list = list->Next;
+ }
+ fprintf(stderr, "\n");
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.h b/src/mesa/drivers/dri/r300/compiler/radeon_list.h
new file mode 100644
index 00000000000..b3c8f89cc68
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_list.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2011 Tom Stellard <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_LIST_H
+#define RADEON_LIST_H
+
+struct memory_pool;
+
+struct rc_list {
+ void * Item;
+ struct rc_list * Prev;
+ struct rc_list * Next;
+};
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item);
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
+unsigned int rc_list_count(struct rc_list * list);
+void rc_list_print(struct rc_list * list);
+
+#endif /* RADEON_LIST_H */
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index e3e498e8fb4..afd78ad79dd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -481,6 +481,7 @@ void rc_compute_sources_for_writemask(
break;
case RC_OPCODE_TXB:
case RC_OPCODE_TXP:
+ case RC_OPCODE_TXL:
srcmasks[0] |= RC_MASK_W;
/* Fall through */
case RC_OPCODE_TEX:
@@ -500,6 +501,33 @@ void rc_compute_sources_for_writemask(
break;
}
break;
+ case RC_OPCODE_TXD:
+ switch (inst->U.I.TexSrcTarget) {
+ case RC_TEXTURE_1D_ARRAY:
+ srcmasks[0] |= RC_MASK_Y;
+ /* Fall through. */
+ case RC_TEXTURE_1D:
+ srcmasks[0] |= RC_MASK_X;
+ srcmasks[1] |= RC_MASK_X;
+ srcmasks[2] |= RC_MASK_X;
+ break;
+ case RC_TEXTURE_2D_ARRAY:
+ srcmasks[0] |= RC_MASK_Z;
+ /* Fall through. */
+ case RC_TEXTURE_2D:
+ case RC_TEXTURE_RECT:
+ srcmasks[0] |= RC_MASK_XY;
+ srcmasks[1] |= RC_MASK_XY;
+ srcmasks[2] |= RC_MASK_XY;
+ break;
+ case RC_TEXTURE_3D:
+ case RC_TEXTURE_CUBE:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZ;
+ srcmasks[2] |= RC_MASK_XYZ;
+ break;
+ }
+ break;
case RC_OPCODE_DST:
srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 79898e1047e..5b4fba80873 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -91,6 +91,8 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
(inst->U.I.Opcode == RC_OPCODE_TEX ||
inst->U.I.Opcode == RC_OPCODE_TXB ||
inst->U.I.Opcode == RC_OPCODE_TXP ||
+ inst->U.I.Opcode == RC_OPCODE_TXD ||
+ inst->U.I.Opcode == RC_OPCODE_TXL ||
inst->U.I.Opcode == RC_OPCODE_KIL)){
reader_data->Abort = 1;
return;
@@ -144,6 +146,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
return;
/* Get a list of all the readers of this MOV instruction. */
+ reader_data.ExitOnAbort = 1;
rc_get_readers(c, inst_mov, &reader_data,
copy_propagate_scan_read, NULL,
is_src_clobbered_scan_write);
@@ -154,7 +157,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
/* Propagate the MOV instruction. */
for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst;
- *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]);
+ *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
inst->U.I.PreSub = inst_mov->U.I.PreSub;
@@ -453,6 +456,7 @@ static int presub_helper(
rc_presubtract_op cb_op = presub_opcode;
reader_data.CbData = &cb_op;
+ reader_data.ExitOnAbort = 1;
rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
is_src_clobbered_scan_write);
@@ -466,7 +470,7 @@ static int presub_helper(
rc_get_opcode_info(reader.Inst->U.I.Opcode);
for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
- if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src)
+ if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
presub_replace(inst_add, reader.Inst, src_index);
}
}
@@ -619,13 +623,11 @@ static int peephole_add_presub_inv(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
{
- unsigned int i, swz, mask;
+ unsigned int i, swz;
if (!is_presub_candidate(c, inst_add))
return 0;
- mask = inst_add->U.I.DstReg.WriteMask;
-
/* Check if src0 is 1. */
/* XXX It would be nice to use is_src_uniform_constant here, but that
* function only works if the register's file is RC_FILE_NONE */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c
new file mode 100644
index 00000000000..1e9a2c09d44
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_dead_sources.c
@@ -0,0 +1,62 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_pair.h"
+
+static void mark_used_presub(struct rc_pair_sub_instruction * sub)
+{
+ if (sub->Src[RC_PAIR_PRESUB_SRC].Used) {
+ unsigned int presub_reg_count = rc_presubtract_src_reg_count(
+ sub->Src[RC_PAIR_PRESUB_SRC].Index);
+ unsigned int i;
+ for (i = 0; i < presub_reg_count; i++) {
+ sub->Src[i].Used = 1;
+ }
+ }
+}
+
+static void mark_used(
+ struct rc_instruction * inst,
+ struct rc_pair_sub_instruction * sub)
+{
+ unsigned int i;
+ const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ unsigned int src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
+ if (src_type & RC_SOURCE_RGB) {
+ inst->U.P.RGB.Src[sub->Arg[i].Source].Used = 1;
+ }
+
+ if (src_type & RC_SOURCE_ALPHA) {
+ inst->U.P.Alpha.Src[sub->Arg[i].Source].Used = 1;
+ }
+ }
+}
+
+/**
+ * This pass finds sources that are not used by their instruction and marks
+ * them as unused.
+ */
+void rc_pair_remove_dead_sources(struct radeon_compiler * c, void *user)
+{
+ struct rc_instruction * inst;
+ for (inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ unsigned int i;
+ if (inst->Type == RC_INSTRUCTION_NORMAL)
+ continue;
+
+ /* Mark all sources as unused */
+ for (i = 0; i < 4; i++) {
+ inst->U.P.RGB.Src[i].Used = 0;
+ inst->U.P.Alpha.Src[i].Used = 0;
+ }
+ mark_used(inst, &inst->U.P.RGB);
+ mark_used(inst, &inst->U.P.Alpha);
+
+ mark_used_presub(&inst->U.P.RGB);
+ mark_used_presub(&inst->U.P.Alpha);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index d53181e1f75..49983d6ce75 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2011 Tom Stellard <[email protected]>
*
* All Rights Reserved.
*
@@ -29,125 +30,126 @@
#include <stdio.h>
+#include "main/glheader.h"
+#include "program/register_allocate.h"
+#include "ralloc.h"
+
+#include "r300_fragprog_swizzle.h"
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
-
+#include "radeon_list.h"
+#include "radeon_variable.h"
#define VERBOSE 0
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
-struct live_intervals {
- int Start;
- int End;
- struct live_intervals * Next;
-};
struct register_info {
- struct live_intervals Live;
+ struct live_intervals Live[4];
unsigned int Used:1;
unsigned int Allocated:1;
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
-};
-
-struct hardware_register {
- struct live_intervals * Used;
+ unsigned int Writemask;
};
struct regalloc_state {
struct radeon_compiler * C;
- struct register_info Input[RC_REGISTER_MAX_INDEX];
- struct register_info Temporary[RC_REGISTER_MAX_INDEX];
-
- struct hardware_register * HwTemporary;
- unsigned int NumHwTemporaries;
- /**
- * If an instruction is inside of a loop, EndLoop will be the
- * IP of the ENDLOOP instruction, and BeginLoop will be the IP
- * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop
- * will be -1.
- */
- int EndLoop;
- int BeginLoop;
+ struct register_info * Input;
+ unsigned int NumInputs;
+
+ struct register_info * Temporary;
+ unsigned int NumTemporaries;
+
+ unsigned int Simple;
+ int LoopEnd;
+};
+
+enum rc_reg_class {
+ RC_REG_CLASS_SINGLE,
+ RC_REG_CLASS_DOUBLE,
+ RC_REG_CLASS_TRIPLE,
+ RC_REG_CLASS_ALPHA,
+ RC_REG_CLASS_SINGLE_PLUS_ALPHA,
+ RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
+ RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
+ RC_REG_CLASS_X,
+ RC_REG_CLASS_Y,
+ RC_REG_CLASS_Z,
+ RC_REG_CLASS_XY,
+ RC_REG_CLASS_YZ,
+ RC_REG_CLASS_XZ,
+ RC_REG_CLASS_XW,
+ RC_REG_CLASS_YW,
+ RC_REG_CLASS_ZW,
+ RC_REG_CLASS_XYW,
+ RC_REG_CLASS_YZW,
+ RC_REG_CLASS_XZW,
+ RC_REG_CLASS_COUNT
+};
+
+struct rc_class {
+ enum rc_reg_class Class;
+
+ unsigned int WritemaskCount;
+
+ /** This is 1 if this class is being used by the register allocator
+ * and 0 otherwise */
+ unsigned int Used;
+
+ /** This is the ID number assigned to this class by ra. */
+ unsigned int Id;
+
+ /** List of writemasks that belong to this class */
+ unsigned int Writemasks[3];
+
+
};
static void print_live_intervals(struct live_intervals * src)
{
- if (!src) {
+ if (!src || !src->Used) {
DBG("(null)");
return;
}
- while(src) {
- DBG("(%i,%i)", src->Start, src->End);
- src = src->Next;
- }
+ DBG("(%i,%i)", src->Start, src->End);
}
-static void add_live_intervals(struct regalloc_state * s,
- struct live_intervals ** dst, struct live_intervals * src)
+static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
{
- struct live_intervals ** dst_backup = dst;
-
if (VERBOSE) {
- DBG("add_live_intervals: ");
- print_live_intervals(*dst);
+ DBG("overlap_live_intervals: ");
+ print_live_intervals(a);
DBG(" to ");
- print_live_intervals(src);
- DBG("\n");
- }
-
- while(src) {
- if (*dst && (*dst)->End < src->Start) {
- dst = &(*dst)->Next;
- } else if (!*dst || (*dst)->Start > src->End) {
- struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
- li->Start = src->Start;
- li->End = src->End;
- li->Next = *dst;
- *dst = li;
- src = src->Next;
- } else {
- if (src->End > (*dst)->End)
- (*dst)->End = src->End;
- if (src->Start < (*dst)->Start)
- (*dst)->Start = src->Start;
- src = src->Next;
- }
- }
-
- if (VERBOSE) {
- DBG(" result: ");
- print_live_intervals(*dst_backup);
+ print_live_intervals(b);
DBG("\n");
}
-}
-static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
-{
- if (VERBOSE) {
- DBG("overlap_live_intervals: ");
- print_live_intervals(dst);
- DBG(" to ");
- print_live_intervals(src);
- DBG("\n");
+ if (!a->Used || !b->Used) {
+ DBG(" unused interval\n");
+ return 0;
}
- while(src && dst) {
- if (dst->End <= src->Start) {
- dst = dst->Next;
- } else if (dst->End <= src->End) {
+ if (a->Start > b->Start) {
+ if (a->Start < b->End) {
+ DBG(" overlap\n");
+ return 1;
+ }
+ } else if (b->Start > a->Start) {
+ if (b->Start < a->End) {
DBG(" overlap\n");
return 1;
- } else if (dst->Start < src->End) {
+ }
+ } else { /* a->Start == b->Start */
+ if (a->Start != a->End && b->Start != b->End) {
DBG(" overlap\n");
return 1;
- } else {
- src = src->Next;
}
}
@@ -156,92 +158,27 @@ static int overlap_live_intervals(struct live_intervals * dst, struct live_inter
return 0;
}
-static int try_add_live_intervals(struct regalloc_state * s,
- struct live_intervals ** dst, struct live_intervals * src)
-{
- if (overlap_live_intervals(*dst, src))
- return 0;
-
- add_live_intervals(s, dst, src);
- return 1;
-}
-
-static void scan_callback(void * data, struct rc_instruction * inst,
+static void scan_read_callback(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
+ unsigned int i;
- if (file == RC_FILE_TEMPORARY)
- reg = &s->Temporary[index];
- else if (file == RC_FILE_INPUT)
- reg = &s->Input[index];
- else
+ if (file != RC_FILE_INPUT)
return;
- if (!reg->Used) {
- reg->Used = 1;
- if (file == RC_FILE_INPUT)
- reg->Live.Start = -1;
- else if (s->BeginLoop >= 0)
- reg->Live.Start = s->BeginLoop;
- else
- reg->Live.Start = inst->IP;
- reg->Live.End = inst->IP;
- } else if (s->EndLoop >= 0)
- reg->Live.End = s->EndLoop;
- else if (inst->IP > reg->Live.End)
- reg->Live.End = inst->IP;
-}
+ s->Input[index].Used = 1;
+ reg = &s->Input[index];
-static void compute_live_intervals(struct radeon_compiler *c,
- struct regalloc_state *s)
-{
- memset(s, 0, sizeof(*s));
- s->C = c;
- s->NumHwTemporaries = c->max_temp_regs;
- s->BeginLoop = -1;
- s->EndLoop = -1;
- s->HwTemporary =
- memory_pool_malloc(&c->Pool,
- s->NumHwTemporaries * sizeof(struct hardware_register));
- memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register));
-
- rc_recompute_ips(s->C);
-
- for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
- inst != &s->C->Program.Instructions;
- inst = inst->Next) {
-
- /* For all instructions inside of a loop, the ENDLOOP
- * instruction is used as the end of the live interval and
- * the BGNLOOP instruction is used as the beginning. */
- if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
- int loops = 1;
- struct rc_instruction * tmp;
- s->BeginLoop = inst->IP;
- for(tmp = inst->Next;
- tmp != &s->C->Program.Instructions;
- tmp = tmp->Next) {
- if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
- loops++;
- } else if (tmp->U.I.Opcode
- == RC_OPCODE_ENDLOOP) {
- if(!--loops) {
- s->EndLoop = tmp->IP;
- break;
- }
- }
- }
- }
-
- if (inst->IP == s->EndLoop) {
- s->EndLoop = -1;
- s->BeginLoop = -1;
+ for (i = 0; i < 4; i++) {
+ if (!((mask >> i) & 0x1)) {
+ continue;
}
-
- rc_for_all_reads_mask(inst, scan_callback, s);
- rc_for_all_writes_mask(inst, scan_callback, s);
+ reg->Live[i].Used = 1;
+ reg->Live[i].Start = 0;
+ reg->Live[i].End =
+ s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
}
}
@@ -251,7 +188,7 @@ static void remap_register(void * data, struct rc_instruction * inst,
struct regalloc_state * s = data;
const struct register_info * reg;
- if (*file == RC_FILE_TEMPORARY)
+ if (*file == RC_FILE_TEMPORARY && s->Simple)
reg = &s->Temporary[*index];
else if (*file == RC_FILE_INPUT)
reg = &s->Input[*index];
@@ -259,106 +196,511 @@ static void remap_register(void * data, struct rc_instruction * inst,
return;
if (reg->Allocated) {
- *file = reg->File;
*index = reg->Index;
}
}
-static void do_regalloc(struct regalloc_state * s)
+static void alloc_input_simple(void * data, unsigned int input,
+ unsigned int hwreg)
+{
+ struct regalloc_state * s = data;
+
+ if (input >= s->NumInputs)
+ return;
+
+ s->Input[input].Allocated = 1;
+ s->Input[input].File = RC_FILE_TEMPORARY;
+ s->Input[input].Index = hwreg;
+}
+
+/* This functions offsets the temporary register indices by the number
+ * of input registers, because input registers are actually temporaries and
+ * should not occupy the same space.
+ *
+ * This pass is supposed to be used to maintain correct allocation of inputs
+ * if the standard register allocation is disabled. */
+static void do_regalloc_inputs_only(struct regalloc_state * s)
+{
+ for (unsigned i = 0; i < s->NumTemporaries; i++) {
+ s->Temporary[i].Allocated = 1;
+ s->Temporary[i].File = RC_FILE_TEMPORARY;
+ s->Temporary[i].Index = i + s->NumInputs;
+ }
+}
+
+static unsigned int is_derivative(rc_opcode op)
{
- /* Simple and stupid greedy register allocation */
- for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
- struct register_info * reg = &s->Temporary[index];
+ return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
+}
- if (!reg->Used)
+static int find_class(
+ struct rc_class * classes,
+ unsigned int writemask,
+ unsigned int max_writemask_count)
+{
+ unsigned int i;
+ for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+ unsigned int j;
+ if (classes[i].WritemaskCount > max_writemask_count) {
continue;
+ }
+ for (j = 0; j < 3; j++) {
+ if (classes[i].Writemasks[j] == writemask) {
+ return i;
+ }
+ }
+ }
+ return -1;
+}
+
+static enum rc_reg_class variable_get_class(
+ struct rc_variable * variable,
+ struct rc_class * classes)
+{
+ unsigned int i;
+ unsigned int can_change_writemask= 1;
+ unsigned int writemask = rc_variable_writemask_sum(variable);
+ struct rc_list * readers = rc_variable_readers_union(variable);
+ int class_index;
+
+ if (!variable->C->is_r500) {
+ struct rc_class c;
+ /* The assumption here is that if an instruction has type
+ * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
+ * r300 and r400 can't swizzle the result of a TEX lookup. */
+ if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ writemask = RC_MASK_XYZW;
+ }
- for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
- if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
- reg->Allocated = 1;
- reg->File = RC_FILE_TEMPORARY;
- reg->Index = hwreg;
- goto success;
+ /* Check if it is possible to do swizzle packing for r300/r400
+ * without creating non-native swizzles. */
+ class_index = find_class(classes, writemask, 3);
+ if (class_index < 0) {
+ goto error;
+ }
+ c = classes[class_index];
+ for (i = 0; i < c.WritemaskCount; i++) {
+ int j;
+ unsigned int conversion_swizzle =
+ rc_make_conversion_swizzle(
+ writemask, c.Writemasks[i]);
+ for (j = 0; j < variable->ReaderCount; j++) {
+ unsigned int old_swizzle;
+ unsigned int new_swizzle;
+ struct rc_reader r = variable->Readers[j];
+ if (r.Inst->Type == RC_INSTRUCTION_PAIR ) {
+ old_swizzle = r.U.P.Arg->Swizzle;
+ } else {
+ old_swizzle = r.U.I.Src->Swizzle;
+ }
+ new_swizzle = rc_adjust_channels(
+ old_swizzle, conversion_swizzle);
+ if (!r300_swizzle_is_native_basic(new_swizzle)) {
+ can_change_writemask = 0;
+ break;
+ }
+ }
+ if (!can_change_writemask) {
+ break;
}
}
+ }
- rc_error(s->C, "Ran out of hardware temporaries\n");
- return;
+ if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
+ /* DDX/DDY seem to always fail when their writemasks are
+ * changed.*/
+ if (is_derivative(variable->Inst->U.P.RGB.Opcode)
+ || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
+ can_change_writemask = 0;
+ }
+ }
+ for ( ; readers; readers = readers->Next) {
+ struct rc_reader * r = readers->Item;
+ if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
+ if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
+ can_change_writemask = 0;
+ break;
+ }
+ /* DDX/DDY also fail when their swizzles are changed. */
+ if (is_derivative(r->Inst->U.P.RGB.Opcode)
+ || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
+ can_change_writemask = 0;
+ break;
+ }
+ }
+ }
- success:;
+ class_index = find_class(classes, writemask,
+ can_change_writemask ? 3 : 1);
+ if (class_index > -1) {
+ return classes[class_index].Class;
+ } else {
+error:
+ rc_error(variable->C,
+ "Could not find class for index=%u mask=%u\n",
+ variable->Dst.Index, writemask);
+ return 0;
}
+}
- /* Rewrite all instructions based on the translation table we built */
- for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
- inst != &s->C->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, &remap_register, s);
+static unsigned int overlap_live_intervals_array(
+ struct live_intervals * a,
+ struct live_intervals * b)
+{
+ unsigned int a_chan, b_chan;
+ for (a_chan = 0; a_chan < 4; a_chan++) {
+ for (b_chan = 0; b_chan < 4; b_chan++) {
+ if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
+ return 1;
+ }
+ }
}
+ return 0;
}
-static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+static unsigned int reg_get_index(int reg)
{
- struct regalloc_state * s = data;
+ return reg / RC_MASK_XYZW;
+}
- if (!s->Input[input].Used)
- return;
+static unsigned int reg_get_writemask(int reg)
+{
+ return (reg % RC_MASK_XYZW) + 1;
+}
- add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+static int get_reg_id(unsigned int index, unsigned int writemask)
+{
+ assert(writemask);
+ if (writemask == 0) {
+ return 0;
+ }
+ return (index * RC_MASK_XYZW) + (writemask - 1);
+}
- s->Input[input].Allocated = 1;
- s->Input[input].File = RC_FILE_TEMPORARY;
- s->Input[input].Index = hwreg;
+#if VERBOSE
+static void print_reg(int reg)
+{
+ unsigned int index = reg_get_index(reg);
+ unsigned int mask = reg_get_writemask(reg);
+ fprintf(stderr, "Temp[%u].%c%c%c%c", index,
+ mask & RC_MASK_X ? 'x' : '_',
+ mask & RC_MASK_Y ? 'y' : '_',
+ mask & RC_MASK_Z ? 'z' : '_',
+ mask & RC_MASK_W ? 'w' : '_');
+}
+#endif
+static void add_register_conflicts(
+ struct ra_regs * regs,
+ unsigned int max_temp_regs)
+{
+ unsigned int index, a_mask, b_mask;
+ for (index = 0; index < max_temp_regs; index++) {
+ for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
+ for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
+ b_mask++) {
+ if (a_mask & b_mask) {
+ ra_add_reg_conflict(regs,
+ get_reg_id(index, a_mask),
+ get_reg_id(index, b_mask));
+ }
+ }
+ }
+ }
}
-void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+static void do_advanced_regalloc(struct regalloc_state * s)
{
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
- struct regalloc_state s;
+ struct rc_class rc_class_list [] = {
+ {RC_REG_CLASS_SINGLE, 3, 0, 0,
+ {RC_MASK_X,
+ RC_MASK_Y,
+ RC_MASK_Z}},
+ {RC_REG_CLASS_DOUBLE, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_Y,
+ RC_MASK_X | RC_MASK_Z,
+ RC_MASK_Y | RC_MASK_Z}},
+ {RC_REG_CLASS_TRIPLE, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_ALPHA, 1, 0, 0,
+ {RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_W,
+ RC_MASK_Y | RC_MASK_W,
+ RC_MASK_Z | RC_MASK_W}},
+ {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+ RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
+ {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_X, 1, 0, 0,
+ {RC_MASK_X,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_Y, 1, 0, 0,
+ {RC_MASK_Y,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_Z, 1, 0, 0,
+ {RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XY, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YZ, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XZ, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YW, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_ZW, 1, 0, 0,
+ {RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XYW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YZW, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XZW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}}
+ };
+
+ unsigned int i, j, index, input_node, node_count, node_index;
+ unsigned int * node_classes;
+ unsigned int * input_classes;
+ struct rc_instruction * inst;
+ struct rc_list * var_ptr;
+ struct rc_list * variables;
+ struct ra_regs * regs;
+ struct ra_graph * graph;
+
+ /* Allocate the main ra data structure */
+ regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW);
+
+ /* Get list of program variables */
+ variables = rc_get_variables(s->C);
+ node_count = rc_list_count(variables);
+ node_classes = memory_pool_malloc(&s->C->Pool,
+ node_count * sizeof(unsigned int));
+ input_classes = memory_pool_malloc(&s->C->Pool,
+ s->NumInputs * sizeof(unsigned int));
+
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next, node_index++) {
+ unsigned int class_index;
+ /* Compute the live intervals */
+ rc_variable_compute_live_intervals(var_ptr->Item);
+
+ class_index = variable_get_class(var_ptr->Item, rc_class_list);
+
+ /* If we haven't used this register class yet, mark it
+ * as used and allocate space for it. */
+ if (!rc_class_list[class_index].Used) {
+ rc_class_list[class_index].Used = 1;
+ rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
+ }
- compute_live_intervals(cc, &s);
+ node_classes[node_index] = rc_class_list[class_index].Id;
+ }
- c->AllocateHwInputs(c, &alloc_input, &s);
- do_regalloc(&s);
-}
+ /* Assign registers to the classes */
+ for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+ struct rc_class class = rc_class_list[i];
+ if (!class.Used) {
+ continue;
+ }
-/* This functions offsets the temporary register indices by the number
- * of input registers, because input registers are actually temporaries and
- * should not occupy the same space.
- *
- * This pass is supposed to be used to maintain correct allocation of inputs
- * if the standard register allocation is disabled. */
-void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user)
-{
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
- struct regalloc_state s;
- int temp_reg_offset;
+ for (index = 0; index < s->C->max_temp_regs; index++) {
+ for (j = 0; j < class.WritemaskCount; j++) {
+ int reg_id = get_reg_id(index,
+ class.Writemasks[j]);
+ ra_class_add_reg(regs, class.Id, reg_id);
+ }
+ }
+ }
+
+ /* Add register conflicts */
+ add_register_conflicts(regs, s->C->max_temp_regs);
+
+ /* Calculate live intervals for input registers */
+ for (inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_opcode op = rc_get_flow_control_inst(inst);
+ if (op == RC_OPCODE_BGNLOOP) {
+ struct rc_instruction * endloop =
+ rc_match_bgnloop(inst);
+ if (endloop->IP > s->LoopEnd) {
+ s->LoopEnd = endloop->IP;
+ }
+ }
+ rc_for_all_reads_mask(inst, scan_read_callback, s);
+ }
- compute_live_intervals(cc, &s);
+ /* Create classes for input registers */
+ for (i = 0; i < s->NumInputs; i++) {
+ unsigned int chan, class_id, writemask = 0;
+ for (chan = 0; chan < 4; chan++) {
+ if (s->Input[i].Live[chan].Used) {
+ writemask |= (1 << chan);
+ }
+ }
+ s->Input[i].Writemask = writemask;
+ if (!writemask) {
+ continue;
+ }
+
+ class_id = ra_alloc_reg_class(regs);
+ input_classes[i] = class_id;
+ ra_class_add_reg(regs, class_id,
+ get_reg_id(s->Input[i].Index, writemask));
+ }
- c->AllocateHwInputs(c, &alloc_input, &s);
+ ra_set_finalize(regs);
- temp_reg_offset = 0;
- for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index)
- temp_reg_offset = s.Input[i].Index + 1;
+ graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
+
+ /* Build the interference graph */
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next,node_index++) {
+ struct rc_list * a, * b;
+ unsigned int b_index;
+
+ ra_set_node_class(graph, node_index, node_classes[node_index]);
+
+ for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
+ b; b = b->Next, b_index++) {
+ struct rc_variable * var_a = a->Item;
+ while (var_a) {
+ struct rc_variable * var_b = b->Item;
+ while (var_b) {
+ if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
+ ra_add_node_interference(graph,
+ node_index, b_index);
+ }
+ var_b = var_b->Friend;
+ }
+ var_a = var_a->Friend;
+ }
+ }
}
- if (temp_reg_offset) {
- for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (s.Temporary[i].Used) {
- s.Temporary[i].Allocated = 1;
- s.Temporary[i].File = RC_FILE_TEMPORARY;
- s.Temporary[i].Index = i + temp_reg_offset;
+ /* Add input registers to the interference graph */
+ for (i = 0, input_node = 0; i< s->NumInputs; i++) {
+ if (!s->Input[i].Writemask) {
+ continue;
+ }
+ ra_set_node_class(graph, node_count + input_node,
+ input_classes[i]);
+ for (var_ptr = variables, node_index = 0;
+ var_ptr; var_ptr = var_ptr->Next, node_index++) {
+ struct rc_variable * var = var_ptr->Item;
+ if (overlap_live_intervals_array(s->Input[i].Live,
+ var->Live)) {
+ ra_add_node_interference(graph, node_index,
+ node_count + input_node);
}
}
+ /* Manually allocate a register for this input */
+ ra_set_node_reg(graph, node_count + input_node, get_reg_id(
+ s->Input[i].Index, s->Input[i].Writemask));
+ input_node++;
+ }
+
+ if (!ra_allocate_no_spills(graph)) {
+ rc_error(s->C, "Ran out of hardware temporaries\n");
+ return;
+ }
+
+ /* Rewrite the registers */
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next, node_index++) {
+ int reg = ra_get_node_reg(graph, node_index);
+ unsigned int writemask = reg_get_writemask(reg);
+ unsigned int index = reg_get_index(reg);
+ struct rc_variable * var = var_ptr->Item;
+
+ if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ writemask = rc_variable_writemask_sum(var);
+ }
- /* Rewrite all registers. */
- for (struct rc_instruction *inst = cc->Program.Instructions.Next;
- inst != &cc->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, &remap_register, &s);
+ if (var->Dst.File == RC_FILE_INPUT) {
+ continue;
}
+ rc_variable_change_dst(var, index, writemask);
+ }
+
+ ralloc_free(graph);
+ ralloc_free(regs);
+}
+
+/**
+ * @param user This parameter should be a pointer to an integer value. If this
+ * integer value is zero, then a simple register allocator will be used that
+ * only allocates space for input registers (\sa do_regalloc_inputs_only). If
+ * user is non-zero, then the regular register allocator will be used
+ * (\sa do_regalloc).
+ */
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+{
+ struct r300_fragment_program_compiler *c =
+ (struct r300_fragment_program_compiler*)cc;
+ struct regalloc_state s;
+ int * do_full_regalloc = (int*)user;
+
+ memset(&s, 0, sizeof(s));
+ s.C = cc;
+ s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
+ s.Input = memory_pool_malloc(&cc->Pool,
+ s.NumInputs * sizeof(struct register_info));
+ memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
+
+ s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
+ s.Temporary = memory_pool_malloc(&cc->Pool,
+ s.NumTemporaries * sizeof(struct register_info));
+ memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
+
+ rc_recompute_ips(s.C);
+
+ c->AllocateHwInputs(c, &alloc_input_simple, &s);
+ if (*do_full_regalloc) {
+ do_advanced_regalloc(&s);
+ } else {
+ s.Simple = 1;
+ do_regalloc_inputs_only(&s);
+ }
+
+ /* Rewrite inputs and if we are doing the simple allocation, rewrite
+ * temporaries too. */
+ for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
+ inst != &s.C->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, &remap_register, &s);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 8e10813ff06..25cd52c9cd4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -709,7 +709,7 @@ static int convert_rgb_to_alpha(
pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
pair_inst->Alpha.DestIndex = new_index;
- pair_inst->Alpha.WriteMask = 1;
+ pair_inst->Alpha.WriteMask = RC_MASK_W;
pair_inst->Alpha.Target = pair_inst->RGB.Target;
pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
@@ -739,7 +739,7 @@ static int convert_rgb_to_alpha(
for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
- rgb_to_alpha_remap(reader.Inst, reader.U.Arg,
+ rgb_to_alpha_remap(reader.Inst, reader.U.P.Arg,
RC_FILE_TEMPORARY, old_swz, new_index);
}
return 1;
@@ -952,6 +952,7 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
instruction_ready(&s, s.Current);
/* Get global readers for possible RGB->Alpha conversion. */
+ s.Current->GlobalReaders.ExitOnAbort = 1;
rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
is_rgb_to_alpha_possible_normal,
is_rgb_to_alpha_possible, NULL);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index a07f6b63c6e..b899eccbf53 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -108,6 +108,9 @@ struct rc_sub_instruction {
/** True if tex instruction should do shadow comparison */
unsigned int TexShadow:1;
+
+ /**R500 Only. How to swizzle the result of a TEX lookup*/
+ unsigned int TexSwizzle:12;
/*@}*/
/** This holds information about the presubtract operation used by
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
index 45f79ece5ba..24577333450 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -129,6 +129,7 @@ typedef enum {
#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED)
/**
* \name Bitmasks for components of vectors.
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 68874795b8a..52315957520 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -223,3 +223,17 @@ struct rc_pair_instruction_source * rc_pair_get_src(
return NULL;
}
}
+
+int rc_pair_get_src_index(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_source * src)
+{
+ int i;
+ for (i = 0; i < 3; i++) {
+ if (&pair_inst->RGB.Src[i] == src
+ || &pair_inst->Alpha.Src[i] == src) {
+ return i;
+ }
+ }
+ return -1;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index d1a435fc530..a957ea9f7a0 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -114,6 +114,10 @@ void rc_pair_foreach_source_that_rgb_reads(
struct rc_pair_instruction_source * rc_pair_get_src(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_arg * arg);
+
+int rc_pair_get_src_index(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_source * src);
/*@}*/
@@ -127,6 +131,7 @@ void rc_pair_translate(struct radeon_compiler *cc, void *user);
void rc_pair_schedule(struct radeon_compiler *cc, void *user);
void rc_pair_regalloc(struct radeon_compiler *cc, void *user);
void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user);
+void rc_pair_remove_dead_sources(struct radeon_compiler *c, void *user);
/*@}*/
#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index cef448ee4e1..8d16b2cf9ec 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -142,6 +142,8 @@ int radeonTransformTEX(
if (inst->U.I.Opcode != RC_OPCODE_TEX &&
inst->U.I.Opcode != RC_OPCODE_TXB &&
inst->U.I.Opcode != RC_OPCODE_TXP &&
+ inst->U.I.Opcode != RC_OPCODE_TXD &&
+ inst->U.I.Opcode != RC_OPCODE_TXL &&
inst->U.I.Opcode != RC_OPCODE_KIL)
return 0;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
index 5bd19c0b9c6..cafa0579734 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -71,6 +71,7 @@ void rc_rename_regs(struct radeon_compiler *c, void *user)
if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
continue;
+ reader_data.ExitOnAbort = 1;
rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
if (reader_data.Abort || reader_data.ReaderCount == 0)
@@ -85,7 +86,7 @@ void rc_rename_regs(struct radeon_compiler *c, void *user)
reader_data.Writer->U.I.DstReg.Index = new_index;
for(i = 0; i < reader_data.ReaderCount; i++) {
- reader_data.Readers[i].U.Src->Index = new_index;
+ reader_data.Readers[i].U.I.Src->Index = new_index;
}
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
new file mode 100644
index 00000000000..16fa5d28902
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
@@ -0,0 +1,484 @@
+/*
+ * Copyright 2011 Tom Stellard <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_variable.h"
+
+#include "memory_pool.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+/**
+ * Rewrite the index and writemask for the destination register of var
+ * and its friends to new_index and new_writemask. This function also takes
+ * care of rewriting the swizzles for the sources of var.
+ */
+void rc_variable_change_dst(
+ struct rc_variable * var,
+ unsigned int new_index,
+ unsigned int new_writemask)
+{
+ struct rc_variable * var_ptr;
+ struct rc_list * readers;
+ unsigned int old_mask = rc_variable_writemask_sum(var);
+ unsigned int conversion_swizzle =
+ rc_make_conversion_swizzle(old_mask, new_writemask);
+
+ for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
+ if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ rc_normal_rewrite_writemask(var_ptr->Inst,
+ conversion_swizzle);
+ var_ptr->Inst->U.I.DstReg.Index = new_index;
+ } else {
+ struct rc_pair_sub_instruction * sub;
+ if (var_ptr->Dst.WriteMask == RC_MASK_W) {
+ assert(new_writemask & RC_MASK_W);
+ sub = &var_ptr->Inst->U.P.Alpha;
+ } else {
+ sub = &var_ptr->Inst->U.P.RGB;
+ rc_pair_rewrite_writemask(sub,
+ conversion_swizzle);
+ }
+ sub->DestIndex = new_index;
+ }
+ }
+
+ readers = rc_variable_readers_union(var);
+
+ for ( ; readers; readers = readers->Next) {
+ struct rc_reader * reader = readers->Item;
+ if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ reader->U.I.Src->Index = new_index;
+ reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
+ reader->U.I.Src->Swizzle, conversion_swizzle);
+ } else {
+ struct rc_pair_instruction * pair_inst =
+ &reader->Inst->U.P;
+ unsigned int src_type = rc_source_type_swz(
+ reader->U.P.Arg->Swizzle);
+
+ int src_index = reader->U.P.Arg->Source;
+ if (src_index == RC_PAIR_PRESUB_SRC) {
+ src_index = rc_pair_get_src_index(
+ pair_inst, reader->U.P.Src);
+ }
+ /* Try to delete the old src, it is OK if this fails,
+ * because rc_pair_alloc_source might be able to
+ * find a source the ca be reused.
+ */
+ if (rc_pair_remove_src(reader->Inst, src_type,
+ src_index, old_mask)) {
+ /* Reuse the source index of the source that
+ * was just deleted and set its register
+ * index. We can't use rc_pair_alloc_source
+ * for this becuase it might return a source
+ * index that is already being used. */
+ if (src_type & RC_SOURCE_RGB) {
+ pair_inst->RGB.Src[src_index]
+ .Used = 1;
+ pair_inst->RGB.Src[src_index]
+ .Index = new_index;
+ pair_inst->RGB.Src[src_index]
+ .File = RC_FILE_TEMPORARY;
+ }
+ if (src_type & RC_SOURCE_ALPHA) {
+ pair_inst->Alpha.Src[src_index]
+ .Used = 1;
+ pair_inst->Alpha.Src[src_index]
+ .Index = new_index;
+ pair_inst->Alpha.Src[src_index]
+ .File = RC_FILE_TEMPORARY;
+ }
+ } else {
+ src_index = rc_pair_alloc_source(
+ &reader->Inst->U.P,
+ src_type & RC_SOURCE_RGB,
+ src_type & RC_SOURCE_ALPHA,
+ RC_FILE_TEMPORARY,
+ new_index);
+ if (src_index < 0) {
+ rc_error(var->C, "Rewrite of inst %u failed "
+ "Can't allocate source for "
+ "Inst %u src_type=%x "
+ "new_index=%u new_mask=%u\n",
+ var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask);
+ continue;
+ }
+ }
+ reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
+ reader->U.P.Arg->Swizzle, conversion_swizzle);
+ if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
+ reader->U.P.Arg->Source = src_index;
+ }
+ }
+ }
+}
+
+/**
+ * Compute the live intervals for var and its friends.
+ */
+void rc_variable_compute_live_intervals(struct rc_variable * var)
+{
+ while(var) {
+ unsigned int i;
+ unsigned int start = var->Inst->IP;
+
+ for (i = 0; i < var->ReaderCount; i++) {
+ unsigned int chan;
+ unsigned int chan_start = start;
+ unsigned int chan_end = var->Readers[i].Inst->IP;
+ unsigned int mask = var->Readers[i].WriteMask;
+ struct rc_instruction * inst;
+
+ /* Extend the live interval of T0 to the start of the
+ * loop for sequences like:
+ * BGNLOOP
+ * read T0
+ * ...
+ * write T0
+ * ENDLOOP
+ */
+ if (var->Readers[i].Inst->IP < start) {
+ struct rc_instruction * bgnloop =
+ rc_match_endloop(var->Readers[i].Inst);
+ chan_start = bgnloop->IP;
+ }
+
+ /* Extend the live interval of T0 to the start of the
+ * loop in case there is a BRK instruction in the loop
+ * (we don't actually check for a BRK instruction we
+ * assume there is one somewhere in the loop, which
+ * there usually is) for sequences like:
+ * BGNLOOP
+ * ...
+ * conditional BRK
+ * ...
+ * write T0
+ * ENDLOOP
+ * read T0
+ ***************************************************
+ * Extend the live interval of T0 to the end of the
+ * loop for sequences like:
+ * write T0
+ * BGNLOOP
+ * ...
+ * read T0
+ * ENDLOOP
+ */
+ for (inst = var->Inst; inst != var->Readers[i].Inst;
+ inst = inst->Next) {
+ rc_opcode op = rc_get_flow_control_inst(inst);
+ if (op == RC_OPCODE_ENDLOOP) {
+ struct rc_instruction * bgnloop =
+ rc_match_endloop(inst);
+ if (bgnloop->IP < chan_start) {
+ chan_start = bgnloop->IP;
+ }
+ } else if (op == RC_OPCODE_BGNLOOP) {
+ struct rc_instruction * endloop =
+ rc_match_bgnloop(inst);
+ if (endloop->IP > chan_end) {
+ chan_end = endloop->IP;
+ }
+ }
+ }
+
+ for (chan = 0; chan < 4; chan++) {
+ if ((mask >> chan) & 0x1) {
+ if (!var->Live[chan].Used
+ || chan_start < var->Live[chan].Start) {
+ var->Live[chan].Start =
+ chan_start;
+ }
+ if (!var->Live[chan].Used
+ || chan_end > var->Live[chan].End) {
+ var->Live[chan].End = chan_end;
+ }
+ var->Live[chan].Used = 1;
+ }
+ }
+ }
+ var = var->Friend;
+ }
+}
+
+/**
+ * @return 1 if a and b share a reader
+ * @return 0 if they do not
+ */
+static unsigned int readers_intersect(
+ struct rc_variable * a,
+ struct rc_variable * b)
+{
+ unsigned int a_index, b_index;
+ for (a_index = 0; a_index < a->ReaderCount; a_index++) {
+ struct rc_reader reader_a = a->Readers[a_index];
+ for (b_index = 0; b_index < b->ReaderCount; b_index++) {
+ struct rc_reader reader_b = b->Readers[b_index];
+ if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
+ && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
+ && reader_a.U.I.Src == reader_b.U.I.Src) {
+
+ return 1;
+ }
+ if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
+ && reader_b.Inst->Type == RC_INSTRUCTION_PAIR
+ && reader_a.U.P.Src == reader_b.U.P.Src) {
+
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+void rc_variable_add_friend(
+ struct rc_variable * var,
+ struct rc_variable * friend)
+{
+ assert(var->Dst.Index == friend->Dst.Index);
+ while(var->Friend) {
+ var = var->Friend;
+ }
+ var->Friend = friend;
+}
+
+struct rc_variable * rc_variable(
+ struct radeon_compiler * c,
+ unsigned int DstFile,
+ unsigned int DstIndex,
+ unsigned int DstWriteMask,
+ struct rc_reader_data * reader_data)
+{
+ struct rc_variable * new =
+ memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
+ memset(new, 0, sizeof(struct rc_variable));
+ new->C = c;
+ new->Dst.File = DstFile;
+ new->Dst.Index = DstIndex;
+ new->Dst.WriteMask = DstWriteMask;
+ if (reader_data) {
+ new->Inst = reader_data->Writer;
+ new->ReaderCount = reader_data->ReaderCount;
+ new->Readers = reader_data->Readers;
+ }
+ return new;
+}
+
+static void get_variable_helper(
+ struct rc_list ** aborted_list,
+ struct rc_list ** variable_list,
+ unsigned int aborted,
+ struct rc_variable * variable)
+{
+ if (aborted) {
+ rc_list_add(aborted_list, rc_list(&variable->C->Pool, variable));
+ } else {
+ rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
+ }
+}
+
+static void get_variable_pair_helper(
+ struct rc_list ** aborted_list,
+ struct rc_list ** variable_list,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ struct rc_pair_sub_instruction * sub_inst)
+{
+ struct rc_reader_data reader_data;
+ struct rc_variable * new_var;
+ rc_register_file file;
+ unsigned int writemask;
+
+ if (sub_inst->Opcode == RC_OPCODE_NOP) {
+ return;
+ }
+ memset(&reader_data, 0, sizeof(struct rc_reader_data));
+ rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
+
+ if (reader_data.ReaderCount == 0) {
+ return;
+ }
+
+ if (sub_inst->WriteMask) {
+ file = RC_FILE_TEMPORARY;
+ writemask = sub_inst->WriteMask;
+ } else if (sub_inst->OutputWriteMask) {
+ file = RC_FILE_OUTPUT;
+ writemask = sub_inst->OutputWriteMask;
+ } else {
+ writemask = 0;
+ file = RC_FILE_NONE;
+ }
+ new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
+ &reader_data);
+ get_variable_helper(aborted_list, variable_list, reader_data.Abort,
+ new_var);
+}
+
+/**
+ * Generate a list of variables used by the shader program. Each instruction
+ * that writes to a register is considered a variable. The struct rc_variable
+ * data structure includes a list of readers and is essentially a
+ * definition-use chain. Any two variables that share a reader are considered
+ * "friends" and they are linked together via the Friend attribute.
+ */
+struct rc_list * rc_get_variables(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+ struct rc_list * aborted_list = NULL;
+ struct rc_list * variable_list = NULL;
+ struct rc_list * var_ptr;
+ struct rc_list * search_ptr;
+
+ for (inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ struct rc_reader_data reader_data;
+ struct rc_variable * new_var;
+ memset(&reader_data, 0, sizeof(reader_data));
+
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+ if (reader_data.ReaderCount == 0) {
+ continue;
+ }
+ new_var = rc_variable(c, inst->U.I.DstReg.File,
+ inst->U.I.DstReg.Index,
+ inst->U.I.DstReg.WriteMask, &reader_data);
+ get_variable_helper(&aborted_list, &variable_list,
+ reader_data.Abort, new_var);
+ } else {
+ get_variable_pair_helper(&aborted_list, &variable_list,
+ c, inst, &inst->U.P.RGB);
+ get_variable_pair_helper(&aborted_list, &variable_list,
+ c, inst, &inst->U.P.Alpha);
+ }
+ }
+
+ /* The aborted_list contains a list of variables that might share a
+ * reader with another variable. We need to search through this list
+ * and pair together variables that do share the same reader.
+ */
+ while (aborted_list) {
+ struct rc_list * search_ptr_next;
+ var_ptr = aborted_list;
+
+ search_ptr = var_ptr->Next;
+ while(search_ptr) {
+ search_ptr_next = search_ptr->Next;
+ if (readers_intersect(var_ptr->Item, search_ptr->Item)){
+ rc_list_remove(&aborted_list, search_ptr);
+ rc_variable_add_friend(var_ptr->Item,
+ search_ptr->Item);
+ }
+ search_ptr = search_ptr_next;
+ }
+ rc_list_remove(&aborted_list, var_ptr);
+ rc_list_add(&variable_list, rc_list(
+ &((struct rc_variable*)(var_ptr->Item))->C->Pool,
+ var_ptr->Item));
+ }
+ return variable_list;
+}
+
+/**
+ * @return The bitwise or of the writemasks of a variable and all of its
+ * friends.
+ */
+unsigned int rc_variable_writemask_sum(struct rc_variable * var)
+{
+ unsigned int writemask = 0;
+ while(var) {
+ writemask |= var->Dst.WriteMask;
+ var = var->Friend;
+ }
+ return writemask;
+}
+
+/*
+ * @return A list of readers for a variable and its friends. Readers
+ * that read from two different variable friends are only included once in
+ * this list.
+ */
+struct rc_list * rc_variable_readers_union(struct rc_variable * var)
+{
+ struct rc_list * list = NULL;
+ while (var) {
+ unsigned int i;
+ for (i = 0; i < var->ReaderCount; i++) {
+ struct rc_list * temp;
+ struct rc_reader * a = &var->Readers[i];
+ unsigned int match = 0;
+ for (temp = list; temp; temp = temp->Next) {
+ struct rc_reader * b = temp->Item;
+ if (a->Inst->Type != b->Inst->Type) {
+ continue;
+ }
+ if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ if (a->U.I.Src == b->U.I.Src) {
+ match = 1;
+ break;
+ }
+ }
+ if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
+ if (a->U.P.Arg == b->U.P.Arg
+ && a->U.P.Src == b->U.P.Src) {
+ match = 1;
+ break;
+ }
+ }
+ }
+ if (match) {
+ continue;
+ }
+ rc_list_add(&list, rc_list(&var->C->Pool, a));
+ }
+ var = var->Friend;
+ }
+ return list;
+}
+
+void rc_variable_print(struct rc_variable * var)
+{
+ unsigned int i;
+ while (var) {
+ fprintf(stderr, "%u: TEMP[%u].%u: ",
+ var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
+ for (i = 0; i < 4; i++) {
+ fprintf(stderr, "chan %u: start=%u end=%u ", i,
+ var->Live[i].Start, var->Live[i].End);
+ }
+ fprintf(stderr, "%u readers\n", var->ReaderCount);
+ if (var->Friend) {
+ fprintf(stderr, "Friend: \n\t");
+ }
+ var = var->Friend;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
new file mode 100644
index 00000000000..b8fbcaa4029
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2011 Tom Stellard <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_VARIABLE_H
+#define RADEON_VARIABLE_H
+
+#include "radeon_compiler.h"
+
+struct radeon_compiler;
+struct rc_list;
+struct rc_reader_data;
+struct rc_readers;
+
+struct live_intervals {
+ int Start;
+ int End;
+ int Used;
+};
+
+struct rc_variable {
+ struct radeon_compiler * C;
+ struct rc_dst_register Dst;
+
+ struct rc_instruction * Inst;
+ unsigned int ReaderCount;
+ struct rc_reader * Readers;
+ struct live_intervals Live[4];
+
+ /* A friend is a variable that shares a reader with another variable.
+ */
+ struct rc_variable * Friend;
+};
+
+void rc_variable_change_dst(
+ struct rc_variable * var,
+ unsigned int new_index,
+ unsigned int new_writemask);
+
+void rc_variable_compute_live_intervals(struct rc_variable * var);
+
+void rc_variable_add_friend(
+ struct rc_variable * var,
+ struct rc_variable * friend);
+
+struct rc_variable * rc_variable(
+ struct radeon_compiler * c,
+ unsigned int DstFile,
+ unsigned int DstIndex,
+ unsigned int DstWriteMask,
+ struct rc_reader_data * reader_data);
+
+struct rc_list * rc_get_variables(struct radeon_compiler * c);
+
+unsigned int rc_variable_writemask_sum(struct rc_variable * var);
+
+struct rc_list * rc_variable_readers_union(struct rc_variable * var);
+
+void rc_variable_print(struct rc_variable * var);
+
+#endif /* RADEON_VARIABLE_H */
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index 88b68e3d191..9145023826e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -406,6 +406,7 @@
#define PCI_CHIP_CEDAR_68E8 0x68E8
#define PCI_CHIP_CEDAR_68E9 0x68E9
#define PCI_CHIP_CEDAR_68F1 0x68F1
+#define PCI_CHIP_CEDAR_68F2 0x68F2
#define PCI_CHIP_CEDAR_68F8 0x68F8
#define PCI_CHIP_CEDAR_68F9 0x68F9
#define PCI_CHIP_CEDAR_68FE 0x68FE
@@ -426,7 +427,9 @@
#define PCI_CHIP_JUNIPER_68B0 0x68B0
#define PCI_CHIP_JUNIPER_68B8 0x68B8
#define PCI_CHIP_JUNIPER_68B9 0x68B9
+#define PCI_CHIP_JUNIPER_68BA 0x68BA
#define PCI_CHIP_JUNIPER_68BE 0x68BE
+#define PCI_CHIP_JUNIPER_68BF 0x68BF
#define PCI_CHIP_CYPRESS_6880 0x6880
#define PCI_CHIP_CYPRESS_6888 0x6888
@@ -434,6 +437,7 @@
#define PCI_CHIP_CYPRESS_688A 0x688A
#define PCI_CHIP_CYPRESS_6898 0x6898
#define PCI_CHIP_CYPRESS_6899 0x6899
+#define PCI_CHIP_CYPRESS_689B 0x689B
#define PCI_CHIP_CYPRESS_689E 0x689E
#define PCI_CHIP_HEMLOCK_689C 0x689C
@@ -458,6 +462,7 @@
#define PCI_CHIP_BARTS_6729 0x6729
#define PCI_CHIP_BARTS_6738 0x6738
#define PCI_CHIP_BARTS_6739 0x6739
+#define PCI_CHIP_BARTS_673E 0x673E
#define PCI_CHIP_TURKS_6740 0x6740
#define PCI_CHIP_TURKS_6741 0x6741
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 732efe8bd85..6449229e088 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -1106,6 +1106,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
case PCI_CHIP_CEDAR_68E8:
case PCI_CHIP_CEDAR_68E9:
case PCI_CHIP_CEDAR_68F1:
+ case PCI_CHIP_CEDAR_68F2:
case PCI_CHIP_CEDAR_68F8:
case PCI_CHIP_CEDAR_68F9:
case PCI_CHIP_CEDAR_68FE:
@@ -1132,7 +1133,9 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
case PCI_CHIP_JUNIPER_68B0:
case PCI_CHIP_JUNIPER_68B8:
case PCI_CHIP_JUNIPER_68B9:
+ case PCI_CHIP_JUNIPER_68BA:
case PCI_CHIP_JUNIPER_68BE:
+ case PCI_CHIP_JUNIPER_68BF:
screen->chip_family = CHIP_FAMILY_JUNIPER;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
@@ -1143,6 +1146,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
case PCI_CHIP_CYPRESS_688A:
case PCI_CHIP_CYPRESS_6898:
case PCI_CHIP_CYPRESS_6899:
+ case PCI_CHIP_CYPRESS_689B:
case PCI_CHIP_CYPRESS_689E:
screen->chip_family = CHIP_FAMILY_CYPRESS;
screen->chip_flags = RADEON_CHIPSET_TCL;
@@ -1176,6 +1180,7 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
case PCI_CHIP_BARTS_6729:
case PCI_CHIP_BARTS_6738:
case PCI_CHIP_BARTS_6739:
+ case PCI_CHIP_BARTS_673E:
screen->chip_family = CHIP_FAMILY_BARTS;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
diff --git a/src/mesa/drivers/windows/fx/fx.rc b/src/mesa/drivers/windows/fx/fx.rc
deleted file mode 100644
index f920b8768dd..00000000000
--- a/src/mesa/drivers/windows/fx/fx.rc
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <windows.h>
-
-#define PRODNAME "Mesa 6.x"
-#define CONTACTSTR "http://www.mesa3d.org"
-#define HWSTR "3dfx Voodoo Graphics, Voodoo Rush, Voodoo^2, Voodoo Banshee, Velocity 100/200, Voodoo3, Voodoo4, Voodoo5"
-#define COPYRIGHTSTR "Copyright \251 Brian E. Paul"
-
-#define VERSIONSTR "6.3.0.1"
-#define MANVERSION 6
-#define MANREVISION 3
-#define BUILD_NUMBER 1
-
-VS_VERSION_INFO VERSIONINFO
- FILEVERSION MANVERSION, MANREVISION, 0, BUILD_NUMBER
- PRODUCTVERSION MANVERSION, MANREVISION, 0, BUILD_NUMBER
- FILEFLAGSMASK 0x0030003FL
-
- FILEOS VOS_DOS_WINDOWS32
- FILETYPE VFT_DRV
- FILESUBTYPE VFT2_DRV_INSTALLABLE
-BEGIN
- BLOCK "StringFileInfo"
- BEGIN
- BLOCK "040904E4"
- BEGIN
- VALUE "FileDescription", PRODNAME
- VALUE "FileVersion", VERSIONSTR
- VALUE "LegalCopyright", COPYRIGHTSTR
- VALUE "ProductName", PRODNAME
- VALUE "Graphics Subsystem", HWSTR
- VALUE "Contact", CONTACTSTR
- END
- END
- BLOCK "VarFileInfo"
- BEGIN
- /* the following line should be extended for localized versions */
- VALUE "Translation", 0x409, 1252
- END
-END
diff --git a/src/mesa/drivers/windows/fx/fxopengl.def b/src/mesa/drivers/windows/fx/fxopengl.def
deleted file mode 100644
index bc615e93ae6..00000000000
--- a/src/mesa/drivers/windows/fx/fxopengl.def
+++ /dev/null
@@ -1,953 +0,0 @@
-LIBRARY OpenGL32
-DESCRIPTION "Mesa 5.1"
-EXPORTS
- glAccum
- glActiveStencilFaceEXT
- glActiveTexture
- glActiveTextureARB
- glAlphaFunc
- glAreProgramsResidentNV
- glAreTexturesResident
- glAreTexturesResidentEXT
- glArrayElement
- glArrayElementEXT
- glBegin
- glBeginQueryARB
- glBindBufferARB
- glBindProgramARB
- glBindProgramNV
- glBindTexture
- glBindTextureEXT
- glBitmap
-;glBlendColor
-;glBlendColorEXT
- glBlendEquation
- glBlendEquationEXT
- glBlendFunc
- glBlendFuncSeparate
- glBlendFuncSeparateEXT
- glBlendFuncSeparateINGR
- glBufferDataARB
- glBufferSubDataARB
- glCallList
- glCallLists
- glClear
- glClearAccum
- glClearColor
- glClearDepth
- glClearIndex
- glClearStencil
- glClientActiveTexture
- glClientActiveTextureARB
- glClipPlane
- glColorMask
- glColorMaterial
- glColorPointer
- glColorPointerEXT
- glColorSubTable
- glColorSubTableEXT
- glColorTable
- glColorTableEXT
- glColorTableParameterfv
- glColorTableParameterfvSGI
- glColorTableParameteriv
- glColorTableParameterivSGI
- glColorTableSGI
- glColor3b
- glColor3bv
- glColor3d
- glColor3dv
- glColor3f
- glColor3fv
- glColor3i
- glColor3iv
- glColor3s
- glColor3sv
- glColor3ub
- glColor3ubv
- glColor3ui
- glColor3uiv
- glColor3us
- glColor3usv
- glColor4b
- glColor4bv
- glColor4d
- glColor4dv
- glColor4f
- glColor4fv
- glColor4i
- glColor4iv
- glColor4s
- glColor4sv
- glColor4ub
- glColor4ubv
- glColor4ui
- glColor4uiv
- glColor4us
- glColor4usv
- glCombinerInputNV
- glCombinerOutputNV
- glCombinerParameterfNV
- glCombinerParameterfvNV
- glCombinerParameteriNV
- glCombinerParameterivNV
- glCompressedTexImage1D
- glCompressedTexImage1DARB
- glCompressedTexImage2D
- glCompressedTexImage2DARB
- glCompressedTexImage3D
- glCompressedTexImage3DARB
- glCompressedTexSubImage1D
- glCompressedTexSubImage1DARB
- glCompressedTexSubImage2D
- glCompressedTexSubImage2DARB
- glCompressedTexSubImage3D
- glCompressedTexSubImage3DARB
- glConvolutionFilter1D
- glConvolutionFilter1DEXT
- glConvolutionFilter2D
- glConvolutionFilter2DEXT
- glConvolutionParameterf
- glConvolutionParameterfEXT
- glConvolutionParameterfv
- glConvolutionParameterfvEXT
- glConvolutionParameteri
- glConvolutionParameteriEXT
- glConvolutionParameteriv
- glConvolutionParameterivEXT
- glCopyColorSubTable
- glCopyColorSubTableEXT
- glCopyColorTable
- glCopyColorTableSGI
- glCopyConvolutionFilter1D
- glCopyConvolutionFilter1DEXT
- glCopyConvolutionFilter2D
- glCopyConvolutionFilter2DEXT
- glCopyPixels
- glCopyTexImage1D
- glCopyTexImage1DEXT
- glCopyTexImage2D
- glCopyTexImage2DEXT
- glCopyTexSubImage1D
- glCopyTexSubImage1DEXT
- glCopyTexSubImage2D
- glCopyTexSubImage2DEXT
- glCopyTexSubImage3D
- glCopyTexSubImage3DEXT
- glCullFace
- glDeleteBuffersARB
- glDeleteFencesNV
- glDeleteLists
- glDeleteProgramsARB
- glDeleteProgramsNV
- glDeleteQueriesARB
- glDeleteTextures
- glDeleteTexturesEXT
- glDepthBoundsEXT
- glDepthFunc
- glDepthMask
- glDepthRange
- glDetailTexFuncSGIS
- glDisable
- glDisableClientState
- glDisableVertexAttribArrayARB
- glDrawArrays
- glDrawArraysEXT
- glDrawBuffer
- glDrawElements
- glDrawPixels
- glDrawRangeElements
- glDrawRangeElementsEXT
- glEdgeFlag
- glEdgeFlagPointer
- glEdgeFlagPointerEXT
- glEdgeFlagv
- glEnable
- glEnableClientState
- glEnableVertexAttribArrayARB
- glEnd
- glEndList
- glEndQueryARB
- glEvalCoord1d
- glEvalCoord1dv
- glEvalCoord1f
- glEvalCoord1fv
- glEvalCoord2d
- glEvalCoord2dv
- glEvalCoord2f
- glEvalCoord2fv
- glEvalMesh1
- glEvalMesh2
- glEvalPoint1
- glEvalPoint2
- glExecuteProgramNV
- glFeedbackBuffer
- glFinalCombinerInputNV
- glFinish
- glFinishFenceNV
- glFlush
- glFlushRasterSGIX
- glFlushVertexArrayRangeNV
- glFogCoordd
- glFogCoorddEXT
- glFogCoorddv
- glFogCoorddvEXT
- glFogCoordf
- glFogCoordfEXT
- glFogCoordfv
- glFogCoordfvEXT
- glFogCoordPointer
- glFogCoordPointerEXT
- glFogf
- glFogfv
- glFogi
- glFogiv
- glFragmentColorMaterialSGIX
- glFragmentLightfSGIX
- glFragmentLightfvSGIX
- glFragmentLightiSGIX
- glFragmentLightivSGIX
- glFragmentLightModelfSGIX
- glFragmentLightModelfvSGIX
- glFragmentLightModeliSGIX
- glFragmentLightModelivSGIX
- glFragmentMaterialfSGIX
- glFragmentMaterialfvSGIX
- glFragmentMaterialiSGIX
- glFragmentMaterialivSGIX
- glFrameZoomSGIX
- glFrontFace
- glFrustum
- glGenBuffersARB
- glGenFencesNV
- glGenLists
- glGenProgramsARB
- glGenProgramsNV
- glGenQueriesARB
- glGenTextures
- glGenTexturesEXT
- glGetBooleanv
- glGetBufferParameterivARB
- glGetBufferPointervARB
- glGetBufferSubDataARB
- glGetClipPlane
- glGetColorTable
- glGetColorTableEXT
- glGetColorTableParameterfv
- glGetColorTableParameterfvEXT
- glGetColorTableParameterfvSGI
- glGetColorTableParameteriv
- glGetColorTableParameterivEXT
- glGetColorTableParameterivSGI
- glGetColorTableSGI
- glGetCombinerInputParameterfvNV
- glGetCombinerInputParameterivNV
- glGetCombinerOutputParameterfvNV
- glGetCombinerOutputParameterivNV
- glGetCompressedTexImage
- glGetCompressedTexImageARB
- glGetConvolutionFilter
- glGetConvolutionFilterEXT
- glGetConvolutionParameterfv
- glGetConvolutionParameterfvEXT
- glGetConvolutionParameteriv
- glGetConvolutionParameterivEXT
- glGetDetailTexFuncSGIS
- glGetDoublev
- glGetError
- glGetFenceivNV
- glGetFinalCombinerInputParameterfvNV
- glGetFinalCombinerInputParameterivNV
- glGetFloatv
- glGetFragmentLightfvSGIX
- glGetFragmentLightivSGIX
- glGetFragmentMaterialfvSGIX
- glGetFragmentMaterialivSGIX
- glGetHistogram
- glGetHistogramEXT
- glGetHistogramParameterfv
- glGetHistogramParameterfvEXT
- glGetHistogramParameteriv
- glGetHistogramParameterivEXT
- glGetInstrumentsSGIX
- glGetIntegerv
- glGetLightfv
- glGetLightiv
- glGetListParameterfvSGIX
- glGetListParameterivSGIX
- glGetMapdv
- glGetMapfv
- glGetMapiv
- glGetMaterialfv
- glGetMaterialiv
- glGetMinmax
- glGetMinmaxEXT
- glGetMinmaxParameterfv
- glGetMinmaxParameterfvEXT
- glGetMinmaxParameteriv
- glGetMinmaxParameterivEXT
- glGetPixelMapfv
- glGetPixelMapuiv
- glGetPixelMapusv
- glGetPixelTexGenParameterfvSGIS
- glGetPixelTexGenParameterivSGIS
- glGetPointerv
- glGetPointervEXT
- glGetPolygonStipple
- glGetProgramEnvParameterdvARB
- glGetProgramEnvParameterfvARB
- glGetProgramivARB
- glGetProgramivNV
- glGetProgramLocalParameterdvARB
- glGetProgramLocalParameterfvARB
- glGetProgramNamedParameterdvNV
- glGetProgramNamedParameterfvNV
- glGetProgramParameterdvNV
- glGetProgramParameterfvNV
- glGetProgramStringARB
- glGetProgramStringNV
- glGetQueryivARB
- glGetQueryObjectivARB
- glGetQueryObjectuivARB
- glGetSeparableFilter
- glGetSeparableFilterEXT
- glGetSharpenTexFuncSGIS
- glGetString
- glGetTexEnvfv
- glGetTexEnviv
- glGetTexFilterFuncSGIS
- glGetTexGendv
- glGetTexGenfv
- glGetTexGeniv
- glGetTexImage
- glGetTexLevelParameterfv
- glGetTexLevelParameteriv
- glGetTexParameterfv
- glGetTexParameteriv
- glGetTrackMatrixivNV
- glGetVertexAttribdvARB
- glGetVertexAttribdvNV
- glGetVertexAttribfvARB
- glGetVertexAttribfvNV
- glGetVertexAttribivARB
- glGetVertexAttribivNV
- glGetVertexAttribPointervARB
- glGetVertexAttribPointervNV
- glHint
- glHintPGI
- glHistogram
- glHistogramEXT
- glIndexd
- glIndexdv
- glIndexf
- glIndexFuncEXT
- glIndexfv
- glIndexi
- glIndexiv
- glIndexMask
- glIndexMaterialEXT
- glIndexPointer
- glIndexPointerEXT
- glIndexs
- glIndexsv
- glIndexub
- glIndexubv
- glInitNames
- glInstrumentsBufferSGIX
- glInterleavedArrays
- glIsBufferARB
- glIsEnabled
- glIsFenceNV
- glIsList
- glIsProgramARB
- glIsProgramNV
- glIsQueryARB
- glIsTexture
- glIsTextureEXT
- glLightEnviSGIX
- glLightf
- glLightfv
- glLighti
- glLightiv
- glLightModelf
- glLightModelfv
- glLightModeli
- glLightModeliv
- glLineStipple
- glLineWidth
- glListBase
- glListParameterfSGIX
- glListParameterfvSGIX
- glListParameteriSGIX
- glListParameterivSGIX
- glLoadIdentity
- glLoadMatrixd
- glLoadMatrixf
- glLoadName
- glLoadProgramNV
- glLoadTransposeMatrixd
- glLoadTransposeMatrixdARB
- glLoadTransposeMatrixf
- glLoadTransposeMatrixfARB
- glLockArraysEXT
- glLogicOp
- glMapBufferARB
- glMapGrid1d
- glMapGrid1f
- glMapGrid2d
- glMapGrid2f
- glMap1d
- glMap1f
- glMap2d
- glMap2f
- glMaterialf
- glMaterialfv
- glMateriali
- glMaterialiv
- glMatrixMode
- glMinmax
- glMinmaxEXT
- glMultiDrawArrays
- glMultiDrawArraysEXT
- glMultiDrawElements
- glMultiDrawElementsEXT
- glMultiModeDrawArraysIBM
- glMultiModeDrawElementsIBM
- glMultiTexCoord1d
- glMultiTexCoord1dARB
- glMultiTexCoord1dv
- glMultiTexCoord1dvARB
- glMultiTexCoord1f
- glMultiTexCoord1fARB
- glMultiTexCoord1fv
- glMultiTexCoord1fvARB
- glMultiTexCoord1i
- glMultiTexCoord1iARB
- glMultiTexCoord1iv
- glMultiTexCoord1ivARB
- glMultiTexCoord1s
- glMultiTexCoord1sARB
- glMultiTexCoord1sv
- glMultiTexCoord1svARB
- glMultiTexCoord2d
- glMultiTexCoord2dARB
- glMultiTexCoord2dv
- glMultiTexCoord2dvARB
- glMultiTexCoord2f
- glMultiTexCoord2fARB
- glMultiTexCoord2fv
- glMultiTexCoord2fvARB
- glMultiTexCoord2i
- glMultiTexCoord2iARB
- glMultiTexCoord2iv
- glMultiTexCoord2ivARB
- glMultiTexCoord2s
- glMultiTexCoord2sARB
- glMultiTexCoord2sv
- glMultiTexCoord2svARB
- glMultiTexCoord3d
- glMultiTexCoord3dARB
- glMultiTexCoord3dv
- glMultiTexCoord3dvARB
- glMultiTexCoord3f
- glMultiTexCoord3fARB
- glMultiTexCoord3fv
- glMultiTexCoord3fvARB
- glMultiTexCoord3i
- glMultiTexCoord3iARB
- glMultiTexCoord3iv
- glMultiTexCoord3ivARB
- glMultiTexCoord3s
- glMultiTexCoord3sARB
- glMultiTexCoord3sv
- glMultiTexCoord3svARB
- glMultiTexCoord4d
- glMultiTexCoord4dARB
- glMultiTexCoord4dv
- glMultiTexCoord4dvARB
- glMultiTexCoord4f
- glMultiTexCoord4fARB
- glMultiTexCoord4fv
- glMultiTexCoord4fvARB
- glMultiTexCoord4i
- glMultiTexCoord4iARB
- glMultiTexCoord4iv
- glMultiTexCoord4ivARB
- glMultiTexCoord4s
- glMultiTexCoord4sARB
- glMultiTexCoord4sv
- glMultiTexCoord4svARB
- glMultMatrixd
- glMultMatrixf
- glMultTransposeMatrixd
- glMultTransposeMatrixdARB
- glMultTransposeMatrixf
- glMultTransposeMatrixfARB
- glNewList
- glNormalPointer
- glNormalPointerEXT
- glNormal3b
- glNormal3bv
- glNormal3d
- glNormal3dv
- glNormal3f
- glNormal3fv
- glNormal3i
- glNormal3iv
- glNormal3s
- glNormal3sv
- glOrtho
- glPassThrough
- glPixelMapfv
- glPixelMapuiv
- glPixelMapusv
- glPixelStoref
- glPixelStorei
- glPixelTexGenParameterfSGIS
- glPixelTexGenParameterfvSGIS
- glPixelTexGenParameteriSGIS
- glPixelTexGenParameterivSGIS
- glPixelTexGenSGIX
- glPixelTransferf
- glPixelTransferi
- glPixelZoom
- glPointParameterf
- glPointParameterfARB
- glPointParameterfEXT
- glPointParameterfSGIS
- glPointParameterfv
- glPointParameterfvARB
- glPointParameterfvEXT
- glPointParameterfvSGIS
- glPointParameteri
- glPointParameteriNV
- glPointParameteriv
- glPointParameterivNV
- glPointSize
- glPollInstrumentsSGIX
- glPolygonMode
- glPolygonOffset
- glPolygonOffsetEXT
- glPolygonStipple
- glPopAttrib
- glPopClientAttrib
- glPopMatrix
- glPopName
- glPrioritizeTextures
- glPrioritizeTexturesEXT
- glProgramEnvParameter4dARB
- glProgramEnvParameter4dvARB
- glProgramEnvParameter4fARB
- glProgramEnvParameter4fvARB
- glProgramLocalParameter4dARB
- glProgramLocalParameter4dvARB
- glProgramLocalParameter4fARB
- glProgramLocalParameter4fvARB
- glProgramNamedParameter4dNV
- glProgramNamedParameter4dvNV
- glProgramNamedParameter4fNV
- glProgramNamedParameter4fvNV
- glProgramParameters4dvNV
- glProgramParameters4fvNV
- glProgramParameter4dNV
- glProgramParameter4dvNV
- glProgramParameter4fNV
- glProgramParameter4fvNV
- glProgramStringARB
- glPushAttrib
- glPushClientAttrib
- glPushMatrix
- glPushName
- glRasterPos2d
- glRasterPos2dv
- glRasterPos2f
- glRasterPos2fv
- glRasterPos2i
- glRasterPos2iv
- glRasterPos2s
- glRasterPos2sv
- glRasterPos3d
- glRasterPos3dv
- glRasterPos3f
- glRasterPos3fv
- glRasterPos3i
- glRasterPos3iv
- glRasterPos3s
- glRasterPos3sv
- glRasterPos4d
- glRasterPos4dv
- glRasterPos4f
- glRasterPos4fv
- glRasterPos4i
- glRasterPos4iv
- glRasterPos4s
- glRasterPos4sv
- glReadBuffer
- glReadInstrumentsSGIX
- glReadPixels
- glRectd
- glRectdv
- glRectf
- glRectfv
- glRecti
- glRectiv
- glRects
- glRectsv
- glReferencePlaneSGIX
- glRenderMode
- glRequestResidentProgramsNV
- glResetHistogram
- glResetHistogramEXT
- glResetMinmax
- glResetMinmaxEXT
- glResizeBuffersMESA
- glRotated
- glRotatef
- glSampleCoverage
- glSampleCoverageARB
- glSampleMaskEXT
- glSampleMaskSGIS
- glSamplePatternEXT
- glSamplePatternSGIS
- glScaled
- glScalef
- glScissor
- glSecondaryColorPointer
- glSecondaryColorPointerEXT
- glSecondaryColor3b
- glSecondaryColor3bEXT
- glSecondaryColor3bv
- glSecondaryColor3bvEXT
- glSecondaryColor3d
- glSecondaryColor3dEXT
- glSecondaryColor3dv
- glSecondaryColor3dvEXT
- glSecondaryColor3f
- glSecondaryColor3fEXT
- glSecondaryColor3fv
- glSecondaryColor3fvEXT
- glSecondaryColor3i
- glSecondaryColor3iEXT
- glSecondaryColor3iv
- glSecondaryColor3ivEXT
- glSecondaryColor3s
- glSecondaryColor3sEXT
- glSecondaryColor3sv
- glSecondaryColor3svEXT
- glSecondaryColor3ub
- glSecondaryColor3ubEXT
- glSecondaryColor3ubv
- glSecondaryColor3ubvEXT
- glSecondaryColor3ui
- glSecondaryColor3uiEXT
- glSecondaryColor3uiv
- glSecondaryColor3uivEXT
- glSecondaryColor3us
- glSecondaryColor3usEXT
- glSecondaryColor3usv
- glSecondaryColor3usvEXT
- glSelectBuffer
- glSeparableFilter2D
- glSeparableFilter2DEXT
- glSetFenceNV
- glShadeModel
- glSharpenTexFuncSGIS
- glSpriteParameterfSGIX
- glSpriteParameterfvSGIX
- glSpriteParameteriSGIX
- glSpriteParameterivSGIX
- glStartInstrumentsSGIX
- glStencilFunc
- glStencilMask
- glStencilOp
- glStopInstrumentsSGIX
- glTagSampleBufferSGIX
- glTbufferMask3DFX
- glTestFenceNV
- glTexCoordPointer
- glTexCoordPointerEXT
- glTexCoord1d
- glTexCoord1dv
- glTexCoord1f
- glTexCoord1fv
- glTexCoord1i
- glTexCoord1iv
- glTexCoord1s
- glTexCoord1sv
- glTexCoord2d
- glTexCoord2dv
- glTexCoord2f
- glTexCoord2fv
- glTexCoord2i
- glTexCoord2iv
- glTexCoord2s
- glTexCoord2sv
- glTexCoord3d
- glTexCoord3dv
- glTexCoord3f
- glTexCoord3fv
- glTexCoord3i
- glTexCoord3iv
- glTexCoord3s
- glTexCoord3sv
- glTexCoord4d
- glTexCoord4dv
- glTexCoord4f
- glTexCoord4fv
- glTexCoord4i
- glTexCoord4iv
- glTexCoord4s
- glTexCoord4sv
- glTexEnvf
- glTexEnvfv
- glTexEnvi
- glTexEnviv
- glTexFilterFuncSGIS
- glTexGend
- glTexGendv
- glTexGenf
- glTexGenfv
- glTexGeni
- glTexGeniv
- glTexImage1D
- glTexImage2D
- glTexImage3D
- glTexImage3DEXT
- glTexImage4DSGIS
- glTexParameterf
- glTexParameterfv
- glTexParameteri
- glTexParameteriv
- glTexSubImage1D
- glTexSubImage1DEXT
- glTexSubImage2D
- glTexSubImage2DEXT
- glTexSubImage3D
- glTexSubImage3DEXT
- glTexSubImage4DSGIS
- glTrackMatrixNV
- glTranslated
- glTranslatef
- glUnlockArraysEXT
- glUnmapBufferARB
- glVertexArrayRangeNV
- glVertexAttribPointerARB
- glVertexAttribPointerNV
- glVertexAttribs1dvNV
- glVertexAttribs1fvNV
- glVertexAttribs1svNV
- glVertexAttribs2dvNV
- glVertexAttribs2fvNV
- glVertexAttribs2svNV
- glVertexAttribs3dvNV
- glVertexAttribs3fvNV
- glVertexAttribs3svNV
- glVertexAttribs4dvNV
- glVertexAttribs4fvNV
- glVertexAttribs4svNV
- glVertexAttribs4ubvNV
- glVertexAttrib1dARB
- glVertexAttrib1dNV
- glVertexAttrib1dvARB
- glVertexAttrib1dvNV
- glVertexAttrib1fARB
- glVertexAttrib1fNV
- glVertexAttrib1fvARB
- glVertexAttrib1fvNV
- glVertexAttrib1sARB
- glVertexAttrib1sNV
- glVertexAttrib1svARB
- glVertexAttrib1svNV
- glVertexAttrib2dARB
- glVertexAttrib2dNV
- glVertexAttrib2dvARB
- glVertexAttrib2dvNV
- glVertexAttrib2fARB
- glVertexAttrib2fNV
- glVertexAttrib2fvARB
- glVertexAttrib2fvNV
- glVertexAttrib2sARB
- glVertexAttrib2sNV
- glVertexAttrib2svARB
- glVertexAttrib2svNV
- glVertexAttrib3dARB
- glVertexAttrib3dNV
- glVertexAttrib3dvARB
- glVertexAttrib3dvNV
- glVertexAttrib3fARB
- glVertexAttrib3fNV
- glVertexAttrib3fvARB
- glVertexAttrib3fvNV
- glVertexAttrib3sARB
- glVertexAttrib3sNV
- glVertexAttrib3svARB
- glVertexAttrib3svNV
- glVertexAttrib4bvARB
- glVertexAttrib4dARB
- glVertexAttrib4dNV
- glVertexAttrib4dvARB
- glVertexAttrib4dvNV
- glVertexAttrib4fARB
- glVertexAttrib4fNV
- glVertexAttrib4fvARB
- glVertexAttrib4fvNV
- glVertexAttrib4ivARB
- glVertexAttrib4NbvARB
- glVertexAttrib4NivARB
- glVertexAttrib4NsvARB
- glVertexAttrib4NubARB
- glVertexAttrib4NubvARB
- glVertexAttrib4NuivARB
- glVertexAttrib4NusvARB
- glVertexAttrib4sARB
- glVertexAttrib4sNV
- glVertexAttrib4svARB
- glVertexAttrib4svNV
- glVertexAttrib4ubNV
- glVertexAttrib4ubvARB
- glVertexAttrib4ubvNV
- glVertexAttrib4uivARB
- glVertexAttrib4usvARB
- glVertexPointer
- glVertexPointerEXT
- glVertexWeightfEXT
- glVertexWeightfvEXT
- glVertexWeightPointerEXT
- glVertex2d
- glVertex2dv
- glVertex2f
- glVertex2fv
- glVertex2i
- glVertex2iv
- glVertex2s
- glVertex2sv
- glVertex3d
- glVertex3dv
- glVertex3f
- glVertex3fv
- glVertex3i
- glVertex3iv
- glVertex3s
- glVertex3sv
- glVertex4d
- glVertex4dv
- glVertex4f
- glVertex4fv
- glVertex4i
- glVertex4iv
- glVertex4s
- glVertex4sv
- glViewport
- glWindowPos2d
- glWindowPos2dARB
- glWindowPos2dMESA
- glWindowPos2dv
- glWindowPos2dvARB
- glWindowPos2dvMESA
- glWindowPos2f
- glWindowPos2fARB
- glWindowPos2fMESA
- glWindowPos2fv
- glWindowPos2fvARB
- glWindowPos2fvMESA
- glWindowPos2i
- glWindowPos2iARB
- glWindowPos2iMESA
- glWindowPos2iv
- glWindowPos2ivARB
- glWindowPos2ivMESA
- glWindowPos2s
- glWindowPos2sARB
- glWindowPos2sMESA
- glWindowPos2sv
- glWindowPos2svARB
- glWindowPos2svMESA
- glWindowPos3d
- glWindowPos3dARB
- glWindowPos3dMESA
- glWindowPos3dv
- glWindowPos3dvARB
- glWindowPos3dvMESA
- glWindowPos3f
- glWindowPos3fARB
- glWindowPos3fMESA
- glWindowPos3fv
- glWindowPos3fvARB
- glWindowPos3fvMESA
- glWindowPos3i
- glWindowPos3iARB
- glWindowPos3iMESA
- glWindowPos3iv
- glWindowPos3ivARB
- glWindowPos3ivMESA
- glWindowPos3s
- glWindowPos3sARB
- glWindowPos3sMESA
- glWindowPos3sv
- glWindowPos3svARB
- glWindowPos3svMESA
- glWindowPos4dMESA
- glWindowPos4dvMESA
- glWindowPos4fMESA
- glWindowPos4fvMESA
- glWindowPos4iMESA
- glWindowPos4ivMESA
- glWindowPos4sMESA
- glWindowPos4svMESA
- fxCloseHardware
-;fxGetScreenGeometry
- fxMesaCreateBestContext
- fxMesaCreateContext
- fxMesaDestroyContext
- fxMesaGetCurrentContext
- fxMesaMakeCurrent
- fxMesaSelectCurrentBoard
-;fxMesaSetNearFar
- fxMesaSwapBuffers
- fxMesaUpdateScreenSize
- wglChoosePixelFormat
- wglCopyContext
- wglCreateContext
- wglCreateLayerContext
- wglDeleteContext
- wglDescribeLayerPlane
- wglDescribePixelFormat
- wglGetCurrentContext
- wglGetCurrentDC
- wglGetDefaultProcAddress
- wglGetLayerPaletteEntries
- wglGetPixelFormat
- wglGetProcAddress
- wglMakeCurrent
- wglRealizeLayerPalette
- wglSetLayerPaletteEntries
- wglSetPixelFormat
- wglShareLists
- wglSwapBuffers
- wglSwapLayerBuffers
- wglUseFontBitmapsA
- wglUseFontBitmapsW
- wglUseFontOutlinesA
- wglUseFontOutlinesW
- ChoosePixelFormat
- DescribePixelFormat
- GetPixelFormat
- SetPixelFormat
- SwapBuffers
- DrvCopyContext
- DrvCreateContext
- DrvCreateLayerContext
- DrvDeleteContext
- DrvDescribeLayerPlane
- DrvDescribePixelFormat
- DrvGetLayerPaletteEntries
- DrvGetProcAddress
- DrvReleaseContext
- DrvRealizeLayerPalette
- DrvSetContext
- DrvSetLayerPaletteEntries
- DrvSetPixelFormat
- DrvShareLists
- DrvSwapBuffers
- DrvSwapLayerBuffers
- DrvValidateVersion
diff --git a/src/mesa/drivers/windows/fx/fxwgl.c b/src/mesa/drivers/windows/fx/fxwgl.c
deleted file mode 100644
index ce76ecd1568..00000000000
--- a/src/mesa/drivers/windows/fx/fxwgl.c
+++ /dev/null
@@ -1,1307 +0,0 @@
-/*
- * Mesa 3-D graphics library
- * Version: 4.0
- *
- * Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/* Authors:
- * David Bucciarelli
- * Brian Paul
- * Keith Whitwell
- * Hiroshi Morii
- * Daniel Borca
- */
-
-/* fxwgl.c - Microsoft wgl functions emulation for
- * 3Dfx VooDoo/Mesa interface
- */
-
-
-#ifdef _WIN32
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <windows.h>
-#define GL_GLEXT_PROTOTYPES
-#include "GL/gl.h"
-#include "GL/glext.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-#include "GL/fxmesa.h"
-#include "glheader.h"
-#include "glapi.h"
-#include "imports.h"
-#include "../../glide/fxdrv.h"
-
-#define MAX_MESA_ATTRS 20
-
-#if (_MSC_VER >= 1200)
-#pragma warning( push )
-#pragma warning( disable : 4273 )
-#endif
-
-struct __extensions__ {
- PROC proc;
- char *name;
-};
-
-struct __pixelformat__ {
- PIXELFORMATDESCRIPTOR pfd;
- GLint mesaAttr[MAX_MESA_ATTRS];
-};
-
-WINGDIAPI void GLAPIENTRY gl3DfxSetPaletteEXT(GLuint *);
-static GLushort gammaTable[3 * 256];
-
-struct __pixelformat__ pix[] = {
- /* 16bit RGB565 single buffer with depth */
- {
- {sizeof(PIXELFORMATDESCRIPTOR), 1,
- PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL,
- PFD_TYPE_RGBA,
- 16,
- 5, 0, 6, 5, 5, 11, 0, 0,
- 0, 0, 0, 0, 0,
- 16,
- 0,
- 0,
- PFD_MAIN_PLANE,
- 0, 0, 0, 0}
- ,
- {FXMESA_COLORDEPTH, 16,
- FXMESA_ALPHA_SIZE, 0,
- FXMESA_DEPTH_SIZE, 16,
- FXMESA_STENCIL_SIZE, 0,
- FXMESA_ACCUM_SIZE, 0,
- FXMESA_NONE}
- }
- ,
- /* 16bit RGB565 double buffer with depth */
- {
- {sizeof(PIXELFORMATDESCRIPTOR), 1,
- PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL |
- PFD_DOUBLEBUFFER | PFD_SWAP_COPY,
- PFD_TYPE_RGBA,
- 16,
- 5, 0, 6, 5, 5, 11, 0, 0,
- 0, 0, 0, 0, 0,
- 16,
- 0,
- 0,
- PFD_MAIN_PLANE,
- 0, 0, 0, 0}
- ,
- {FXMESA_COLORDEPTH, 16,
- FXMESA_DOUBLEBUFFER,
- FXMESA_ALPHA_SIZE, 0,
- FXMESA_DEPTH_SIZE, 16,
- FXMESA_STENCIL_SIZE, 0,
- FXMESA_ACCUM_SIZE, 0,
- FXMESA_NONE}
- }
- ,
- /* 16bit ARGB1555 single buffer with depth */
- {
- {sizeof(PIXELFORMATDESCRIPTOR), 1,
- PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL,
- PFD_TYPE_RGBA,
- 16,
- 5, 0, 5, 5, 5, 10, 1, 15,
- 0, 0, 0, 0, 0,
- 16,
- 0,
- 0,
- PFD_MAIN_PLANE,
- 0, 0, 0, 0}
- ,
- {FXMESA_COLORDEPTH, 15,
- FXMESA_ALPHA_SIZE, 1,
- FXMESA_DEPTH_SIZE, 16,
- FXMESA_STENCIL_SIZE, 0,
- FXMESA_ACCUM_SIZE, 0,
- FXMESA_NONE}
- }
- ,
- /* 16bit ARGB1555 double buffer with depth */
- {
- {sizeof(PIXELFORMATDESCRIPTOR), 1,
- PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL |
- PFD_DOUBLEBUFFER | PFD_SWAP_COPY,
- PFD_TYPE_RGBA,
- 16,
- 5, 0, 5, 5, 5, 10, 1, 15,
- 0, 0, 0, 0, 0,
- 16,
- 0,
- 0,
- PFD_MAIN_PLANE,
- 0, 0, 0, 0}
- ,
- {FXMESA_COLORDEPTH, 15,
- FXMESA_DOUBLEBUFFER,
- FXMESA_ALPHA_SIZE, 1,
- FXMESA_DEPTH_SIZE, 16,
- FXMESA_STENCIL_SIZE, 0,
- FXMESA_ACCUM_SIZE, 0,
- FXMESA_NONE}
- }
- ,
- /* 32bit ARGB8888 single buffer with depth */
- {
- {sizeof(PIXELFORMATDESCRIPTOR), 1,
- PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL,
- PFD_TYPE_RGBA,
- 32,
- 8, 0, 8, 8, 8, 16, 8, 24,
- 0, 0, 0, 0, 0,
- 24,
- 8,
- 0,
- PFD_MAIN_PLANE,
- 0, 0, 0, 0}
- ,
- {FXMESA_COLORDEPTH, 32,
- FXMESA_ALPHA_SIZE, 8,
- FXMESA_DEPTH_SIZE, 24,
- FXMESA_STENCIL_SIZE, 8,
- FXMESA_ACCUM_SIZE, 0,
- FXMESA_NONE}
- }
- ,
- /* 32bit ARGB8888 double buffer with depth */
- {
- {sizeof(PIXELFORMATDESCRIPTOR), 1,
- PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL |
- PFD_DOUBLEBUFFER | PFD_SWAP_COPY,
- PFD_TYPE_RGBA,
- 32,
- 8, 0, 8, 8, 8, 16, 8, 24,
- 0, 0, 0, 0, 0,
- 24,
- 8,
- 0,
- PFD_MAIN_PLANE,
- 0, 0, 0, 0}
- ,
- {FXMESA_COLORDEPTH, 32,
- FXMESA_DOUBLEBUFFER,
- FXMESA_ALPHA_SIZE, 8,
- FXMESA_DEPTH_SIZE, 24,
- FXMESA_STENCIL_SIZE, 8,
- FXMESA_ACCUM_SIZE, 0,
- FXMESA_NONE}
- }
-};
-
-static fxMesaContext ctx = NULL;
-static WNDPROC hWNDOldProc;
-static int curPFD = 0;
-static HDC hDC;
-static HWND hWND;
-
-static GLboolean haveDualHead;
-
-/* For the in-window-rendering hack */
-
-#ifndef GR_CONTROL_RESIZE
-/* Apparently GR_CONTROL_RESIZE can be ignored. OK? */
-#define GR_CONTROL_RESIZE -1
-#endif
-
-static GLboolean gdiWindowHack;
-static void *dibSurfacePtr;
-static BITMAPINFO *dibBMI;
-static HBITMAP dibHBM;
-static HWND dibWnd;
-
-static int
-env_check (const char *var, int val)
-{
- const char *env = getenv(var);
- return (env && (env[0] == val));
-}
-
-static LRESULT APIENTRY
-__wglMonitor (HWND hwnd, UINT message, UINT wParam, LONG lParam)
-{
- long ret; /* Now gives the resized window at the end to hWNDOldProc */
-
- if (ctx && hwnd == hWND) {
- switch (message) {
- case WM_PAINT:
- case WM_MOVE:
- break;
- case WM_DISPLAYCHANGE:
- case WM_SIZE:
-#if 0
- if (wParam != SIZE_MINIMIZED) {
- static int moving = 0;
- if (!moving) {
- if (!FX_grSstControl(GR_CONTROL_RESIZE)) {
- moving = 1;
- SetWindowPos(hwnd, 0, 0, 0, 300, 300, SWP_NOMOVE | SWP_NOZORDER);
- moving = 0;
- if (!FX_grSstControl(GR_CONTROL_RESIZE)) {
- /*MessageBox(0,_T("Error changing windowsize"),_T("fxMESA"),MB_OK);*/
- PostMessage(hWND, WM_CLOSE, 0, 0);
- }
- }
- /* Do the clipping in the glide library */
- grClipWindow(0, 0, FX_grSstScreenWidth(), FX_grSstScreenHeight());
- /* And let the new size set in the context */
- fxMesaUpdateScreenSize(ctx);
- }
- }
-#endif
- break;
- case WM_ACTIVATE:
- break;
- case WM_SHOWWINDOW:
- break;
- case WM_SYSKEYDOWN:
- case WM_SYSCHAR:
- break;
- }
- }
-
- /* Finally call the hWNDOldProc, which handles the resize with the
- * now changed window sizes */
- ret = CallWindowProc(hWNDOldProc, hwnd, message, wParam, lParam);
-
- return ret;
-}
-
-static void
-wgl_error (long error)
-{
-#define WGL_INVALID_PIXELFORMAT ERROR_INVALID_PIXEL_FORMAT
- SetLastError(0xC0000000 /* error severity */
- |0x00070000 /* error facility (who we are) */
- |error);
-}
-
-GLAPI BOOL GLAPIENTRY
-wglCopyContext (HGLRC hglrcSrc, HGLRC hglrcDst, UINT mask)
-{
- return FALSE;
-}
-
-GLAPI HGLRC GLAPIENTRY
-wglCreateContext (HDC hdc)
-{
- HWND hWnd;
- WNDPROC oldProc;
- int error;
-
- if (ctx) {
- SetLastError(0);
- return NULL;
- }
-
- if (!(hWnd = WindowFromDC(hdc))) {
- SetLastError(0);
- return NULL;
- }
-
- if (curPFD == 0) {
- wgl_error(WGL_INVALID_PIXELFORMAT);
- return NULL;
- }
-
- if ((oldProc = (WNDPROC)GetWindowLong(hWnd, GWL_WNDPROC)) != __wglMonitor) {
- hWNDOldProc = oldProc;
- SetWindowLong(hWnd, GWL_WNDPROC, (LONG)__wglMonitor);
- }
-
- /* always log when debugging, or if user demands */
- if (TDFX_DEBUG || env_check("MESA_FX_INFO", 'r')) {
- freopen("MESA.LOG", "w", stderr);
- }
-
- {
- RECT cliRect;
- ShowWindow(hWnd, SW_SHOWNORMAL);
- SetForegroundWindow(hWnd);
- Sleep(100); /* a hack for win95 */
- if (env_check("MESA_GLX_FX", 'w') && !(GetWindowLong(hWnd, GWL_STYLE) & WS_POPUP)) {
- /* XXX todo - windowed modes */
- error = !(ctx = fxMesaCreateContext((GLuint) hWnd, GR_RESOLUTION_NONE, GR_REFRESH_NONE, pix[curPFD - 1].mesaAttr));
- } else {
- GetClientRect(hWnd, &cliRect);
- error = !(ctx = fxMesaCreateBestContext((GLuint) hWnd, cliRect.right, cliRect.bottom, pix[curPFD - 1].mesaAttr));
- }
- }
-
- /*if (getenv("SST_DUALHEAD"))
- haveDualHead =
- ((atoi(getenv("SST_DUALHEAD")) == 1) ? GL_TRUE : GL_FALSE);
- else
- haveDualHead = GL_FALSE;*/
-
- if (error) {
- SetLastError(0);
- return NULL;
- }
-
- hDC = hdc;
- hWND = hWnd;
-
- /* Required by the OpenGL Optimizer 1.1 (is it a Optimizer bug ?) */
- wglMakeCurrent(hdc, (HGLRC)1);
-
- return (HGLRC)1;
-}
-
-GLAPI HGLRC GLAPIENTRY
-wglCreateLayerContext (HDC hdc, int iLayerPlane)
-{
- SetLastError(0);
- return NULL;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglDeleteContext (HGLRC hglrc)
-{
- if (ctx && hglrc == (HGLRC)1) {
-
- fxMesaDestroyContext(ctx);
-
- SetWindowLong(WindowFromDC(hDC), GWL_WNDPROC, (LONG) hWNDOldProc);
-
- ctx = NULL;
- hDC = 0;
- return TRUE;
- }
-
- SetLastError(0);
-
- return FALSE;
-}
-
-GLAPI HGLRC GLAPIENTRY
-wglGetCurrentContext (VOID)
-{
- if (ctx)
- return (HGLRC)1;
-
- SetLastError(0);
- return NULL;
-}
-
-GLAPI HDC GLAPIENTRY
-wglGetCurrentDC (VOID)
-{
- if (ctx)
- return hDC;
-
- SetLastError(0);
- return NULL;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglSwapIntervalEXT (int interval)
-{
- if (ctx == NULL) {
- return FALSE;
- }
- if (interval < 0) {
- interval = 0;
- } else if (interval > 3) {
- interval = 3;
- }
- ctx->swapInterval = interval;
- return TRUE;
-}
-
-GLAPI int GLAPIENTRY
-wglGetSwapIntervalEXT (void)
-{
- return (ctx == NULL) ? -1 : ctx->swapInterval;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglGetDeviceGammaRamp3DFX (HDC hdc, LPVOID arrays)
-{
- /* gammaTable should be per-context */
- memcpy(arrays, gammaTable, 3 * 256 * sizeof(GLushort));
- return TRUE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglSetDeviceGammaRamp3DFX (HDC hdc, LPVOID arrays)
-{
- GLint i, tableSize, inc, index;
- GLushort *red, *green, *blue;
- FxU32 gammaTableR[256], gammaTableG[256], gammaTableB[256];
-
- /* gammaTable should be per-context */
- memcpy(gammaTable, arrays, 3 * 256 * sizeof(GLushort));
-
- tableSize = FX_grGetInteger(GR_GAMMA_TABLE_ENTRIES);
- inc = 256 / tableSize;
- red = (GLushort *)arrays;
- green = (GLushort *)arrays + 256;
- blue = (GLushort *)arrays + 512;
- for (i = 0, index = 0; i < tableSize; i++, index += inc) {
- gammaTableR[i] = red[index] >> 8;
- gammaTableG[i] = green[index] >> 8;
- gammaTableB[i] = blue[index] >> 8;
- }
-
- grLoadGammaTable(tableSize, gammaTableR, gammaTableG, gammaTableB);
-
- return TRUE;
-}
-
-typedef void *HPBUFFERARB;
-
-/* WGL_ARB_pixel_format */
-GLAPI BOOL GLAPIENTRY
-wglGetPixelFormatAttribivARB (HDC hdc,
- int iPixelFormat,
- int iLayerPlane,
- UINT nAttributes,
- const int *piAttributes,
- int *piValues)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglGetPixelFormatAttribfvARB (HDC hdc,
- int iPixelFormat,
- int iLayerPlane,
- UINT nAttributes,
- const int *piAttributes,
- FLOAT *pfValues)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglChoosePixelFormatARB (HDC hdc,
- const int *piAttribIList,
- const FLOAT *pfAttribFList,
- UINT nMaxFormats,
- int *piFormats,
- UINT *nNumFormats)
-{
- SetLastError(0);
- return FALSE;
-}
-
-/* WGL_ARB_render_texture */
-GLAPI BOOL GLAPIENTRY
-wglBindTexImageARB (HPBUFFERARB hPbuffer, int iBuffer)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglReleaseTexImageARB (HPBUFFERARB hPbuffer, int iBuffer)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglSetPbufferAttribARB (HPBUFFERARB hPbuffer,
- const int *piAttribList)
-{
- SetLastError(0);
- return FALSE;
-}
-
-/* WGL_ARB_pbuffer */
-GLAPI HPBUFFERARB GLAPIENTRY
-wglCreatePbufferARB (HDC hDC,
- int iPixelFormat,
- int iWidth,
- int iHeight,
- const int *piAttribList)
-{
- SetLastError(0);
- return NULL;
-}
-
-GLAPI HDC GLAPIENTRY
-wglGetPbufferDCARB (HPBUFFERARB hPbuffer)
-{
- SetLastError(0);
- return NULL;
-}
-
-GLAPI int GLAPIENTRY
-wglReleasePbufferDCARB (HPBUFFERARB hPbuffer, HDC hDC)
-{
- SetLastError(0);
- return -1;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglDestroyPbufferARB (HPBUFFERARB hPbuffer)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglQueryPbufferARB (HPBUFFERARB hPbuffer,
- int iAttribute,
- int *piValue)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI const char * GLAPIENTRY
-wglGetExtensionsStringEXT (void)
-{
- return "WGL_3DFX_gamma_control "
- "WGL_EXT_swap_control "
- "WGL_EXT_extensions_string WGL_ARB_extensions_string"
- /*WGL_ARB_pixel_format WGL_ARB_render_texture WGL_ARB_pbuffer*/;
-}
-
-GLAPI const char * GLAPIENTRY
-wglGetExtensionsStringARB (HDC hdc)
-{
- return wglGetExtensionsStringEXT();
-}
-
-static struct {
- const char *name;
- PROC func;
-} wgl_ext[] = {
- {"wglGetExtensionsStringARB", (PROC)wglGetExtensionsStringARB},
- {"wglGetExtensionsStringEXT", (PROC)wglGetExtensionsStringEXT},
- {"wglSwapIntervalEXT", (PROC)wglSwapIntervalEXT},
- {"wglGetSwapIntervalEXT", (PROC)wglGetSwapIntervalEXT},
- {"wglGetDeviceGammaRamp3DFX", (PROC)wglGetDeviceGammaRamp3DFX},
- {"wglSetDeviceGammaRamp3DFX", (PROC)wglSetDeviceGammaRamp3DFX},
- /* WGL_ARB_pixel_format */
- {"wglGetPixelFormatAttribivARB", (PROC)wglGetPixelFormatAttribivARB},
- {"wglGetPixelFormatAttribfvARB", (PROC)wglGetPixelFormatAttribfvARB},
- {"wglChoosePixelFormatARB", (PROC)wglChoosePixelFormatARB},
- /* WGL_ARB_render_texture */
- {"wglBindTexImageARB", (PROC)wglBindTexImageARB},
- {"wglReleaseTexImageARB", (PROC)wglReleaseTexImageARB},
- {"wglSetPbufferAttribARB", (PROC)wglSetPbufferAttribARB},
- /* WGL_ARB_pbuffer */
- {"wglCreatePbufferARB", (PROC)wglCreatePbufferARB},
- {"wglGetPbufferDCARB", (PROC)wglGetPbufferDCARB},
- {"wglReleasePbufferDCARB", (PROC)wglReleasePbufferDCARB},
- {"wglDestroyPbufferARB", (PROC)wglDestroyPbufferARB},
- {"wglQueryPbufferARB", (PROC)wglQueryPbufferARB},
- {NULL, NULL}
-};
-
-GLAPI PROC GLAPIENTRY
-wglGetProcAddress (LPCSTR lpszProc)
-{
- int i;
- PROC p = (PROC)_glapi_get_proc_address((const char *)lpszProc);
-
- /* we can't BlendColor. work around buggy applications */
- if (p && strcmp(lpszProc, "glBlendColor")
- && strcmp(lpszProc, "glBlendColorEXT"))
- return p;
-
- for (i = 0; wgl_ext[i].name; i++) {
- if (!strcmp(lpszProc, wgl_ext[i].name)) {
- return wgl_ext[i].func;
- }
- }
-
- SetLastError(0);
- return NULL;
-}
-
-GLAPI PROC GLAPIENTRY
-wglGetDefaultProcAddress (LPCSTR lpszProc)
-{
- SetLastError(0);
- return NULL;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglMakeCurrent (HDC hdc, HGLRC hglrc)
-{
- if ((hdc == NULL) && (hglrc == NULL))
- return TRUE;
-
- if (!ctx || hglrc != (HGLRC)1 || WindowFromDC(hdc) != hWND) {
- SetLastError(0);
- return FALSE;
- }
-
- hDC = hdc;
-
- fxMesaMakeCurrent(ctx);
-
- return TRUE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglShareLists (HGLRC hglrc1, HGLRC hglrc2)
-{
- if (!ctx || hglrc1 != (HGLRC)1 || hglrc1 != hglrc2) {
- SetLastError(0);
- return FALSE;
- }
-
- return TRUE;
-}
-
-static BOOL
-wglUseFontBitmaps_FX (HDC fontDevice, DWORD firstChar, DWORD numChars,
- DWORD listBase)
-{
- TEXTMETRIC metric;
- BITMAPINFO *dibInfo;
- HDC bitDevice;
- COLORREF tempColor;
- int i;
-
- GetTextMetrics(fontDevice, &metric);
-
- dibInfo = (BITMAPINFO *)calloc(sizeof(BITMAPINFO) + sizeof(RGBQUAD), 1);
- dibInfo->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
- dibInfo->bmiHeader.biPlanes = 1;
- dibInfo->bmiHeader.biBitCount = 1;
- dibInfo->bmiHeader.biCompression = BI_RGB;
-
- bitDevice = CreateCompatibleDC(fontDevice);
-
- /* Swap fore and back colors so the bitmap has the right polarity */
- tempColor = GetBkColor(bitDevice);
- SetBkColor(bitDevice, GetTextColor(bitDevice));
- SetTextColor(bitDevice, tempColor);
-
- /* Place chars based on base line */
- SetTextAlign(bitDevice, TA_BASELINE);
-
- for (i = 0; i < (int)numChars; i++) {
- SIZE size;
- char curChar;
- int charWidth, charHeight, bmapWidth, bmapHeight, numBytes, res;
- HBITMAP bitObject;
- HGDIOBJ origBmap;
- unsigned char *bmap;
-
- curChar = (char)(i + firstChar); /* [koolsmoky] explicit cast */
-
- /* Find how high/wide this character is */
- GetTextExtentPoint32(bitDevice, &curChar, 1, &size);
-
- /* Create the output bitmap */
- charWidth = size.cx;
- charHeight = size.cy;
- bmapWidth = ((charWidth + 31) / 32) * 32; /* Round up to the next multiple of 32 bits */
- bmapHeight = charHeight;
- bitObject = CreateCompatibleBitmap(bitDevice, bmapWidth, bmapHeight);
- /*VERIFY(bitObject);*/
-
- /* Assign the output bitmap to the device */
- origBmap = SelectObject(bitDevice, bitObject);
-
- PatBlt(bitDevice, 0, 0, bmapWidth, bmapHeight, BLACKNESS);
-
- /* Use our source font on the device */
- SelectObject(bitDevice, GetCurrentObject(fontDevice, OBJ_FONT));
-
- /* Draw the character */
- TextOut(bitDevice, 0, metric.tmAscent, &curChar, 1);
-
- /* Unselect our bmap object */
- SelectObject(bitDevice, origBmap);
-
- /* Convert the display dependant representation to a 1 bit deep DIB */
- numBytes = (bmapWidth * bmapHeight) / 8;
- bmap = MALLOC(numBytes);
- dibInfo->bmiHeader.biWidth = bmapWidth;
- dibInfo->bmiHeader.biHeight = bmapHeight;
- res = GetDIBits(bitDevice, bitObject, 0, bmapHeight, bmap,
- dibInfo, DIB_RGB_COLORS);
-
- /* Create the GL object */
- glNewList(i + listBase, GL_COMPILE);
- glBitmap(bmapWidth, bmapHeight, 0.0, metric.tmDescent,
- charWidth, 0.0, bmap);
- glEndList();
- /* CheckGL(); */
-
- /* Destroy the bmap object */
- DeleteObject(bitObject);
-
- /* Deallocate the bitmap data */
- FREE(bmap);
- }
-
- /* Destroy the DC */
- DeleteDC(bitDevice);
-
- FREE(dibInfo);
-
- return TRUE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglUseFontBitmapsW (HDC hdc, DWORD first, DWORD count, DWORD listBase)
-{
- return FALSE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglUseFontOutlinesA (HDC hdc, DWORD first, DWORD count,
- DWORD listBase, FLOAT deviation,
- FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglUseFontOutlinesW (HDC hdc, DWORD first, DWORD count,
- DWORD listBase, FLOAT deviation,
- FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf)
-{
- SetLastError(0);
- return FALSE;
-}
-
-
-GLAPI BOOL GLAPIENTRY
-wglSwapLayerBuffers (HDC hdc, UINT fuPlanes)
-{
- if (ctx && WindowFromDC(hdc) == hWND) {
- fxMesaSwapBuffers();
-
- return TRUE;
- }
-
- SetLastError(0);
- return FALSE;
-}
-
-static int
-pfd_tablen (void)
-{
- /* we should take an envvar for `fxMesaSelectCurrentBoard' */
- return (fxMesaSelectCurrentBoard(0) < GR_SSTTYPE_Voodoo4)
- ? 2 /* only 16bit entries */
- : sizeof(pix) / sizeof(pix[0]); /* full table */
-}
-
-GLAPI int GLAPIENTRY
-wglChoosePixelFormat (HDC hdc, const PIXELFORMATDESCRIPTOR *ppfd)
-{
- int i, best = -1, qt_valid_pix;
- PIXELFORMATDESCRIPTOR pfd = *ppfd;
-
- qt_valid_pix = pfd_tablen();
-
-#if 1 || QUAKE2 || GORE
- /* QUAKE2: 24+32 */
- /* GORE : 24+16 */
- if ((pfd.cColorBits == 24) || (pfd.cColorBits == 32)) {
- /* the first 2 entries are 16bit */
- pfd.cColorBits = (qt_valid_pix > 2) ? 32 : 16;
- }
- if (pfd.cColorBits == 32) {
- pfd.cDepthBits = 24;
- } else if (pfd.cColorBits == 16) {
- pfd.cDepthBits = 16;
- }
-#endif
-
- if (pfd.nSize != sizeof(PIXELFORMATDESCRIPTOR) || pfd.nVersion != 1) {
- SetLastError(0);
- return 0;
- }
-
- for (i = 0; i < qt_valid_pix; i++) {
- if (pfd.cColorBits > 0 && pix[i].pfd.cColorBits != pfd.cColorBits)
- continue;
-
- if ((pfd.dwFlags & PFD_DRAW_TO_WINDOW)
- && !(pix[i].pfd.dwFlags & PFD_DRAW_TO_WINDOW)) continue;
- if ((pfd.dwFlags & PFD_DRAW_TO_BITMAP)
- && !(pix[i].pfd.dwFlags & PFD_DRAW_TO_BITMAP)) continue;
- if ((pfd.dwFlags & PFD_SUPPORT_GDI)
- && !(pix[i].pfd.dwFlags & PFD_SUPPORT_GDI)) continue;
- if ((pfd.dwFlags & PFD_SUPPORT_OPENGL)
- && !(pix[i].pfd.dwFlags & PFD_SUPPORT_OPENGL)) continue;
- if (!(pfd.dwFlags & PFD_DOUBLEBUFFER_DONTCARE)
- && ((pfd.dwFlags & PFD_DOUBLEBUFFER) !=
- (pix[i].pfd.dwFlags & PFD_DOUBLEBUFFER))) continue;
-#if 1 /* Doom3 fails here! */
- if (!(pfd.dwFlags & PFD_STEREO_DONTCARE)
- && ((pfd.dwFlags & PFD_STEREO) !=
- (pix[i].pfd.dwFlags & PFD_STEREO))) continue;
-#endif
-
- if (pfd.cDepthBits > 0 && pix[i].pfd.cDepthBits == 0)
- continue; /* need depth buffer */
-
- if (pfd.cAlphaBits > 0 && pix[i].pfd.cAlphaBits == 0)
- continue; /* need alpha buffer */
-
-#if 0 /* regression bug? */
- if (pfd.cStencilBits > 0 && pix[i].pfd.cStencilBits == 0)
- continue; /* need stencil buffer */
-#endif
-
- if (pfd.iPixelType == pix[i].pfd.iPixelType) {
- best = i + 1;
- break;
- }
- }
-
- if (best == -1) {
- FILE *err = fopen("MESA.LOG", "w");
- if (err != NULL) {
- fprintf(err, "wglChoosePixelFormat failed\n");
- fprintf(err, "\tnSize = %d\n", ppfd->nSize);
- fprintf(err, "\tnVersion = %d\n", ppfd->nVersion);
- fprintf(err, "\tdwFlags = %lu\n", ppfd->dwFlags);
- fprintf(err, "\tiPixelType = %d\n", ppfd->iPixelType);
- fprintf(err, "\tcColorBits = %d\n", ppfd->cColorBits);
- fprintf(err, "\tcRedBits = %d\n", ppfd->cRedBits);
- fprintf(err, "\tcRedShift = %d\n", ppfd->cRedShift);
- fprintf(err, "\tcGreenBits = %d\n", ppfd->cGreenBits);
- fprintf(err, "\tcGreenShift = %d\n", ppfd->cGreenShift);
- fprintf(err, "\tcBlueBits = %d\n", ppfd->cBlueBits);
- fprintf(err, "\tcBlueShift = %d\n", ppfd->cBlueShift);
- fprintf(err, "\tcAlphaBits = %d\n", ppfd->cAlphaBits);
- fprintf(err, "\tcAlphaShift = %d\n", ppfd->cAlphaShift);
- fprintf(err, "\tcAccumBits = %d\n", ppfd->cAccumBits);
- fprintf(err, "\tcAccumRedBits = %d\n", ppfd->cAccumRedBits);
- fprintf(err, "\tcAccumGreenBits = %d\n", ppfd->cAccumGreenBits);
- fprintf(err, "\tcAccumBlueBits = %d\n", ppfd->cAccumBlueBits);
- fprintf(err, "\tcAccumAlphaBits = %d\n", ppfd->cAccumAlphaBits);
- fprintf(err, "\tcDepthBits = %d\n", ppfd->cDepthBits);
- fprintf(err, "\tcStencilBits = %d\n", ppfd->cStencilBits);
- fprintf(err, "\tcAuxBuffers = %d\n", ppfd->cAuxBuffers);
- fprintf(err, "\tiLayerType = %d\n", ppfd->iLayerType);
- fprintf(err, "\tbReserved = %d\n", ppfd->bReserved);
- fprintf(err, "\tdwLayerMask = %lu\n", ppfd->dwLayerMask);
- fprintf(err, "\tdwVisibleMask = %lu\n", ppfd->dwVisibleMask);
- fprintf(err, "\tdwDamageMask = %lu\n", ppfd->dwDamageMask);
- fclose(err);
- }
-
- SetLastError(0);
- return 0;
- }
-
- return best;
-}
-
-GLAPI int GLAPIENTRY
-ChoosePixelFormat (HDC hdc, const PIXELFORMATDESCRIPTOR *ppfd)
-{
-
- return wglChoosePixelFormat(hdc, ppfd);
-}
-
-GLAPI int GLAPIENTRY
-wglDescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes,
- LPPIXELFORMATDESCRIPTOR ppfd)
-{
- int qt_valid_pix;
-
- qt_valid_pix = pfd_tablen();
-
- if (iPixelFormat < 1 || iPixelFormat > qt_valid_pix ||
- ((nBytes != sizeof(PIXELFORMATDESCRIPTOR)) && (nBytes != 0))) {
- SetLastError(0);
- return qt_valid_pix;
- }
-
- if (nBytes != 0)
- *ppfd = pix[iPixelFormat - 1].pfd;
-
- return qt_valid_pix;
-}
-
-GLAPI int GLAPIENTRY
-DescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes,
- LPPIXELFORMATDESCRIPTOR ppfd)
-{
- return wglDescribePixelFormat(hdc, iPixelFormat, nBytes, ppfd);
-}
-
-GLAPI int GLAPIENTRY
-wglGetPixelFormat (HDC hdc)
-{
- if (curPFD == 0) {
- SetLastError(0);
- return 0;
- }
-
- return curPFD;
-}
-
-GLAPI int GLAPIENTRY
-GetPixelFormat (HDC hdc)
-{
- return wglGetPixelFormat(hdc);
-}
-
-GLAPI BOOL GLAPIENTRY
-wglSetPixelFormat (HDC hdc, int iPixelFormat, const PIXELFORMATDESCRIPTOR *ppfd)
-{
- int qt_valid_pix;
-
- qt_valid_pix = pfd_tablen();
-
- if (iPixelFormat < 1 || iPixelFormat > qt_valid_pix) {
- if (ppfd == NULL) {
- PIXELFORMATDESCRIPTOR my_pfd;
- if (!wglDescribePixelFormat(hdc, iPixelFormat, sizeof(PIXELFORMATDESCRIPTOR), &my_pfd)) {
- SetLastError(0);
- return FALSE;
- }
- } else if (ppfd->nSize != sizeof(PIXELFORMATDESCRIPTOR)) {
- SetLastError(0);
- return FALSE;
- }
- }
- curPFD = iPixelFormat;
-
- return TRUE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglSwapBuffers (HDC hdc)
-{
- if (!ctx) {
- SetLastError(0);
- return FALSE;
- }
-
- fxMesaSwapBuffers();
-
- return TRUE;
-}
-
-GLAPI BOOL GLAPIENTRY
-SetPixelFormat (HDC hdc, int iPixelFormat, const PIXELFORMATDESCRIPTOR *ppfd)
-{
- return wglSetPixelFormat(hdc, iPixelFormat, ppfd);
-}
-
-GLAPI BOOL GLAPIENTRY
-SwapBuffers(HDC hdc)
-{
- return wglSwapBuffers(hdc);
-}
-
-static FIXED
-FixedFromDouble (double d)
-{
- struct {
- FIXED f;
- long l;
- } pun;
- pun.l = (long)(d * 65536L);
- return pun.f;
-}
-
-/*
-** This was yanked from windows/gdi/wgl.c
-*/
-GLAPI BOOL GLAPIENTRY
-wglUseFontBitmapsA (HDC hdc, DWORD first, DWORD count, DWORD listBase)
-{
- int i;
- GLuint font_list;
- DWORD size;
- GLYPHMETRICS gm;
- HANDLE hBits;
- LPSTR lpBits;
- MAT2 mat;
- int success = TRUE;
-
- font_list = listBase;
-
- mat.eM11 = FixedFromDouble(1);
- mat.eM12 = FixedFromDouble(0);
- mat.eM21 = FixedFromDouble(0);
- mat.eM22 = FixedFromDouble(-1);
-
- memset(&gm, 0, sizeof(gm));
-
- /*
- ** If we can't get the glyph outline, it may be because this is a fixed
- ** font. Try processing it that way.
- */
- if (GetGlyphOutline(hdc, first, GGO_BITMAP, &gm, 0, NULL, &mat) == GDI_ERROR) {
- return wglUseFontBitmaps_FX(hdc, first, count, listBase);
- }
-
- /*
- ** Otherwise process all desired characters.
- */
- for (i = 0; i < count; i++) {
- DWORD err;
-
- glNewList(font_list + i, GL_COMPILE);
-
- /* allocate space for the bitmap/outline */
- size = GetGlyphOutline(hdc, first + i, GGO_BITMAP, &gm, 0, NULL, &mat);
- if (size == GDI_ERROR) {
- glEndList();
- err = GetLastError();
- success = FALSE;
- continue;
- }
-
- hBits = GlobalAlloc(GHND, size + 1);
- lpBits = GlobalLock(hBits);
-
- err = GetGlyphOutline(hdc, /* handle to device context */
- first + i, /* character to query */
- GGO_BITMAP, /* format of data to return */
- &gm, /* pointer to structure for metrics */
- size, /* size of buffer for data */
- lpBits, /* pointer to buffer for data */
- &mat /* pointer to transformation */
- /* matrix structure */
- );
-
- if (err == GDI_ERROR) {
- GlobalUnlock(hBits);
- GlobalFree(hBits);
-
- glEndList();
- err = GetLastError();
- success = FALSE;
- continue;
- }
-
- glBitmap(gm.gmBlackBoxX, gm.gmBlackBoxY,
- -gm.gmptGlyphOrigin.x,
- gm.gmptGlyphOrigin.y,
- gm.gmCellIncX, gm.gmCellIncY,
- (const GLubyte *)lpBits);
-
- GlobalUnlock(hBits);
- GlobalFree(hBits);
-
- glEndList();
- }
-
- return success;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglDescribeLayerPlane (HDC hdc, int iPixelFormat, int iLayerPlane,
- UINT nBytes, LPLAYERPLANEDESCRIPTOR ppfd)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI int GLAPIENTRY
-wglGetLayerPaletteEntries (HDC hdc, int iLayerPlane, int iStart,
- int cEntries, COLORREF *pcr)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI BOOL GLAPIENTRY
-wglRealizeLayerPalette (HDC hdc, int iLayerPlane, BOOL bRealize)
-{
- SetLastError(0);
- return FALSE;
-}
-
-GLAPI int GLAPIENTRY
-wglSetLayerPaletteEntries (HDC hdc, int iLayerPlane, int iStart,
- int cEntries, CONST COLORREF *pcr)
-{
- SetLastError(0);
- return FALSE;
-}
-
-
-/***************************************************************************
- * [dBorca] simplistic ICD implementation, based on ICD code by Gregor Anich
- */
-
-typedef struct _icdTable {
- DWORD size;
- PROC table[336];
-} ICDTABLE, *PICDTABLE;
-
-#ifdef USE_MGL_NAMESPACE
-#define GL_FUNC(func) mgl##func
-#else
-#define GL_FUNC(func) gl##func
-#endif
-
-static ICDTABLE icdTable = { 336, {
-#define ICD_ENTRY(func) (PROC)GL_FUNC(func),
-#include "../icd/icdlist.h"
-#undef ICD_ENTRY
-} };
-
-
-GLAPI BOOL GLAPIENTRY
-DrvCopyContext (HGLRC hglrcSrc, HGLRC hglrcDst, UINT mask)
-{
- return wglCopyContext(hglrcSrc, hglrcDst, mask);
-}
-
-
-GLAPI HGLRC GLAPIENTRY
-DrvCreateContext (HDC hdc)
-{
- return wglCreateContext(hdc);
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvDeleteContext (HGLRC hglrc)
-{
- return wglDeleteContext(hglrc);
-}
-
-
-GLAPI HGLRC GLAPIENTRY
-DrvCreateLayerContext (HDC hdc, int iLayerPlane)
-{
- return wglCreateContext(hdc);
-}
-
-
-GLAPI PICDTABLE GLAPIENTRY
-DrvSetContext (HDC hdc, HGLRC hglrc, void *callback)
-{
- return wglMakeCurrent(hdc, hglrc) ? &icdTable : NULL;
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvReleaseContext (HGLRC hglrc)
-{
- return TRUE;
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvShareLists (HGLRC hglrc1, HGLRC hglrc2)
-{
- return wglShareLists(hglrc1, hglrc2);
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvDescribeLayerPlane (HDC hdc, int iPixelFormat,
- int iLayerPlane, UINT nBytes,
- LPLAYERPLANEDESCRIPTOR plpd)
-{
- return wglDescribeLayerPlane(hdc, iPixelFormat, iLayerPlane, nBytes, plpd);
-}
-
-
-GLAPI int GLAPIENTRY
-DrvSetLayerPaletteEntries (HDC hdc, int iLayerPlane,
- int iStart, int cEntries, CONST COLORREF *pcr)
-{
- return wglSetLayerPaletteEntries(hdc, iLayerPlane, iStart, cEntries, pcr);
-}
-
-
-GLAPI int GLAPIENTRY
-DrvGetLayerPaletteEntries (HDC hdc, int iLayerPlane,
- int iStart, int cEntries, COLORREF *pcr)
-{
- return wglGetLayerPaletteEntries(hdc, iLayerPlane, iStart, cEntries, pcr);
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvRealizeLayerPalette (HDC hdc, int iLayerPlane, BOOL bRealize)
-{
- return wglRealizeLayerPalette(hdc, iLayerPlane, bRealize);
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvSwapLayerBuffers (HDC hdc, UINT fuPlanes)
-{
- return wglSwapLayerBuffers(hdc, fuPlanes);
-}
-
-GLAPI int GLAPIENTRY
-DrvDescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes,
- LPPIXELFORMATDESCRIPTOR ppfd)
-{
- return wglDescribePixelFormat(hdc, iPixelFormat, nBytes, ppfd);
-}
-
-
-GLAPI PROC GLAPIENTRY
-DrvGetProcAddress (LPCSTR lpszProc)
-{
- return wglGetProcAddress(lpszProc);
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvSetPixelFormat (HDC hdc, int iPixelFormat)
-{
- return wglSetPixelFormat(hdc, iPixelFormat, NULL);
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvSwapBuffers (HDC hdc)
-{
- return wglSwapBuffers(hdc);
-}
-
-
-GLAPI BOOL GLAPIENTRY
-DrvValidateVersion (DWORD version)
-{
- (void)version;
- return TRUE;
-}
-
-
-#if (_MSC_VER >= 1200)
-#pragma warning( pop )
-#endif
-
-#endif /* FX */
diff --git a/src/mesa/drivers/windows/gdi/InitCritSections.cpp b/src/mesa/drivers/windows/gdi/InitCritSections.cpp
deleted file mode 100644
index 69f03b8e47c..00000000000
--- a/src/mesa/drivers/windows/gdi/InitCritSections.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "glapi.h"
-#include "glThread.h"
-
-#ifdef WIN32
-
-extern "C" _glthread_Mutex OneTimeLock;
-extern "C" _glthread_Mutex GenTexturesLock;
-
-extern "C" void FreeAllTSD(void);
-
-class _CriticalSectionInit
-{
-public:
- static _CriticalSectionInit m_inst;
-
- _CriticalSectionInit()
- {
- _glthread_INIT_MUTEX(OneTimeLock);
- _glthread_INIT_MUTEX(GenTexturesLock);
- }
-
- ~_CriticalSectionInit()
- {
- _glthread_DESTROY_MUTEX(OneTimeLock);
- _glthread_DESTROY_MUTEX(GenTexturesLock);
- FreeAllTSD();
- }
-};
-
-_CriticalSectionInit _CriticalSectionInit::m_inst;
-
-
-#endif /* WIN32 */
diff --git a/src/mesa/drivers/windows/gdi/wgl.c b/src/mesa/drivers/windows/gdi/wgl.c
index 1dafe6e2952..bf4ca9c908f 100644
--- a/src/mesa/drivers/windows/gdi/wgl.c
+++ b/src/mesa/drivers/windows/gdi/wgl.c
@@ -390,7 +390,7 @@ static FIXED FixedFromDouble(double d)
static BOOL wglUseFontBitmaps_FX(HDC fontDevice, DWORD firstChar,
DWORD numChars, DWORD listBase)
{
-#define VERIFY(a) a
+#define VERIFY(a) (void)(a)
TEXTMETRIC metric;
BITMAPINFO *dibInfo;
diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c
index e3a37eb1ace..35a150d0687 100644
--- a/src/mesa/drivers/windows/gdi/wmesa.c
+++ b/src/mesa/drivers/windows/gdi/wmesa.c
@@ -5,7 +5,7 @@
#include "wmesadef.h"
#include "colors.h"
-#include <GL/wmesa.h>
+#include "GL/wmesa.h"
#include <winuser.h>
#include "main/context.h"
#include "main/extensions.h"
@@ -30,7 +30,7 @@ static WMesaFramebuffer FirstFramebuffer = NULL;
* Create a new WMesaFramebuffer object which will correspond to the
* given HDC (Window handle).
*/
-WMesaFramebuffer
+static WMesaFramebuffer
wmesa_new_framebuffer(HDC hdc, struct gl_config *visual)
{
WMesaFramebuffer pwfb
@@ -48,7 +48,7 @@ wmesa_new_framebuffer(HDC hdc, struct gl_config *visual)
/**
* Given an hdc, free the corresponding WMesaFramebuffer
*/
-void
+static void
wmesa_free_framebuffer(HDC hdc)
{
WMesaFramebuffer pwfb, prev;
@@ -71,7 +71,7 @@ wmesa_free_framebuffer(HDC hdc)
/**
* Given an hdc, return the corresponding WMesaFramebuffer
*/
-WMesaFramebuffer
+static WMesaFramebuffer
wmesa_lookup_framebuffer(HDC hdc)
{
WMesaFramebuffer pwfb;
@@ -147,9 +147,8 @@ static void wmSetPixelFormat(WMesaFramebuffer pwfb, HDC hDC)
* We write into this memory with the span routines and then blit it
* to the window on a buffer swap.
*/
-BOOL wmCreateBackingStore(WMesaFramebuffer pwfb, long lxSize, long lySize)
+static BOOL wmCreateBackingStore(WMesaFramebuffer pwfb, long lxSize, long lySize)
{
- HDC hdc = pwfb->hDC;
LPBITMAPINFO pbmi = &(pwfb->bmi);
HDC hic;
@@ -227,7 +226,6 @@ wmesa_get_buffer_size(struct gl_framebuffer *buffer, GLuint *width, GLuint *heig
static void wmesa_flush(struct gl_context *ctx)
{
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->WinSysDrawBuffer);
if (ctx->Visual.doubleBufferMode == 1) {
@@ -254,9 +252,7 @@ static void wmesa_flush(struct gl_context *ctx)
static void clear_color(struct gl_context *ctx, const GLfloat color[4])
{
WMesaContext pwc = wmesa_context(ctx);
- WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
GLubyte col[3];
- UINT bytesPerPixel = pwfb->cColorBits / 8;
CLAMPED_FLOAT_TO_UBYTE(col[0], color[0]);
CLAMPED_FLOAT_TO_UBYTE(col[1], color[1]);
@@ -448,21 +444,15 @@ static void clear(struct gl_context *ctx, GLbitfield mask)
**/
/* Write a horizontal span of RGBA color pixels with a boolean mask. */
-static void write_rgba_span_front(const struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint n, GLint x, GLint y,
- const GLubyte rgba[][4],
- const GLubyte mask[] )
+static void write_rgba_span_front(struct gl_context *ctx,
+ struct gl_renderbuffer *rb,
+ GLuint n, GLint x, GLint y,
+ const void *values,
+ const GLubyte *mask)
{
+ const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values;
WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_lookup_framebuffer(pwc->hDC);
- CONST BITMAPINFO bmi=
- {
- {
- sizeof(BITMAPINFOHEADER),
- n, 1, 1, 32, BI_RGB, 0, 1, 1, 0, 0
- }
- };
HBITMAP bmp=0;
HDC mdc=0;
typedef union
@@ -535,12 +525,13 @@ static void write_rgba_span_front(const struct gl_context *ctx,
}
/* Write a horizontal span of RGB color pixels with a boolean mask. */
-static void write_rgb_span_front(const struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint n, GLint x, GLint y,
- const GLubyte rgb[][3],
- const GLubyte mask[] )
+static void write_rgb_span_front(struct gl_context *ctx,
+ struct gl_renderbuffer *rb,
+ GLuint n, GLint x, GLint y,
+ const void *values,
+ const GLubyte *mask)
{
+ const GLubyte (*rgb)[3] = (const GLubyte (*)[3])values;
WMesaContext pwc = wmesa_context(ctx);
GLuint i;
@@ -564,12 +555,13 @@ static void write_rgb_span_front(const struct gl_context *ctx,
* Write a horizontal span of pixels with a boolean mask. The current color
* is used for all pixels.
*/
-static void write_mono_rgba_span_front(const struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint n, GLint x, GLint y,
- const GLchan color[4],
- const GLubyte mask[])
+static void write_mono_rgba_span_front(struct gl_context *ctx,
+ struct gl_renderbuffer *rb,
+ GLuint n, GLint x, GLint y,
+ const void *value,
+ const GLubyte *mask)
{
+ const GLchan *color = (const GLchan *)value;
GLuint i;
WMesaContext pwc = wmesa_context(ctx);
COLORREF colorref;
@@ -589,13 +581,14 @@ static void write_mono_rgba_span_front(const struct gl_context *ctx,
}
/* Write an array of RGBA pixels with a boolean mask. */
-static void write_rgba_pixels_front(const struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint n,
- const GLint x[], const GLint y[],
- const GLubyte rgba[][4],
- const GLubyte mask[] )
+static void write_rgba_pixels_front(struct gl_context *ctx,
+ struct gl_renderbuffer *rb,
+ GLuint n,
+ const GLint x[], const GLint y[],
+ const void *values,
+ const GLubyte *mask)
{
+ const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values;
GLuint i;
WMesaContext pwc = wmesa_context(ctx);
(void) ctx;
@@ -612,13 +605,14 @@ static void write_rgba_pixels_front(const struct gl_context *ctx,
* Write an array of pixels with a boolean mask. The current color
* is used for all pixels.
*/
-static void write_mono_rgba_pixels_front(const struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint n,
- const GLint x[], const GLint y[],
- const GLchan color[4],
- const GLubyte mask[] )
+static void write_mono_rgba_pixels_front(struct gl_context *ctx,
+ struct gl_renderbuffer *rb,
+ GLuint n,
+ const GLint x[], const GLint y[],
+ const void *value,
+ const GLubyte *mask)
{
+ const GLchan *color = (const GLchan *)value;
GLuint i;
WMesaContext pwc = wmesa_context(ctx);
COLORREF colorref;
@@ -630,11 +624,12 @@ static void write_mono_rgba_pixels_front(const struct gl_context *ctx,
}
/* Read a horizontal span of color pixels. */
-static void read_rgba_span_front(const struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint n, GLint x, GLint y,
- GLubyte rgba[][4] )
+static void read_rgba_span_front(struct gl_context *ctx,
+ struct gl_renderbuffer *rb,
+ GLuint n, GLint x, GLint y,
+ void *values)
{
+ GLubyte (*rgba)[4] = (GLubyte (*)[4])values;
WMesaContext pwc = wmesa_context(ctx);
GLuint i;
COLORREF Color;
@@ -650,11 +645,12 @@ static void read_rgba_span_front(const struct gl_context *ctx,
/* Read an array of color pixels. */
-static void read_rgba_pixels_front(const struct gl_context *ctx,
- struct gl_renderbuffer *rb,
- GLuint n, const GLint x[], const GLint y[],
- GLubyte rgba[][4])
+static void read_rgba_pixels_front(struct gl_context *ctx,
+ struct gl_renderbuffer *rb,
+ GLuint n, const GLint x[], const GLint y[],
+ void *values)
{
+ GLubyte (*rgba)[4] = (GLubyte (*)[4])values;
WMesaContext pwc = wmesa_context(ctx);
GLuint i;
COLORREF Color;
@@ -679,13 +675,13 @@ LPDWORD lpdw = ((LPDWORD)((pwc)->pbPixels + (pwc)->ScanWidth * (y)) + (x)); \
/* Write a horizontal span of RGBA color pixels with a boolean mask. */
-static void write_rgba_span_32(const struct gl_context *ctx,
+static void write_rgba_span_32(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLubyte rgba[][4],
- const GLubyte mask[] )
+ const void *values,
+ const GLubyte *mask)
{
- WMesaContext pwc = wmesa_context(ctx);
+ const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values;
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
GLuint i;
LPDWORD lpdw;
@@ -709,13 +705,13 @@ static void write_rgba_span_32(const struct gl_context *ctx,
/* Write a horizontal span of RGB color pixels with a boolean mask. */
-static void write_rgb_span_32(const struct gl_context *ctx,
+static void write_rgb_span_32(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLubyte rgb[][3],
- const GLubyte mask[] )
+ const void *values,
+ const GLubyte *mask)
{
- WMesaContext pwc = wmesa_context(ctx);
+ const GLubyte (*rgb)[3] = (const GLubyte (*)[3])values;
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
GLuint i;
LPDWORD lpdw;
@@ -741,16 +737,16 @@ static void write_rgb_span_32(const struct gl_context *ctx,
* Write a horizontal span of pixels with a boolean mask. The current color
* is used for all pixels.
*/
-static void write_mono_rgba_span_32(const struct gl_context *ctx,
+static void write_mono_rgba_span_32(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLchan color[4],
- const GLubyte mask[])
+ const void *value,
+ const GLubyte *mask)
{
+ const GLchan *color = (const GLchan *)value;
LPDWORD lpdw;
DWORD pixel;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
y=FLIP(y);
@@ -767,14 +763,14 @@ static void write_mono_rgba_span_32(const struct gl_context *ctx,
}
/* Write an array of RGBA pixels with a boolean mask. */
-static void write_rgba_pixels_32(const struct gl_context *ctx,
+static void write_rgba_pixels_32(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, const GLint x[], const GLint y[],
- const GLubyte rgba[][4],
- const GLubyte mask[])
+ const void *values,
+ const GLubyte *mask)
{
+ const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
for (i=0; i<n; i++)
if (mask[i])
@@ -786,15 +782,15 @@ static void write_rgba_pixels_32(const struct gl_context *ctx,
* Write an array of pixels with a boolean mask. The current color
* is used for all pixels.
*/
-static void write_mono_rgba_pixels_32(const struct gl_context *ctx,
+static void write_mono_rgba_pixels_32(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n,
const GLint x[], const GLint y[],
- const GLchan color[4],
- const GLubyte mask[])
+ const void *value,
+ const GLubyte *mask)
{
+ const GLchan *color = (const GLchan *)value;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
for (i=0; i<n; i++)
if (mask[i])
@@ -803,15 +799,15 @@ static void write_mono_rgba_pixels_32(const struct gl_context *ctx,
}
/* Read a horizontal span of color pixels. */
-static void read_rgba_span_32(const struct gl_context *ctx,
+static void read_rgba_span_32(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- GLubyte rgba[][4] )
+ void *values)
{
+ GLubyte (*rgba)[4] = (GLubyte (*)[4])values;
GLuint i;
DWORD pixel;
LPDWORD lpdw;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
y = FLIP(y);
@@ -827,15 +823,15 @@ static void read_rgba_span_32(const struct gl_context *ctx,
/* Read an array of color pixels. */
-static void read_rgba_pixels_32(const struct gl_context *ctx,
+static void read_rgba_pixels_32(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, const GLint x[], const GLint y[],
- GLubyte rgba[][4])
+ void *values)
{
+ GLubyte (*rgba)[4] = (GLubyte (*)[4])values;
GLuint i;
DWORD pixel;
LPDWORD lpdw;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
for (i=0; i<n; i++) {
@@ -861,13 +857,13 @@ lpb[1] = (g); \
lpb[2] = (r); }
/* Write a horizontal span of RGBA color pixels with a boolean mask. */
-static void write_rgba_span_24(const struct gl_context *ctx,
+static void write_rgba_span_24(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLubyte rgba[][4],
- const GLubyte mask[] )
+ const void *values,
+ const GLubyte *mask)
{
- WMesaContext pwc = wmesa_context(ctx);
+ const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values;
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
GLuint i;
LPBYTE lpb;
@@ -895,13 +891,13 @@ static void write_rgba_span_24(const struct gl_context *ctx,
/* Write a horizontal span of RGB color pixels with a boolean mask. */
-static void write_rgb_span_24(const struct gl_context *ctx,
+static void write_rgb_span_24(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLubyte rgb[][3],
- const GLubyte mask[] )
+ const void *values,
+ const GLubyte *mask)
{
- WMesaContext pwc = wmesa_context(ctx);
+ const GLubyte (*rgb)[3] = (const GLubyte (*)[3])values;
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
GLuint i;
LPBYTE lpb;
@@ -931,15 +927,15 @@ static void write_rgb_span_24(const struct gl_context *ctx,
* Write a horizontal span of pixels with a boolean mask. The current color
* is used for all pixels.
*/
-static void write_mono_rgba_span_24(const struct gl_context *ctx,
+static void write_mono_rgba_span_24(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLchan color[4],
- const GLubyte mask[])
+ const void *value,
+ const GLubyte *mask)
{
+ const GLchan *color = (const GLchan *)value;
LPBYTE lpb;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
lpb = ((LPBYTE)(pwfb->pbPixels + pwfb->ScanWidth * y)) + (3 * x);
y=FLIP(y);
@@ -960,14 +956,14 @@ static void write_mono_rgba_span_24(const struct gl_context *ctx,
}
/* Write an array of RGBA pixels with a boolean mask. */
-static void write_rgba_pixels_24(const struct gl_context *ctx,
+static void write_rgba_pixels_24(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, const GLint x[], const GLint y[],
- const GLubyte rgba[][4],
- const GLubyte mask[])
+ const void *values,
+ const GLubyte *mask)
{
+ const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
for (i=0; i<n; i++)
if (mask[i])
@@ -979,15 +975,15 @@ static void write_rgba_pixels_24(const struct gl_context *ctx,
* Write an array of pixels with a boolean mask. The current color
* is used for all pixels.
*/
-static void write_mono_rgba_pixels_24(const struct gl_context *ctx,
+static void write_mono_rgba_pixels_24(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n,
const GLint x[], const GLint y[],
- const GLchan color[4],
- const GLubyte mask[])
+ const void *value,
+ const GLubyte *mask)
{
+ const GLchan *color = (const GLchan *)value;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
for (i=0; i<n; i++)
if (mask[i])
@@ -996,14 +992,14 @@ static void write_mono_rgba_pixels_24(const struct gl_context *ctx,
}
/* Read a horizontal span of color pixels. */
-static void read_rgba_span_24(const struct gl_context *ctx,
+static void read_rgba_span_24(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- GLubyte rgba[][4] )
+ void *values)
{
+ GLubyte (*rgba)[4] = (GLubyte (*)[4])values;
GLuint i;
LPBYTE lpb;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
y = FLIP(y);
@@ -1018,14 +1014,14 @@ static void read_rgba_span_24(const struct gl_context *ctx,
/* Read an array of color pixels. */
-static void read_rgba_pixels_24(const struct gl_context *ctx,
+static void read_rgba_pixels_24(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, const GLint x[], const GLint y[],
- GLubyte rgba[][4])
+ void *values)
{
+ GLubyte (*rgba)[4] = (GLubyte (*)[4])values;
GLuint i;
LPBYTE lpb;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
for (i=0; i<n; i++) {
@@ -1050,13 +1046,13 @@ LPWORD lpw = ((LPWORD)((pwc)->pbPixels + (pwc)->ScanWidth * (y)) + (x)); \
/* Write a horizontal span of RGBA color pixels with a boolean mask. */
-static void write_rgba_span_16(const struct gl_context *ctx,
+static void write_rgba_span_16(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLubyte rgba[][4],
- const GLubyte mask[] )
+ const void *values,
+ const GLubyte *mask)
{
- WMesaContext pwc = wmesa_context(ctx);
+ const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values;
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
GLuint i;
LPWORD lpw;
@@ -1080,13 +1076,13 @@ static void write_rgba_span_16(const struct gl_context *ctx,
/* Write a horizontal span of RGB color pixels with a boolean mask. */
-static void write_rgb_span_16(const struct gl_context *ctx,
+static void write_rgb_span_16(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLubyte rgb[][3],
- const GLubyte mask[] )
+ const void *values,
+ const GLubyte *mask)
{
- WMesaContext pwc = wmesa_context(ctx);
+ const GLubyte (*rgb)[3] = (const GLubyte (*)[3])values;
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
GLuint i;
LPWORD lpw;
@@ -1112,16 +1108,16 @@ static void write_rgb_span_16(const struct gl_context *ctx,
* Write a horizontal span of pixels with a boolean mask. The current color
* is used for all pixels.
*/
-static void write_mono_rgba_span_16(const struct gl_context *ctx,
+static void write_mono_rgba_span_16(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- const GLchan color[4],
- const GLubyte mask[])
+ const void *value,
+ const GLubyte *mask)
{
+ const GLchan *color = (const GLchan *)value;
LPWORD lpw;
WORD pixel;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
(void) ctx;
lpw = ((LPWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
@@ -1139,14 +1135,14 @@ static void write_mono_rgba_span_16(const struct gl_context *ctx,
}
/* Write an array of RGBA pixels with a boolean mask. */
-static void write_rgba_pixels_16(const struct gl_context *ctx,
+static void write_rgba_pixels_16(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, const GLint x[], const GLint y[],
- const GLubyte rgba[][4],
- const GLubyte mask[])
+ const void *values,
+ const GLubyte *mask)
{
+ const GLubyte (*rgba)[4] = (const GLubyte (*)[4])values;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
(void) ctx;
for (i=0; i<n; i++)
@@ -1159,15 +1155,15 @@ static void write_rgba_pixels_16(const struct gl_context *ctx,
* Write an array of pixels with a boolean mask. The current color
* is used for all pixels.
*/
-static void write_mono_rgba_pixels_16(const struct gl_context *ctx,
+static void write_mono_rgba_pixels_16(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n,
const GLint x[], const GLint y[],
- const GLchan color[4],
- const GLubyte mask[])
+ const void *value,
+ const GLubyte *mask)
{
+ const GLchan *color = (const GLchan *)value;
GLuint i;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
(void) ctx;
for (i=0; i<n; i++)
@@ -1177,14 +1173,14 @@ static void write_mono_rgba_pixels_16(const struct gl_context *ctx,
}
/* Read a horizontal span of color pixels. */
-static void read_rgba_span_16(const struct gl_context *ctx,
+static void read_rgba_span_16(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, GLint x, GLint y,
- GLubyte rgba[][4] )
+ void *values)
{
+ GLubyte (*rgba)[4] = (GLubyte (*)[4])values;
GLuint i, pixel;
LPWORD lpw;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
y = FLIP(y);
@@ -1201,14 +1197,14 @@ static void read_rgba_span_16(const struct gl_context *ctx,
/* Read an array of color pixels. */
-static void read_rgba_pixels_16(const struct gl_context *ctx,
+static void read_rgba_pixels_16(struct gl_context *ctx,
struct gl_renderbuffer *rb,
GLuint n, const GLint x[], const GLint y[],
- GLubyte rgba[][4])
+ void *values)
{
+ GLubyte (*rgba)[4] = (GLubyte (*)[4])values;
GLuint i, pixel;
LPWORD lpw;
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
for (i=0; i<n; i++) {
@@ -1261,8 +1257,9 @@ wmesa_renderbuffer_storage(struct gl_context *ctx,
* Plug in the Get/PutRow/Values functions for a renderbuffer depending
* on if we're drawing to the front or back color buffer.
*/
-void wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat,
- int cColorBits, int double_buffer)
+static void
+wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat,
+ int cColorBits, int double_buffer)
{
if (double_buffer) {
/* back buffer */
@@ -1324,7 +1321,6 @@ static void
wmesa_resize_buffers(struct gl_context *ctx, struct gl_framebuffer *buffer,
GLuint width, GLuint height)
{
- WMesaContext pwc = wmesa_context(ctx);
WMesaFramebuffer pwfb = wmesa_framebuffer(buffer);
if (pwfb->Base.Width != width || pwfb->Base.Height != height) {
@@ -1353,7 +1349,6 @@ static void wmesa_viewport(struct gl_context *ctx,
GLint x, GLint y,
GLsizei width, GLsizei height)
{
- WMesaContext pwc = wmesa_context(ctx);
GLuint new_width, new_height;
wmesa_get_buffer_size(ctx->WinSysDrawBuffer, &new_width, &new_height);
@@ -1553,7 +1548,7 @@ void WMesaDestroyContext( WMesaContext pwc )
/**
* Create a new color renderbuffer.
*/
-struct gl_renderbuffer *
+static struct gl_renderbuffer *
wmesa_new_renderbuffer(void)
{
struct gl_renderbuffer *rb = CALLOC_STRUCT(gl_renderbuffer);
diff --git a/src/mesa/drivers/windows/gdi/wmesadef.h b/src/mesa/drivers/windows/gdi/wmesadef.h
index 32289ebc700..9fda8839014 100644
--- a/src/mesa/drivers/windows/gdi/wmesadef.h
+++ b/src/mesa/drivers/windows/gdi/wmesadef.h
@@ -1,8 +1,8 @@
#ifndef WMESADEF_H
#define WMESADEF_H
-#ifdef __MINGW32__
+
#include <windows.h>
-#endif
+
#include "main/context.h"
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 3031b7b3273..81f48f9d95a 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -445,11 +445,11 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
if (swrast->NewState)
_swrast_validate_derived( ctx );
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* unpack from PBO */
GLubyte *buf;
if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
- format, type, pixels)) {
+ format, type, INT_MAX, pixels)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glDrawPixels(invalid PBO access)");
return;
@@ -507,7 +507,7 @@ xmesa_DrawPixels_8R8G8B( struct gl_context *ctx,
XPutImage(dpy, xrb->pixmap, gc, &ximage, 0, 0, dstX, dstY, w, h);
}
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
unpack->BufferObj);
}
@@ -580,11 +580,11 @@ xmesa_DrawPixels_5R6G5B( struct gl_context *ctx,
if (swrast->NewState)
_swrast_validate_derived( ctx );
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* unpack from PBO */
GLubyte *buf;
if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
- format, type, pixels)) {
+ format, type, INT_MAX, pixels)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glDrawPixels(invalid PBO access)");
return;