summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorChristian König <[email protected]>2010-12-11 13:43:44 +0100
committerChristian König <[email protected]>2010-12-11 13:43:44 +0100
commit772b25e1f366edc857e77b8c1ccdc5297d82cc41 (patch)
tree88d38b52e80319dbd4a0c5b5e038d92f1105110a /src/mesa/drivers
parentab130400cf91ab471e265e58193c95f04c7aeeda (diff)
parentb3d2ec9942303d1d03e28a25b030eb060415abfb (diff)
Merge remote branch 'origin/master' into pipe-video
Conflicts: src/gallium/drivers/r600/r600_pipe.c src/gallium/drivers/r600/r600_texture.c
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/common/meta.c19
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.c1
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c6
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c25
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c22
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h14
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c35
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c314
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp170
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c17
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c50
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h31
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c351
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c131
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h30
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c81
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c1035
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_iz.c32
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass2.c45
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_sampler_state.c41
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c92
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c89
-rw-r--r--src/mesa/drivers/dri/i965/gen6_urb.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c24
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c28
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c11
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c8
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c16
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_format.c2
-rw-r--r--src/mesa/drivers/dri/r200/r200_maos_arrays.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c25
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c103
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h23
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c207
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h31
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c381
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c73
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c31
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c364
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c115
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h53
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c162
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c50
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c103
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_chip.c6
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_state.c8
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_tex.c1
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c11
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c64
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c1
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c8
-rw-r--r--src/mesa/drivers/dri/sis/server/sis_dri.h9
-rw-r--r--src/mesa/drivers/dri/tdfx/tdfx_context.h4
-rw-r--r--src/mesa/drivers/dri/unichrome/server/via_dri.h2
-rw-r--r--src/mesa/drivers/windows/gdi/InitCritSections.cpp5
-rw-r--r--src/mesa/drivers/x11/glxheader.h8
-rw-r--r--src/mesa/drivers/x11/xm_api.c176
-rw-r--r--src/mesa/drivers/x11/xm_buffer.c19
-rw-r--r--src/mesa/drivers/x11/xm_dd.c55
-rw-r--r--src/mesa/drivers/x11/xm_glide.c8
-rw-r--r--src/mesa/drivers/x11/xm_image.c94
-rw-r--r--src/mesa/drivers/x11/xm_line.c4
-rw-r--r--src/mesa/drivers/x11/xm_span.c23
-rw-r--r--src/mesa/drivers/x11/xmesa.h17
-rw-r--r--src/mesa/drivers/x11/xmesaP.h18
100 files changed, 2601 insertions, 2588 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index ba8be125718..cdb2500f7c2 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -266,13 +266,16 @@ struct gen_mipmap_state
GLuint FBO;
};
-
+#define MAX_META_OPS_DEPTH 2
/**
* All per-context meta state.
*/
struct gl_meta_state
{
- struct save_state Save; /**< state saved during meta-ops */
+ /** Stack of state saved during meta-ops */
+ struct save_state Save[MAX_META_OPS_DEPTH];
+ /** Save stack depth */
+ GLuint SaveStackDepth;
struct temp_texture TempTex;
@@ -324,8 +327,13 @@ _mesa_meta_free(struct gl_context *ctx)
static void
_mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
{
- struct save_state *save = &ctx->Meta->Save;
+ struct save_state *save;
+
+ /* hope MAX_META_OPS_DEPTH is large enough */
+ assert(ctx->Meta->SaveStackDepth < MAX_META_OPS_DEPTH);
+ save = &ctx->Meta->Save[ctx->Meta->SaveStackDepth++];
+ memset(save, 0, sizeof(*save));
save->SavedState = state;
if (state & META_ALPHA_TEST) {
@@ -575,7 +583,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
static void
_mesa_meta_end(struct gl_context *ctx)
{
- struct save_state *save = &ctx->Meta->Save;
+ struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth];
const GLbitfield state = save->SavedState;
if (state & META_ALPHA_TEST) {
@@ -1398,6 +1406,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
struct vertex verts[4];
/* save all state but scissor, pixel pack/unpack */
GLbitfield metaSave = META_ALL - META_SCISSOR - META_PIXEL_STORE;
+ const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
if (buffers & BUFFER_BITS_COLOR) {
/* if clearing color buffers, don't save/restore colormask */
@@ -1453,7 +1462,7 @@ _mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
_mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
GL_REPLACE, GL_REPLACE, GL_REPLACE);
_mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
- ctx->Stencil.Clear & 0x7fffffff,
+ ctx->Stencil.Clear & stencilMax,
ctx->Stencil.WriteMask[0]);
}
else {
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index f943f81dd05..f32f3cf6020 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -176,6 +176,7 @@ i915CreateContext(int api,
ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = GL_TRUE;
ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoIfs = GL_TRUE;
ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoNoise = GL_TRUE;
+ ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoPow = GL_TRUE;
ctx->Const.MaxDrawBuffers = 1;
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index c00ee415b6b..7a9fb7f088b 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -569,10 +569,14 @@ upload_program(struct i915_fragment_program *p)
if (inst->DstReg.CondMask == COND_TR) {
tmp = i915_get_utemp(p);
+ /* The KIL instruction discards the fragment if any component of
+ * the source is < 0. Emit an immediate operand of {-1}.xywz.
+ */
i915_emit_texld(p, get_live_regs(p, inst),
tmp, A0_DEST_CHANNEL_ALL,
0, /* use a dummy dest reg */
- swizzle(tmp, ONE, ONE, ONE, ONE), /* always */
+ negate(swizzle(tmp, ONE, ONE, ONE, ONE),
+ 1, 1, 1, 1),
T0_TEXKILL);
} else {
p->error = 1;
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index e3ca863fe51..7c3ac0c14ef 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -81,7 +81,6 @@ DRIVER_SOURCES = \
brw_wm_emit.c \
brw_wm_fp.c \
brw_wm_iz.c \
- brw_wm_glsl.c \
brw_wm_pass0.c \
brw_wm_pass1.c \
brw_wm_pass2.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index a8369b07c35..d3a1233aac0 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -232,3 +232,28 @@ const struct brw_tracked_state brw_cc_unit = {
.prepare = prepare_cc_unit,
.emit = upload_cc_unit,
};
+
+static void upload_blend_constant_color(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->intel.ctx;
+ struct brw_blend_constant_color bcc;
+
+ memset(&bcc, 0, sizeof(bcc));
+ bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+ bcc.header.length = sizeof(bcc)/4-2;
+ bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
+ bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
+ bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
+ bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
+
+ BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+}
+
+const struct brw_tracked_state brw_blend_constant_color = {
+ .dirty = {
+ .mesa = _NEW_COLOR,
+ .brw = BRW_NEW_CONTEXT,
+ .cache = 0
+ },
+ .emit = upload_blend_constant_color
+};
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index cb0a8b96c9c..28549f2574a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api,
(i == MESA_SHADER_FRAGMENT);
ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
(i == MESA_SHADER_FRAGMENT);
-
- if (intel->gen == 6)
- ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX);
}
ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 335339515a2..7069724466a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -171,7 +171,6 @@ struct brw_vertex_program {
struct brw_fragment_program {
struct gl_fragment_program program;
GLuint id; /**< serial no. to identify frag progs, never re-used */
- GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
/** for debugging, which texture units are referenced */
GLbitfield tex_units_used;
@@ -211,6 +210,7 @@ struct brw_wm_prog_data {
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
GLboolean error;
+ int dispatch_width;
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 7b823eb201b..877b22fec19 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -242,21 +242,13 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
GLuint nr = brw->vs.prog_data->nr_params / 4;
- if (vp->use_const_buffer) {
- /* Load the subset of push constants that will get used when
- * we also have a pull constant buffer.
- */
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- assert(brw->vs.constant_map[i] <= nr);
- memcpy(buf + offset + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- }
- }
- } else {
- for (i = 0; i < nr; i++) {
- memcpy(buf + offset + i * 4,
+ /* Load the subset of push constants that will get used when
+ * we also have a pull constant buffer.
+ */
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ assert(brw->vs.constant_map[i] <= nr);
+ memcpy(buf + offset + brw->vs.constant_map[i] * 4,
vp->program.Base.Parameters->ParameterValues[i],
4 * sizeof(float));
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 239586a0366..7f3e4986808 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -462,6 +462,13 @@
#define BRW_COMPRESSION_2NDHALF 1
#define BRW_COMPRESSION_COMPRESSED 2
+#define GEN6_COMPRESSION_1Q 0
+#define GEN6_COMPRESSION_2Q 1
+#define GEN6_COMPRESSION_3Q 2
+#define GEN6_COMPRESSION_4Q 3
+#define GEN6_COMPRESSION_1H 0
+#define GEN6_COMPRESSION_2H 2
+
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
@@ -1022,6 +1029,13 @@
# define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9
# define ATTRIBUTE_0_SWIZZLE_SHIFT 6
# define ATTRIBUTE_0_SOURCE_SHIFT 0
+
+# define ATTRIBUTE_SWIZZLE_INPUTATTR 0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3
+# define ATTRIBUTE_SWIZZLE_SHIFT 6
+
/* DW16: Point sprite texture coordinate enables */
/* DW17: Constant interpolation enables */
/* DW18: attr 0-7 wrap shortest enables */
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 962c04128b8..6b61f7af15d 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
err |= dest (file, inst);
} else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF ||
inst->header.opcode == BRW_OPCODE_ELSE ||
- inst->header.opcode == BRW_OPCODE_ENDIF)) {
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
format (file, " %d", inst->bits1.branch_gen6.jump_count);
}
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 2ff39e8e64a..3b5c4c071e3 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -72,7 +72,37 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
{
- p->current->header.compression_control = compression_control;
+ p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
+
+ if (p->brw->intel.gen >= 6) {
+ /* Since we don't use the 32-wide support in gen6, we translate
+ * the pre-gen6 compression control here.
+ */
+ switch (compression_control) {
+ case BRW_COMPRESSION_NONE:
+ /* This is the "use the first set of bits of dmask/vmask/arf
+ * according to execsize" option.
+ */
+ p->current->header.compression_control = GEN6_COMPRESSION_1Q;
+ break;
+ case BRW_COMPRESSION_2NDHALF:
+ /* For 8-wide, this is "use the second set of 8 bits." */
+ p->current->header.compression_control = GEN6_COMPRESSION_2Q;
+ break;
+ case BRW_COMPRESSION_COMPRESSED:
+ /* For 16-wide instruction compression, use the first set of 16 bits
+ * since we don't do 32-wide dispatch.
+ */
+ p->current->header.compression_control = GEN6_COMPRESSION_1H;
+ break;
+ default:
+ assert(!"not reached");
+ p->current->header.compression_control = GEN6_COMPRESSION_1H;
+ break;
+ }
+ } else {
+ p->current->header.compression_control = compression_control;
+ }
}
void brw_set_mask_control( struct brw_compile *p, GLuint value )
@@ -95,6 +125,7 @@ void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+ p->compressed_stack[p->current - p->stack] = p->compressed;
p->current++;
}
@@ -102,6 +133,7 @@ void brw_pop_insn_state( struct brw_compile *p )
{
assert(p->current != p->stack);
p->current--;
+ p->compressed = p->compressed_stack[p->current - p->stack];
}
@@ -112,6 +144,7 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
p->brw = brw;
p->nr_insn = 0;
p->current = p->stack;
+ p->compressed = false;
memset(p->current, 0, sizeof(p->current[0]));
/* Some defaults?
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index b4538e6e8a7..4dbdc522100 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -33,6 +33,7 @@
#ifndef BRW_EU_H
#define BRW_EU_H
+#include <stdbool.h>
#include "brw_structs.h"
#include "brw_defines.h"
#include "program/prog_instruction.h"
@@ -106,10 +107,12 @@ struct brw_compile {
/* Allow clients to push/pop instruction state:
*/
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ bool compressed_stack[BRW_EU_MAX_INSN_STACK];
struct brw_instruction *current;
GLuint flag_value;
GLboolean single_program_flow;
+ bool compressed;
struct brw_context *brw;
struct brw_glsl_label *first_label; /**< linked list of labels */
@@ -954,6 +957,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
+struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
+ struct brw_instruction *do_insn);
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
/* Forward jumps:
*/
@@ -1009,6 +1014,7 @@ void brw_math_invert( struct brw_compile *p,
void brw_set_src1( struct brw_instruction *insn,
struct brw_reg reg );
+void brw_set_uip_jip(struct brw_compile *p);
/* brw_optimize.c */
void brw_optimize(struct brw_compile *p);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 9cb941dacfd..9c764fe779d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -41,19 +41,20 @@
* Internal helper for constructing instructions
*/
-static void guess_execution_size( struct brw_instruction *insn,
- struct brw_reg reg )
+static void guess_execution_size(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg)
{
- if (reg.width == BRW_WIDTH_8 &&
- insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ if (reg.width == BRW_WIDTH_8 && p->compressed)
insn->header.execution_size = BRW_EXECUTE_16;
else
insn->header.execution_size = reg.width; /* note - definitions are compatible */
}
-static void brw_set_dest( struct brw_instruction *insn,
- struct brw_reg dest )
+static void brw_set_dest(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
@@ -100,7 +101,7 @@ static void brw_set_dest( struct brw_instruction *insn,
/* NEW: Set the execution size based on dest.width and
* insn->compression_control:
*/
- guess_execution_size(insn, dest);
+ guess_execution_size(p, insn, dest);
}
extern int reg_type_size[];
@@ -629,7 +630,7 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p,
struct brw_reg src )
{
struct brw_instruction *insn = next_insn(p, opcode);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
return insn;
}
@@ -641,7 +642,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p,
struct brw_reg src1 )
{
struct brw_instruction *insn = next_insn(p, opcode);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
return insn;
@@ -680,7 +681,7 @@ void brw_##OP(struct brw_compile *p, \
{ \
struct brw_instruction *rnd, *add; \
rnd = next_insn(p, BRW_OPCODE_##OP); \
- brw_set_dest(rnd, dest); \
+ brw_set_dest(p, rnd, dest); \
brw_set_src0(rnd, src); \
rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
\
@@ -779,7 +780,7 @@ struct brw_instruction *brw_MUL(struct brw_compile *p,
void brw_NOP(struct brw_compile *p)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src1(insn, brw_imm_ud(0x0));
}
@@ -840,11 +841,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
/* Override the defaults for this instruction:
*/
if (intel->gen < 6) {
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
@@ -870,7 +871,7 @@ brw_IF_gen6(struct brw_compile *p, uint32_t conditional,
insn = next_insn(p, BRW_OPCODE_IF);
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->header.execution_size = BRW_EXECUTE_8;
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, src0);
@@ -905,11 +906,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p,
}
if (intel->gen < 6) {
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
@@ -965,11 +966,11 @@ void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
if (intel->gen < 6) {
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src1(insn, brw_imm_d(0x0));
} else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
}
@@ -1029,16 +1030,44 @@ void brw_ENDIF(struct brw_compile *p,
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
+
insn = next_insn(p, BRW_OPCODE_BREAK);
- brw_set_dest(insn, brw_ip_reg());
+ if (intel->gen >= 6) {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(insn, brw_imm_d(0x0));
+ } else {
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0x0));
+ insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = pop_count;
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+
+ return insn;
+}
+
+struct brw_instruction *brw_CONT_gen6(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+ int br = 2;
+
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
+
+ insn->bits3.break_cont.uip = br * (do_insn - insn);
+
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- insn->bits3.if_else.pad0 = 0;
- insn->bits3.if_else.pop_count = pop_count;
return insn;
}
@@ -1046,7 +1075,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
insn = next_insn(p, BRW_OPCODE_CONTINUE);
- brw_set_dest(insn, brw_ip_reg());
+ brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(insn, brw_ip_reg());
brw_set_src1(insn, brw_imm_d(0x0));
insn->header.compression_control = BRW_COMPRESSION_NONE;
@@ -1058,17 +1087,33 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
}
/* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop. We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO. WHILE
+ * just points back to the first instruction of the loop.
*/
struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
{
- if (p->single_program_flow) {
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
/* Override the defaults for this instruction:
*/
- brw_set_dest(insn, brw_null_reg());
+ brw_set_dest(p, insn, brw_null_reg());
brw_set_src0(insn, brw_null_reg());
brw_set_src1(insn, brw_null_reg());
@@ -1094,34 +1139,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p,
if (intel->gen >= 5)
br = 2;
- if (p->single_program_flow)
- insn = next_insn(p, BRW_OPCODE_ADD);
- else
+ if (intel->gen >= 6) {
insn = next_insn(p, BRW_OPCODE_WHILE);
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
+ brw_set_dest(p, insn, brw_imm_w(0));
+ insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+ brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = do_insn->header.execution_size;
+ assert(insn->header.execution_size == BRW_EXECUTE_8);
+ } else {
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
- if (p->single_program_flow) {
- insn->header.execution_size = BRW_EXECUTE_1;
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16));
+ insn->header.execution_size = BRW_EXECUTE_1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
- insn->bits3.d = (do_insn - insn) * 16;
- } else {
- insn->header.execution_size = do_insn->header.execution_size;
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
- assert(do_insn->header.opcode == BRW_OPCODE_DO);
- insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
- insn->bits3.if_else.pop_count = 0;
- insn->bits3.if_else.pad0 = 0;
- }
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(insn, brw_ip_reg());
+ brw_set_src1(insn, brw_imm_d(0));
-/* insn->header.mask_control = BRW_MASK_ENABLE; */
+ insn->header.execution_size = do_insn->header.execution_size;
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- p->current->header.predicate_control = BRW_PREDICATE_NONE;
return insn;
}
@@ -1159,7 +1212,7 @@ void brw_CMP(struct brw_compile *p,
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
insn->header.destreg__conditionalmod = conditional;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
@@ -1184,7 +1237,7 @@ void brw_WAIT (struct brw_compile *p)
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
struct brw_reg src = brw_notification_1_reg();
- brw_set_dest(insn, src);
+ brw_set_dest(p, insn, src);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
insn->header.execution_size = 0; /* must */
@@ -1219,6 +1272,10 @@ void brw_math( struct brw_compile *p,
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src.negate);
+ assert(!src.abs);
+
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src.type == BRW_REGISTER_TYPE_F);
@@ -1228,8 +1285,9 @@ void brw_math( struct brw_compile *p,
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
+ insn->header.saturate = saturate;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
} else {
@@ -1242,7 +1300,7 @@ void brw_math( struct brw_compile *p,
insn->header.predicate_control = 0;
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1284,12 +1342,18 @@ void brw_math2(struct brw_compile *p,
assert(src1.type == BRW_REGISTER_TYPE_F);
}
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src0.negate);
+ assert(!src0.abs);
+ assert(!src1.negate);
+ assert(!src1.abs);
+
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, src1);
}
@@ -1318,8 +1382,13 @@ void brw_math_16( struct brw_compile *p,
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
+ insn->header.saturate = saturate;
+
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src.negate);
+ assert(!src.abs);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_src1(insn, brw_null_reg());
return;
@@ -1334,7 +1403,7 @@ void brw_math_16( struct brw_compile *p,
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1351,7 +1420,7 @@ void brw_math_16( struct brw_compile *p,
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
insn->header.destreg__conditionalmod = msg_reg_nr+1;
- brw_set_dest(insn, offset(dest,1));
+ brw_set_dest(p, insn, offset(dest,1));
brw_set_src0(insn, src);
brw_set_math_message(p->brw,
insn,
@@ -1446,7 +1515,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
send_commit_msg = 1;
}
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_null_reg());
brw_set_dp_write_message(p->brw,
@@ -1516,7 +1585,7 @@ brw_oword_block_read_scratch(struct brw_compile *p,
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = mrf.nr;
- brw_set_dest(insn, dest); /* UW? */
+ brw_set_dest(p, insn, dest); /* UW? */
brw_set_src0(insn, brw_null_reg());
brw_set_dp_read_message(p->brw,
@@ -1569,7 +1638,7 @@ void brw_oword_block_read(struct brw_compile *p,
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
if (intel->gen >= 6) {
brw_set_src0(insn, mrf);
} else {
@@ -1614,7 +1683,7 @@ void brw_dword_scattered_read(struct brw_compile *p,
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_null_reg());
brw_set_dp_read_message(p->brw,
@@ -1639,29 +1708,21 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
GLuint location,
GLuint bind_table_index)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_reg_nr = 1;
- struct brw_reg b;
- /*
- printf("vs const read msg, location %u, msg_reg_nr %d\n",
- location, msg_reg_nr);
- */
+ if (intel->gen >= 6)
+ location /= 16;
/* Setup MRF[1] with location/offset into const buffer */
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
- /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
- * when the docs say only dword[2] should be set. Hmmm. But it works.
- */
- b = brw_message_reg(msg_reg_nr);
- b = retype(b, BRW_REGISTER_TYPE_UD);
- /*b = get_element_ud(b, 2);*/
- brw_MOV(p, b, brw_imm_ud(location));
-
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
+ BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(location));
brw_pop_insn_state(p);
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -1671,8 +1732,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p,
insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest);
+ if (intel->gen >= 6) {
+ brw_set_src0(insn, brw_message_reg(msg_reg_nr));
+ } else {
+ brw_set_src0(insn, brw_null_reg());
+ }
brw_set_dp_read_message(p->brw,
insn,
@@ -1706,7 +1771,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
* fields ignored.
*/
- brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
+ brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
@@ -1717,7 +1782,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
insn->header.destreg__conditionalmod = 0;
insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, brw_vec8_grf(0, 0));
if (intel->gen == 6)
@@ -1782,7 +1847,7 @@ void brw_fb_WRITE(struct brw_compile *p,
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_dp_write_message(p->brw,
insn,
@@ -1860,7 +1925,7 @@ void brw_SAMPLE(struct brw_compile *p,
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
- guess_execution_size(p->current, dest);
+ guess_execution_size(p, p->current, dest);
if (p->current->header.execution_size == BRW_EXECUTE_16)
dispatch_16 = GL_TRUE;
@@ -1895,12 +1960,15 @@ void brw_SAMPLE(struct brw_compile *p,
* and the first message register index comes from src0.
*/
if (intel->gen >= 6) {
- brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- /* m1 contains header? */
- brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
- brw_pop_insn_state(p);
- src0 = brw_message_reg(msg_reg_nr);
+ if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ src0.nr != BRW_ARF_NULL) {
+ brw_push_insn_state(p);
+ brw_set_mask_control( p, BRW_MASK_DISABLE );
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0);
+ brw_pop_insn_state(p);
+ }
+ src0 = brw_message_reg(msg_reg_nr);
}
insn = next_insn(p, BRW_OPCODE_SEND);
@@ -1909,7 +1977,7 @@ void brw_SAMPLE(struct brw_compile *p,
if (intel->gen < 6)
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_sampler_message(p->brw, insn,
binding_table_index,
@@ -1970,7 +2038,7 @@ void brw_urb_WRITE(struct brw_compile *p,
assert(msg_length < BRW_MAX_MRF);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
@@ -1989,6 +2057,80 @@ void brw_urb_WRITE(struct brw_compile *p,
swizzle);
}
+static int
+brw_find_next_block_end(struct brw_compile *p, int start)
+{
+ int ip;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* There is no DO instruction on gen6, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+static int
+brw_find_loop_end(struct brw_compile *p, int start)
+{
+ int ip;
+ int br = 2;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ if (insn->header.opcode == BRW_OPCODE_WHILE) {
+ if (ip + insn->bits1.branch_gen6.jump_count / br < start)
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK and CONT instructions to their correct locations.
+ */
+void
+brw_set_uip_jip(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int ip;
+ int br = 2;
+
+ if (intel->gen < 6)
+ return;
+
+ for (ip = 0; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_BREAK:
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ /* JIP is set at CONTINUE emit time, since that's when we
+ * know where the start of the loop is.
+ */
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ assert(insn->bits3.break_cont.uip != 0);
+ assert(insn->bits3.break_cont.jip != 0);
+ break;
+ }
+ }
+}
+
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
@@ -2013,7 +2155,7 @@ void brw_ff_sync(struct brw_compile *p,
}
insn = next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(insn, dest);
+ brw_set_dest(p, insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index edb02fabb23..c3cbe0df618 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -600,8 +600,13 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
* might be able to do better by doing execsize = 1 math and then
* expanding that result out, but we would need to be careful with
* masking.
+ *
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
*/
- if (intel->gen >= 6 && src.file == UNIFORM) {
+ if (intel->gen >= 6 && (src.file == UNIFORM ||
+ src.abs ||
+ src.negate)) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
emit(fs_inst(BRW_OPCODE_MOV, expanded, src));
src = expanded;
@@ -933,6 +938,10 @@ fs_visitor::visit(ir_expression *ir)
assert(!"not reached: should be handled by lower_noise");
break;
+ case ir_quadop_vector:
+ assert(!"not reached: should be handled by lower_quadop_vector");
+ break;
+
case ir_unop_sqrt:
emit_math(FS_OPCODE_SQRT, this->result, op[0]);
break;
@@ -1423,28 +1432,70 @@ fs_visitor::visit(ir_discard *ir)
void
fs_visitor::visit(ir_constant *ir)
{
- fs_reg reg(this, ir->type);
- this->result = reg;
+ /* Set this->result to reg at the bottom of the function because some code
+ * paths will cause this visitor to be applied to other fields. This will
+ * cause the value stored in this->result to be modified.
+ *
+ * Make reg constant so that it doesn't get accidentally modified along the
+ * way. Yes, I actually had this problem. :(
+ */
+ const fs_reg reg(this, ir->type);
+ fs_reg dst_reg = reg;
- for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
- switch (ir->type->base_type) {
- case GLSL_TYPE_FLOAT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
- break;
- case GLSL_TYPE_UINT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
- break;
- case GLSL_TYPE_INT:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
- break;
- case GLSL_TYPE_BOOL:
- emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
- break;
- default:
- assert(!"Non-float/uint/int/bool constant");
+ if (ir->type->is_array()) {
+ const unsigned size = type_size(ir->type->fields.array);
+
+ for (unsigned i = 0; i < ir->type->length; i++) {
+ ir->array_elements[i]->accept(this);
+ fs_reg src_reg = this->result;
+
+ dst_reg.type = src_reg.type;
+ for (unsigned j = 0; j < size; j++) {
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg));
+ src_reg.reg_offset++;
+ dst_reg.reg_offset++;
+ }
+ }
+ } else if (ir->type->is_record()) {
+ foreach_list(node, &ir->components) {
+ ir_instruction *const field = (ir_instruction *) node;
+ const unsigned size = type_size(field->type);
+
+ field->accept(this);
+ fs_reg src_reg = this->result;
+
+ dst_reg.type = src_reg.type;
+ for (unsigned j = 0; j < size; j++) {
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg));
+ src_reg.reg_offset++;
+ dst_reg.reg_offset++;
+ }
+ }
+ } else {
+ const unsigned size = type_size(ir->type);
+
+ for (unsigned i = 0; i < size; i++) {
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i])));
+ break;
+ case GLSL_TYPE_UINT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i])));
+ break;
+ case GLSL_TYPE_INT:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i])));
+ break;
+ case GLSL_TYPE_BOOL:
+ emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i])));
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ }
+ dst_reg.reg_offset++;
}
- reg.reg_offset++;
}
+
+ this->result = reg;
}
void
@@ -1574,7 +1625,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
switch (expr->operation) {
case ir_unop_logic_not:
- inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1)));
+ inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0)));
inst->conditional_mod = BRW_CONDITIONAL_Z;
return;
@@ -1951,7 +2002,7 @@ fs_visitor::emit_interpolation_setup_gen6()
emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y));
this->current_annotation = "compute 1/pos.w";
- this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
+ this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
this->pixel_w = fs_reg(this, glsl_type::float_type);
emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
@@ -1979,17 +2030,17 @@ fs_visitor::emit_fb_writes()
nr += 2;
}
- if (c->key.aa_dest_stencil_reg) {
+ if (c->aa_dest_stencil_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
+ fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
}
/* Reserve space for color. It'll be filled in per MRT below. */
int color_mrf = nr;
nr += 4;
- if (c->key.source_depth_to_render_target) {
- if (c->key.computes_depth) {
+ if (c->source_depth_to_render_target) {
+ if (c->computes_depth) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth);
fs_reg depth = *(variable_storage(this->frag_depth));
@@ -1998,20 +2049,22 @@ fs_visitor::emit_fb_writes()
} else {
/* Pass through the payload depth. */
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
+ fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
}
}
- if (c->key.dest_depth_reg) {
+ if (c->dest_depth_reg) {
emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
- fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
+ fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
}
fs_reg color = reg_undef;
if (this->frag_color)
color = *(variable_storage(this->frag_color));
- else if (this->frag_data)
+ else if (this->frag_data) {
color = *(variable_storage(this->frag_data));
+ color.type = BRW_REGISTER_TYPE_F;
+ }
for (int target = 0; target < c->key.nr_color_regions; target++) {
this->current_annotation = talloc_asprintf(this->mem_ctx,
@@ -2452,7 +2505,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
void
fs_visitor::assign_curb_setup()
{
- c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
+ c->prog_data.first_curbe_grf = c->nr_payload_regs;
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
/* Map the offsets in the UNIFORM file to fixed HW regs. */
@@ -3227,6 +3280,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
break;
default:
assert(!"not reached");
+ brw_reg = brw_null_reg();
break;
}
break;
@@ -3241,6 +3295,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
assert(!"not reached");
brw_reg = brw_null_reg();
break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
}
if (reg->abs)
brw_reg = brw_abs(brw_reg);
@@ -3373,10 +3431,6 @@ fs_visitor::generate_code()
break;
case BRW_OPCODE_DO:
- /* FINISHME: We need to write the loop instruction support still. */
- if (intel->gen >= 6)
- this->fail = true;
-
loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
if_depth_in_loop[loop_stack_depth] = 0;
break;
@@ -3386,7 +3440,11 @@ fs_visitor::generate_code()
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
- brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ /* FINISHME: We need to write the loop instruction support still. */
+ if (intel->gen >= 6)
+ brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]);
+ else
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
@@ -3400,16 +3458,18 @@ fs_visitor::generate_code()
assert(loop_stack_depth > 0);
loop_stack_depth--;
inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_stack[loop_stack_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
}
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
}
}
}
@@ -3486,6 +3546,26 @@ fs_visitor::generate_code()
last_native_inst = p->nr_insn;
}
+
+ brw_set_uip_jip(p);
+
+ /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0) {
+ if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+ }
}
GLboolean
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index 3b7b03a05b8..20bfa4c3ea3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -205,6 +205,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_round_even:
case ir_unop_sin:
case ir_unop_cos:
+ case ir_unop_sin_reduced:
+ case ir_unop_cos_reduced:
case ir_unop_dFdx:
case ir_unop_dFdy:
for (i = 0; i < vector_elements; i++) {
@@ -328,6 +330,9 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_noise:
assert(!"noise should have been broken down to function call");
break;
+ case ir_quadop_vector:
+ assert(!"should have been lowered");
+ break;
}
ir->remove();
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index b0c76f4094d..73b41fdbcef 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -166,6 +166,9 @@ static void populate_key( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
struct gl_context *ctx = &brw->intel.ctx;
+ struct intel_context *intel = &brw->intel;
+ int prim_gs_always;
+
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG */
@@ -185,10 +188,14 @@ static void populate_key( struct brw_context *brw,
key->pv_first = GL_TRUE;
}
- key->need_gs_prog = (key->hint_gs_always ||
- brw->primitive == GL_QUADS ||
+ if (intel->gen == 6)
+ prim_gs_always = brw->primitive == GL_LINE_LOOP;
+ else
+ prim_gs_always = brw->primitive == GL_QUADS ||
brw->primitive == GL_QUAD_STRIP ||
- brw->primitive == GL_LINE_LOOP);
+ brw->primitive == GL_LINE_LOOP;
+
+ key->need_gs_prog = (key->hint_gs_always || prim_gs_always);
}
/* Calculate interpolants for triangle and line rasterization.
@@ -205,8 +212,10 @@ static void prepare_gs_prog(struct brw_context *brw)
brw->gs.prog_active = key.need_gs_prog;
}
+ drm_intel_bo_unreference(brw->gs.prog_bo);
+ brw->gs.prog_bo = NULL;
+
if (brw->gs.prog_active) {
- drm_intel_bo_unreference(brw->gs.prog_bo);
brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
&key, sizeof(key),
NULL, 0,
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 1d350bc0413..a91b0528fac 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -38,40 +38,6 @@
#include "brw_state.h"
#include "brw_defines.h"
-
-
-
-
-/***********************************************************************
- * Blend color
- */
-
-static void upload_blend_constant_color(struct brw_context *brw)
-{
- struct gl_context *ctx = &brw->intel.ctx;
- struct brw_blend_constant_color bcc;
-
- memset(&bcc, 0, sizeof(bcc));
- bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
- bcc.header.length = sizeof(bcc)/4-2;
- bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
- bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
- bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
- bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
-
- BRW_CACHED_BATCH_STRUCT(brw, &bcc);
-}
-
-
-const struct brw_tracked_state brw_blend_constant_color = {
- .dirty = {
- .mesa = _NEW_COLOR,
- .brw = BRW_NEW_CONTEXT,
- .cache = 0
- },
- .emit = upload_blend_constant_color
-};
-
/* Constant single cliprect for framebuffer object or DRI2 drawing */
static void upload_drawing_rect(struct brw_context *brw)
{
@@ -339,6 +305,9 @@ static void upload_polygon_stipple(struct brw_context *brw)
struct brw_polygon_stipple bps;
GLuint i;
+ if (!ctx->Polygon.StippleFlag)
+ return;
+
memset(&bps, 0, sizeof(bps));
bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
bps.header.length = sizeof(bps)/4-2;
@@ -381,6 +350,9 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
struct gl_context *ctx = &brw->intel.ctx;
struct brw_polygon_stipple_offset bpso;
+ if (!ctx->Polygon.StippleFlag)
+ return;
+
memset(&bpso, 0, sizeof(bpso));
bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
bpso.header.length = sizeof(bpso)/4-2;
@@ -409,7 +381,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
const struct brw_tracked_state brw_polygon_stipple_offset = {
.dirty = {
- .mesa = _NEW_WINDOW_POS,
+ .mesa = _NEW_WINDOW_POS | _NEW_POLYGONSTIPPLE,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
@@ -421,9 +393,10 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
*/
static void upload_aa_line_parameters(struct brw_context *brw)
{
+ struct gl_context *ctx = &brw->intel.ctx;
struct brw_aa_line_parameters balp;
- if (!brw->has_aa_line_parameters)
+ if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
return;
/* use legacy aa line coverage computation */
@@ -436,7 +409,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
const struct brw_tracked_state brw_aa_line_parameters = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_LINE,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
@@ -454,6 +427,9 @@ static void upload_line_stipple(struct brw_context *brw)
GLfloat tmp;
GLint tmpi;
+ if (!ctx->Line.StippleFlag)
+ return;
+
memset(&bls, 0, sizeof(bls));
bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
bls.header.length = sizeof(bls)/4 - 2;
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 1367d814696..94efa791091 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -142,7 +142,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
if (newFP == curFP)
brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
newFP->id = brw->program_id++;
- newFP->isGLSL = brw_wm_is_glsl(fprog);
/* Don't reject fragment shaders for their Mesa IR state when we're
* using the new FS backend.
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 338f3876b31..eba4411ca70 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -129,7 +129,7 @@ const struct brw_tracked_state *gen6_atoms[] =
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
&brw_wm_constants, /* Before wm_surfaces and constant_buffer */
- &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */
+ &gen6_wm_constants, /* Before wm_state */
&brw_vs_surfaces, /* must do before unit */
&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 8ce9af9c4fe..461f27048cc 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -1064,6 +1064,15 @@ struct brw_sampler_default_color {
GLfloat color[4];
};
+struct gen5_sampler_default_color {
+ uint8_t ub[4];
+ float f[4];
+ uint16_t hf[4];
+ uint16_t us[4];
+ int16_t s[4];
+ uint8_t b[4];
+};
+
struct brw_sampler_state
{
@@ -1169,7 +1178,12 @@ struct brw_surface_state
GLuint cube_neg_y:1;
GLuint cube_pos_x:1;
GLuint cube_neg_x:1;
- GLuint pad:4;
+ GLuint pad:2;
+ /* Required on gen6 for surfaces accessed through render cache messages.
+ */
+ GLuint render_cache_read_write:1;
+ /* Ironlake and newer: instead of replicating one of the texels */
+ GLuint cube_corner_average:1;
GLuint mipmap_layout_mode:1;
GLuint vert_line_stride_ofs:1;
GLuint vert_line_stride:1;
@@ -1539,6 +1553,21 @@ struct brw_instruction
GLuint pad0:12;
} if_else;
+ struct
+ {
+ /* Signed jump distance to the ip to jump to if all channels
+ * are disabled after the break or continue. It should point
+ * to the end of the innermost control flow block, as that's
+ * where some channel could get re-enabled.
+ */
+ int jip:16;
+
+ /* Signed jump distance to the location to resume execution
+ * of this channel if it's enabled for the break or continue.
+ */
+ int uip:16;
+ } break_cont;
+
struct {
GLuint function:4;
GLuint int_type:1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 4a41c7a5176..6ae75d22c14 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -99,8 +99,8 @@ static void do_vs_prog( struct brw_context *brw,
(void) ctx;
aux_size = sizeof(c.prog_data);
- if (c.vp->use_const_buffer)
- aux_size += c.vp->program.Base.Parameters->NumParameters;
+ /* constant_map */
+ aux_size += c.vp->program.Base.Parameters->NumParameters;
drm_intel_bo_unreference(brw->vs.prog_bo);
brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
@@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL);
+ key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_POINT */
if (ctx->Point.PointSprite) {
@@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT,
+ .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 9338a6b7dbf..0b88cc1ec76 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -44,6 +44,7 @@ struct brw_vs_prog_key {
GLuint nr_userclip:4;
GLuint copy_edgeflag:1;
GLuint point_coord_replace:8;
+ GLuint two_side_color: 1;
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 7e43324a1f9..09887dae95d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -140,9 +140,13 @@ clear_current_const(struct brw_vs_compile *c)
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
struct intel_context *intel = &c->func.brw->intel;
- GLuint i, reg = 0, mrf;
+ GLuint i, reg = 0, mrf, j;
int attributes_in_vue;
int first_reladdr_output;
+ int max_constant;
+ int constant = 0;
+ int vert_result_reoder[VERT_RESULT_MAX];
+ int bfc = 0;
/* Determine whether to use a real constant buffer or use a block
* of GRF registers for constants. The later is faster but only
@@ -181,62 +185,81 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
- /* Vertex program parameters from curbe:
+ /* Assign some (probably all) of the vertex program constants to
+ * the push constant buffer/CURBE.
+ *
+ * There's an obvious limit to the numer of push constants equal to
+ * the number of register available, and that number is smaller
+ * than the minimum maximum number of vertex program parameters, so
+ * support for pull constants is required if we overflow.
+ * Additionally, on gen6 the number of push constants is even
+ * lower.
+ *
+ * When there's relative addressing, we don't know what range of
+ * Mesa IR registers can be accessed. And generally, when relative
+ * addressing is used we also have too many constants to load them
+ * all as push constants. So, we'll just support relative
+ * addressing out of the pull constant buffers, and try to load as
+ * many statically-accessed constants into the push constant buffer
+ * as we can.
*/
- if (c->vp->use_const_buffer) {
- int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
- int constant = 0;
-
- /* We've got more constants than we can load with the push
- * mechanism. This is often correlated with reladdr loads where
- * we should probably be using a pull mechanism anyway to avoid
- * excessive reading. However, the pull mechanism is slow in
- * general. So, we try to allocate as many non-reladdr-loaded
- * constants through the push buffer as we can before giving up.
- */
- memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
- for (i = 0;
- i < c->vp->program.Base.NumInstructions && constant < max_constant;
- i++) {
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
- int arg;
-
- for (arg = 0; arg < 3 && constant < max_constant; arg++) {
- if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
- inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
- inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
- inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
- inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) ||
- inst->SrcReg[arg].RelAddr)
- continue;
-
- if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
- c->constant_map[inst->SrcReg[arg].Index] = constant++;
- }
+ if (intel->gen >= 6) {
+ /* We can only load 32 regs of push constants. */
+ max_constant = 32 * 2 - c->key.nr_userclip;
+ } else {
+ max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
+ }
+
+ /* constant_map maps from ParameterValues[] index to index in the
+ * push constant buffer, or -1 if it's only in the pull constant
+ * buffer.
+ */
+ memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
+ for (i = 0;
+ i < c->vp->program.Base.NumInstructions && constant < max_constant;
+ i++) {
+ struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
+ int arg;
+
+ for (arg = 0; arg < 3 && constant < max_constant; arg++) {
+ if (inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
+ inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
+ inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
+ inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
+ inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) {
+ continue;
}
- }
- for (i = 0; i < constant; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2,
- (i%2) * 4),
- 0, 4, 1);
+ if (inst->SrcReg[arg].RelAddr) {
+ c->vp->use_const_buffer = GL_TRUE;
+ continue;
+ }
+
+ if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
+ c->constant_map[inst->SrcReg[arg].Index] = constant++;
+ }
}
- reg += (constant + 1) / 2;
- c->prog_data.curb_read_length = reg - 1;
- /* XXX 0 causes a bug elsewhere... */
- c->prog_data.nr_params = MAX2(constant * 4, 4);
}
- else {
- /* use a section of the GRF for constants */
- GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
- }
- reg += (nr_params + 1) / 2;
- c->prog_data.curb_read_length = reg - 1;
- c->prog_data.nr_params = nr_params * 4;
+ /* If we ran out of push constant space, then we'll also upload all
+ * constants through the pull constant buffer so that they can be
+ * accessed no matter what. For relative addressing (the common
+ * case) we need them all in place anyway.
+ */
+ if (constant == max_constant)
+ c->vp->use_const_buffer = GL_TRUE;
+
+ for (i = 0; i < constant; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride(brw_vec4_grf(reg + i / 2,
+ (i % 2) * 4),
+ 0, 4, 1);
}
+ reg += (constant + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.nr_params = constant * 4;
+ /* XXX 0 causes a bug elsewhere... */
+ if (intel->gen < 6 && c->prog_data.nr_params == 0)
+ c->prog_data.nr_params = 4;
/* Allocate input regs:
*/
@@ -270,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
mrf = 4;
first_reladdr_output = get_first_reladdr_output(&c->vp->program);
- for (i = 0; i < VERT_RESULT_MAX; i++) {
+
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ vert_result_reoder[i] = i;
+
+ /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */
+ if (intel->gen >= 6 && c->key.two_side_color) {
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ bfc = 2;
+ } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)))
+ bfc = 1;
+
+ if (bfc) {
+ for (i = 0; i < bfc; i++) {
+ vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i;
+ vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i;
+ }
+
+ for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) {
+ vert_result_reoder[i] = i - bfc;
+ }
+ }
+ }
+
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ i = vert_result_reoder[j];
+
if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
c->nr_outputs++;
assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
@@ -281,7 +333,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
else if (i == VERT_RESULT_PSIZ) {
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
- mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
else {
/* Two restrictions on our compute-to-MRF here. The
@@ -574,9 +625,18 @@ static void emit_max( struct brw_compile *p,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
- brw_SEL(p, dst, arg0, arg1);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ } else {
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
}
static void emit_min( struct brw_compile *p,
@@ -584,9 +644,18 @@ static void emit_min( struct brw_compile *p,
struct brw_reg arg0,
struct brw_reg arg1 )
{
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
- brw_SEL(p, dst, arg0, arg1);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6) {
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ } else {
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+ brw_SEL(p, dst, arg0, arg1);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
}
static void emit_math1_gen4(struct brw_vs_compile *c,
@@ -680,7 +749,7 @@ emit_math1(struct brw_vs_compile *c,
emit_math1_gen4(c, function, dst, arg0, precision);
}
-static void emit_math2( struct brw_vs_compile *c,
+static void emit_math2_gen4( struct brw_vs_compile *c,
GLuint function,
struct brw_reg dst,
struct brw_reg arg0,
@@ -688,14 +757,11 @@ static void emit_math2( struct brw_vs_compile *c,
GLuint precision)
{
struct brw_compile *p = &c->func;
- struct intel_context *intel = &p->brw->intel;
struct brw_reg tmp = dst;
GLboolean need_tmp = GL_FALSE;
- if (dst.file != BRW_GENERAL_REGISTER_FILE)
- need_tmp = GL_TRUE;
-
- if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf)
+ if (dst.file != BRW_GENERAL_REGISTER_FILE ||
+ dst.dw1.bits.writemask != 0xf)
need_tmp = GL_TRUE;
if (need_tmp)
@@ -718,6 +784,53 @@ static void emit_math2( struct brw_vs_compile *c,
}
}
+static void emit_math2_gen6( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg tmp_src0, tmp_src1, tmp_dst;
+
+ tmp_src0 = get_tmp(c);
+ tmp_src1 = get_tmp(c);
+ tmp_dst = get_tmp(c);
+
+ brw_MOV(p, tmp_src0, arg0);
+ brw_MOV(p, tmp_src1, arg1);
+
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_math2(p,
+ tmp_dst,
+ function,
+ tmp_src0,
+ tmp_src1);
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ brw_MOV(p, dst, tmp_dst);
+
+ release_tmp(c, tmp_src0);
+ release_tmp(c, tmp_src1);
+ release_tmp(c, tmp_dst);
+}
+
+static void emit_math2( struct brw_vs_compile *c,
+ GLuint function,
+ struct brw_reg dst,
+ struct brw_reg arg0,
+ struct brw_reg arg1,
+ GLuint precision)
+{
+ struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6)
+ emit_math2_gen6(c, function, dst, arg0, arg1, precision);
+ else
+ emit_math2_gen4(c, function, dst, arg0, arg1, precision);
+}
static void emit_exp_noalias( struct brw_vs_compile *c,
struct brw_reg dst,
@@ -990,8 +1103,6 @@ get_constant(struct brw_vs_compile *c,
assert(argIndex < 3);
- assert(c->func.brw->intel.gen < 6); /* FINISHME */
-
if (c->current_const[argIndex].index != src->Index) {
/* Keep track of the last constant loaded in this slot, for reuse. */
c->current_const[argIndex].index = src->Index;
@@ -1022,14 +1133,14 @@ get_reladdr_constant(struct brw_vs_compile *c,
{
const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
struct brw_reg const_reg = c->current_const[argIndex].reg;
- struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
- struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ uint32_t offset;
assert(argIndex < 3);
- assert(c->func.brw->intel.gen < 6); /* FINISHME */
-
/* Can't reuse a reladdr constant load. */
c->current_const[argIndex].index = -1;
@@ -1038,15 +1149,21 @@ get_reladdr_constant(struct brw_vs_compile *c,
src->Index, argIndex, c->current_const[argIndex].reg.nr);
#endif
- brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16));
+ if (intel->gen >= 6) {
+ offset = src->Index;
+ } else {
+ struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D);
+ brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16));
+ addr_reg = byte_addr_reg;
+ offset = 16 * src->Index;
+ }
/* fetch the first vec4 */
brw_dp_READ_4_vs_relative(p,
- const_reg, /* writeback dest */
- byte_addr_reg, /* address register */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
- );
+ const_reg,
+ addr_reg,
+ offset,
+ SURF_INDEX_VERT_CONST_BUFFER);
return const_reg;
}
@@ -1241,22 +1358,18 @@ get_src_reg( struct brw_vs_compile *c,
case PROGRAM_UNIFORM:
case PROGRAM_ENV_PARAM:
case PROGRAM_LOCAL_PARAM:
- if (c->vp->use_const_buffer) {
- if (!relAddr && c->constant_map[index] != -1) {
- assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
- } else if (relAddr)
+ if (!relAddr && c->constant_map[index] != -1) {
+ /* Take from the push constant buffer if possible. */
+ assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
+ } else {
+ /* Must be in the pull constant buffer then .*/
+ assert(c->vp->use_const_buffer);
+ if (relAddr)
return get_reladdr_constant(c, inst, argIndex);
else
return get_constant(c, inst, argIndex);
}
- else if (relAddr) {
- return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16);
- }
- else {
- assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
- return c->regs[PROGRAM_STATE_VAR][index];
- }
case PROGRAM_ADDRESS:
assert(index == 0);
return c->regs[file][index];
@@ -1585,6 +1698,8 @@ static void emit_vertex_write( struct brw_vs_compile *c)
break;
if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i)))
continue;
+ if (i == VERT_RESULT_PSIZ)
+ continue;
if (i >= VERT_RESULT_TEX0 &&
c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) {
@@ -1895,7 +2010,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
case OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+ emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, brw_abs(args[0]), BRW_MATH_PRECISION_FULL);
break;
case OPCODE_SEQ:
@@ -1969,35 +2084,42 @@ void brw_vs_emit(struct brw_vs_compile *c )
break;
case OPCODE_CONT:
brw_set_predicate_control(p, get_predicate(inst));
- brw_CONT(p, if_depth_in_loop[loop_depth]);
+ if (intel->gen >= 6) {
+ brw_CONT_gen6(p, loop_inst[loop_depth - 1]);
+ } else {
+ brw_CONT(p, if_depth_in_loop[loop_depth]);
+ }
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case OPCODE_ENDLOOP:
- {
- clear_current_const(c);
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- loop_depth--;
-
- if (intel->gen == 5)
- br = 2;
-
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BEGINLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+
+ case OPCODE_ENDLOOP: {
+ clear_current_const(c);
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ loop_depth--;
+
+ if (intel->gen == 5)
+ br = 2;
+
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ }
break;
+
case OPCODE_BRA:
brw_set_predicate_control(p, get_predicate(inst));
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
@@ -2088,6 +2210,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
}
brw_resolve_cals(p);
+ brw_set_uip_jip(p);
brw_optimize(p);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index ccdc18e0b8d..656501b4f79 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_emit(c);
}
+static void
+brw_wm_payload_setup(struct brw_context *brw,
+ struct brw_wm_compile *c)
+{
+ struct intel_context *intel = &brw->intel;
+ bool uses_depth = (c->fp->program.Base.InputsRead &
+ (1 << FRAG_ATTRIB_WPOS)) != 0;
+
+ if (intel->gen >= 6) {
+ /* R0-1: masks, pixel X/Y coordinates. */
+ c->nr_payload_regs = 2;
+ /* R2: only for 32-pixel dispatch.*/
+ /* R3-4: perspective pixel location barycentric */
+ c->nr_payload_regs += 2;
+ /* R5-6: perspective pixel location bary for dispatch width != 8 */
+ if (c->dispatch_width == 16) {
+ c->nr_payload_regs += 2;
+ }
+ /* R7-10: perspective centroid barycentric */
+ /* R11-14: perspective sample barycentric */
+ /* R15-18: linear pixel location barycentric */
+ /* R19-22: linear centroid barycentric */
+ /* R23-26: linear sample barycentric */
+
+ /* R27: interpolated depth if uses source depth */
+ if (uses_depth) {
+ c->source_depth_reg = c->nr_payload_regs;
+ c->nr_payload_regs++;
+ if (c->dispatch_width == 16) {
+ /* R28: interpolated depth if not 8-wide. */
+ c->nr_payload_regs++;
+ }
+ }
+ /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
+ */
+ if (uses_depth) {
+ c->source_w_reg = c->nr_payload_regs;
+ c->nr_payload_regs++;
+ if (c->dispatch_width == 16) {
+ /* R30: interpolated W if not 8-wide. */
+ c->nr_payload_regs++;
+ }
+ }
+ /* R31: MSAA position offsets. */
+ /* R32-: bary for 32-pixel. */
+ /* R58-59: interp W for 32-pixel. */
+
+ if (c->fp->program.Base.OutputsWritten &
+ BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ c->source_depth_to_render_target = GL_TRUE;
+ c->computes_depth = GL_TRUE;
+ }
+ } else {
+ brw_wm_lookup_iz(intel, c);
+ }
+}
/**
* All Mesa program -> GPU code generation goes through this function.
@@ -167,23 +223,18 @@ static void do_wm_prog( struct brw_context *brw,
brw_init_compile(brw, &c->func);
- /* temporary sanity check assertion */
- ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+ brw_wm_payload_setup(brw, c);
if (!brw_wm_fs_emit(brw, c)) {
/*
* Shader which use GLSL features such as flow control are handled
* differently from "simple" shaders.
*/
- if (fp->isGLSL) {
- c->dispatch_width = 8;
- brw_wm_glsl_emit(brw, c);
- }
- else {
- c->dispatch_width = 16;
- brw_wm_non_glsl_emit(brw, c);
- }
+ c->dispatch_width = 16;
+ brw_wm_payload_setup(brw, c);
+ brw_wm_non_glsl_emit(brw, c);
}
+ c->prog_data.dispatch_width = c->dispatch_width;
/* Scratch space is used for register spilling */
if (c->last_scratch) {
@@ -220,12 +271,10 @@ static void do_wm_prog( struct brw_context *brw,
static void brw_wm_populate_key( struct brw_context *brw,
struct brw_wm_prog_key *key )
{
- struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &brw->intel.ctx;
/* BRW_NEW_FRAGMENT_PROGRAM */
const struct brw_fragment_program *fp =
(struct brw_fragment_program *)brw->fragment_program;
- GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
GLuint lookup = 0;
GLuint line_aa;
GLuint i;
@@ -285,57 +334,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
}
}
- if (intel->gen >= 6) {
- /* R0-1: masks, pixel X/Y coordinates. */
- key->nr_payload_regs = 2;
- /* R2: only for 32-pixel dispatch.*/
- /* R3-4: perspective pixel location barycentric */
- key->nr_payload_regs += 2;
- /* R5-6: perspective pixel location bary for dispatch width != 8 */
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- key->nr_payload_regs += 2;
- }
- /* R7-10: perspective centroid barycentric */
- /* R11-14: perspective sample barycentric */
- /* R15-18: linear pixel location barycentric */
- /* R19-22: linear centroid barycentric */
- /* R23-26: linear sample barycentric */
-
- /* R27: interpolated depth if uses source depth */
- if (uses_depth) {
- key->source_depth_reg = key->nr_payload_regs;
- key->nr_payload_regs++;
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- /* R28: interpolated depth if not 8-wide. */
- key->nr_payload_regs++;
- }
- }
- /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W.
- */
- if (uses_depth) {
- key->source_w_reg = key->nr_payload_regs;
- key->nr_payload_regs++;
- if (!fp->isGLSL) { /* dispatch_width != 8 */
- /* R30: interpolated W if not 8-wide. */
- key->nr_payload_regs++;
- }
- }
- /* R31: MSAA position offsets. */
- /* R32-: bary for 32-pixel. */
- /* R58-59: interp W for 32-pixel. */
-
- if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
- key->source_depth_to_render_target = GL_TRUE;
- key->computes_depth = GL_TRUE;
- }
-
- } else {
- brw_wm_lookup_iz(intel,
- line_aa,
- lookup,
- uses_depth,
- key);
- }
+ key->iz_lookup = lookup;
+ key->line_aa = line_aa;
+ key->stats_wm = brw->intel.stats_wm;
/* BRW_NEW_WM_INPUT_DIMENSIONS */
key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
@@ -377,6 +378,10 @@ static void brw_wm_populate_key( struct brw_context *brw,
swizzles[2] = SWIZZLE_ZERO;
} else if (t->DepthMode == GL_LUMINANCE) {
swizzles[3] = SWIZZLE_ONE;
+ } else if (t->DepthMode == GL_RED) {
+ swizzles[1] = SWIZZLE_ZERO;
+ swizzles[2] = SWIZZLE_ZERO;
+ swizzles[3] = SWIZZLE_ZERO;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 2ca685784fc..e7f3cfbb75f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -59,16 +59,9 @@
#define AA_ALWAYS 2
struct brw_wm_prog_key {
- GLuint source_depth_reg:3;
- GLuint source_w_reg:3;
- GLuint aa_dest_stencil_reg:3;
- GLuint dest_depth_reg:3;
- GLuint nr_payload_regs:4;
- GLuint computes_depth:1; /* could be derived from program string */
- GLuint source_depth_to_render_target:1;
+ GLuint stats_wm:1;
GLuint flat_shade:1;
GLuint linear_color:1; /**< linear interpolation vs perspective interp */
- GLuint runtime_check_aads_emit:1;
GLuint nr_color_regions:5;
GLuint render_to_fbo:1;
@@ -81,6 +74,8 @@ struct brw_wm_prog_key {
GLushort drawable_height;
GLbitfield64 vp_outputs_written;
+ GLuint iz_lookup;
+ GLuint line_aa;
GLuint program_string_id:32;
};
@@ -204,6 +199,15 @@ struct brw_wm_compile {
PASS2_DONE
} state;
+ GLuint source_depth_reg:3;
+ GLuint source_w_reg:3;
+ GLuint aa_dest_stencil_reg:3;
+ GLuint dest_depth_reg:3;
+ GLuint nr_payload_regs:4;
+ GLuint computes_depth:1; /* could be derived from program string */
+ GLuint source_depth_to_render_target:1;
+ GLuint runtime_check_aads_emit:1;
+
/* Initial pass - translate fp instructions to fp instructions,
* simplifying and adding instructions for interpolation and
* framebuffer writes.
@@ -306,14 +310,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
void brw_wm_print_program( struct brw_wm_compile *c,
const char *stage );
-void brw_wm_lookup_iz( struct intel_context *intel,
- GLuint line_aa,
- GLuint lookup,
- GLboolean ps_uses_depth,
- struct brw_wm_prog_key *key );
+void brw_wm_lookup_iz(struct intel_context *intel,
+ struct brw_wm_compile *c);
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
/* brw_wm_emit.c */
@@ -381,7 +380,6 @@ void emit_fb_write(struct brw_wm_compile *c,
void emit_frontfacing(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask);
-void emit_kil_nv(struct brw_wm_compile *c);
void emit_linterp(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 96fecc97ee2..a0e86034e1e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -896,10 +896,14 @@ void emit_math1(struct brw_wm_compile *c,
BRW_MATH_SATURATE_NONE);
struct brw_reg src;
- if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
- arg0[0].file != BRW_GENERAL_REGISTER_FILE)) {
+ if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
+ arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
+ arg0[0].negate || arg0[0].abs)) {
/* Gen6 math requires that source and dst horizontal stride be 1,
* and that the argument be in the GRF.
+ *
+ * The hardware ignores source modifiers (negate and abs) on math
+ * instructions, so we also move to a temp to set those up.
*/
src = dst[dst_chan];
brw_MOV(p, src, arg0[0]);
@@ -1301,9 +1305,15 @@ static void emit_kil( struct brw_wm_compile *c,
struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_reg pixelmask;
GLuint i, j;
+ if (intel->gen >= 6)
+ pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+ else
+ pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
for (i = 0; i < 4; i++) {
/* Check if we've already done the comparison for this reg
* -- common when someone does KIL TEMP.wwww.
@@ -1319,26 +1329,11 @@ static void emit_kil( struct brw_wm_compile *c,
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
brw_set_predicate_control_flag_value(p, 0xff);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+ brw_AND(p, pixelmask, brw_flag_reg(), pixelmask);
brw_pop_insn_state(p);
}
}
-/* KIL_NV kills the pixels that are currently executing, not based on a test
- * of the arguments.
- */
-void emit_kil_nv( struct brw_wm_compile *c )
-{
- struct brw_compile *p = &c->func;
- struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
- brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
- brw_pop_insn_state(p);
-}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
@@ -1387,8 +1382,8 @@ static void emit_aa( struct brw_wm_compile *c,
GLuint reg )
{
struct brw_compile *p = &c->func;
- GLuint comp = c->key.aa_dest_stencil_reg / 2;
- GLuint off = c->key.aa_dest_stencil_reg % 2;
+ GLuint comp = c->aa_dest_stencil_reg / 2;
+ GLuint off = c->aa_dest_stencil_reg % 2;
struct brw_reg aa = offset(arg1[comp], off);
brw_push_insn_state(p);
@@ -1416,11 +1411,10 @@ void emit_fb_write(struct brw_wm_compile *c,
struct intel_context *intel = &brw->intel;
GLuint nr = 2;
GLuint channel;
- int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */
/* Reserve a space for AA - may not be needed:
*/
- if (c->key.aa_dest_stencil_reg)
+ if (c->aa_dest_stencil_reg)
nr += 1;
/* I don't really understand how this achieves the color interleave
@@ -1428,11 +1422,6 @@ void emit_fb_write(struct brw_wm_compile *c,
*/
brw_push_insn_state(p);
- if (intel->gen >= 6)
- base_reg = nr;
- else
- base_reg = 0;
-
for (channel = 0; channel < 4; channel++) {
if (intel->gen >= 6) {
/* gen6 SIMD16 single source DP write looks like:
@@ -1493,9 +1482,9 @@ void emit_fb_write(struct brw_wm_compile *c,
brw_pop_insn_state(p);
- if (c->key.source_depth_to_render_target)
+ if (c->source_depth_to_render_target)
{
- if (c->key.computes_depth)
+ if (c->computes_depth)
brw_MOV(p, brw_message_reg(nr), arg2[2]);
else
brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
@@ -1503,10 +1492,10 @@ void emit_fb_write(struct brw_wm_compile *c,
nr += 2;
}
- if (c->key.dest_depth_reg)
+ if (c->dest_depth_reg)
{
- GLuint comp = c->key.dest_depth_reg / 2;
- GLuint off = c->key.dest_depth_reg % 2;
+ GLuint comp = c->dest_depth_reg / 2;
+ GLuint off = c->dest_depth_reg % 2;
if (off != 0) {
brw_push_insn_state(p);
@@ -1524,15 +1513,27 @@ void emit_fb_write(struct brw_wm_compile *c,
}
if (intel->gen >= 6) {
- /* Subtract off the message header, since we send headerless. */
- nr -= 2;
+ /* Load the message header. There's no implied move from src0
+ * to the base mrf on gen6.
+ */
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, brw_message_reg(0), brw_vec8_grf(0, 0));
+ brw_pop_insn_state(p);
+
+ if (target != 0) {
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ 0,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(target));
+ }
}
- if (!c->key.runtime_check_aads_emit) {
- if (c->key.aa_dest_stencil_reg)
+ if (!c->runtime_check_aads_emit) {
+ if (c->aa_dest_stencil_reg)
emit_aa(c, arg1, 2);
- fire_fb_write(c, base_reg, nr, target, eot);
+ fire_fb_write(c, 0, nr, target, eot);
}
else {
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
@@ -1897,10 +1898,6 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_kil(c, args[0]);
break;
- case OPCODE_KIL_NV:
- emit_kil_nv(c);
- break;
-
default:
printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 2cae6988804..4759b289a0c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -338,11 +338,13 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
{
- /* This is only called for producing 1/w in pre-gen6 interp. for
- * gen6, the interp opcodes don't use this argument.
+ /* This is called for producing 1/w in pre-gen6 interp. for gen6,
+ * the interp opcodes don't use this argument. But to keep the
+ * nr_args = 3 expectations of pinterp happy, just stuff delta_xy
+ * into the slot.
*/
if (c->func.brw->intel.gen >= 6)
- return src_undef();
+ return c->delta_xy;
if (src_is_undef(c->pixel_w)) {
struct prog_dst_register pixel_w = get_temp(c);
@@ -373,11 +375,7 @@ static void emit_interp( struct brw_wm_compile *c,
struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
struct prog_src_register deltas;
- if (c->func.brw->intel.gen < 6) {
- deltas = get_delta_xy(c);
- } else {
- deltas = src_undef();
- }
+ deltas = get_delta_xy(c);
/* Need to use PINTERP on attributes which have been
* multiplied by 1/W in the SF program, and LINTERP on those
@@ -1133,6 +1131,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
precalc_lit(c, inst);
break;
+ case OPCODE_RSQ:
+ out = emit_scalar_insn(c, inst);
+ out->SrcReg[0].Abs = GL_TRUE;
+ break;
+
case OPCODE_TEX:
precalc_tex(c, inst);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
deleted file mode 100644
index 7fe8ab1f334..00000000000
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ /dev/null
@@ -1,1035 +0,0 @@
-#include "main/macros.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-
-static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint component);
-
-/**
- * Determine if the given fragment program uses GLSL features such
- * as flow conditionals, loops, subroutines.
- * Some GLSL shaders may use these features, others might not.
- */
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
-{
- int i;
-
- if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE))
- return GL_TRUE;
-
- for (i = 0; i < fp->Base.NumInstructions; i++) {
- const struct prog_instruction *inst = &fp->Base.Instructions[i];
- switch (inst->Opcode) {
- case OPCODE_ARL:
- case OPCODE_IF:
- case OPCODE_ENDIF:
- case OPCODE_CAL:
- case OPCODE_BRK:
- case OPCODE_RET:
- case OPCODE_BGNLOOP:
- return GL_TRUE;
- default:
- break;
- }
- }
- return GL_FALSE;
-}
-
-
-
-static void
-reclaim_temps(struct brw_wm_compile *c);
-
-
-/** Mark GRF register as used. */
-static void
-prealloc_grf(struct brw_wm_compile *c, int r)
-{
- c->used_grf[r] = GL_TRUE;
-}
-
-
-/** Mark given GRF register as not in use. */
-static void
-release_grf(struct brw_wm_compile *c, int r)
-{
- /*assert(c->used_grf[r]);*/
- c->used_grf[r] = GL_FALSE;
- c->first_free_grf = MIN2(c->first_free_grf, r);
-}
-
-
-/** Return index of a free GRF, mark it as used. */
-static int
-alloc_grf(struct brw_wm_compile *c)
-{
- GLuint r;
- for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
- if (!c->used_grf[r]) {
- c->used_grf[r] = GL_TRUE;
- c->first_free_grf = r + 1; /* a guess */
- return r;
- }
- }
-
- /* no free temps, try to reclaim some */
- reclaim_temps(c);
- c->first_free_grf = 0;
-
- /* try alloc again */
- for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
- if (!c->used_grf[r]) {
- c->used_grf[r] = GL_TRUE;
- c->first_free_grf = r + 1; /* a guess */
- return r;
- }
- }
-
- for (r = 0; r < BRW_WM_MAX_GRF; r++) {
- assert(c->used_grf[r]);
- }
-
- /* really, no free GRF regs found */
- if (!c->out_of_regs) {
- /* print warning once per compilation */
- _mesa_warning(NULL, "i965: ran out of registers for fragment program");
- c->out_of_regs = GL_TRUE;
- }
-
- return -1;
-}
-
-
-/** Return number of GRF registers used */
-static int
-num_grf_used(const struct brw_wm_compile *c)
-{
- int r;
- for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
- if (c->used_grf[r])
- return r + 1;
- return 0;
-}
-
-
-
-/**
- * Record the mapping of a Mesa register to a hardware register.
- */
-static void set_reg(struct brw_wm_compile *c, int file, int index,
- int component, struct brw_reg reg)
-{
- c->wm_regs[file][index][component].reg = reg;
- c->wm_regs[file][index][component].inited = GL_TRUE;
-}
-
-static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
-{
- struct brw_reg reg;
-
- /* if we need to allocate another temp, grow the tmp_regs[] array */
- if (c->tmp_index == c->tmp_max) {
- int r = alloc_grf(c);
- if (r < 0) {
- /*printf("Out of temps in %s\n", __FUNCTION__);*/
- r = 50; /* XXX random register! */
- }
- c->tmp_regs[ c->tmp_max++ ] = r;
- }
-
- /* form the GRF register */
- reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
- /*printf("alloc_temp %d\n", reg.nr);*/
- assert(reg.nr < BRW_WM_MAX_GRF);
- return reg;
-
-}
-
-/**
- * Save current temp register info.
- * There must be a matching call to release_tmps().
- */
-static int mark_tmps(struct brw_wm_compile *c)
-{
- return c->tmp_index;
-}
-
-static void release_tmps(struct brw_wm_compile *c, int mark)
-{
- c->tmp_index = mark;
-}
-
-/**
- * Convert Mesa src register to brw register.
- *
- * Since we're running in SOA mode each Mesa register corresponds to four
- * hardware registers. We allocate the hardware registers as needed here.
- *
- * \param file register file, one of PROGRAM_x
- * \param index register number
- * \param component src component (X=0, Y=1, Z=2, W=3)
- * \param nr not used?!?
- * \param neg negate value?
- * \param abs take absolute value?
- */
-static struct brw_reg
-get_reg(struct brw_wm_compile *c, int file, int index, int component,
- int nr, GLuint neg, GLuint abs)
-{
- struct brw_reg reg;
- switch (file) {
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- case PROGRAM_UNIFORM:
- file = PROGRAM_STATE_VAR;
- break;
- case PROGRAM_UNDEFINED:
- return brw_null_reg();
- case PROGRAM_TEMPORARY:
- case PROGRAM_INPUT:
- case PROGRAM_OUTPUT:
- case PROGRAM_PAYLOAD:
- break;
- default:
- _mesa_problem(NULL, "Unexpected file in get_reg()");
- return brw_null_reg();
- }
-
- assert(index < 256);
- assert(component < 4);
-
- /* see if we've already allocated a HW register for this Mesa register */
- if (c->wm_regs[file][index][component].inited) {
- /* yes, re-use */
- reg = c->wm_regs[file][index][component].reg;
- }
- else {
- /* no, allocate new register */
- int grf = alloc_grf(c);
- /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
- if (grf < 0) {
- /* totally out of temps */
- grf = 51; /* XXX random register! */
- }
-
- reg = brw_vec8_grf(grf, 0);
- /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
-
- set_reg(c, file, index, component, reg);
- }
-
- if (neg & (1 << component)) {
- reg = negate(reg);
- }
- if (abs)
- reg = brw_abs(reg);
- return reg;
-}
-
-
-
-/**
- * This is called if we run out of GRF registers. Examine the live intervals
- * of temp regs in the program and free those which won't be used again.
- */
-static void
-reclaim_temps(struct brw_wm_compile *c)
-{
- GLint intBegin[MAX_PROGRAM_TEMPS];
- GLint intEnd[MAX_PROGRAM_TEMPS];
- int index;
-
- /*printf("Reclaim temps:\n");*/
-
- _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
- intBegin, intEnd);
-
- for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
- if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
- /* program temp[i] can be freed */
- int component;
- /*printf(" temp[%d] is dead\n", index);*/
- for (component = 0; component < 4; component++) {
- if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
- int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
- release_grf(c, r);
- /*
- printf(" Reclaim temp %d, reg %d at inst %d\n",
- index, r, c->cur_inst);
- */
- c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
- }
- }
- }
- }
-}
-
-
-
-
-/**
- * Preallocate registers. This sets up the Mesa to hardware register
- * mapping for certain registers, such as constants (uniforms/state vars)
- * and shader inputs.
- */
-static void prealloc_reg(struct brw_wm_compile *c)
-{
- struct intel_context *intel = &c->func.brw->intel;
- int i, j;
- struct brw_reg reg;
- int urb_read_length = 0;
- GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
- GLuint reg_index = 0;
-
- memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
- c->first_free_grf = 0;
-
- for (i = 0; i < 4; i++) {
- if (i < (c->key.nr_payload_regs + 1) / 2)
- reg = brw_vec8_grf(i * 2, 0);
- else
- reg = brw_vec8_grf(0, 0);
- set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
- }
- set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0,
- brw_vec8_grf(c->key.source_w_reg, 0));
- reg_index += c->key.nr_payload_regs;
-
- /* constants */
- {
- const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
- const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
-
- /* use a real constant buffer, or just use a section of the GRF? */
- /* XXX this heuristic may need adjustment... */
- if ((nr_params + nr_temps) * 4 + reg_index > 80) {
- for (i = 0; i < nr_params; i++) {
- float *pv = c->fp->program.Base.Parameters->ParameterValues[i];
- for (j = 0; j < 4; j++) {
- c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j];
- c->prog_data.nr_pull_params++;
- }
- }
-
- c->prog_data.nr_params = 0;
- }
- /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
-
- if (!c->prog_data.nr_pull_params) {
- const struct gl_program_parameter_list *plist =
- c->fp->program.Base.Parameters;
- int index = 0;
-
- /* number of float constants in CURBE */
- c->prog_data.nr_params = 4 * nr_params;
-
- /* loop over program constants (float[4]) */
- for (i = 0; i < nr_params; i++) {
- /* loop over XYZW channels */
- for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(reg_index + index / 8, index % 8);
- /* Save pointer to parameter/constant value.
- * Constants will be copied in prepare_constant_buffer()
- */
- c->prog_data.param[index] = &plist->ParameterValues[i][j];
- set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
- }
- }
- /* number of constant regs used (each reg is float[8]) */
- c->nr_creg = ALIGN(nr_params, 2) / 2;
- reg_index += c->nr_creg;
- }
- }
-
- /* fragment shader inputs: One 2-reg pair of interpolation
- * coefficients for each vec4 to be set up.
- */
- if (intel->gen >= 6) {
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i)))
- continue;
-
- reg = brw_vec8_grf(reg_index, 0);
- for (j = 0; j < 4; j++) {
- set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
- }
- reg_index += 2;
- }
- urb_read_length = reg_index;
- } else {
- for (i = 0; i < VERT_RESULT_MAX; i++) {
- int fp_input;
-
- if (i >= VERT_RESULT_VAR0)
- fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
- else if (i <= VERT_RESULT_TEX7)
- fp_input = i;
- else
- fp_input = -1;
-
- if (fp_input >= 0 && inputs & (1 << fp_input)) {
- urb_read_length = reg_index;
- reg = brw_vec8_grf(reg_index, 0);
- for (j = 0; j < 4; j++)
- set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
- }
- if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
- reg_index += 2;
- }
- }
- }
-
- c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
- c->prog_data.urb_read_length = urb_read_length;
- c->prog_data.curb_read_length = c->nr_creg;
- c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
- reg_index++;
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
- reg_index += 2;
-
- /* mark GRF regs [0..reg_index-1] as in-use */
- for (i = 0; i < reg_index; i++)
- prealloc_grf(c, i);
-
- /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
- prealloc_grf(c, 126);
- prealloc_grf(c, 127);
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
- struct brw_reg dst[4];
-
- switch (inst->Opcode) {
- case OPCODE_TEX:
- case OPCODE_TXB:
- /* Allocate the channels of texture results contiguously,
- * since they are written out that way by the sampler unit.
- */
- for (j = 0; j < 4; j++) {
- dst[j] = get_dst_reg(c, inst, j);
- if (j != 0)
- assert(dst[j].nr == dst[j - 1].nr + 1);
- }
- break;
- default:
- break;
- }
- }
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
-
- switch (inst->Opcode) {
- case WM_DELTAXY:
- /* Allocate WM_DELTAXY destination on G45/GM45 to an
- * even-numbered GRF if possible so that we can use the PLN
- * instruction.
- */
- if (inst->DstReg.WriteMask == WRITEMASK_XY &&
- !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
- !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
- (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
- int grf;
-
- for (grf = c->first_free_grf & ~1;
- grf < BRW_WM_MAX_GRF;
- grf += 2)
- {
- if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
- c->used_grf[grf] = GL_TRUE;
- c->used_grf[grf + 1] = GL_TRUE;
- c->first_free_grf = grf + 2; /* a guess */
-
- set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
- brw_vec8_grf(grf, 0));
- set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
- brw_vec8_grf(grf + 1, 0));
- break;
- }
- }
- }
- default:
- break;
- }
- }
-
- /* An instruction may reference up to three constants.
- * They'll be found in these registers.
- * XXX alloc these on demand!
- */
- if (c->prog_data.nr_pull_params) {
- for (i = 0; i < 3; i++) {
- c->current_const[i].index = -1;
- c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
- }
- }
-#if 0
- printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
- printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
-#endif
-}
-
-
-/**
- * Check if any of the instruction's src registers are constants, uniforms,
- * or statevars. If so, fetch any constants that we don't already have in
- * the three GRF slots.
- */
-static void fetch_constants(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint i;
-
- /* loop over instruction src regs */
- for (i = 0; i < 3; i++) {
- const struct prog_src_register *src = &inst->SrcReg[i];
- if (src->File == PROGRAM_STATE_VAR ||
- src->File == PROGRAM_CONSTANT ||
- src->File == PROGRAM_UNIFORM) {
- c->current_const[i].index = src->Index;
-
-#if 0
- printf(" fetch const[%d] for arg %d into reg %d\n",
- src->Index, i, c->current_const[i].reg.nr);
-#endif
-
- /* need to fetch the constant now */
- brw_oword_block_read(p,
- c->current_const[i].reg,
- brw_message_reg(1),
- 16 * src->Index,
- SURF_INDEX_FRAG_CONST_BUFFER);
- }
- }
-}
-
-
-/**
- * Convert Mesa dst register to brw register.
- */
-static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint component)
-{
- const int nr = 1;
- return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
- 0, 0);
-}
-
-
-static struct brw_reg
-get_src_reg_const(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint srcRegIndex, GLuint component)
-{
- /* We should have already fetched the constant from the constant
- * buffer in fetch_constants(). Now we just have to return a
- * register description that extracts the needed component and
- * smears it across all eight vector components.
- */
- const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
- struct brw_reg const_reg;
-
- assert(component < 4);
- assert(srcRegIndex < 3);
- assert(c->current_const[srcRegIndex].index != -1);
- const_reg = c->current_const[srcRegIndex].reg;
-
- /* extract desired float from the const_reg, and smear */
- const_reg = stride(const_reg, 0, 1, 0);
- const_reg.subnr = component * 4;
-
- if (src->Negate & (1 << component))
- const_reg = negate(const_reg);
- if (src->Abs)
- const_reg = brw_abs(const_reg);
-
-#if 0
- printf(" form const[%d].%d for arg %d, reg %d\n",
- c->current_const[srcRegIndex].index,
- component,
- srcRegIndex,
- const_reg.nr);
-#endif
-
- return const_reg;
-}
-
-
-/**
- * Convert Mesa src register to brw register.
- */
-static struct brw_reg get_src_reg(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- GLuint srcRegIndex, GLuint channel)
-{
- const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
- const GLuint nr = 1;
- const GLuint component = GET_SWZ(src->Swizzle, channel);
-
- /* Only one immediate value can be used per native opcode, and it
- * has be in the src1 slot, so not all Mesa instructions will get
- * to take advantage of immediate constants.
- */
- if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) {
- const struct gl_program_parameter_list *params;
-
- params = c->fp->program.Base.Parameters;
-
- /* Extended swizzle terms */
- if (component == SWIZZLE_ZERO) {
- return brw_imm_f(0.0F);
- } else if (component == SWIZZLE_ONE) {
- if (src->Negate)
- return brw_imm_f(-1.0F);
- else
- return brw_imm_f(1.0F);
- }
-
- if (src->File == PROGRAM_CONSTANT) {
- float f = params->ParameterValues[src->Index][component];
-
- if (src->Abs)
- f = fabs(f);
- if (src->Negate)
- f = -f;
-
- return brw_imm_f(f);
- }
- }
-
- if (c->prog_data.nr_pull_params &&
- (src->File == PROGRAM_STATE_VAR ||
- src->File == PROGRAM_CONSTANT ||
- src->File == PROGRAM_UNIFORM)) {
- return get_src_reg_const(c, inst, srcRegIndex, component);
- }
- else {
- /* other type of source register */
- return get_reg(c, src->File, src->Index, component, nr,
- src->Negate, src->Abs);
- }
-}
-
-static void emit_arl(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg src0, addr_reg;
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
- BRW_ARF_ADDRESS, 0);
- src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
- brw_MOV(p, addr_reg, src0);
- brw_set_saturate(p, 0);
-}
-
-static INLINE struct brw_reg high_words( struct brw_reg reg )
-{
- return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
- 0, 8, 2 );
-}
-
-static INLINE struct brw_reg low_words( struct brw_reg reg )
-{
- return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
-}
-
-static INLINE struct brw_reg even_bytes( struct brw_reg reg )
-{
- return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
-}
-
-static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
-{
- return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
- 0, 16, 2 );
-}
-
-/**
- * Resolve subroutine calls after code emit is done.
- */
-static void post_wm_emit( struct brw_wm_compile *c )
-{
- brw_resolve_cals(&c->func);
-}
-
-static void
-get_argument_regs(struct brw_wm_compile *c,
- const struct prog_instruction *inst,
- int index,
- struct brw_reg *dst,
- struct brw_reg *regs,
- int mask)
-{
- struct brw_compile *p = &c->func;
- int i, j;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1 << i)) {
- regs[i] = get_src_reg(c, inst, index, i);
-
- /* Unalias destination registers from our sources. */
- if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
- for (j = 0; j < 4; j++) {
- if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
- struct brw_reg tmp = alloc_tmp(c);
- brw_MOV(p, tmp, regs[i]);
- regs[i] = tmp;
- break;
- }
- }
- }
- }
- }
-}
-
-static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
-{
- struct intel_context *intel = &brw->intel;
-#define MAX_IF_DEPTH 32
-#define MAX_LOOP_DEPTH 32
- struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
- int if_depth_in_loop[MAX_LOOP_DEPTH];
- GLuint i, if_depth = 0, loop_depth = 0;
- struct brw_compile *p = &c->func;
- struct brw_indirect stack_index = brw_indirect(0, 0);
-
- c->out_of_regs = GL_FALSE;
-
- if_depth_in_loop[loop_depth] = 0;
-
- prealloc_reg(c);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
-
- if (intel->gen >= 6)
- brw_set_acc_write_control(p, 1);
-
- for (i = 0; i < c->nr_fp_insns; i++) {
- const struct prog_instruction *inst = &c->prog_instructions[i];
- int dst_flags;
- struct brw_reg args[3][4], dst[4];
- int j;
- int mark = mark_tmps( c );
-
- c->cur_inst = i;
-
-#if 0
- printf("Inst %d: ", i);
- _mesa_print_instruction(inst);
-#endif
-
- /* fetch any constants that this instruction needs */
- if (c->prog_data.nr_pull_params)
- fetch_constants(c, inst);
-
- if (inst->Opcode != OPCODE_ARL) {
- for (j = 0; j < 4; j++) {
- if (inst->DstReg.WriteMask & (1 << j))
- dst[j] = get_dst_reg(c, inst, j);
- else
- dst[j] = brw_null_reg();
- }
- }
- for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
- get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
-
- dst_flags = inst->DstReg.WriteMask;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- dst_flags |= SATURATE;
-
- if (inst->CondUpdate)
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
- else
- brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
-
- switch (inst->Opcode) {
- case WM_PIXELXY:
- emit_pixel_xy(c, dst, dst_flags);
- break;
- case WM_DELTAXY:
- emit_delta_xy(p, dst, dst_flags, args[0]);
- break;
- case WM_PIXELW:
- emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
- break;
- case WM_LINTERP:
- emit_linterp(p, dst, dst_flags, args[0], args[1]);
- break;
- case WM_PINTERP:
- emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case WM_CINTERP:
- emit_cinterp(p, dst, dst_flags, args[0]);
- break;
- case WM_WPOSXY:
- emit_wpos_xy(c, dst, dst_flags, args[0]);
- break;
- case WM_FB_WRITE:
- emit_fb_write(c, args[0], args[1], args[2],
- INST_AUX_GET_TARGET(inst->Aux),
- inst->Aux & INST_AUX_EOT);
- break;
- case WM_FRONTFACING:
- emit_frontfacing(p, dst, dst_flags);
- break;
- case OPCODE_ADD:
- emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_ARL:
- emit_arl(c, inst);
- break;
- case OPCODE_FRC:
- emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
- break;
- case OPCODE_FLR:
- emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
- break;
- case OPCODE_LRP:
- emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_TRUNC:
- emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
- break;
- case OPCODE_MOV:
- case OPCODE_SWZ:
- emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
- break;
- case OPCODE_DP2:
- emit_dp2(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DP3:
- emit_dp3(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DP4:
- emit_dp4(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_XPD:
- emit_xpd(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DPH:
- emit_dph(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_RCP:
- emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
- break;
- case OPCODE_RSQ:
- emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
- break;
- case OPCODE_SIN:
- emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
- break;
- case OPCODE_COS:
- emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
- break;
- case OPCODE_EX2:
- emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
- break;
- case OPCODE_LG2:
- emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
- break;
- case OPCODE_CMP:
- emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_MIN:
- emit_min(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_MAX:
- emit_max(p, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_DDX:
- case OPCODE_DDY:
- emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
- args[0]);
- break;
- case OPCODE_SLT:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_L, args[0], args[1]);
- break;
- case OPCODE_SLE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_LE, args[0], args[1]);
- break;
- case OPCODE_SGT:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_G, args[0], args[1]);
- break;
- case OPCODE_SGE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_GE, args[0], args[1]);
- break;
- case OPCODE_SEQ:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_EQ, args[0], args[1]);
- break;
- case OPCODE_SNE:
- emit_sop(p, dst, dst_flags,
- BRW_CONDITIONAL_NEQ, args[0], args[1]);
- break;
- case OPCODE_SSG:
- emit_sign(p, dst, dst_flags, args[0]);
- break;
- case OPCODE_MUL:
- emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_POW:
- emit_math2(c, BRW_MATH_FUNCTION_POW,
- dst, dst_flags, args[0], args[1]);
- break;
- case OPCODE_MAD:
- emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
- break;
- case OPCODE_TEX:
- emit_tex(c, dst, dst_flags, args[0],
- get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
- 0, 1, 0, 0),
- inst->TexSrcTarget,
- inst->TexSrcUnit,
- (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0);
- break;
- case OPCODE_TXB:
- emit_txb(c, dst, dst_flags, args[0],
- get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
- 0, 1, 0, 0),
- inst->TexSrcTarget,
- c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]);
- break;
- case OPCODE_KIL_NV:
- emit_kil_nv(c);
- break;
- case OPCODE_IF:
- assert(if_depth < MAX_IF_DEPTH);
- if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
- if_depth_in_loop[loop_depth]++;
- break;
- case OPCODE_ELSE:
- assert(if_depth > 0);
- if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
- break;
- case OPCODE_ENDIF:
- assert(if_depth > 0);
- brw_ENDIF(p, if_inst[--if_depth]);
- if_depth_in_loop[loop_depth]--;
- break;
- case OPCODE_BGNSUB:
- brw_save_label(p, inst->Comment, p->nr_insn);
- break;
- case OPCODE_ENDSUB:
- /* no-op */
- break;
- case OPCODE_CAL:
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(4));
- brw_save_call(&c->func, inst->Comment, p->nr_insn);
- brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_pop_insn_state(p);
- break;
-
- case OPCODE_RET:
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_ADD(p, get_addr_reg(stack_index),
- get_addr_reg(stack_index), brw_imm_d(-4));
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
- brw_set_access_mode(p, BRW_ALIGN_16);
- brw_pop_insn_state(p);
-
- break;
- case OPCODE_BGNLOOP:
- /* XXX may need to invalidate the current_constant regs */
- loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
- if_depth_in_loop[loop_depth] = 0;
- break;
- case OPCODE_BRK:
- brw_BREAK(p, if_depth_in_loop[loop_depth]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_CONT:
- brw_CONT(p, if_depth_in_loop[loop_depth]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- break;
- case OPCODE_ENDLOOP:
- {
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- if (intel->gen == 5)
- br = 2;
-
- assert(loop_depth > 0);
- loop_depth--;
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_inst[loop_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
- break;
- default:
- printf("unsupported opcode %d (%s) in fragment shader\n",
- inst->Opcode, inst->Opcode < MAX_OPCODE ?
- _mesa_opcode_string(inst->Opcode) : "unknown");
- }
-
- /* Release temporaries containing any unaliased source regs. */
- release_tmps( c, mark );
-
- if (inst->CondUpdate)
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- else
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- }
- post_wm_emit(c);
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("wm-native:\n");
- for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stdout, &p->store[i], intel->gen);
- printf("\n");
- }
-}
-
-/**
- * Do GPU code generation for shaders that use GLSL features such as
- * flow control. Other shaders will be compiled with the
- */
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
-{
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("brw_wm_glsl_emit:\n");
- }
-
- /* initial instruction translation/simplification */
- brw_wm_pass_fp(c);
-
- /* actual code generation */
- brw_wm_emit_glsl(brw, c);
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- brw_wm_print_program(c, "brw_wm_glsl_emit done");
- }
-
- c->prog_data.total_grf = num_grf_used(c);
- c->prog_data.total_scratch = 0;
-}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
index 62e556698ba..471ea1c18d6 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -120,14 +120,14 @@ const struct {
* \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES
* \param lookup bitmask of IZ_* flags
*/
-void brw_wm_lookup_iz( struct intel_context *intel,
- GLuint line_aa,
- GLuint lookup,
- GLboolean ps_uses_depth,
- struct brw_wm_prog_key *key )
+void brw_wm_lookup_iz(struct intel_context *intel,
+ struct brw_wm_compile *c)
{
GLuint reg = 2;
GLboolean kill_stats_promoted_workaround = GL_FALSE;
+ int lookup = c->key.iz_lookup;
+ bool uses_depth = (c->fp->program.Base.InputsRead &
+ (1 << FRAG_ATTRIB_WPOS)) != 0;
assert (lookup < IZ_BIT_MAX);
@@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel,
* statistics are enabled..." paragraph of 11.5.3.2: Early Depth
* Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
*/
- if (intel->stats_wm &&
+ if (c->key.stats_wm &&
(lookup & IZ_PS_KILL_ALPHATEST_BIT) &&
wm_iz_table[lookup].mode == P) {
kill_stats_promoted_workaround = GL_TRUE;
}
if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
- key->computes_depth = 1;
+ c->computes_depth = 1;
- if (wm_iz_table[lookup].sd_present || ps_uses_depth ||
+ if (wm_iz_table[lookup].sd_present || uses_depth ||
kill_stats_promoted_workaround) {
- key->source_depth_reg = reg;
+ c->source_depth_reg = reg;
reg += 2;
}
if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
- key->source_depth_to_render_target = 1;
+ c->source_depth_to_render_target = 1;
- if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
- key->aa_dest_stencil_reg = reg;
- key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
- line_aa == AA_SOMETIMES);
+ if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) {
+ c->aa_dest_stencil_reg = reg;
+ c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+ c->key.line_aa == AA_SOMETIMES);
reg++;
}
if (wm_iz_table[lookup].dd_present) {
- key->dest_depth_reg = reg;
+ c->dest_depth_reg = reg;
reg+=2;
}
- key->nr_payload_regs = reg;
+ c->nr_payload_regs = reg;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 83152526b3a..f78bdc31866 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c )
GLuint i;
for (i = 0; i < 4; i++) {
- GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i;
+ GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i;
pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
&c->payload.depth[j] );
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index 3a2874b6ddf..7d6a3fa9f12 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
if (inst->opcode == WM_FB_WRITE) {
track_arg(c, inst, 0, WRITEMASK_XYZW);
track_arg(c, inst, 1, WRITEMASK_XYZW);
- if (c->key.source_depth_to_render_target &&
- c->key.computes_depth)
+ if (c->source_depth_to_render_target && c->computes_depth)
track_arg(c, inst, 2, WRITEMASK_Z);
else
track_arg(c, inst, 2, 0);
@@ -281,7 +280,6 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_DST:
case WM_FRONTFACING:
- case OPCODE_KIL_NV:
default:
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
index 44e39538145..8c2b9e7020b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
@@ -69,6 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c,
*/
static void init_registers( struct brw_wm_compile *c )
{
+ struct brw_context *brw = c->func.brw;
+ struct intel_context *intel = &brw->intel;
GLuint nr_interp_regs = 0;
GLuint i = 0;
GLuint j;
@@ -76,32 +78,41 @@ static void init_registers( struct brw_wm_compile *c )
for (j = 0; j < c->grf_limit; j++)
c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
- for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++)
+ for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++)
prealloc_reg(c, &c->payload.depth[j], i++);
for (j = 0; j < c->nr_creg; j++)
prealloc_reg(c, &c->creg[j], i++);
- for (j = 0; j < VERT_RESULT_MAX; j++) {
- if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
- int fp_index;
-
- if (j >= VERT_RESULT_VAR0)
- fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
- else if (j <= VERT_RESULT_TEX7)
- fp_index = j;
- else
- fp_index = -1;
-
- nr_interp_regs++;
- if (fp_index >= 0)
- prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+ if (intel->gen >= 6) {
+ for (unsigned int j = 0; j < FRAG_ATTRIB_MAX; j++) {
+ if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(j)) {
+ nr_interp_regs++;
+ prealloc_reg(c, &c->payload.input_interp[j], i++);
+ }
+ }
+ } else {
+ for (j = 0; j < VERT_RESULT_MAX; j++) {
+ if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
+ int fp_index;
+
+ if (j >= VERT_RESULT_VAR0)
+ fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+ else if (j <= VERT_RESULT_TEX7)
+ fp_index = j;
+ else
+ fp_index = -1;
+
+ nr_interp_regs++;
+ if (fp_index >= 0)
+ prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+ }
}
+ assert(nr_interp_regs >= 1);
}
- assert(nr_interp_regs >= 1);
- c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2);
+ c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2);
c->prog_data.urb_read_length = nr_interp_regs * 2;
c->prog_data.curb_read_length = c->nr_creg * 2;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index fea96d35381..e7c97a1cb05 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap )
static drm_intel_bo *upload_default_color( struct brw_context *brw,
const GLfloat *color )
{
- struct brw_sampler_default_color sdc;
+ struct intel_context *intel = &brw->intel;
- COPY_4V(sdc.color, color);
-
- return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
- &sdc, sizeof(sdc));
+ if (intel->gen >= 5) {
+ struct gen5_sampler_default_color sdc;
+
+ memset(&sdc, 0, sizeof(sdc));
+
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]);
+
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]);
+ UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]);
+
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]);
+ UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]);
+
+ /* XXX: Fill in half floats */
+ /* XXX: Fill in signed bytes */
+
+ COPY_4V(sdc.f, color);
+
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc));
+ } else {
+ struct brw_sampler_default_color sdc;
+
+ COPY_4V(sdc.color, color);
+
+ return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+ &sdc, sizeof(sdc));
+ }
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 76de7b7b6f6..e9ef635bca2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -87,7 +87,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
struct gl_context *ctx = &brw->intel.ctx;
const struct gl_fragment_program *fp = brw->fragment_program;
- const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
struct intel_context *intel = &brw->intel;
memset(key, 0, sizeof(*key));
@@ -132,7 +131,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
- key->is_glsl = bfp->isGLSL;
/* If using the fragment shader backend, the program is always
* 8-wide.
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 76fc94df1f6..ad744044c70 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -139,6 +139,8 @@ static GLuint translate_tex_format( gl_format mesa_format,
return BRW_SURFACEFORMAT_I16_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A16_UNORM;
+ else if (depth_mode == GL_RED)
+ return BRW_SURFACEFORMAT_R16_UNORM;
else
return BRW_SURFACEFORMAT_L16_UNORM;
@@ -174,6 +176,8 @@ static GLuint translate_tex_format( gl_format mesa_format,
return BRW_SURFACEFORMAT_I24X8_UNORM;
else if (depth_mode == GL_ALPHA)
return BRW_SURFACEFORMAT_A24X8_UNORM;
+ else if (depth_mode == GL_RED)
+ return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS;
else
return BRW_SURFACEFORMAT_L24X8_UNORM;
@@ -274,6 +278,7 @@ brw_create_constant_surface(struct brw_context *brw,
drm_intel_bo **out_bo,
uint32_t *out_offset)
{
+ struct intel_context *intel = &brw->intel;
const GLint w = width - 1;
struct brw_surface_state surf;
void *map;
@@ -284,6 +289,9 @@ brw_create_constant_surface(struct brw_context *brw,
surf.ss0.surface_type = BRW_SURFACE_BUFFER;
surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+ if (intel->gen >= 6)
+ surf.ss0.render_cache_read_write = 1;
+
assert(bo);
surf.ss1.base_addr = bo->offset; /* reloc */
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index 800a2555214..c2631a7b4df 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -35,6 +35,7 @@
struct gen6_blend_state_key {
GLboolean color_blend, alpha_enabled;
GLboolean dither;
+ GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4];
GLenum logic_op;
@@ -54,6 +55,9 @@ blend_state_populate_key(struct brw_context *brw,
memset(key, 0, sizeof(*key));
/* _NEW_COLOR */
+ memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask));
+
+ /* _NEW_COLOR */
if (ctx->Color._LogicOpEnabled)
key->logic_op = ctx->Color.LogicOp;
else
@@ -87,54 +91,62 @@ static drm_intel_bo *
blend_state_create_from_key(struct brw_context *brw,
struct gen6_blend_state_key *key)
{
- struct gen6_blend_state blend;
+ struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS];
drm_intel_bo *bo;
+ int b;
memset(&blend, 0, sizeof(blend));
- if (key->logic_op != GL_COPY) {
- blend.blend1.logic_op_enable = 1;
- blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
- } else if (key->color_blend) {
- GLenum eqRGB = key->blend_eq_rgb;
- GLenum eqA = key->blend_eq_a;
- GLenum srcRGB = key->blend_src_rgb;
- GLenum dstRGB = key->blend_dst_rgb;
- GLenum srcA = key->blend_src_a;
- GLenum dstA = key->blend_dst_a;
-
- if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
- srcRGB = dstRGB = GL_ONE;
- }
-
- if (eqA == GL_MIN || eqA == GL_MAX) {
- srcA = dstA = GL_ONE;
+ for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) {
+ if (key->logic_op != GL_COPY) {
+ blend[b].blend1.logic_op_enable = 1;
+ blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
+ } else if (key->color_blend & (1 << b)) {
+ GLenum eqRGB = key->blend_eq_rgb;
+ GLenum eqA = key->blend_eq_a;
+ GLenum srcRGB = key->blend_src_rgb;
+ GLenum dstRGB = key->blend_dst_rgb;
+ GLenum srcA = key->blend_src_a;
+ GLenum dstA = key->blend_dst_a;
+
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+ srcRGB = dstRGB = GL_ONE;
+ }
+
+ if (eqA == GL_MIN || eqA == GL_MAX) {
+ srcA = dstA = GL_ONE;
+ }
+
+ blend[b].blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+ blend[b].blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
+ blend[b].blend0.blend_func = brw_translate_blend_equation(eqRGB);
+
+ blend[b].blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+ blend[b].blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
+ blend[b].blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+
+ blend[b].blend0.blend_enable = 1;
+ blend[b].blend0.ia_blend_enable = (srcA != srcRGB ||
+ dstA != dstRGB ||
+ eqA != eqRGB);
}
- blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
- blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
- blend.blend0.blend_func = brw_translate_blend_equation(eqRGB);
-
- blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
- blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
- blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+ if (key->alpha_enabled) {
+ blend[b].blend1.alpha_test_enable = 1;
+ blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
- blend.blend0.blend_enable = 1;
- blend.blend0.ia_blend_enable = (srcA != srcRGB ||
- dstA != dstRGB ||
- eqA != eqRGB);
- }
-
- if (key->alpha_enabled) {
- blend.blend1.alpha_test_enable = 1;
- blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+ }
- }
+ if (key->dither) {
+ blend[b].blend1.dither_enable = 1;
+ blend[b].blend1.y_dither_offset = 0;
+ blend[b].blend1.x_dither_offset = 0;
+ }
- if (key->dither) {
- blend.blend1.dither_enable = 1;
- blend.blend1.y_dither_offset = 0;
- blend.blend1.x_dither_offset = 0;
+ blend[b].blend1.write_disable_r = !key->color_mask[b][0];
+ blend[b].blend1.write_disable_g = !key->color_mask[b][1];
+ blend[b].blend1.write_disable_b = !key->color_mask[b][2];
+ blend[b].blend1.write_disable_a = !key->color_mask[b][3];
}
bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE,
@@ -172,7 +184,7 @@ const struct brw_tracked_state gen6_blend_state = {
};
struct gen6_color_calc_state_key {
- GLubyte blend_constant_color[4];
+ float blend_constant_color[4];
GLclampf alpha_ref;
GLubyte stencil_ref[2];
};
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index c65b41e2b6b..c7c4eb1f27d 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -64,7 +64,9 @@ upload_clip_state(struct brw_context *brw)
userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
depth_clamp |
provoking);
- OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX);
+ OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+ U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+ GEN6_CLIP_FORCE_ZERO_RTAINDEX);
ADVANCE_BATCH();
}
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 471067e8f02..45c148baedd 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -33,9 +33,10 @@
#include "intel_batchbuffer.h"
static uint32_t
-get_attr_override(struct brw_context *brw, int fs_attr)
+get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color)
{
int attr_index = 0, i, vs_attr;
+ int bfc = 0;
if (fs_attr <= FRAG_ATTRIB_TEX7)
vs_attr = fs_attr;
@@ -57,6 +58,30 @@ get_attr_override(struct brw_context *brw, int fs_attr)
attr_index++;
}
+ assert(attr_index < 32);
+
+ if (two_side_color) {
+ if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) &&
+ (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) {
+ assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0));
+ assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0));
+ bfc = 2;
+ } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) &&
+ (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)))
+ bfc = 1;
+ }
+
+ if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) {
+ if (fs_attr == FRAG_ATTRIB_COL0)
+ attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+ else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) {
+ attr_index++;
+ attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+ } else {
+ attr_index += bfc;
+ }
+ }
+
return attr_index;
}
@@ -67,13 +92,15 @@ upload_sf_state(struct brw_context *brw)
struct gl_context *ctx = &intel->ctx;
/* CACHE_NEW_VS_PROG */
uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+ /* BRW_NEW_FRAGMENT_PROGRAM */
uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead);
- uint32_t dw1, dw2, dw3, dw4, dw16;
+ uint32_t dw1, dw2, dw3, dw4, dw16, dw17;
int i;
/* _NEW_BUFFER */
GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
int attr = 0;
int urb_start;
+ int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
/* _NEW_TRANSFORM */
if (ctx->Transform.ClipPlanesEnabled)
@@ -91,6 +118,7 @@ upload_sf_state(struct brw_context *brw)
dw3 = 0;
dw4 = 0;
dw16 = 0;
+ dw17 = 0;
/* _NEW_POLYGON */
if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
@@ -99,6 +127,48 @@ upload_sf_state(struct brw_context *brw)
if (ctx->Polygon.OffsetFill)
dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+ if (ctx->Polygon.OffsetLine)
+ dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+
+ if (ctx->Polygon.OffsetPoint)
+ dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+
+ switch (ctx->Polygon.FrontMode) {
+ case GL_FILL:
+ dw2 |= GEN6_SF_FRONT_SOLID;
+ break;
+
+ case GL_LINE:
+ dw2 |= GEN6_SF_FRONT_WIREFRAME;
+ break;
+
+ case GL_POINT:
+ dw2 |= GEN6_SF_FRONT_POINT;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (ctx->Polygon.BackMode) {
+ case GL_FILL:
+ dw2 |= GEN6_SF_BACK_SOLID;
+ break;
+
+ case GL_LINE:
+ dw2 |= GEN6_SF_BACK_WIREFRAME;
+ break;
+
+ case GL_POINT:
+ dw2 |= GEN6_SF_BACK_POINT;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+
/* _NEW_SCISSOR */
if (ctx->Scissor.Enabled)
dw3 |= GEN6_SF_SCISSOR_ENABLE;
@@ -160,6 +230,12 @@ upload_sf_state(struct brw_context *brw)
}
}
+ /* flat shading */
+ if (ctx->Light.ShadeModel == GL_FLAT) {
+ dw17 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >>
+ ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1));
+ }
+
BEGIN_BATCH(20);
OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
OUT_BATCH(dw1);
@@ -174,7 +250,7 @@ upload_sf_state(struct brw_context *brw)
for (; attr < 64; attr++) {
if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
- attr_overrides |= get_attr_override(brw, attr);
+ attr_overrides |= get_attr_override(brw, attr, two_side_color);
attr++;
break;
}
@@ -182,7 +258,7 @@ upload_sf_state(struct brw_context *brw)
for (; attr < 64; attr++) {
if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
- attr_overrides |= get_attr_override(brw, attr) << 16;
+ attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16;
attr++;
break;
}
@@ -190,7 +266,7 @@ upload_sf_state(struct brw_context *brw)
OUT_BATCH(attr_overrides);
}
OUT_BATCH(dw16); /* point sprite texcoord bitmask */
- OUT_BATCH(0); /* constant interp bitmask */
+ OUT_BATCH(dw17); /* constant interp bitmask */
OUT_BATCH(0); /* wrapshortest enables 0-7 */
OUT_BATCH(0); /* wrapshortest enables 8-15 */
ADVANCE_BATCH();
@@ -205,7 +281,8 @@ const struct brw_tracked_state gen6_sf_state = {
_NEW_BUFFERS |
_NEW_POINT |
_NEW_TRANSFORM),
- .brw = BRW_NEW_CONTEXT,
+ .brw = (BRW_NEW_CONTEXT |
+ BRW_NEW_FRAGMENT_PROGRAM),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_sf_state,
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index a34123478fb..de97fd3783d 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -72,7 +72,7 @@ const struct brw_tracked_state gen6_urb = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_CONTEXT,
- .cache = CACHE_NEW_VS_PROG,
+ .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG),
},
.prepare = prepare_urb,
.emit = upload_urb,
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e94d0c0ddbb..4ef9e2e6072 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -54,7 +54,7 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
- int params_uploaded = 0;
+ int params_uploaded = 0, param_regs;
float *param;
if (brw->vertex_program->IsNVProgram)
@@ -88,20 +88,11 @@ upload_vs_state(struct brw_context *brw)
params_uploaded++;
}
- if (vp->use_const_buffer) {
- for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
- if (brw->vs.constant_map[i] != -1) {
- memcpy(param + brw->vs.constant_map[i] * 4,
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- params_uploaded++;
- }
- }
- } else {
- for (i = 0; i < nr_params; i++) {
- memcpy(param, vp->program.Base.Parameters->ParameterValues[i],
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ memcpy(param + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
4 * sizeof(float));
- param += 4;
params_uploaded++;
}
}
@@ -117,13 +108,16 @@ upload_vs_state(struct brw_context *brw)
drm_intel_gem_bo_unmap_gtt(constant_bo);
+ param_regs = (params_uploaded + 1) / 2;
+ assert(param_regs <= 32);
+
BEGIN_BATCH(5);
OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
OUT_RELOC(constant_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- ALIGN(params_uploaded, 2) / 2 - 1);
+ param_regs - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index ea5418bacf1..d80df4e254b 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -66,6 +66,21 @@ prepare_wm_constants(struct brw_context *brw)
constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
*brw->wm.prog_data->param[i]);
}
+
+ if (0) {
+ printf("WM constants:\n");
+ for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+ if ((i & 7) == 0)
+ printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8);
+ printf("%8f ", constants[i]);
+ if ((i & 7) == 7)
+ printf("\n");
+ }
+ if ((i & 7) != 0)
+ printf("\n");
+ printf("\n");
+ }
+
drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo);
}
}
@@ -88,6 +103,7 @@ upload_wm_state(struct brw_context *brw)
brw_fragment_program_const(brw->fragment_program);
uint32_t dw2, dw4, dw5, dw6;
+ /* CACHE_NEW_WM_PROG */
if (brw->wm.prog_data->nr_params == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
@@ -104,7 +120,8 @@ upload_wm_state(struct brw_context *brw)
(5 - 2));
OUT_RELOC(brw->wm.push_const_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1);
+ ALIGN(brw->wm.prog_data->nr_params,
+ brw->wm.prog_data->dispatch_width) / 8 - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@@ -126,8 +143,8 @@ upload_wm_state(struct brw_context *brw)
dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- if (fp->isGLSL)
+ /* CACHE_NEW_WM_PROG */
+ if (brw->wm.prog_data->dispatch_width == 8)
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
else
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
@@ -176,13 +193,14 @@ upload_wm_state(struct brw_context *brw)
const struct brw_tracked_state gen6_wm_state = {
.dirty = {
.mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS |
- _NEW_PROGRAM_CONSTANTS),
+ _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON),
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_NR_WM_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_BATCH),
- .cache = CACHE_NEW_SAMPLER
+ .cache = (CACHE_NEW_SAMPLER |
+ CACHE_NEW_WM_PROG)
},
.emit = upload_wm_state,
};
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 4b498f8c5b2..21fc9ece886 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -92,7 +92,7 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
batch->ptr = NULL;
- if (!intel->no_hw) {
+ if (!intel->intelScreen->no_hw) {
drm_intel_bo_exec(batch->buf, used, NULL, 0,
(x_off & 0xffff) | (y_off << 16));
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 152cdcaf37d..9c222c7b485 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -519,7 +519,6 @@ static const struct dri_debug_control debug_control[] = {
{ "sing", DEBUG_SINGLE_THREAD },
{ "thre", DEBUG_SINGLE_THREAD },
{ "wm", DEBUG_WM },
- { "glsl_force", DEBUG_GLSL_FORCE },
{ "urb", DEBUG_URB },
{ "vs", DEBUG_VS },
{ "clip", DEBUG_CLIP },
@@ -800,11 +799,6 @@ intelInitContext(struct intel_context *intel,
if (INTEL_DEBUG & DEBUG_BUFMGR)
dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
- /* XXX force SIMD8 kernel for Sandybridge before we fixed
- SIMD16 interpolation. */
- if (intel->gen == 6)
- INTEL_DEBUG |= DEBUG_GLSL_FORCE;
-
intel->batch = intel_batchbuffer_alloc(intel);
intel_fbo_init(intel);
@@ -838,11 +832,6 @@ intelInitContext(struct intel_context *intel,
intel->always_flush_cache = 1;
}
- /* Disable all hardware rendering (skip emitting batches and fences/waits
- * to the kernel)
- */
- intel->no_hw = getenv("INTEL_NO_HW") != NULL;
-
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 9d5139c0000..96493c0f2bb 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -207,7 +207,6 @@ struct intel_context
GLboolean hw_stipple;
GLboolean depth_buffer_is_float;
GLboolean no_rast;
- GLboolean no_hw;
GLboolean always_flush_batch;
GLboolean always_flush_cache;
@@ -362,7 +361,6 @@ extern int INTEL_DEBUG;
#define DEBUG_WM 0x800000
#define DEBUG_URB 0x1000000
#define DEBUG_VS 0x2000000
-#define DEBUG_GLSL_FORCE 0x4000000
#define DEBUG_CLIP 0x8000000
#define DBG(...) do { \
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 862a13d2ea5..18e796a1186 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -465,10 +465,12 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb,
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to XGBA8 texture OK\n");
}
+#ifndef I915
else if (texImage->TexFormat == MESA_FORMAT_SARGB8) {
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to SARGB8 texture OK\n");
}
+#endif
else if (texImage->TexFormat == MESA_FORMAT_RGB565) {
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to RGB5 texture OK\n");
@@ -481,6 +483,7 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb,
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to ARGB4444 texture OK\n");
}
+#ifndef I915
else if (texImage->TexFormat == MESA_FORMAT_A8) {
irb->Base.DataType = GL_UNSIGNED_BYTE;
DBG("Render to A8 texture OK\n");
@@ -501,6 +504,7 @@ intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb,
irb->Base.DataType = GL_UNSIGNED_SHORT;
DBG("Render to RG88 texture OK\n");
}
+#endif
else if (texImage->TexFormat == MESA_FORMAT_Z16) {
irb->Base.DataType = GL_UNSIGNED_SHORT;
DBG("Render to DEPTH16 texture OK\n");
@@ -710,15 +714,17 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
switch (irb->Base.Format) {
case MESA_FORMAT_ARGB8888:
case MESA_FORMAT_XRGB8888:
- case MESA_FORMAT_SARGB8:
case MESA_FORMAT_RGB565:
case MESA_FORMAT_ARGB1555:
case MESA_FORMAT_ARGB4444:
+#ifndef I915
+ case MESA_FORMAT_SARGB8:
case MESA_FORMAT_A8:
case MESA_FORMAT_R8:
case MESA_FORMAT_R16:
case MESA_FORMAT_RG88:
case MESA_FORMAT_RG1616:
+#endif
break;
default:
fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 061f0d278d6..3f13589a214 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -452,7 +452,7 @@ intelCreateContext(gl_api api,
return brwCreateContext(api, mesaVis,
driContextPriv, sharedContextPrivate);
#endif
- fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
+ fprintf(stderr, "Unrecognized deviceID 0x%x\n", intelScreen->deviceID);
return GL_FALSE;
}
@@ -462,7 +462,8 @@ intel_init_bufmgr(struct intel_screen *intelScreen)
__DRIscreen *spriv = intelScreen->driScrnPriv;
int num_fences = 0;
- intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
+ intelScreen->no_hw = (getenv("INTEL_NO_HW") != NULL ||
+ getenv("INTEL_DEVID_OVERRIDE") != NULL);
intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ);
if (intelScreen->bufmgr == NULL) {
@@ -497,6 +498,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
GLenum fb_format[3];
GLenum fb_type[3];
unsigned int api_mask;
+ char *devid_override;
static const GLenum back_buffer_modes[] = {
GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
@@ -523,6 +525,16 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
&intelScreen->deviceID))
return GL_FALSE;
+ /* Allow an override of the device ID for the purpose of making the
+ * driver produce dumps for debugging of new chipset enablement.
+ * This implies INTEL_NO_HW, to avoid programming your actual GPU
+ * incorrectly.
+ */
+ devid_override = getenv("INTEL_DEVID_OVERRIDE");
+ if (devid_override) {
+ intelScreen->deviceID = strtod(devid_override, NULL);
+ }
+
api_mask = (1 << __DRI_API_OPENGL);
#if FEATURE_ES1
api_mask |= (1 << __DRI_API_GLES);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 9d73a2fb375..f8316ae2f8d 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -204,11 +204,13 @@ intelChooseTextureFormat(struct gl_context * ctx, GLint internalFormat,
* { R, G, 1.0, 1.0 } from a red-green texture would be useful.
*/
case GL_RED:
+ case GL_COMPRESSED_RED:
case GL_R8:
return MESA_FORMAT_R8;
case GL_R16:
return MESA_FORMAT_R16;
case GL_RG:
+ case GL_COMPRESSED_RG:
case GL_RG8:
return MESA_FORMAT_RG88;
case GL_RG16:
diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
index 8a047e6419b..b62290231b9 100644
--- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c
+++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
@@ -200,6 +200,7 @@ void r200EmitArrays( struct gl_context *ctx, GLubyte *vimap_rev )
}
default:
assert(0);
+ emitsize = 0;
}
if (!rmesa->radeon.tcl.aos[nr].bo) {
rcommon_emit_vector( ctx,
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 8be32ea91fe..1db8678e890 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -76,6 +76,9 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
{
+ if (!src.Used)
+ return 0;
+
if (src.File == RC_FILE_CONSTANT) {
return src.Index | (1 << 5);
} else if (src.File == RC_FILE_TEMPORARY) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index 2d28b065390..05d3da8a10d 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -94,6 +94,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
*/
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
+ const struct swizzle_data* sd;
unsigned int relevant;
int j;
@@ -127,7 +128,8 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
return 0;
- if (!lookup_native_swizzle(reg.Swizzle))
+ sd = lookup_native_swizzle(reg.Swizzle);
+ if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
return 0;
return 1;
@@ -201,7 +203,7 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
{
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
- if (!sd) {
+ if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
return 0;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 2f130198d35..e0d349b98ce 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -24,6 +24,7 @@
#include <stdio.h>
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
@@ -54,6 +55,8 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
struct rc_sub_instruction * inst = &rci->U.I;
+ unsigned i;
+ const struct rc_opcode_info *info = rc_get_opcode_info(inst->Opcode);
if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
continue;
@@ -65,27 +68,12 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
continue;
}
- switch (inst->Opcode) {
- case RC_OPCODE_FRC:
- case RC_OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- break;
- case RC_OPCODE_ADD:
- case RC_OPCODE_MAX:
- case RC_OPCODE_MIN:
- case RC_OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
- break;
- case RC_OPCODE_CMP:
- case RC_OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]);
- break;
- default:
- // Scalar instructions needn't be reswizzled
- break;
+ if (!info->IsComponentwise) {
+ continue;
+ }
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ inst->SrcReg[i] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[i]);
}
}
}
@@ -93,7 +81,6 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
int is_r500 = c->Base.is_r500;
- int kill_consts = c->Base.remove_unused_constants;
int opt = !c->Base.disable_optimizations;
/* Lists of instruction transformations. */
@@ -133,11 +120,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
- {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
+ {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
/* This pass makes it easier for the scheduler to group TEX
* instructions and reduces the chances of creating too
* many texture indirections.*/
- {"register rename", 1, !is_r500, rc_rename_regs, NULL},
+ {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, NULL},
{"register allocation", 1, opt, rc_pair_regalloc, NULL},
@@ -150,9 +137,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{NULL, 0, 0, NULL, NULL}
};
+ c->Base.type = RC_FRAGMENT_PROGRAM;
c->Base.SwizzleCaps = c->Base.is_r500 ? &r500_swizzle_caps : &r300_swizzle_caps;
- rc_run_compiler(&c->Base, fs_list, "Fragment Program");
+ rc_run_compiler(&c->Base, fs_list);
rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index bf8341f0173..472029f63d0 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -26,6 +26,7 @@
#include "../r300_reg.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
#include "radeon_swizzle.h"
@@ -790,19 +791,14 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
if (!hwtemps[j])
break;
}
- if (j >= c->max_temp_regs) {
- rc_error(c, "Too many temporaries\n");
- return;
+ ta[orig].Allocated = 1;
+ if (last_inst_src_reladdr &&
+ last_inst_src_reladdr->IP > inst->IP) {
+ ta[orig].HwTemp = orig;
} else {
- ta[orig].Allocated = 1;
- if (last_inst_src_reladdr &&
- last_inst_src_reladdr->IP > inst->IP) {
- ta[orig].HwTemp = orig;
- } else {
- ta[orig].HwTemp = j;
- }
- hwtemps[ta[orig].HwTemp] = 1;
+ ta[orig].HwTemp = j;
}
+ hwtemps[ta[orig].HwTemp] = 1;
}
inst->U.I.DstReg.Index = ta[orig].HwTemp;
@@ -1018,7 +1014,6 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{
int is_r500 = c->Base.is_r500;
- int kill_consts = c->Base.remove_unused_constants;
int opt = !c->Base.disable_optimizations;
/* Lists of instruction transformations. */
@@ -1062,18 +1057,18 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{"dataflow optimize", 1, opt, rc_optimize, NULL},
/* This pass must be done after optimizations. */
{"source conflict resolve", 1, 1, rc_local_transform, resolve_src_conflicts},
- {"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"register allocation", 1, opt, allocate_temporary_registers, NULL},
- {"dead constants", 1, kill_consts, rc_remove_unused_constants, &c->code->constants_remap_table},
+ {"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, 1, translate_vertex_program, NULL},
{"dump machine code", 0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump, NULL},
{NULL, 0, 0, NULL, NULL}
};
+ c->Base.type = RC_VERTEX_PROGRAM;
c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
- rc_run_compiler(&c->Base, vs_list, "Vertex Program");
+ rc_run_compiler(&c->Base, vs_list);
c->code->InputsRead = c->Base.Program.InputsRead;
c->code->OutputsWritten = c->Base.Program.OutputsWritten;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 289bb87ae59..ef81be48f77 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -29,6 +29,7 @@
#include <stdio.h>
+#include "radeon_compiler_util.h"
#include "../r300_reg.h"
/**
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 6f101c68eb6..5da82d90f67 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -45,9 +45,6 @@
#include "radeon_program_pair.h"
-#define MAX_BRANCH_DEPTH_FULL 32
-#define MAX_BRANCH_DEPTH_PARTIAL 4
-
#define PROG_CODE \
struct r500_fragment_program_code *code = &c->code->code.r500
@@ -200,6 +197,9 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int
static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
{
+ if (!src.Used)
+ return 0;
+
if (src.File == RC_FILE_CONSTANT) {
return src.Index | 0x100;
} else if (src.File == RC_FILE_TEMPORARY) {
@@ -506,7 +506,7 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
break;
}
case RC_OPCODE_IF:
- if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
+ if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
return;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index cfb6df2cd79..b69e81698ae 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -34,6 +34,8 @@
#define R500_PFS_MAX_INST 512
#define R500_PFS_NUM_TEMP_REGS 128
#define R500_PFS_NUM_CONST_REGS 256
+#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
+#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 4286baed0c6..65548604bcc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -29,6 +29,7 @@
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
void rc_init(struct radeon_compiler * c)
@@ -356,66 +357,92 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- unsigned int * max_reg = userdata;
+ int *max_reg = userdata;
if (file == RC_FILE_TEMPORARY)
- index > *max_reg ? *max_reg = index : 0;
+ (int)index > *max_reg ? *max_reg = index : 0;
}
-static void print_stats(struct radeon_compiler * c)
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
{
+ int max_reg = -1;
struct rc_instruction * tmp;
- unsigned max_reg, insts, fc, tex, alpha, rgb, presub;
- max_reg = insts = fc = tex = alpha = rgb = presub = 0;
+ memset(s, 0, sizeof(*s));
+
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
tmp = tmp->Next){
const struct rc_opcode_info * info;
rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
- presub++;
+ s->num_presub_ops++;
info = rc_get_opcode_info(tmp->U.I.Opcode);
} else {
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
- presub++;
+ s->num_presub_ops++;
if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
- presub++;
+ s->num_presub_ops++;
/* Assuming alpha will never be a flow control or
* a tex instruction. */
if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
- alpha++;
+ s->num_alpha_insts++;
if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
- rgb++;
+ s->num_rgb_insts++;
info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
}
if (info->IsFlowControl)
- fc++;
+ s->num_fc_insts++;
if (info->HasTexture)
- tex++;
- insts++;
+ s->num_tex_insts++;
+ s->num_insts++;
}
- if (insts < 4)
- return;
- fprintf(stderr,"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
- "~%4u Instructions\n"
- "~%4u Vector Instructions (RGB)\n"
- "~%4u Scalar Instructions (Alpha)\n"
- "~%4u Flow Control Instructions\n"
- "~%4u Texture Instructions\n"
- "~%4u Presub Operations\n"
- "~%4u Temporary Registers\n"
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
- insts, rgb, alpha, fc, tex, presub, max_reg + 1);
+ s->num_temp_regs = max_reg + 1;
}
-/* Executes a list of compiler passes given in the parameter 'list'. */
-void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list,
- const char *shader_name)
+static void print_stats(struct radeon_compiler * c)
{
- if (c->Debug & RC_DBG_LOG) {
- fprintf(stderr, "%s: before compilation\n", shader_name);
- rc_print_program(&c->Program);
+ struct rc_program_stats s;
+
+ rc_get_stats(c, &s);
+
+ if (s.num_insts < 4)
+ return;
+
+ switch (c->type) {
+ case RC_VERTEX_PROGRAM:
+ fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
+ "~%4u Instructions\n"
+ "~%4u Flow Control Instructions\n"
+ "~%4u Temporary Registers\n"
+ "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+ s.num_insts, s.num_fc_insts, s.num_temp_regs);
+ break;
+
+ case RC_FRAGMENT_PROGRAM:
+ fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n"
+ "~%4u Instructions\n"
+ "~%4u Vector Instructions (RGB)\n"
+ "~%4u Scalar Instructions (Alpha)\n"
+ "~%4u Flow Control Instructions\n"
+ "~%4u Texture Instructions\n"
+ "~%4u Presub Operations\n"
+ "~%4u Temporary Registers\n"
+ "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
+ s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
+ s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
+ s.num_temp_regs);
+ break;
+ default:
+ assert(0);
}
+}
+static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
+ "Vertex Program",
+ "Fragment Program"
+};
+
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
for (unsigned i = 0; list[i].name; i++) {
if (list[i].predicate) {
list[i].run(c, list[i].user);
@@ -424,11 +451,23 @@ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *lis
return;
if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
- fprintf(stderr, "%s: after '%s'\n", shader_name, list[i].name);
+ fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
rc_print_program(&c->Program);
}
}
}
+}
+
+/* Executes a list of compiler passes given in the parameter 'list'. */
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
+{
+ if (c->Debug & RC_DBG_LOG) {
+ fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
+ rc_print_program(&c->Program);
+ }
+
+ rc_run_compiler_passes(c, list);
+
if (c->Debug & RC_DBG_STATS)
print_stats(c);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 31fd469a04f..e6633395895 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -35,9 +35,16 @@
struct rc_swizzle_caps;
+enum rc_program_type {
+ RC_VERTEX_PROGRAM,
+ RC_FRAGMENT_PROGRAM,
+ RC_NUM_PROGRAM_TYPES
+};
+
struct radeon_compiler {
struct memory_pool Pool;
struct rc_program Program;
+ enum rc_program_type type;
unsigned Debug:2;
unsigned Error:1;
char * ErrorMsg;
@@ -140,9 +147,21 @@ struct radeon_compiler_pass {
void *user; /* Optional parameter which is passed to the run function. */
};
+struct rc_program_stats {
+ unsigned num_insts;
+ unsigned num_fc_insts;
+ unsigned num_tex_insts;
+ unsigned num_rgb_insts;
+ unsigned num_alpha_insts;
+ unsigned num_presub_ops;
+ unsigned num_temp_regs;
+};
+
+void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
+
/* Executes a list of compiler passes given in the parameter 'list'. */
-void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list,
- const char *shader_name);
+void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list);
+void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list);
void rc_validate_final_shader(struct radeon_compiler *c, void *user);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
index 97f4c758492..bf393a9fb16 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@@ -31,6 +31,8 @@
#include "radeon_compiler_util.h"
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
/**
*/
unsigned int rc_swizzle_to_writemask(unsigned int swz)
@@ -46,6 +48,91 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz)
return mask;
}
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+/**
+ * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
+ */
+rc_swizzle rc_mask_to_swizzle(unsigned int mask)
+{
+ switch (mask) {
+ case RC_MASK_X: return RC_SWIZZLE_X;
+ case RC_MASK_Y: return RC_SWIZZLE_Y;
+ case RC_MASK_Z: return RC_SWIZZLE_Z;
+ case RC_MASK_W: return RC_SWIZZLE_W;
+ }
+ return RC_SWIZZLE_UNUSED;
+}
+
+/* Reorder mask bits according to swizzle. */
+unsigned swizzle_mask(unsigned swizzle, unsigned mask)
+{
+ unsigned ret = 0;
+ for (unsigned chan = 0; chan < 4; ++chan) {
+ unsigned swz = GET_SWZ(swizzle, chan);
+ if (swz < 4)
+ ret |= GET_BIT(mask, swz) << chan;
+ }
+ return ret;
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+ struct rc_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.Negate = 0;
+ for(i = 0; i < 4; ++i) {
+ rc_swizzle swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
+
+void reset_srcreg(struct rc_src_register* reg)
+{
+ memset(reg, 0, sizeof(struct rc_src_register));
+ reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
@@ -59,3 +146,123 @@ unsigned int rc_src_reads_dst_mask(
}
return dst_mask & rc_swizzle_to_writemask(src_swz);
}
+
+unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels)
+{
+ unsigned int chan;
+ unsigned int swz = RC_SWIZZLE_UNUSED;
+ unsigned int ret = RC_SOURCE_NONE;
+
+ for(chan = 0; chan < channels; chan++) {
+ swz = GET_SWZ(swizzle, chan);
+ if (swz == RC_SWIZZLE_W) {
+ ret |= RC_SOURCE_ALPHA;
+ } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+ || swz == RC_SWIZZLE_Z) {
+ ret |= RC_SOURCE_RGB;
+ }
+ }
+ return ret;
+}
+
+unsigned int rc_source_type_mask(unsigned int mask)
+{
+ unsigned int ret = RC_SOURCE_NONE;
+
+ if (mask & RC_MASK_XYZ)
+ ret |= RC_SOURCE_RGB;
+
+ if (mask & RC_MASK_W)
+ ret |= RC_SOURCE_ALPHA;
+
+ return ret;
+}
+
+struct can_use_presub_data {
+ struct rc_src_register RemoveSrcs[3];
+ unsigned int RGBCount;
+ unsigned int AlphaCount;
+};
+
+static void can_use_presub_read_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct can_use_presub_data * d = userdata;
+ unsigned int src_type = rc_source_type_mask(mask);
+ unsigned int i;
+
+ if (file == RC_FILE_NONE)
+ return;
+
+ for(i = 0; i < 3; i++) {
+ if (d->RemoveSrcs[i].File == file
+ && d->RemoveSrcs[i].Index == index) {
+ src_type &=
+ ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4);
+ }
+ }
+
+ if (src_type & RC_SOURCE_RGB)
+ d->RGBCount++;
+
+ if (src_type & RC_SOURCE_ALPHA)
+ d->AlphaCount++;
+}
+
+unsigned int rc_inst_can_use_presub(
+ struct rc_instruction * inst,
+ rc_presubtract_op presub_op,
+ unsigned int presub_writemask,
+ struct rc_src_register replace_reg,
+ struct rc_src_register presub_src0,
+ struct rc_src_register presub_src1)
+{
+ struct can_use_presub_data d;
+ unsigned int num_presub_srcs;
+ unsigned int presub_src_type = rc_source_type_mask(presub_writemask);
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (presub_op == RC_PRESUB_NONE) {
+ return 1;
+ }
+
+ if (info->HasTexture) {
+ return 0;
+ }
+
+ /* We can't use more than one presubtract value in an
+ * instruction, unless the two prsubtract operations
+ * are the same and read from the same registers.
+ * XXX For now we will limit instructions to only one presubtract
+ * value.*/
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+ return 0;
+ }
+
+ memset(&d, 0, sizeof(d));
+ d.RemoveSrcs[0] = replace_reg;
+ d.RemoveSrcs[1] = presub_src0;
+ d.RemoveSrcs[2] = presub_src1;
+
+ rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d);
+
+ num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
+
+ if ((presub_src_type & RC_SOURCE_RGB)
+ && d.RGBCount + num_presub_srcs > 3) {
+ return 0;
+ }
+
+ if ((presub_src_type & RC_SOURCE_ALPHA)
+ && d.AlphaCount + num_presub_srcs > 3) {
+ return 0;
+ }
+
+ return 1;
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
index 1a14e7cb0ef..461ab9ffb10 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
@@ -3,8 +3,27 @@
#ifndef RADEON_PROGRAM_UTIL_H
#define RADEON_PROGRAM_UTIL_H
+struct rc_instruction;
+struct rc_src_register;
+
unsigned int rc_swizzle_to_writemask(unsigned int swz);
+rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
+
+unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y,
+ rc_swizzle swz_z, rc_swizzle swz_w);
+
+unsigned int combine_swizzles(unsigned int src, unsigned int swz);
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
+
+unsigned swizzle_mask(unsigned swizzle, unsigned mask);
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+void reset_srcreg(struct rc_src_register* reg);
+
unsigned int rc_src_reads_dst_mask(
rc_register_file src_file,
unsigned int src_idx,
@@ -13,4 +32,16 @@ unsigned int rc_src_reads_dst_mask(
unsigned int dst_idx,
unsigned int dst_mask);
+unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels);
+
+unsigned int rc_source_type_mask(unsigned int mask);
+
+unsigned int rc_inst_can_use_presub(
+ struct rc_instruction * inst,
+ rc_presubtract_op presub_op,
+ unsigned int presub_writemask,
+ struct rc_src_register replace_reg,
+ struct rc_src_register presub_src0,
+ struct rc_src_register presub_src1);
+
#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index fd94194dc34..d0a64d936e0 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -139,7 +139,46 @@ static void pair_sub_for_all_args(
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
- cb(userdata, fullinst, &sub->Arg[i]);
+ unsigned int src_type;
+ unsigned int channels = 0;
+ if (&fullinst->U.P.RGB == sub)
+ channels = 3;
+ else if (&fullinst->U.P.Alpha == sub)
+ channels = 1;
+
+ assert(channels > 0);
+ src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels);
+
+ if (src_type == RC_SOURCE_NONE)
+ continue;
+
+ if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
+ unsigned int presub_type;
+ unsigned int presub_src_count;
+ struct rc_pair_instruction_source * src_array;
+ unsigned int j;
+ if (src_type & RC_SOURCE_RGB) {
+ presub_type = fullinst->
+ U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
+ src_array = fullinst->U.P.RGB.Src;
+ } else {
+ presub_type = fullinst->
+ U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index;
+ src_array = fullinst->U.P.Alpha.Src;
+ }
+ presub_src_count
+ = rc_presubtract_src_reg_count(presub_type);
+ for(j = 0; j < presub_src_count; j++) {
+ cb(userdata, fullinst, &sub->Arg[i],
+ &src_array[j]);
+ }
+ } else {
+ struct rc_pair_instruction_source * src =
+ rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
+ if (src) {
+ cb(userdata, fullinst, &sub->Arg[i], src);
+ }
+ }
}
}
@@ -308,6 +347,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+ unsigned int remapped_presub = 0;
if (opcode->HasDstReg) {
rc_register_file file = inst->DstReg.File;
@@ -327,6 +367,12 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
unsigned int i;
unsigned int srcp_srcs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
+ /* Make sure we only remap presubtract sources once in
+ * case more than one source register reads the
+ * presubtract result. */
+ if (remapped_presub)
+ continue;
+
for(i = 0; i < srcp_srcs; i++) {
file = inst->PreSub.SrcReg[i].File;
index = inst->PreSub.SrcReg[i].Index;
@@ -334,7 +380,7 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
inst->PreSub.SrcReg[i].File = file;
inst->PreSub.SrcReg[i].Index = index;
}
-
+ remapped_presub = 1;
}
else {
cb(userdata, fullinst, &file, &index);
@@ -430,12 +476,29 @@ static rc_opcode get_flow_control_inst(struct rc_instruction * inst)
}
+struct branch_write_mask {
+ unsigned int IfWriteMask:4;
+ unsigned int ElseWriteMask:4;
+ unsigned int HasElse:1;
+};
+
+union get_readers_read_cb {
+ rc_read_src_fn I;
+ rc_pair_read_arg_fn P;
+};
+
struct get_readers_callback_data {
struct radeon_compiler * C;
struct rc_reader_data * ReaderData;
- rc_read_src_fn ReadCB;
+ rc_read_src_fn ReadNormalCB;
+ rc_pair_read_arg_fn ReadPairCB;
rc_read_write_mask_fn WriteCB;
+ rc_register_file DstFile;
+ unsigned int DstIndex;
+ unsigned int DstMask;
unsigned int AliveWriteMask;
+ /* For convenience, this is indexed starting at 1 */
+ struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
};
static void add_reader(
@@ -443,7 +506,7 @@ static void add_reader(
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask,
- struct rc_src_register * src)
+ void * arg_or_src)
{
struct rc_reader * new;
memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
@@ -451,7 +514,74 @@ static void add_reader(
new = &data->Readers[data->ReaderCount++];
new->Inst = inst;
new->WriteMask = mask;
- new->Src = src;
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ new->U.Src = arg_or_src;
+ } else {
+ new->U.Arg = arg_or_src;
+ }
+}
+
+static unsigned int get_readers_read_callback(
+ struct get_readers_callback_data * cb_data,
+ unsigned int has_rel_addr,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int swizzle)
+{
+ unsigned int shared_mask, read_mask;
+
+ if (has_rel_addr) {
+ cb_data->ReaderData->Abort = 1;
+ return RC_MASK_NONE;
+ }
+
+ shared_mask = rc_src_reads_dst_mask(file, index, swizzle,
+ cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask);
+
+ if (shared_mask == RC_MASK_NONE)
+ return shared_mask;
+
+ /* If we make it this far, it means that this source reads from the
+ * same register written to by d->ReaderData->Writer. */
+
+ read_mask = rc_swizzle_to_writemask(swizzle);
+ if (cb_data->ReaderData->AbortOnRead & read_mask) {
+ cb_data->ReaderData->Abort = 1;
+ return shared_mask;
+ }
+
+ /* XXX The behavior in this case should be configurable. */
+ if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
+ cb_data->ReaderData->Abort = 1;
+ return shared_mask;
+ }
+
+ return shared_mask;
+}
+
+static void get_readers_pair_read_callback(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src)
+{
+ unsigned int shared_mask;
+ struct get_readers_callback_data * d = userdata;
+
+ shared_mask = get_readers_read_callback(d,
+ 0 /*Pair Instructions don't use RelAddr*/,
+ src->File, src->Index, arg->Swizzle);
+
+ if (shared_mask == RC_MASK_NONE)
+ return;
+
+ if (d->ReadPairCB)
+ d->ReadPairCB(d->ReaderData, inst, arg, src);
+
+ if (d->ReaderData->Abort)
+ return;
+
+ add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg);
}
/**
@@ -464,37 +594,18 @@ static void get_readers_normal_read_callback(
struct rc_src_register * src)
{
struct get_readers_callback_data * d = userdata;
- unsigned int read_mask;
unsigned int shared_mask;
- if (src->RelAddr)
- d->ReaderData->Abort = 1;
-
- shared_mask = rc_src_reads_dst_mask(src->File, src->Index,
- src->Swizzle,
- d->ReaderData->Writer->U.I.DstReg.File,
- d->ReaderData->Writer->U.I.DstReg.Index,
- d->AliveWriteMask);
+ shared_mask = get_readers_read_callback(d,
+ src->RelAddr, src->File, src->Index, src->Swizzle);
if (shared_mask == RC_MASK_NONE)
return;
+ /* The callback function could potentially clear d->ReaderData->Abort,
+ * so we need to call it before we return. */
+ if (d->ReadNormalCB)
+ d->ReadNormalCB(d->ReaderData, inst, src);
- /* If we make it this far, it means that this source reads from the
- * same register written to by d->ReaderData->Writer. */
-
- if (d->ReaderData->AbortOnRead) {
- d->ReaderData->Abort = 1;
- return;
- }
-
- read_mask = rc_swizzle_to_writemask(src->Swizzle);
- /* XXX The behavior in this case should be configurable. */
- if ((read_mask & d->AliveWriteMask) != read_mask) {
- d->ReaderData->Abort = 1;
- return;
- }
-
- d->ReadCB(d->ReaderData, inst, src);
if (d->ReaderData->Abort)
return;
@@ -515,29 +626,132 @@ static void get_readers_write_callback(
{
struct get_readers_callback_data * d = userdata;
- if (index == d->ReaderData->Writer->U.I.DstReg.Index
- && file == d->ReaderData->Writer->U.I.DstReg.File) {
- unsigned int shared_mask = mask
- & d->ReaderData->Writer->U.I.DstReg.WriteMask;
- if (d->ReaderData->InElse) {
- if (shared_mask & d->AliveWriteMask) {
- /* We set AbortOnRead here because the
- * destination register of d->ReaderData->Writer
- * is written to in both the IF and the
- * ELSE block of this IF/ELSE statement.
- * This means that readers of this
- * destination register that follow this IF/ELSE
- * statement use the value of different
- * instructions depending on the control flow
- * decisions made by the program. */
- d->ReaderData->AbortOnRead = 1;
+ if (index == d->DstIndex && file == d->DstFile) {
+ unsigned int shared_mask = mask & d->DstMask;
+ d->ReaderData->AbortOnRead &= ~shared_mask;
+ d->AliveWriteMask &= ~shared_mask;
+ }
+
+ if(d->WriteCB)
+ d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+static void get_readers_for_single_write(
+ void * userdata,
+ struct rc_instruction * writer,
+ rc_register_file dst_file,
+ unsigned int dst_index,
+ unsigned int dst_mask)
+{
+ struct rc_instruction * tmp;
+ unsigned int branch_depth = 0;
+ struct get_readers_callback_data * d = userdata;
+
+ d->ReaderData->Writer = writer;
+ d->ReaderData->AbortOnRead = 0;
+ d->ReaderData->InElse = 0;
+ d->DstFile = dst_file;
+ d->DstIndex = dst_index;
+ d->DstMask = dst_mask;
+ d->AliveWriteMask = dst_mask;
+ memset(d->BranchMasks, 0, sizeof(d->BranchMasks));
+
+ if (!dst_mask)
+ return;
+
+ for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
+ tmp = tmp->Next){
+ rc_opcode opcode = get_flow_control_inst(tmp);
+ switch(opcode) {
+ case RC_OPCODE_BGNLOOP:
+ /* XXX We can do better when we see a BGNLOOP if we
+ * add a flag called AbortOnWrite to struct
+ * rc_reader_data and leave it set until the next
+ * ENDLOOP. */
+ case RC_OPCODE_ENDLOOP:
+ /* XXX We can do better when we see an ENDLOOP by
+ * searching backwards from writer and looking for
+ * readers of writer's destination index. If we find a
+ * reader before we get to the BGNLOOP, we must abort
+ * unless there is another writer between that reader
+ * and the BGNLOOP. */
+ case RC_OPCODE_BRK:
+ case RC_OPCODE_CONT:
+ d->ReaderData->Abort = 1;
+ return;
+ case RC_OPCODE_IF:
+ branch_depth++;
+ if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+ d->ReaderData->Abort = 1;
+ return;
+ }
+ d->BranchMasks[branch_depth].IfWriteMask =
+ d->AliveWriteMask;
+ break;
+ case RC_OPCODE_ELSE:
+ if (branch_depth == 0) {
+ d->ReaderData->InElse = 1;
+ } else {
+ unsigned int temp_mask = d->AliveWriteMask;
+ d->AliveWriteMask =
+ d->BranchMasks[branch_depth].IfWriteMask;
+ d->BranchMasks[branch_depth].ElseWriteMask =
+ temp_mask;
+ d->BranchMasks[branch_depth].HasElse = 1;
}
+ break;
+ case RC_OPCODE_ENDIF:
+ if (branch_depth == 0) {
+ d->ReaderData->AbortOnRead = d->AliveWriteMask;
+ d->ReaderData->InElse = 0;
+ }
+ else {
+ struct branch_write_mask * masks =
+ &d->BranchMasks[branch_depth];
+
+ if (masks->HasElse) {
+ d->ReaderData->AbortOnRead |=
+ masks->IfWriteMask
+ & ~masks->ElseWriteMask;
+ d->AliveWriteMask = masks->IfWriteMask
+ ^ ((masks->IfWriteMask ^
+ masks->ElseWriteMask)
+ & (masks->IfWriteMask
+ ^ d->AliveWriteMask));
+ } else {
+ d->ReaderData->AbortOnRead |=
+ masks->IfWriteMask
+ & ~d->AliveWriteMask;
+ d->AliveWriteMask = masks->IfWriteMask;
+
+ }
+ memset(masks, 0,
+ sizeof(struct branch_write_mask));
+ branch_depth--;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (d->ReaderData->InElse)
+ continue;
+
+ if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+ rc_for_all_reads_src(tmp,
+ get_readers_normal_read_callback, d);
} else {
- d->AliveWriteMask &= ~shared_mask;
+ rc_pair_for_all_reads_arg(tmp,
+ get_readers_pair_read_callback, d);
}
- }
+ rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
- d->WriteCB(d->ReaderData, inst, file, index, mask);
+ if (d->ReaderData->Abort)
+ return;
+
+ if (branch_depth == 0 && !d->AliveWriteMask)
+ return;
+ }
}
/**
@@ -578,83 +792,26 @@ static void get_readers_write_callback(
* @param write_cb This function will be called for every instruction after
* writer.
*/
-void rc_get_readers_normal(
+void rc_get_readers(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_reader_data * data,
- rc_read_src_fn read_cb,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
- struct rc_instruction * tmp;
struct get_readers_callback_data d;
- unsigned int branch_depth = 0;
- data->Writer = writer;
data->Abort = 0;
- data->AbortOnRead = 0;
- data->InElse = 0;
data->ReaderCount = 0;
data->ReadersReserved = 0;
data->Readers = NULL;
d.C = c;
- d.AliveWriteMask = writer->U.I.DstReg.WriteMask;
d.ReaderData = data;
- d.ReadCB = read_cb;
+ d.ReadNormalCB = read_normal_cb;
+ d.ReadPairCB = read_pair_cb;
d.WriteCB = write_cb;
- if (!writer->U.I.DstReg.WriteMask)
- return;
-
- for(tmp = writer->Next; tmp != &c->Program.Instructions;
- tmp = tmp->Next){
- rc_opcode opcode = get_flow_control_inst(tmp);
- switch(opcode) {
- case RC_OPCODE_BGNLOOP:
- /* XXX We can do better when we see a BGNLOOP if we
- * add a flag called AbortOnWrite to struct
- * rc_reader_data and leave it set until the next
- * ENDLOOP. */
- case RC_OPCODE_ENDLOOP:
- /* XXX We can do better when we see an ENDLOOP by
- * searching backwards from writer and looking for
- * readers of writer's destination index. If we find a
- * reader before we get to the BGNLOOP, we must abort
- * unless there is another writer between that reader
- * and the BGNLOOP. */
- data->Abort = 1;
- return;
- case RC_OPCODE_IF:
- /* XXX We can do better here, but this will have to
- * do until this dataflow analysis is more mature. */
- data->Abort = 1;
- branch_depth++;
- break;
- case RC_OPCODE_ELSE:
- if (branch_depth == 0)
- data->InElse = 1;
- break;
- case RC_OPCODE_ENDIF:
- if (branch_depth == 0) {
- data->AbortOnRead = 1;
- data->InElse = 0;
- }
- else {
- branch_depth--;
- }
- break;
- default:
- break;
- }
-
- if (!data->InElse)
- rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d);
- rc_for_all_writes_mask(tmp, get_readers_write_callback, &d);
-
- if (data->Abort)
- return;
-
- if (!d.AliveWriteMask)
- return;
- }
+ rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index 7de6b98f763..ef971c5b234 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -36,6 +36,7 @@ struct rc_instruction;
struct rc_swizzle_caps;
struct rc_src_register;
struct rc_pair_instruction_arg;
+struct rc_pair_instruction_source;
struct rc_compiler;
@@ -59,7 +60,8 @@ void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
void * userdata);
typedef void (*rc_pair_read_arg_fn)(void * userdata,
- struct rc_instruction * inst, struct rc_pair_instruction_arg * arg);
+ struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src);
void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
rc_pair_read_arg_fn cb, void * userdata);
@@ -71,7 +73,10 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v
struct rc_reader {
struct rc_instruction * Inst;
unsigned int WriteMask;
- struct rc_src_register * Src;
+ union {
+ struct rc_src_register * Src;
+ struct rc_pair_instruction_arg * Arg;
+ } U;
};
struct rc_reader_data {
@@ -87,14 +92,13 @@ struct rc_reader_data {
void * CbData;
};
-void rc_get_readers_normal(
+void rc_get_readers(
struct radeon_compiler * c,
- struct rc_instruction * inst,
+ struct rc_instruction * writer,
struct rc_reader_data * data,
- /*XXX: These should be their own function types. */
- rc_read_src_fn read_cb,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
-
/**
* Compiler passes based on dataflow analysis.
*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index da495a3afaa..25afd272bee 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -67,6 +67,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_CLAMP,
+ .Name = "CLAMP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_CMP,
.Name = "CMP",
.NumSrcRegs = 3,
@@ -453,6 +460,7 @@ void rc_compute_sources_for_writemask(
srcmasks[1] |= RC_MASK_XY;
break;
case RC_OPCODE_DP3:
+ case RC_OPCODE_XPD:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
break;
@@ -460,6 +468,10 @@ void rc_compute_sources_for_writemask(
srcmasks[0] |= RC_MASK_XYZW;
srcmasks[1] |= RC_MASK_XYZW;
break;
+ case RC_OPCODE_DPH:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
case RC_OPCODE_TXB:
case RC_OPCODE_TXP:
srcmasks[0] |= RC_MASK_W;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index d3f639c8701..7e666101276 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -50,6 +50,9 @@ typedef enum {
/** vec4 instruction: dst.c = ceil(src0.c) */
RC_OPCODE_CEIL,
+ /** vec4 instruction: dst.c = clamp(src0.c, src1.c, src2.c) */
+ RC_OPCODE_CLAMP,
+
/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
RC_OPCODE_CMP,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 15b9c5e7dc3..44f4c0fbdc7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -54,12 +54,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
combine.Negate = outer.Negate;
} else {
combine.Abs = inner.Abs;
- combine.Negate = 0;
- for(unsigned int chan = 0; chan < 4; ++chan) {
- unsigned int swz = GET_SWZ(outer.Swizzle, chan);
- if (swz < 4)
- combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
- }
+ combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
combine.Negate ^= outer.Negate;
}
combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
@@ -71,12 +66,13 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
{
rc_register_file file = src->File;
struct rc_reader_data * reader_data = data;
- const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
- /* It is possible to do copy propigation in this situation,
- * just not right now, see peephole_add_presub_inv() */
- if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
- (info->NumSrcRegs > 2 || info->HasTexture)) {
+ if(!rc_inst_can_use_presub(inst,
+ reader_data->Writer->U.I.PreSub.Opcode,
+ rc_swizzle_to_writemask(src->Swizzle),
+ *src,
+ reader_data->Writer->U.I.PreSub.SrcReg[0],
+ reader_data->Writer->U.I.PreSub.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
@@ -112,11 +108,11 @@ static void src_clobbered_reads_cb(
&& src->Index == sc_data->Index
&& (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
- sc_data->ReaderData->AbortOnRead = 1;
+ sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
- sc_data->ReaderData->AbortOnRead = 1;
+ sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
}
@@ -149,8 +145,9 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
return;
/* Get a list of all the readers of this MOV instruction. */
- rc_get_readers_normal(c, inst_mov, &reader_data,
- copy_propagate_scan_read, is_src_clobbered_scan_write);
+ rc_get_readers(c, inst_mov, &reader_data,
+ copy_propagate_scan_read, NULL,
+ is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
return;
@@ -158,7 +155,7 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
/* Propagate the MOV instruction. */
for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst;
- *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, inst_mov->U.I.SrcReg[0]);
+ *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]);
if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
inst->U.I.PreSub = inst_mov->U.I.PreSub;
@@ -423,24 +420,13 @@ static void presub_scan_read(
struct rc_src_register * src)
{
struct rc_reader_data * reader_data = data;
- const struct rc_opcode_info * info =
- rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX: There are some situations where instructions
- * with more than 2 src registers can use the
- * presubtract select, but to keep things simple we
- * will disable presubtract on these instructions for
- * now. */
- if (info->NumSrcRegs > 2 || info->HasTexture) {
- reader_data->Abort = 1;
- return;
- }
+ rc_presubtract_op * presub_opcode = reader_data->CbData;
- /* We can't use more than one presubtract value in an
- * instruction, unless the two prsubtract operations
- * are the same and read from the same registers.
- * XXX For now we will limit instructions to only one presubtract
- * value.*/
- if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+ if (!rc_inst_can_use_presub(inst, *presub_opcode,
+ reader_data->Writer->U.I.DstReg.WriteMask,
+ *src,
+ reader_data->Writer->U.I.SrcReg[0],
+ reader_data->Writer->U.I.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
@@ -454,8 +440,10 @@ static int presub_helper(
{
struct rc_reader_data reader_data;
unsigned int i;
+ rc_presubtract_op cb_op = presub_opcode;
- rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read,
+ reader_data.CbData = &cb_op;
+ rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
@@ -468,7 +456,7 @@ static int presub_helper(
rc_get_opcode_info(reader.Inst->U.I.Opcode);
for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
- if (&reader.Inst->U.I.SrcReg[src_index] == reader.Src)
+ if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src)
presub_replace(inst_add, reader.Inst, src_index);
}
}
@@ -505,7 +493,9 @@ static void presub_replace_add(
inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
}
-static int is_presub_candidate(struct rc_instruction * inst)
+static int is_presub_candidate(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst)
{
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int i;
@@ -514,7 +504,12 @@ static int is_presub_candidate(struct rc_instruction * inst)
return 0;
for(i = 0; i < info->NumSrcRegs; i++) {
- if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg))
+ struct rc_src_register src = inst->U.I.SrcReg[i];
+ if (src_reads_dst_mask(src, inst->U.I.DstReg))
+ return 0;
+
+ src.File = RC_FILE_PRESUB;
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
return 0;
}
return 1;
@@ -528,7 +523,7 @@ static int peephole_add_presub_add(
struct rc_src_register * src1 = NULL;
unsigned int i;
- if (!is_presub_candidate(inst_add))
+ if (!is_presub_candidate(c, inst_add))
return 0;
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
@@ -592,7 +587,7 @@ static int peephole_add_presub_inv(
{
unsigned int i, swz, mask;
- if (!is_presub_candidate(inst_add))
+ if (!is_presub_candidate(c, inst_add))
return 0;
mask = inst_add->U.I.DstReg.WriteMask;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 91524f5ec68..d53181e1f75 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -66,10 +66,13 @@ struct regalloc_state {
struct hardware_register * HwTemporary;
unsigned int NumHwTemporaries;
/**
- * If an instruction is inside of a loop, end_loop will be the
- * IP of the ENDLOOP instruction, otherwise end_loop will be 0
+ * If an instruction is inside of a loop, EndLoop will be the
+ * IP of the ENDLOOP instruction, and BeginLoop will be the IP
+ * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop
+ * will be -1.
*/
- int end_loop;
+ int EndLoop;
+ int BeginLoop;
};
static void print_live_intervals(struct live_intervals * src)
@@ -180,11 +183,13 @@ static void scan_callback(void * data, struct rc_instruction * inst,
reg->Used = 1;
if (file == RC_FILE_INPUT)
reg->Live.Start = -1;
+ else if (s->BeginLoop >= 0)
+ reg->Live.Start = s->BeginLoop;
else
reg->Live.Start = inst->IP;
reg->Live.End = inst->IP;
- } else if (s->end_loop)
- reg->Live.End = s->end_loop;
+ } else if (s->EndLoop >= 0)
+ reg->Live.End = s->EndLoop;
else if (inst->IP > reg->Live.End)
reg->Live.End = inst->IP;
}
@@ -195,6 +200,8 @@ static void compute_live_intervals(struct radeon_compiler *c,
memset(s, 0, sizeof(*s));
s->C = c;
s->NumHwTemporaries = c->max_temp_regs;
+ s->BeginLoop = -1;
+ s->EndLoop = -1;
s->HwTemporary =
memory_pool_malloc(&c->Pool,
s->NumHwTemporaries * sizeof(struct hardware_register));
@@ -207,10 +214,12 @@ static void compute_live_intervals(struct radeon_compiler *c,
inst = inst->Next) {
/* For all instructions inside of a loop, the ENDLOOP
- * instruction is used as the end of the live interval. */
- if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
+ * instruction is used as the end of the live interval and
+ * the BGNLOOP instruction is used as the beginning. */
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
int loops = 1;
struct rc_instruction * tmp;
+ s->BeginLoop = inst->IP;
for(tmp = inst->Next;
tmp != &s->C->Program.Instructions;
tmp = tmp->Next) {
@@ -219,15 +228,17 @@ static void compute_live_intervals(struct radeon_compiler *c,
} else if (tmp->U.I.Opcode
== RC_OPCODE_ENDLOOP) {
if(!--loops) {
- s->end_loop = tmp->IP;
+ s->EndLoop = tmp->IP;
break;
}
}
}
}
- if (inst->IP == s->end_loop)
- s->end_loop = 0;
+ if (inst->IP == s->EndLoop) {
+ s->EndLoop = -1;
+ s->BeginLoop = -1;
+ }
rc_for_all_reads_mask(inst, scan_callback, s);
rc_for_all_writes_mask(inst, scan_callback, s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 553e9dcf7c1..9beb5d63579 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
@@ -54,6 +55,11 @@ struct schedule_instruction {
* this instruction can be scheduled.
*/
unsigned int NumDependencies:5;
+
+ /** List of all readers (see rc_get_readers() for the definition of
+ * "all readers"), even those outside the basic block this instruction
+ * lives in. */
+ struct rc_reader_data GlobalReaders;
};
@@ -94,6 +100,16 @@ struct register_state {
struct reg_value * Values[4];
};
+struct remap_reg {
+ struct rc_instruciont * Inst;
+ unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int OldSwizzle:3;
+ unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int NewSwizzle:3;
+ unsigned int OnlyTexReads:1;
+ struct remap_reg * Next;
+};
+
struct schedule_state {
struct radeon_compiler * C;
struct schedule_instruction * Current;
@@ -126,15 +142,6 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s,
return &s->Temporary[index].Values[chan];
}
-static struct reg_value * get_reg_value(struct schedule_state * s,
- rc_register_file file, unsigned int index, unsigned int chan)
-{
- struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
- if (!pv)
- return 0;
- return *pv;
-}
-
static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
{
inst->NextReady = *list;
@@ -295,12 +302,12 @@ static int merge_presub_sources(
assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
switch(type) {
- case RC_PAIR_SOURCE_RGB:
+ case RC_SOURCE_RGB:
is_rgb = 1;
is_alpha = 0;
dst_sub = &dst_full->RGB;
break;
- case RC_PAIR_SOURCE_ALPHA:
+ case RC_SOURCE_ALPHA:
is_rgb = 0;
is_alpha = 1;
dst_sub = &dst_full->Alpha;
@@ -341,6 +348,8 @@ static int merge_presub_sources(
continue;
free_source = rc_pair_alloc_source(dst_full, is_rgb,
is_alpha, temp.File, temp.Index);
+ if (free_source < 0)
+ return 0;
one_way = 1;
} else {
dst_sub->Src[free_source] = temp;
@@ -356,11 +365,11 @@ static int merge_presub_sources(
for(arg = 0; arg < info->NumSrcRegs; arg++) {
/*If this arg does not read from an rgb source,
* do nothing. */
- if (!(rc_source_type_that_arg_reads(
- dst_full->RGB.Arg[arg].Source,
- dst_full->RGB.Arg[arg].Swizzle) & type)) {
+ if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle,
+ 3) & type)) {
continue;
}
+
if (dst_full->RGB.Arg[arg].Source == srcp_src)
dst_full->RGB.Arg[arg].Source = free_source;
/* We need to do this just in case register
@@ -392,13 +401,13 @@ static int destructive_merge_instructions(
/* Merge the rgb presubtract registers. */
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
- if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) {
+ if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
return 0;
}
}
/* Merge the alpha presubtract registers */
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
- if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){
+ if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
return 0;
}
}
@@ -525,6 +534,222 @@ static void presub_nop(struct rc_instruction * emitted) {
}
}
}
+
+static void rgb_to_alpha_remap (
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ rc_register_file old_file,
+ rc_swizzle old_swz,
+ unsigned int new_index)
+{
+ int new_src_index;
+ unsigned int i;
+ struct rc_pair_instruction_source * old_src =
+ rc_pair_get_src(&inst->U.P, arg);
+ if (!old_src) {
+ return;
+ }
+
+ for (i = 0; i < 3; i++) {
+ if (get_swz(arg->Swizzle, i) == old_swz) {
+ SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
+ }
+ }
+ memset(old_src, 0, sizeof(struct rc_pair_instruction_source));
+ new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
+ old_file, new_index);
+ /* This conversion is not possible, we must have made a mistake in
+ * is_rgb_to_alpha_possible. */
+ if (new_src_index < 0) {
+ assert(0);
+ return;
+ }
+
+ arg->Source = new_src_index;
+}
+
+static int can_remap(unsigned int opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static int can_convert_opcode_to_alpha(unsigned int opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ case RC_OPCODE_DP2:
+ case RC_OPCODE_DP3:
+ case RC_OPCODE_DP4:
+ case RC_OPCODE_DPH:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void is_rgb_to_alpha_possible(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src)
+{
+ unsigned int chan_count = 0;
+ unsigned int alpha_sources = 0;
+ unsigned int i;
+ struct rc_reader_data * reader_data = userdata;
+
+ if (!can_remap(inst->U.P.RGB.Opcode)
+ || !can_remap(inst->U.P.Alpha.Opcode)) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ if (!src)
+ return;
+
+ /* XXX There are some cases where we can still do the conversion if
+ * a reader reads from a presubtract source, but for now we'll prevent
+ * it. */
+ if (arg->Source == RC_PAIR_PRESUB_SRC) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* Make sure the source only reads from one component.
+ * XXX We should allow the source to read from the same component twice.
+ * XXX If the index we will be converting to is the same as the
+ * current index, then it is OK to read from more than one component.
+ */
+ for (i = 0; i < 3; i++) {
+ rc_swizzle swz = get_swz(arg->Swizzle, i);
+ switch(swz) {
+ case RC_SWIZZLE_X:
+ case RC_SWIZZLE_Y:
+ case RC_SWIZZLE_Z:
+ case RC_SWIZZLE_W:
+ chan_count++;
+ break;
+ default:
+ break;
+ }
+ }
+ if (chan_count > 1) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* Make sure there are enough alpha sources.
+ * XXX If we know what register all the readers are going
+ * to be remapped to, then in some situations we can still do
+ * the subsitution, even if all 3 alpha sources are being used.*/
+ for (i = 0; i < 3; i++) {
+ if (inst->U.P.Alpha.Src[i].Used) {
+ alpha_sources++;
+ }
+ }
+ if (alpha_sources > 2) {
+ reader_data->Abort = 1;
+ return;
+ }
+}
+
+static int convert_rgb_to_alpha(
+ struct schedule_state * s,
+ struct schedule_instruction * sched_inst)
+{
+ struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
+ unsigned int old_mask = pair_inst->RGB.WriteMask;
+ unsigned int old_swz = rc_mask_to_swizzle(old_mask);
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(pair_inst->RGB.Opcode);
+ int new_index = -1;
+ unsigned int i;
+
+ if (sched_inst->GlobalReaders.Abort)
+ return 0;
+
+ if (!pair_inst->RGB.WriteMask)
+ return 0;
+
+ if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
+ || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
+ return 0;
+ }
+
+ assert(sched_inst->NumWriteValues == 1);
+
+ if (!sched_inst->WriteValues[0]) {
+ assert(0);
+ return 0;
+ }
+
+ /* We start at the old index, because if we can reuse the same
+ * register and just change the swizzle then it is more likely we
+ * will be able to convert all the readers. */
+ for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
+ struct reg_value ** new_regvalp = get_reg_valuep(
+ s, RC_FILE_TEMPORARY, i, 3);
+ if (!*new_regvalp) {
+ struct reg_value ** old_regvalp =
+ get_reg_valuep(s,
+ RC_FILE_TEMPORARY,
+ pair_inst->RGB.DestIndex,
+ rc_mask_to_swizzle(old_mask));
+ new_index = i;
+ *new_regvalp = *old_regvalp;
+ *old_regvalp = NULL;
+ new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
+ break;
+ }
+ }
+ if (new_index < 0) {
+ return 0;
+ }
+
+ pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
+ pair_inst->Alpha.DestIndex = new_index;
+ pair_inst->Alpha.WriteMask = 1;
+ pair_inst->Alpha.Target = pair_inst->RGB.Target;
+ pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
+ pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
+ pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
+ memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
+ sizeof(pair_inst->Alpha.Arg));
+ /* Move the swizzles into the first chan */
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ unsigned int j;
+ for (j = 0; j < 3; j++) {
+ unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
+ if (swz != RC_SWIZZLE_UNUSED) {
+ pair_inst->Alpha.Arg[i].Swizzle = swz;
+ break;
+ }
+ }
+ }
+ pair_inst->RGB.Opcode = RC_OPCODE_NOP;
+ pair_inst->RGB.DestIndex = 0;
+ pair_inst->RGB.WriteMask = 0;
+ pair_inst->RGB.Target = 0;
+ pair_inst->RGB.OutputWriteMask = 0;
+ pair_inst->RGB.DepthWriteMask = 0;
+ pair_inst->RGB.Saturate = 0;
+ memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
+
+ for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
+ struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
+ rgb_to_alpha_remap(reader.Inst, reader.U.Arg,
+ RC_FILE_TEMPORARY, old_swz, new_index);
+ }
+ return 1;
+}
+
/**
* Find a good ALU instruction or pair of ALU instruction and emit it.
*
@@ -536,24 +761,16 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
{
struct schedule_instruction * sinst;
- if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- if (s->ReadyFullALU) {
- sinst = s->ReadyFullALU;
- s->ReadyFullALU = s->ReadyFullALU->NextReady;
- } else if (s->ReadyRGB) {
- sinst = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
- } else {
- sinst = s->ReadyAlpha;
- s->ReadyAlpha = s->ReadyAlpha->NextReady;
- }
-
+ if (s->ReadyFullALU) {
+ sinst = s->ReadyFullALU;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
rc_insert_instruction(before->Prev, sinst->Instruction);
commit_alu_instruction(s, sinst);
} else {
struct schedule_instruction **prgb;
struct schedule_instruction **palpha;
-
+ struct schedule_instruction *prev;
+pair:
/* Some pairings might fail because they require too
* many source slots; try all possible pairings if necessary */
for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
@@ -572,10 +789,43 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
goto success;
}
}
-
- /* No success in pairing; just take the first RGB instruction */
- sinst = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
+ prev = NULL;
+ /* No success in pairing, now try to convert one of the RGB
+ * instructions to an Alpha so we can pair it with another RGB.
+ */
+ if (s->ReadyRGB && s->ReadyRGB->NextReady) {
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ if ((*prgb)->NumWriteValues == 1) {
+ struct schedule_instruction * prgb_next;
+ if (!convert_rgb_to_alpha(s, *prgb))
+ goto cont_loop;
+ prgb_next = (*prgb)->NextReady;
+ /* Add instruction to the Alpha ready list. */
+ (*prgb)->NextReady = s->ReadyAlpha;
+ s->ReadyAlpha = *prgb;
+ /* Remove instruction from the RGB ready list.*/
+ if (prev)
+ prev->NextReady = prgb_next;
+ else
+ s->ReadyRGB = prgb_next;
+ goto pair;
+ }
+cont_loop:
+ prev = *prgb;
+ }
+ }
+ /* Still no success in pairing, just take the first RGB
+ * or alpha instruction. */
+ if (s->ReadyRGB) {
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else if (s->ReadyAlpha) {
+ sinst = s->ReadyAlpha;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ } else {
+ /*XXX Something real bad has happened. */
+ assert(0);
+ }
rc_insert_instruction(before->Prev, sinst->Instruction);
commit_alu_instruction(s, sinst);
@@ -591,13 +841,13 @@ static void scan_read(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan)
{
struct schedule_state * s = data;
- struct reg_value * v = get_reg_value(s, file, index, chan);
+ struct reg_value ** v = get_reg_valuep(s, file, index, chan);
struct reg_value_reader * reader;
if (!v)
return;
- if (v->Writer == s->Current) {
+ if (*v && (*v)->Writer == s->Current) {
/* The instruction reads and writes to a register component.
* In this case, we only want to increment dependencies by one.
*/
@@ -608,16 +858,28 @@ static void scan_read(void * data, struct rc_instruction * inst,
reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
reader->Reader = s->Current;
- reader->Next = v->Readers;
- v->Readers = reader;
- v->NumReaders++;
-
- s->Current->NumDependencies++;
+ if (!*v) {
+ /* In this situation, the instruction reads from a register
+ * that hasn't been written to or read from in the current
+ * block. */
+ *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
+ memset(*v, 0, sizeof(struct reg_value));
+ (*v)->Readers = reader;
+ } else {
+ reader->Next = (*v)->Readers;
+ (*v)->Readers = reader;
+ /* Only update the current instruction's dependencies if the
+ * register it reads from has been written to in this block. */
+ if ((*v)->Writer) {
+ s->Current->NumDependencies++;
+ }
+ }
+ (*v)->NumReaders++;
if (s->Current->NumReadValues >= 12) {
rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
} else {
- s->Current->ReadValues[s->Current->NumReadValues++] = v;
+ s->Current->ReadValues[s->Current->NumReadValues++] = *v;
}
}
@@ -652,6 +914,16 @@ static void scan_write(void * data, struct rc_instruction * inst,
}
}
+static void is_rgb_to_alpha_possible_normal(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ struct rc_reader_data * reader_data = userdata;
+ reader_data->Abort = 1;
+
+}
+
static void schedule_block(struct r300_fragment_program_compiler * c,
struct rc_instruction * begin, struct rc_instruction * end)
{
@@ -683,6 +955,11 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
if (!s.Current->NumDependencies)
instruction_ready(&s, s.Current);
+
+ /* Get global readers for possible RGB->Alpha conversion. */
+ rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
+ is_rgb_to_alpha_possible_normal,
+ is_rgb_to_alpha_possible, NULL);
}
/* Temporarily unlink all instructions */
@@ -711,8 +988,13 @@ static int is_controlflow(struct rc_instruction * inst)
void rc_pair_schedule(struct radeon_compiler *cc, void *user)
{
+ struct schedule_state s;
+
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
while(inst != &c->Base.Program.Instructions) {
struct rc_instruction * first;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index c549be52183..fc05366f50e 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -280,9 +280,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
pair->RGB.DestIndex = inst->DstReg.Index;
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
}
+
if (needalpha) {
- pair->Alpha.DestIndex = inst->DstReg.Index;
pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ if (pair->Alpha.WriteMask) {
+ pair->Alpha.DestIndex = inst->DstReg.Index;
+ }
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index 24b685fbeb4..fe5756ebc45 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
/**
@@ -70,58 +71,98 @@ void rc_local_transform(
}
}
+struct get_used_temporaries_data {
+ unsigned char * Used;
+ unsigned int UsedLength;
+};
+
+static void get_used_temporaries_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct get_used_temporaries_data * d = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= d->UsedLength)
+ return;
+
+ d->Used[index] |= mask;
+}
+
/**
- * Left multiplication of a register with a swizzle
+ * This function fills in the parameter 'used' with a writemask that
+ * represent which components of each temporary register are used by the
+ * program. This is meant to be combined with rc_find_free_temporary_list as a
+ * more efficient version of rc_find_free_temporary.
+ * @param used The function does not initialize this parameter.
*/
-struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+void rc_get_used_temporaries(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length)
+{
+ struct rc_instruction * inst;
+ struct get_used_temporaries_data d;
+ d.Used = used;
+ d.UsedLength = used_length;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d);
+ rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d);
+ }
+}
+
+/* Search a list of used temporaries for a free one
+ * \sa rc_get_used_temporaries
+ * @note If this functions finds a free temporary, it will mark it as used
+ * in the used temporary list (param 'used')
+ * @param used list of used temporaries
+ * @param used_length number of items in param 'used'
+ * @param mask which components must be free in the temporary index that is
+ * returned.
+ * @return -1 If there are no more free temporaries, otherwise the index of
+ * a temporary register where the components specified in param 'mask' are
+ * not being used.
+ */
+int rc_find_free_temporary_list(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length,
+ unsigned int mask)
{
- struct rc_src_register tmp = srcreg;
int i;
- tmp.Swizzle = 0;
- tmp.Negate = 0;
- for(i = 0; i < 4; ++i) {
- rc_swizzle swz = GET_SWZ(swizzle, i);
- if (swz < 4) {
- tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
- tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
- } else {
- tmp.Swizzle |= swz << (i*3);
+ for(i = 0; i < used_length; i++) {
+ if ((~used[i] & mask) == mask) {
+ used[i] |= mask;
+ return i;
}
}
- return tmp;
+ return -1;
}
unsigned int rc_find_free_temporary(struct radeon_compiler * c)
{
- char used[RC_REGISTER_MAX_INDEX];
- unsigned int i;
- struct rc_instruction * rcinst;
+ unsigned char used[RC_REGISTER_MAX_INDEX];
+ int free;
memset(used, 0, sizeof(used));
- for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
- const struct rc_sub_instruction *inst = &rcinst->U.I;
- const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
- unsigned int k;
-
- for (k = 0; k < opcode->NumSrcRegs; k++) {
- if (inst->SrcReg[k].File == RC_FILE_TEMPORARY)
- used[inst->SrcReg[k].Index] = 1;
- }
-
- if (opcode->HasDstReg) {
- if (inst->DstReg.File == RC_FILE_TEMPORARY)
- used[inst->DstReg.Index] = 1;
- }
- }
+ rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX);
- for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (!used[i])
- return i;
+ free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX,
+ RC_MASK_XYZW);
+ if (free < 0) {
+ rc_error(c, "Ran out of temporary registers\n");
+ return 0;
}
-
- rc_error(c, "Ran out of temporary registers\n");
- return 0;
+ return free;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index f0a77d7b539..df6c94b35f9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -159,47 +159,6 @@ struct rc_program {
struct rc_constant_list Constants;
};
-static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
-{
- if (idx & 0x4)
- return idx;
- return GET_SWZ(swz, idx);
-}
-
-static inline unsigned int combine_swizzles4(unsigned int src,
- rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
-{
- unsigned int ret = 0;
-
- ret |= get_swz(src, swz_x);
- ret |= get_swz(src, swz_y) << 3;
- ret |= get_swz(src, swz_z) << 6;
- ret |= get_swz(src, swz_w) << 9;
-
- return ret;
-}
-
-static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
-{
- unsigned int ret = 0;
-
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
- ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
-
- return ret;
-}
-
-struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
-
-static inline void reset_srcreg(struct rc_src_register* reg)
-{
- memset(reg, 0, sizeof(struct rc_src_register));
- reg->Swizzle = RC_SWIZZLE_XYZW;
-}
-
-
/**
* A transformation that can be passed to \ref rc_local_transform.
*
@@ -222,6 +181,17 @@ void rc_local_transform(
struct radeon_compiler *c,
void *user);
+void rc_get_used_temporaries(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length);
+
+int rc_find_free_temporary_list(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length,
+ unsigned int mask);
+
unsigned int rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
@@ -233,4 +203,5 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c);
void rc_print_program(const struct rc_program *prog);
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 39408845d5a..58977a40c7c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -36,6 +36,7 @@
#include "radeon_program_alu.h"
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
static struct rc_instruction *emit1(
@@ -84,16 +85,6 @@ static struct rc_instruction *emit3(
return fpi;
}
-static struct rc_dst_register dstreg(int file, int index)
-{
- struct rc_dst_register dst;
- dst.File = file;
- dst.Index = index;
- dst.WriteMask = RC_MASK_XYZW;
- dst.RelAddr = 0;
- return dst;
-}
-
static struct rc_dst_register dstregtmpmask(int index, int mask)
{
struct rc_dst_register dst = {0};
@@ -186,6 +177,38 @@ static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
return swizzle_smear(reg, RC_SWIZZLE_W);
}
+static int is_dst_safe_to_reuse(struct rc_instruction *inst)
+{
+ const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ assert(info->HasDstReg);
+
+ if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
+ return 0;
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ unsigned tmp;
+
+ if (is_dst_safe_to_reuse(inst))
+ tmp = inst->U.I.DstReg.Index;
+ else
+ tmp = rc_find_free_temporary(c);
+
+ return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
+}
+
static void transform_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -209,10 +232,26 @@ static void transform_CEIL(struct radeon_compiler* c,
* ceil(x) = x+frac(-x)
*/
- int tempreg = rc_find_free_temporary(c);
- emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
- inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
+ rc_remove_instruction(inst);
+}
+
+static void transform_CLAMP(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* CLAMP dst, src, min, max
+ * into:
+ * MIN tmp, src, max
+ * MAX dst, tmp, min
+ */
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
+ inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
+ emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
@@ -258,10 +297,10 @@ static void transform_DST(struct radeon_compiler* c,
static void transform_FLR(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
- emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
- inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
@@ -351,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c,
static void transform_LRP(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg),
+ dst,
inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
inst->U.I.DstReg,
- inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
rc_remove_instruction(inst);
}
@@ -366,9 +405,8 @@ static void transform_LRP(struct radeon_compiler* c,
static void transform_POW(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
- struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
- struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
+ struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
+ struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
tempdst.WriteMask = RC_MASK_W;
tempsrc.Swizzle = RC_SWIZZLE_WWWW;
@@ -388,11 +426,11 @@ static void transform_RSQ(struct radeon_compiler* c,
static void transform_SEQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -407,11 +445,11 @@ static void transform_SFL(struct radeon_compiler* c,
static void transform_SGE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -419,11 +457,11 @@ static void transform_SGE(struct radeon_compiler* c,
static void transform_SGT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -431,11 +469,11 @@ static void transform_SGT(struct radeon_compiler* c,
static void transform_SLE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -443,11 +481,11 @@ static void transform_SLE(struct radeon_compiler* c,
static void transform_SLT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -455,11 +493,11 @@ static void transform_SLT(struct radeon_compiler* c,
static void transform_SNE(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
- negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -473,12 +511,13 @@ static void transform_SSG(struct radeon_compiler* c,
* CMP tmp1, x, 1, 0
* ADD result, tmp0, -tmp1;
*/
- unsigned tmp0, tmp1;
+ struct rc_dst_register dst0;
+ unsigned tmp1;
/* 0 < x */
- tmp0 = rc_find_free_temporary(c);
+ dst0 = try_to_reuse_dst(c, inst);
emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
- dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ dst0,
negate(inst->U.I.SrcReg[0]),
builtin_one,
builtin_zero);
@@ -495,7 +534,7 @@ static void transform_SSG(struct radeon_compiler* c,
/* result = tmp0 - tmp1 */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tmp0),
+ srcreg(RC_FILE_TEMPORARY, dst0.Index),
negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
rc_remove_instruction(inst);
@@ -517,15 +556,15 @@ static void transform_SWZ(struct radeon_compiler* c,
static void transform_XPD(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
- emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
- negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
rc_remove_instruction(inst);
}
@@ -553,6 +592,7 @@ int radeonTransformALU(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
@@ -592,7 +632,7 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
{
/* There is no decent CMP available, so let's rig one up.
* CMP is defined as dst = src0 < 0.0 ? src1 : src2
- * The following sequence consumes two temps and two extra slots
+ * The following sequence consumes zero to two temps and two extra slots
* (the second temp and the second slot is consumed by transform_LRP),
* but should be equivalent:
*
@@ -600,18 +640,18 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
* LRP dst, tmp0, src1, src2
*
* Yes, I know, I'm a mad scientist. ~ C. & M. */
- int tempreg0 = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
/* SLT tmp0, src0, 0.0 */
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg0),
+ dst,
inst->U.I.SrcReg[0], builtin_zero);
/* LRP dst, tmp0, src1, src2 */
transform_LRP(c,
emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
+ srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
rc_remove_instruction(inst);
}
@@ -642,7 +682,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c,
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register dst = try_to_reuse_dst(c, inst);
unsigned constant_swizzle;
int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
0.0000000000000000001,
@@ -650,16 +690,16 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
/* MOV dst, src */
emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
- dstreg(RC_FILE_TEMPORARY, tempreg),
+ dst,
inst->U.I.SrcReg[0]);
/* MAX dst.z, src, 0.00...001 */
emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
- dstregtmpmask(tempreg, RC_MASK_Y),
- srcreg(RC_FILE_TEMPORARY, tempreg),
+ dstregtmpmask(dst.Index, RC_MASK_Y),
+ srcreg(RC_FILE_TEMPORARY, dst.Index),
srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
- inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
+ inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
}
static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
@@ -743,12 +783,13 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
* SLT tmp1, x, 0;
* ADD result, tmp0, -tmp1;
*/
- unsigned tmp0, tmp1;
+ struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
+ unsigned tmp1;
/* 0 < x */
- tmp0 = rc_find_free_temporary(c);
+ dst0 = try_to_reuse_dst(c, inst);
emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
- dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ dst0,
builtin_zero,
inst->U.I.SrcReg[0]);
@@ -763,7 +804,7 @@ static void transform_r300_vertex_SSG(struct radeon_compiler* c,
/* result = tmp0 - tmp1 */
emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
inst->U.I.DstReg,
- srcreg(RC_FILE_TEMPORARY, tmp0),
+ srcreg(RC_FILE_TEMPORARY, dst0.Index),
negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
rc_remove_instruction(inst);
@@ -781,6 +822,7 @@ int r300_transform_vertex_alu(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
index 9dcd44c522d..45f79ece5ba 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
return 0;
}
}
+
+#define RC_SOURCE_NONE 0x0
+#define RC_SOURCE_RGB 0x1
+#define RC_SOURCE_ALPHA 0x2
+
#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index a21fe8d3df8..5905d26e521 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -27,6 +27,9 @@
#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
+
+#include <stdlib.h>
/**
* Return the source slot where we installed the given register access,
@@ -204,24 +207,37 @@ void rc_pair_foreach_source_that_rgb_reads(
}
}
-/*return 0 for rgb, 1 for alpha -1 for error. */
-
-unsigned int rc_source_type_that_arg_reads(
- unsigned int source,
- unsigned int swizzle)
+struct rc_pair_instruction_source * rc_pair_get_src(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_arg * arg)
{
- unsigned int chan;
- unsigned int swz = RC_SWIZZLE_UNUSED;
- unsigned int ret = RC_PAIR_SOURCE_NONE;
-
- for(chan = 0; chan < 3; chan++) {
- swz = GET_SWZ(swizzle, chan);
- if (swz == RC_SWIZZLE_W) {
- ret |= RC_PAIR_SOURCE_ALPHA;
- } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
- || swz == RC_SWIZZLE_Z) {
- ret |= RC_PAIR_SOURCE_RGB;
+ unsigned int i, type;
+ unsigned int channels = 0;
+
+ for(i = 0; i < 3; i++) {
+ if (arg == pair_inst->RGB.Arg + i) {
+ channels = 3;
+ break;
}
}
- return ret;
+
+ if (channels == 0) {
+ for (i = 0; i < 3; i++) {
+ if (arg == pair_inst->Alpha.Arg + i) {
+ channels = 1;
+ break;
+ }
+ }
+ }
+
+ assert(channels > 0);
+ type = rc_source_type_swz(arg->Swizzle, channels);
+
+ if (type & RC_SOURCE_RGB) {
+ return &pair_inst->RGB.Src[arg->Source];
+ } else if (type & RC_SOURCE_ALPHA) {
+ return &pair_inst->Alpha.Src[arg->Source];
+ } else {
+ return NULL;
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 54d44a2098b..ccf7a0070cd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -55,10 +55,6 @@ struct radeon_compiler;
*/
#define RC_PAIR_PRESUB_SRC 3
-#define RC_PAIR_SOURCE_NONE 0x0
-#define RC_PAIR_SOURCE_RGB 0x1
-#define RC_PAIR_SOURCE_ALPHA 0x2
-
struct rc_pair_instruction_source {
unsigned int Used:1;
unsigned int File:3;
@@ -115,9 +111,9 @@ void rc_pair_foreach_source_that_rgb_reads(
void * data,
rc_pair_foreach_src_fn cb);
-unsigned int rc_source_type_that_arg_reads(
- unsigned int source,
- unsigned int swizzle);
+struct rc_pair_instruction_source * rc_pair_get_src(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_arg * arg);
/*@}*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
index 618ab5a099b..ae13f6742f8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -129,6 +129,7 @@ static char rc_swizzle_char(unsigned int swz)
case RC_SWIZZLE_HALF: return 'H';
case RC_SWIZZLE_UNUSED: return '_';
}
+ fprintf(stderr, "bad swz: %u\n", swz);
return '?';
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index 530afa5e08e..f9d9f34b6ad 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -28,6 +28,8 @@
#include "radeon_program_tex.h"
+#include "radeon_compiler_util.h"
+
/* Series of transformations to be done on textures. */
static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
index 5f67f536f61..7d76585a593 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
@@ -87,8 +87,9 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
rc_for_all_reads_src(inst, mark_used, &d);
}
- /* Pass 2: If there is relative addressing, mark all externals as used. */
- if (has_rel_addr) {
+ /* Pass 2: If there is relative addressing or dead constant elimination
+ * is disabled, mark all externals as used. */
+ if (has_rel_addr || !c->remove_unused_constants) {
for (unsigned i = 0; i < c->Program.Constants.Count; i++)
if (constants[i].Type == RC_CONSTANT_EXTERNAL)
const_used[i] = 1;
@@ -119,7 +120,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
/* is_identity ==> new_count == old_count
* !is_identity ==> new_count < old_count */
assert( is_identity || new_count < c->Program.Constants.Count);
- assert(!(has_rel_addr && are_externals_remapped));
+ assert(!((has_rel_addr || !c->remove_unused_constants) && are_externals_remapped));
/* Pass 4: Redirect reads of all constants to their new locations. */
if (!is_identity) {
@@ -127,7 +128,6 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
inst != &c->Program.Instructions; inst = inst->Next) {
rc_remap_registers(inst, remap_regs, inv_remap_table);
}
-
}
/* Set the new constant count. Note that new_count may be less than
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
index 60e228be5bd..88165f78953 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -33,100 +33,51 @@
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
-
-struct reg_rename {
- int old_index;
- int new_index;
- int temp_index;
-};
-
-static void rename_reg(void * data, struct rc_instruction * inst,
- rc_register_file * file, unsigned int * index)
-{
- struct reg_rename *r = data;
-
- if(r->old_index == *index && *file == RC_FILE_TEMPORARY) {
- *index = r->new_index;
- }
- else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) {
- *index = r->temp_index;
- }
-}
-
-static void rename_all(
- struct radeon_compiler *c,
- struct rc_instruction * start,
- unsigned int old,
- unsigned int new,
- unsigned int temp)
-{
- struct rc_instruction * inst;
- struct reg_rename r;
- r.old_index = old;
- r.new_index = new;
- r.temp_index = temp;
- for(inst = start; inst != &c->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, rename_reg, &r);
- }
-}
+#include "radeon_program.h"
/**
* This function renames registers in an attempt to get the code close to
* SSA form. After this function has completed, most of the register are only
- * written to one time, with a few exceptions. For example, this block of code
- * will not be modified by this function:
- * Mov Temp[0].x Const[0].x
- * Mov Temp[0].y Const[0].y
- * Basically, destination registers will be renamed if:
- * 1. There have been no previous writes to that register
- * or
- * 2. If the instruction is writting to the exact components (no more, no less)
- * of a register that has been written to by previous instructions.
+ * written to one time, with a few exceptions.
*
* This function assumes all the instructions are still of type
* RC_INSTRUCTION_NORMAL.
*/
void rc_rename_regs(struct radeon_compiler *c, void *user)
{
- unsigned int cur_index = 0;
- unsigned int icount;
+ unsigned int i, used_length;
+ int new_index;
struct rc_instruction * inst;
- unsigned int * masks;
+ struct rc_reader_data reader_data;
+ unsigned char * used;
- /* The number of instructions in the program is also the maximum
- * number of temp registers that could potentially be used. */
- icount = rc_recompute_ips(c);
- masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int));
- memset(masks, 0, icount * sizeof(unsigned int));
+ used_length = 2 * rc_recompute_ips(c);
+ used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
+ memset(used, 0, sizeof(unsigned char) * used_length);
+ rc_get_used_temporaries(c, used, used_length);
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
- const struct rc_opcode_info * info;
- unsigned int old_index, temp_index;
- struct rc_dst_register * dst;
- if(inst->Type != RC_INSTRUCTION_NORMAL) {
- rc_error(c, "%s only works with normal instructions.",
- __FUNCTION__);
- return;
- }
- dst = &inst->U.I.DstReg;
- info = rc_get_opcode_info(inst->U.I.Opcode);
- if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) {
+
+ if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
continue;
+
+ rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
+ continue;
+
+ new_index = rc_find_free_temporary_list(c, used, used_length,
+ RC_MASK_XYZW);
+ if (new_index < 0) {
+ rc_error(c, "Ran out of temporary registers\n");
+ return;
}
- if(dst->Index >= icount || !masks[dst->Index] ||
- masks[dst->Index] == dst->WriteMask) {
- old_index = dst->Index;
- /* We need to set dst->Index here so get free temporary
- * will work. */
- dst->Index = cur_index++;
- temp_index = rc_find_free_temporary(c);
- rename_all(c, inst->Next, old_index,
- dst->Index, temp_index);
+
+ reader_data.Writer->U.I.DstReg.Index = new_index;
+ for(i = 0; i < reader_data.ReaderCount; i++) {
+ reader_data.Readers[i].U.Src->Index = new_index;
}
- assert(dst->Index < icount);
- masks[dst->Index] |= dst->WriteMask;
}
}
diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.c b/src/mesa/drivers/dri/r600/evergreen_chip.c
index 2c9e4e2b844..53dacbfdf39 100644
--- a/src/mesa/drivers/dri/r600/evergreen_chip.c
+++ b/src/mesa/drivers/dri/r600/evergreen_chip.c
@@ -286,7 +286,11 @@ static void evergreenSetupVTXConstants(struct gl_context * ctx,
if (!paos->bo)
return;
- r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+ if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM))
+ r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
+ else
+ r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
//uSQ_VTX_CONSTANT_WORD0_0
uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c
index a77be183a12..076a608573c 100644
--- a/src/mesa/drivers/dri/r600/evergreen_state.c
+++ b/src/mesa/drivers/dri/r600/evergreen_state.c
@@ -1461,6 +1461,14 @@ static void evergreenInitSQConfig(struct gl_context * ctx)
uMaxThreads = 248;
uMaxStackEntries = 512;
break;
+ case CHIP_FAMILY_PALM:
+ uSqNumCfInsts = 1;
+ bVC_ENABLE = GL_FALSE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 96;
+ uMaxThreads = 192;
+ uMaxStackEntries = 256;
+ break;
default:
uSqNumCfInsts = 2;
bVC_ENABLE = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
index 2f4c92d6767..3b5448a0e4e 100644
--- a/src/mesa/drivers/dri/r600/evergreen_tex.c
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -31,7 +31,6 @@
#include "main/enums.h"
#include "main/image.h"
#include "main/teximage.h"
-#include "main/mipmap.h"
#include "main/simple_list.h"
#include "main/texobj.h"
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index b6443bf0c53..aa1891eac32 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -259,7 +259,7 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen)
R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
if( (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR)
- &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_HEMLOCK) )
+ &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_PALM) )
{
r700->bShaderUseMemConstant = GL_TRUE;
}
@@ -285,8 +285,13 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen)
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
ctx->Const.MaxTextureLodBias = 16.0;
- ctx->Const.MaxTextureLevels = 13; /* hw support 14 */
- ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */
+ if (screen->chip_family >= CHIP_FAMILY_CEDAR) {
+ ctx->Const.MaxTextureLevels = 15;
+ ctx->Const.MaxTextureRectSize = 16384;
+ } else {
+ ctx->Const.MaxTextureLevels = 14;
+ ctx->Const.MaxTextureRectSize = 8192;
+ }
ctx->Const.MinPointSize = 0x0001 / 8.0;
ctx->Const.MinPointSizeAA = 0x0001 / 8.0;
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 2bf24096a0d..1fa559cec1a 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -3334,7 +3334,14 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+ }
pAsm->D.dst.op3 = 1;
tmp = (-1);
@@ -3416,8 +3423,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
checkop1(pAsm);
tmp = gethelpr(pAsm);
-
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -3457,7 +3470,14 @@ GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4742,7 +4762,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4782,7 +4809,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
{
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -5010,7 +5044,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
GLuint tmp = gethelpr(pAsm);
/* tmp = (src > 0 ? 1 : src) */
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ }
pAsm->D.dst.op3 = 1;
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
@@ -5033,7 +5074,14 @@ GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
}
/* dst = (-tmp > 0 ? -1 : tmp) */
- pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ }
pAsm->D.dst.op3 = 1;
if( GL_FALSE == assemble_dst(pAsm) )
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index 61106fbc43f..82789cec5ed 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -440,6 +440,11 @@
#define PCI_CHIP_HEMLOCK_689C 0x689C
#define PCI_CHIP_HEMLOCK_689D 0x689D
+#define PCI_CHIP_PALM_9802 0x9802
+#define PCI_CHIP_PALM_9803 0x9803
+#define PCI_CHIP_PALM_9804 0x9804
+#define PCI_CHIP_PALM_9805 0x9805
+
enum {
CHIP_FAMILY_R100,
CHIP_FAMILY_RV100,
@@ -483,6 +488,7 @@ enum {
CHIP_FAMILY_JUNIPER,
CHIP_FAMILY_CYPRESS,
CHIP_FAMILY_HEMLOCK,
+ CHIP_FAMILY_PALM,
CHIP_FAMILY_LAST
};
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index fecdd119059..ca6ab46ca43 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -99,6 +99,7 @@ static const char* get_chip_family_name(int chip_family)
case CHIP_FAMILY_JUNIPER: return "JUNIPER";
case CHIP_FAMILY_CYPRESS: return "CYPRESS";
case CHIP_FAMILY_HEMLOCK: return "HEMLOCK";
+ case CHIP_FAMILY_PALM: return "PALM";
default: return "unknown";
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
index 088f9701722..a68a9768779 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
@@ -49,7 +49,7 @@ struct _radeon_mipmap_level {
};
/* store the max possible in the miptree */
-#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13
+#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 15
/**
* A mipmap tree contains texture images in the layout that the hardware
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index b379240579d..94e56c2ade6 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -1155,6 +1155,14 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
+ case PCI_CHIP_PALM_9802:
+ case PCI_CHIP_PALM_9803:
+ case PCI_CHIP_PALM_9804:
+ case PCI_CHIP_PALM_9805:
+ screen->chip_family = CHIP_FAMILY_PALM;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
default:
fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
device_id);
diff --git a/src/mesa/drivers/dri/sis/server/sis_dri.h b/src/mesa/drivers/dri/sis/server/sis_dri.h
index f0171f3c0f8..7d8f507115d 100644
--- a/src/mesa/drivers/dri/sis/server/sis_dri.h
+++ b/src/mesa/drivers/dri/sis/server/sis_dri.h
@@ -72,13 +72,4 @@ typedef struct {
int dummy;
} SISDRIContextRec, *SISDRIContextPtr;
-#ifdef XFree86Server
-
-#include "screenint.h"
-
-Bool SISDRIScreenInit(ScreenPtr pScreen);
-void SISDRICloseScreen(ScreenPtr pScreen);
-Bool SISDRIFinishScreenInit(ScreenPtr pScreen);
-
-#endif
#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.h b/src/mesa/drivers/dri/tdfx/tdfx_context.h
index fb38419dcdd..7e2f0e00a8e 100644
--- a/src/mesa/drivers/dri/tdfx/tdfx_context.h
+++ b/src/mesa/drivers/dri/tdfx/tdfx_context.h
@@ -41,11 +41,7 @@
#include <sys/time.h>
#include "dri_util.h"
-#ifdef XFree86Server
-#include "GL/xf86glx.h"
-#else
#include "main/glheader.h"
-#endif
#if defined(__linux__)
#include <signal.h>
#endif
diff --git a/src/mesa/drivers/dri/unichrome/server/via_dri.h b/src/mesa/drivers/dri/unichrome/server/via_dri.h
index b47397d5728..c6eed03c1c9 100644
--- a/src/mesa/drivers/dri/unichrome/server/via_dri.h
+++ b/src/mesa/drivers/dri/unichrome/server/via_dri.h
@@ -35,9 +35,7 @@
#define VIA_DRIDDX_VERSION_MINOR 0
#define VIA_DRIDDX_VERSION_PATCH 0
-#ifndef XFree86Server
typedef int Bool;
-#endif
typedef struct {
drm_handle_t handle;
diff --git a/src/mesa/drivers/windows/gdi/InitCritSections.cpp b/src/mesa/drivers/windows/gdi/InitCritSections.cpp
index 7145bffa510..69f03b8e47c 100644
--- a/src/mesa/drivers/windows/gdi/InitCritSections.cpp
+++ b/src/mesa/drivers/windows/gdi/InitCritSections.cpp
@@ -1,7 +1,8 @@
#include "glapi.h"
#include "glThread.h"
-#ifdef WIN32_THREADS
+#ifdef WIN32
+
extern "C" _glthread_Mutex OneTimeLock;
extern "C" _glthread_Mutex GenTexturesLock;
@@ -29,4 +30,4 @@ public:
_CriticalSectionInit _CriticalSectionInit::m_inst;
-#endif
+#endif /* WIN32 */
diff --git a/src/mesa/drivers/x11/glxheader.h b/src/mesa/drivers/x11/glxheader.h
index d88afba20e7..ee002191bc0 100644
--- a/src/mesa/drivers/x11/glxheader.h
+++ b/src/mesa/drivers/x11/glxheader.h
@@ -32,13 +32,6 @@
#include "main/glheader.h"
-#ifdef XFree86Server
-
-# include "xorg-server.h"
-# include "resource.h"
-# include "windowstr.h"
-
-#else
# include <X11/Xlib.h>
# include <X11/Xlibint.h>
@@ -51,7 +44,6 @@
# include <GL/glx.h>
# include <sys/time.h>
-#endif
diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c
index 00ceb960c62..b5eabadf486 100644
--- a/src/mesa/drivers/x11/xm_api.c
+++ b/src/mesa/drivers/x11/xm_api.c
@@ -158,14 +158,12 @@ static short hpcr_rgbTbl[3][256] = {
/**
* Return the host's byte order as LSBFirst or MSBFirst ala X.
*/
-#ifndef XFree86Server
static int host_byte_order( void )
{
int i = 1;
char *cptr = (char *) &i;
return (*cptr==1) ? LSBFirst : MSBFirst;
}
-#endif
/**
@@ -176,7 +174,7 @@ static int host_byte_order( void )
*/
static int check_for_xshm( XMesaDisplay *display )
{
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#if defined(USE_XSHM)
int major, minor, ignore;
Bool pixmaps;
@@ -227,16 +225,6 @@ gamma_adjust( GLfloat gamma, GLint value, GLint max )
static int
bits_per_pixel( XMesaVisual xmv )
{
-#ifdef XFree86Server
- const int depth = xmv->nplanes;
- int i;
- assert(depth > 0);
- for (i = 0; i < screenInfo.numPixmapFormats; i++) {
- if (screenInfo.formats[i].depth == depth)
- return screenInfo.formats[i].bitsPerPixel;
- }
- return depth; /* should never get here, but this should be safe */
-#else
XMesaDisplay *dpy = xmv->display;
XMesaVisualInfo visinfo = xmv->visinfo;
XMesaImage *img;
@@ -257,7 +245,6 @@ bits_per_pixel( XMesaVisual xmv )
img->data = NULL;
XMesaDestroyImage( img );
return bitsPerPixel;
-#endif
}
@@ -271,7 +258,6 @@ bits_per_pixel( XMesaVisual xmv )
* Return: GL_TRUE - window exists
* GL_FALSE - window doesn't exist
*/
-#ifndef XFree86Server
static GLboolean WindowExistsFlag;
static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr )
@@ -306,7 +292,6 @@ get_drawable_size( XMesaDisplay *dpy, Drawable d, GLuint *width, GLuint *height
*height = h;
return stat;
}
-#endif
/**
@@ -319,10 +304,6 @@ void
xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b,
GLuint *width, GLuint *height)
{
-#ifdef XFree86Server
- *width = MIN2(b->frontxrb->drawable->width, MAX_WIDTH);
- *height = MIN2(b->frontxrb->drawable->height, MAX_HEIGHT);
-#else
Status stat;
_glthread_LOCK_MUTEX(_xmesa_lock);
@@ -335,7 +316,6 @@ xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b,
_mesa_warning(NULL, "XGetGeometry failed!\n");
*width = *height = 1;
}
-#endif
}
@@ -549,16 +529,11 @@ noFaultXAllocColor( int client,
XMesaColor *color,
int *exact, int *alloced )
{
-#ifdef XFree86Server
- Pixel *ppixIn;
- xrgb *ctable;
-#else
/* we'll try to cache ctable for better remote display performance */
static Display *prevDisplay = NULL;
static XMesaColormap prevCmap = 0;
static int prevCmapSize = 0;
static XMesaColor *ctable = NULL;
-#endif
XMesaColor subColor;
int i, bestmatch;
double mindist; /* 3*2^16^2 exceeds long int precision. */
@@ -566,14 +541,7 @@ noFaultXAllocColor( int client,
(void) client;
/* First try just using XAllocColor. */
-#ifdef XFree86Server
- if (AllocColor(cmap,
- &color->red, &color->green, &color->blue,
- &color->pixel,
- client) == Success)
-#else
if (XAllocColor(dpy, cmap, color))
-#endif
{
*exact = 1;
*alloced = 1;
@@ -584,14 +552,6 @@ noFaultXAllocColor( int client,
/* Retrieve color table entries. */
/* XXX alloca candidate. */
-#ifdef XFree86Server
- ppixIn = (Pixel *) MALLOC(cmapSize * sizeof(Pixel));
- ctable = (xrgb *) MALLOC(cmapSize * sizeof(xrgb));
- for (i = 0; i < cmapSize; i++) {
- ppixIn[i] = i;
- }
- QueryColors(cmap, cmapSize, ppixIn, ctable);
-#else
if (prevDisplay != dpy || prevCmap != cmap
|| prevCmapSize != cmapSize || !ctable) {
/* free previously cached color table */
@@ -608,7 +568,6 @@ noFaultXAllocColor( int client,
prevCmap = cmap;
prevCmapSize = cmapSize;
}
-#endif
/* Find best match. */
bestmatch = -1;
@@ -632,14 +591,7 @@ noFaultXAllocColor( int client,
* fail if the cell is read/write. Otherwise, we're incrementing
* the cell's reference count.
*/
-#ifdef XFree86Server
- if (AllocColor(cmap,
- &subColor.red, &subColor.green, &subColor.blue,
- &subColor.pixel,
- client) == Success) {
-#else
if (XAllocColor(dpy, cmap, &subColor)) {
-#endif
*alloced = 1;
}
else {
@@ -651,12 +603,7 @@ noFaultXAllocColor( int client,
subColor.flags = DoRed | DoGreen | DoBlue;
*alloced = 0;
}
-#ifdef XFree86Server
- free(ppixIn);
- free(ctable);
-#else
/* don't free table, save it for next time */
-#endif
*color = subColor;
*exact = 0;
@@ -873,10 +820,8 @@ setup_8bit_hpcr(XMesaVisual v)
v->hpcr_clear_pixmap = XMesaCreatePixmap(v->display,
DefaultRootWindow(v->display),
16, 2, 8);
-#ifndef XFree86Server
v->hpcr_clear_ximage = XGetImage(v->display, v->hpcr_clear_pixmap,
0, 0, 16, 2, AllPlanes, ZPixmap);
-#endif
}
}
@@ -1049,9 +994,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
int client = 0;
const int xclass = v->visualType;
-#ifdef XFree86Server
- client = (window) ? CLIENT_ID(window->id) : 0;
-#endif
ASSERT(!b || b->xm_visual == v);
@@ -1120,40 +1062,23 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
}
/* X11 graphics contexts */
-#ifdef XFree86Server
- b->gc = CreateScratchGC(v->display, window->depth);
-#else
b->gc = XCreateGC( v->display, window, 0, NULL );
-#endif
XMesaSetFunction( v->display, b->gc, GXcopy );
/* cleargc - for glClear() */
-#ifdef XFree86Server
- b->cleargc = CreateScratchGC(v->display, window->depth);
-#else
b->cleargc = XCreateGC( v->display, window, 0, NULL );
-#endif
XMesaSetFunction( v->display, b->cleargc, GXcopy );
/*
* Don't generate Graphics Expose/NoExpose events in swapbuffers().
* Patch contributed by Michael Pichler May 15, 1995.
*/
-#ifdef XFree86Server
- b->swapgc = CreateScratchGC(v->display, window->depth);
- {
- CARD32 v[1];
- v[0] = FALSE;
- dixChangeGC(NullClient, b->swapgc, GCGraphicsExposures, v, NULL);
- }
-#else
{
XGCValues gcvalues;
gcvalues.graphics_exposures = False;
b->swapgc = XCreateGC(v->display, window,
GCGraphicsExposures, &gcvalues);
}
-#endif
XMesaSetFunction( v->display, b->swapgc, GXcopy );
/*
* Set fill style and tile pixmap once for all for HPCR stuff
@@ -1175,9 +1100,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
/* Initialize the row buffer XImage for use in write_color_span() */
data = (char*) MALLOC(MAX_WIDTH*4);
-#ifdef XFree86Server
- b->rowimage = XMesaCreateImage(GET_VISUAL_DEPTH(v), MAX_WIDTH, 1, data);
-#else
b->rowimage = XCreateImage( v->display,
v->visinfo->visual,
v->visinfo->depth,
@@ -1186,7 +1108,6 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
MAX_WIDTH, 1, /*width, height*/
32, /*bitmap_pad*/
0 /*bytes_per_line*/ );
-#endif
if (!b->rowimage)
return GL_FALSE;
}
@@ -1334,7 +1255,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
XMesaVisual v;
GLint red_bits, green_bits, blue_bits, alpha_bits;
-#ifndef XFree86Server
/* For debugging only */
if (_mesa_getenv("MESA_XSYNC")) {
/* This makes debugging X easier.
@@ -1343,7 +1263,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
*/
XSynchronize( display, 1 );
}
-#endif
/* Color-index rendering not supported. */
if (!rgb_flag)
@@ -1360,14 +1279,12 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
* the struct but we may need some of the information contained in it
* at a later time.
*/
-#ifndef XFree86Server
v->visinfo = (XVisualInfo *) MALLOC(sizeof(*visinfo));
if(!v->visinfo) {
free(v);
return NULL;
}
memcpy(v->visinfo, visinfo, sizeof(*visinfo));
-#endif
/* check for MESA_GAMMA environment variable */
gamma = _mesa_getenv("MESA_GAMMA");
@@ -1384,30 +1301,13 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
v->ximage_flag = ximage_flag;
-#ifdef XFree86Server
- /* We could calculate these values by ourselves. nplanes is either the sum
- * of the red, green, and blue bits or the number index bits.
- * ColormapEntries is either (1U << index_bits) or
- * (1U << max(redBits, greenBits, blueBits)).
- */
- assert(visinfo->nplanes > 0);
- v->nplanes = visinfo->nplanes;
- v->ColormapEntries = visinfo->ColormapEntries;
-
- v->mesa_visual.redMask = visinfo->redMask;
- v->mesa_visual.greenMask = visinfo->greenMask;
- v->mesa_visual.blueMask = visinfo->blueMask;
- v->visualID = visinfo->vid;
- v->screen = 0; /* FIXME: What should be done here? */
-#else
v->mesa_visual.redMask = visinfo->red_mask;
v->mesa_visual.greenMask = visinfo->green_mask;
v->mesa_visual.blueMask = visinfo->blue_mask;
v->visualID = visinfo->visualid;
v->screen = visinfo->screen;
-#endif
-#if defined(XFree86Server) || !(defined(__cplusplus) || defined(c_plusplus))
+#if !(defined(__cplusplus) || defined(c_plusplus))
v->visualType = xmesa_convert_from_x_visual_type(visinfo->class);
#else
v->visualType = xmesa_convert_from_x_visual_type(visinfo->c_class);
@@ -1461,9 +1361,7 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
PUBLIC
void XMesaDestroyVisual( XMesaVisual v )
{
-#ifndef XFree86Server
free(v->visinfo);
-#endif
free(v);
}
@@ -1532,12 +1430,6 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
_mesa_enable_extension(mesaCtx, "GL_EXT_timer_query");
#endif
-#ifdef XFree86Server
- /* If we're running in the X server, do bounds checking to prevent
- * segfaults and server crashes!
- */
- mesaCtx->Const.CheckArrayBounds = GL_TRUE;
-#endif
/* finish up xmesa context initializations */
c->swapbytes = CHECK_BYTE_ORDER(v) ? GL_FALSE : GL_TRUE;
@@ -1602,9 +1494,7 @@ void XMesaDestroyContext( XMesaContext c )
PUBLIC XMesaBuffer
XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w)
{
-#ifndef XFree86Server
XWindowAttributes attr;
-#endif
XMesaBuffer b;
XMesaColormap cmap;
int depth;
@@ -1613,12 +1503,8 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w)
assert(w);
/* Check that window depth matches visual depth */
-#ifdef XFree86Server
- depth = ((XMesaDrawable)w)->depth;
-#else
XGetWindowAttributes( v->display, w, &attr );
depth = attr.depth;
-#endif
if (GET_VISUAL_DEPTH(v) != depth) {
_mesa_warning(NULL, "XMesaCreateWindowBuffer: depth mismatch between visual (%d) and window (%d)!\n",
GET_VISUAL_DEPTH(v), depth);
@@ -1626,9 +1512,6 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w)
}
/* Find colormap */
-#ifdef XFree86Server
- cmap = (ColormapPtr)LookupIDByType(wColormap(w), RT_COLORMAP);
-#else
if (attr.colormap) {
cmap = attr.colormap;
}
@@ -1638,7 +1521,6 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w)
/* OK, let's just allocate a new one and hope for the best */
cmap = XCreateColormap(v->display, w, attr.visual, AllocNone);
}
-#endif
b = create_xmesa_buffer((XMesaDrawable) w, WINDOW, v, cmap);
if (!b)
@@ -1748,7 +1630,6 @@ XMesaBuffer
XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap,
unsigned int width, unsigned int height)
{
-#ifndef XFree86Server
XMesaWindow root;
XMesaDrawable drawable; /* X Pixmap Drawable */
XMesaBuffer b;
@@ -1770,9 +1651,6 @@ XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap,
}
return b;
-#else
- return 0;
-#endif
}
@@ -1931,40 +1809,6 @@ XMesaBuffer XMesaGetCurrentReadBuffer( void )
}
-#ifdef XFree86Server
-PUBLIC
-GLboolean XMesaForceCurrent(XMesaContext c)
-{
- if (c) {
- _glapi_set_dispatch(c->mesa.CurrentDispatch);
-
- if (&(c->mesa) != _mesa_get_current_context()) {
- _mesa_make_current(&c->mesa, c->mesa.DrawBuffer, c->mesa.ReadBuffer);
- }
- }
- else {
- _mesa_make_current(NULL, NULL, NULL);
- }
- return GL_TRUE;
-}
-
-
-PUBLIC
-GLboolean XMesaLoseCurrent(XMesaContext c)
-{
- (void) c;
- _mesa_make_current(NULL, NULL, NULL);
- return GL_TRUE;
-}
-
-
-PUBLIC
-GLboolean XMesaCopyContext( XMesaContext xm_src, XMesaContext xm_dst, GLuint mask )
-{
- _mesa_copy_context(&xm_src->mesa, &xm_dst->mesa, mask);
- return GL_TRUE;
-}
-#endif /* XFree86Server */
#ifndef FX
@@ -2004,7 +1848,7 @@ void XMesaSwapBuffers( XMesaBuffer b )
#endif
if (b->backxrb->ximage) {
/* Copy Ximage (back buf) from client memory to server window */
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#if defined(USE_XSHM)
if (b->shm) {
/*_glthread_LOCK_MUTEX(_xmesa_lock);*/
XShmPutImage( b->xm_visual->display, b->frontxrb->drawable,
@@ -2041,9 +1885,7 @@ void XMesaSwapBuffers( XMesaBuffer b )
if (b->swAlpha)
_mesa_copy_soft_alpha_renderbuffers(ctx, &b->mesa_buffer);
}
-#if !defined(XFree86Server)
XSync( b->xm_visual->display, False );
-#endif
}
@@ -2074,7 +1916,7 @@ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height )
#endif
if (b->backxrb->ximage) {
/* Copy Ximage from host's memory to server's window */
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#if defined(USE_XSHM)
if (b->shm) {
/* XXX assuming width and height aren't too large! */
XShmPutImage( b->xm_visual->display, b->frontxrb->drawable,
@@ -2116,7 +1958,6 @@ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height )
* Return: GL_TRUE = context is double buffered
* GL_FALSE = context is single buffered
*/
-#ifndef XFree86Server
GLboolean XMesaGetBackBuffer( XMesaBuffer b,
XMesaPixmap *pixmap,
XMesaImage **ximage )
@@ -2134,7 +1975,6 @@ GLboolean XMesaGetBackBuffer( XMesaBuffer b,
return GL_FALSE;
}
}
-#endif /* XFree86Server */
/*
@@ -2171,11 +2011,7 @@ GLboolean XMesaGetDepthBuffer( XMesaBuffer b, GLint *width, GLint *height,
void XMesaFlush( XMesaContext c )
{
if (c && c->xm_visual) {
-#ifdef XFree86Server
- /* NOT_NEEDED */
-#else
XSync( c->xm_visual->display, False );
-#endif
}
}
@@ -2234,15 +2070,11 @@ void XMesaGarbageCollect( void )
for (b=XMesaBufferList; b; b=next) {
next = b->Next;
if (b->display && b->frontxrb->drawable && b->type == WINDOW) {
-#ifdef XFree86Server
- /* NOT_NEEDED */
-#else
XSync(b->display, False);
if (!window_exists( b->display, b->frontxrb->drawable )) {
/* found a dead window, free the ancillary info */
XMesaDestroyBuffer( b );
}
-#endif
}
}
}
diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c
index 2683bd44d19..10829b4284f 100644
--- a/src/mesa/drivers/x11/xm_buffer.c
+++ b/src/mesa/drivers/x11/xm_buffer.c
@@ -37,7 +37,7 @@
#include "main/renderbuffer.h"
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#if defined(USE_XSHM)
static volatile int mesaXErrorFlag = 0;
/**
@@ -170,7 +170,7 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height)
if (b->db_mode == BACK_XIMAGE) {
/* Deallocate the old backxrb->ximage, if any */
if (b->backxrb->ximage) {
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#if defined(USE_XSHM)
if (b->shm) {
XShmDetach(b->xm_visual->display, &b->shminfo);
XDestroyImage(b->backxrb->ximage);
@@ -188,10 +188,6 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height)
/* Allocate new back buffer */
if (b->shm == 0 || !alloc_back_shm_ximage(b, width, height)) {
/* Allocate a regular XImage for the back buffer. */
-#ifdef XFree86Server
- b->backxrb->ximage = XMesaCreateImage(b->xm_visual->BitsPerPixel,
- width, height, NULL);
-#else
b->backxrb->ximage = XCreateImage(b->xm_visual->display,
b->xm_visual->visinfo->visual,
GET_VISUAL_DEPTH(b->xm_visual),
@@ -199,7 +195,6 @@ alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height)
NULL,
width, height,
8, 0); /* pad, bytes_per_line */
-#endif
if (!b->backxrb->ximage) {
_mesa_warning(NULL, "alloc_back_buffer: XCreateImage failed.\n");
return;
@@ -359,16 +354,8 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb)
if (b->num_alloced > 0) {
/* If no other buffer uses this X colormap then free the colors. */
if (!xmesa_find_buffer(b->display, b->cmap, b)) {
-#ifdef XFree86Server
- int client = 0;
- if (b->frontxrb->drawable)
- client = CLIENT_ID(b->frontxrb->drawable->id);
- (void)FreeColors(b->cmap, client,
- b->num_alloced, b->alloced_colors, 0);
-#else
XFreeColors(b->display, b->cmap,
b->alloced_colors, b->num_alloced, 0);
-#endif
}
}
@@ -382,7 +369,7 @@ xmesa_delete_framebuffer(struct gl_framebuffer *fb)
if (fb->Visual.doubleBufferMode) {
/* free back ximage/pixmap/shmregion */
if (b->backxrb->ximage) {
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#if defined(USE_XSHM)
if (b->shm) {
XShmDetach( b->display, &b->shminfo );
XDestroyImage( b->backxrb->ximage );
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index acece2025cf..b8d9e20c426 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -93,16 +93,12 @@ const int xmesa_kernel1[16] = {
static void
finish_or_flush( struct gl_context *ctx )
{
-#ifdef XFree86Server
- /* NOT_NEEDED */
-#else
const XMesaContext xmesa = XMESA_CONTEXT(ctx);
if (xmesa) {
_glthread_LOCK_MUTEX(_xmesa_lock);
XSync( xmesa->display, False );
_glthread_UNLOCK_MUTEX(_xmesa_lock);
}
-#endif
}
@@ -388,7 +384,6 @@ clear_buffers(struct gl_context *ctx, GLbitfield buffers)
}
-#ifndef XFree86Server
/* XXX these functions haven't been tested in the Xserver environment */
@@ -731,7 +726,6 @@ xmesa_CopyPixels( struct gl_context *ctx,
}
}
-#endif /* XFree86Server */
@@ -745,17 +739,9 @@ get_string( struct gl_context *ctx, GLenum name )
(void) ctx;
switch (name) {
case GL_RENDERER:
-#ifdef XFree86Server
- return (const GLubyte *) "Mesa GLX Indirect";
-#else
return (const GLubyte *) "Mesa X11";
-#endif
case GL_VENDOR:
-#ifdef XFree86Server
- return (const GLubyte *) "Mesa project: www.mesa3d.org";
-#else
return NULL;
-#endif
default:
return NULL;
}
@@ -948,43 +934,6 @@ xmesa_update_state( struct gl_context *ctx, GLbitfield new_state )
/**
- * Called via ctx->Driver.TestProxyTeximage(). Normally, we'd just use
- * the _mesa_test_proxy_teximage() fallback function, but we're going to
- * special-case the 3D texture case to allow textures up to 512x512x32
- * texels.
- */
-static GLboolean
-test_proxy_teximage(struct gl_context *ctx, GLenum target, GLint level,
- GLint internalFormat, GLenum format, GLenum type,
- GLint width, GLint height, GLint depth, GLint border)
-{
- if (target == GL_PROXY_TEXTURE_3D) {
- /* special case for 3D textures */
- if (width * height * depth > 512 * 512 * 64 ||
- width < 2 * border ||
- (!ctx->Extensions.ARB_texture_non_power_of_two &&
- _mesa_bitcount(width - 2 * border) != 1) ||
- height < 2 * border ||
- (!ctx->Extensions.ARB_texture_non_power_of_two &&
- _mesa_bitcount(height - 2 * border) != 1) ||
- depth < 2 * border ||
- (!ctx->Extensions.ARB_texture_non_power_of_two &&
- _mesa_bitcount(depth - 2 * border) != 1)) {
- /* Bad size, or too many texels */
- return GL_FALSE;
- }
- return GL_TRUE;
- }
- else {
- /* use the fallback routine for 1D, 2D, cube and rect targets */
- return _mesa_test_proxy_teximage(ctx, target, level, internalFormat,
- format, type, width, height, depth,
- border);
- }
-}
-
-
-/**
* In SW, we don't really compress GL_COMPRESSED_RGB[A] textures!
*/
static gl_format
@@ -1124,7 +1073,6 @@ xmesa_init_driver_functions( XMesaVisual xmvisual,
}
else {
driver->Clear = clear_buffers;
-#ifndef XFree86Server
driver->CopyPixels = xmesa_CopyPixels;
if (xmvisual->undithered_pf == PF_8R8G8B &&
xmvisual->dithered_pf == PF_8R8G8B &&
@@ -1134,9 +1082,8 @@ xmesa_init_driver_functions( XMesaVisual xmvisual,
else if (xmvisual->undithered_pf == PF_5R6G5B) {
driver->DrawPixels = xmesa_DrawPixels_5R6G5B;
}
-#endif
}
- driver->TestProxyTexImage = test_proxy_teximage;
+
#if ENABLE_EXT_texure_compression_s3tc
driver->ChooseTextureFormat = choose_tex_format;
#else
diff --git a/src/mesa/drivers/x11/xm_glide.c b/src/mesa/drivers/x11/xm_glide.c
index cbd69b011a1..d8a0e6de6d0 100644
--- a/src/mesa/drivers/x11/xm_glide.c
+++ b/src/mesa/drivers/x11/xm_glide.c
@@ -140,16 +140,8 @@ static void FXgetImage( XMesaBuffer b )
GLuint x, y;
GLuint width, height;
-#ifdef XFree86Server
- x = b->frontxrb->pixmap->x;
- y = b->frontxrb->pixmap->y;
- width = b->frontxrb->pixmap->width;
- height = b->frontxrb->pixmap->height;
- depth = b->frontxrb->pixmap->depth;
-#else
xmesa_get_window_size(b->display, b, &width, &height);
x = y = 0;
-#endif
if (b->mesa_buffer.Width != width || b->mesa_buffer.Height != height) {
b->mesa_buffer.Width = MIN2((int)width, b->FXctx->width);
b->mesa_buffer.Height = MIN2((int)height, b->FXctx->height);
diff --git a/src/mesa/drivers/x11/xm_image.c b/src/mesa/drivers/x11/xm_image.c
index 087b4e4c3a7..12fef7dad34 100644
--- a/src/mesa/drivers/x11/xm_image.c
+++ b/src/mesa/drivers/x11/xm_image.c
@@ -37,97 +37,3 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "glxheader.h"
#include "xmesaP.h"
-#ifdef XFree86Server
-
-#ifdef ROUNDUP
-#undef ROUNDUP
-#endif
-
-#define ROUNDUP(nbytes, pad) ((((nbytes) + ((pad)-1)) / (pad)) * ((pad)>>3))
-
-XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, char *data)
-{
- XMesaImage *image;
-
- image = (XMesaImage *)xalloc(sizeof(XMesaImage));
-
- if (image) {
- image->width = width;
- image->height = height;
- image->data = data;
- /* Always pad to 32 bits */
- image->bytes_per_line = ROUNDUP((bitsPerPixel * width), 32);
- image->bits_per_pixel = bitsPerPixel;
- }
-
- return image;
-}
-
-void XMesaDestroyImage(XMesaImage *image)
-{
- if (image->data)
- free(image->data);
- xfree(image);
-}
-
-unsigned long XMesaGetPixel(XMesaImage *image, int x, int y)
-{
- CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line);
- CARD8 *i8;
- CARD16 *i16;
- CARD32 *i32;
- switch (image->bits_per_pixel) {
- case 8:
- i8 = (CARD8 *)row;
- return i8[x];
- break;
- case 15:
- case 16:
- i16 = (CARD16 *)row;
- return i16[x];
- break;
- case 24: /* WARNING: architecture specific code */
- i8 = (CARD8 *)row;
- return (((CARD32)i8[x*3]) |
- (((CARD32)i8[x*3+1])<<8) |
- (((CARD32)i8[x*3+2])<<16));
- break;
- case 32:
- i32 = (CARD32 *)row;
- return i32[x];
- break;
- }
- return 0;
-}
-
-#ifndef XMESA_USE_PUTPIXEL_MACRO
-void XMesaPutPixel(XMesaImage *image, int x, int y, unsigned long pixel)
-{
- CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line);
- CARD8 *i8;
- CARD16 *i16;
- CARD32 *i32;
- switch (image->bits_per_pixel) {
- case 8:
- i8 = (CARD8 *)row;
- i8[x] = (CARD8)pixel;
- break;
- case 15:
- case 16:
- i16 = (CARD16 *)row;
- i16[x] = (CARD16)pixel;
- break;
- case 24: /* WARNING: architecture specific code */
- i8 = (CARD8 *)__row;
- i8[x*3] = (CARD8)(p);
- i8[x*3+1] = (CARD8)(p>>8);
- i8[x*3+2] = (CARD8)(p>>16);
- case 32:
- i32 = (CARD32 *)row;
- i32[x] = (CARD32)pixel;
- break;
- }
-}
-#endif
-
-#endif /* XFree86Server */
diff --git a/src/mesa/drivers/x11/xm_line.c b/src/mesa/drivers/x11/xm_line.c
index f03f99f918f..04cedcd4ec0 100644
--- a/src/mesa/drivers/x11/xm_line.c
+++ b/src/mesa/drivers/x11/xm_line.c
@@ -537,7 +537,6 @@ void xmesa_choose_point( struct gl_context *ctx )
-#ifndef XFree86Server
/**
* Draw fast, XOR line with XDrawLine in front color buffer.
* WARNING: this isn't fully OpenGL conformant because different pixels
@@ -567,7 +566,6 @@ xor_line(struct gl_context *ctx, const SWvertex *vert0, const SWvertex *vert1)
XDrawLine(dpy, xrb->pixmap, gc, x0, y0, x1, y1);
XMesaSetFunction(dpy, gc, GXcopy); /* this gc is used elsewhere */
}
-#endif /* XFree86Server */
#endif /* CHAN_BITS == 8 */
@@ -660,7 +658,6 @@ get_line_func(struct gl_context *ctx)
}
}
-#ifndef XFree86Server
if (ctx->DrawBuffer->_NumColorDrawBuffers == 1
&& ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT
&& swrast->_RasterMask == LOGIC_OP_BIT
@@ -669,7 +666,6 @@ get_line_func(struct gl_context *ctx)
&& !ctx->Line.SmoothFlag) {
return xor_line;
}
-#endif /* XFree86Server */
#endif /* CHAN_BITS == 8 */
return (swrast_line_func) NULL;
diff --git a/src/mesa/drivers/x11/xm_span.c b/src/mesa/drivers/x11/xm_span.c
index ab66c5e1f12..294b93a57cc 100644
--- a/src/mesa/drivers/x11/xm_span.c
+++ b/src/mesa/drivers/x11/xm_span.c
@@ -42,7 +42,6 @@
* generate BadMatch errors if the drawable isn't mapped.
*/
-#ifndef XFree86Server
static int caught_xgetimage_error = 0;
static int (*old_xerror_handler)( XMesaDisplay *dpy, XErrorEvent *ev );
static unsigned long xgetimage_serial;
@@ -87,7 +86,6 @@ static int check_xgetimage_errors( void )
/* return 0=no error, 1=error caught */
return caught_xgetimage_error;
}
-#endif
/*
@@ -97,7 +95,6 @@ static unsigned long read_pixel( XMesaDisplay *dpy,
XMesaDrawable d, int x, int y )
{
unsigned long p;
-#ifndef XFree86Server
XMesaImage *pixel = NULL;
int error;
@@ -113,9 +110,6 @@ static unsigned long read_pixel( XMesaDisplay *dpy,
if (pixel) {
XMesaDestroyImage( pixel );
}
-#else
- (*dpy->GetImage)(d, x, y, 1, 1, ZPixmap, ~0L, (pointer)&p);
-#endif
return p;
}
@@ -3763,7 +3757,6 @@ static void put_values_ci_ximage( PUT_VALUES_ARGS )
/***** Pixel reading *****/
/**********************************************************************/
-#ifndef XFree86Server
/**
* Do clip testing prior to calling XGetImage. If any of the region lies
* outside the screen's bounds, XGetImage will return NULL.
@@ -3806,7 +3799,6 @@ clip_for_xgetimage(struct gl_context *ctx, XMesaPixmap pixmap, GLuint *n, GLint
}
return 0;
}
-#endif
/*
@@ -3824,7 +3816,6 @@ get_row_ci(struct gl_context *ctx, struct gl_renderbuffer *rb,
y = YFLIP(xrb, y);
if (xrb->pixmap) {
-#ifndef XFree86Server
XMesaImage *span = NULL;
int error;
int k = clip_for_xgetimage(ctx, xrb->pixmap, &n, &x, &y);
@@ -3850,11 +3841,6 @@ get_row_ci(struct gl_context *ctx, struct gl_renderbuffer *rb,
if (span) {
XMesaDestroyImage( span );
}
-#else
- (*xmesa->display->GetImage)(xrb->drawable,
- x, y, n, 1, ZPixmap,
- ~0L, (pointer)index);
-#endif
}
else if (xrb->ximage) {
XMesaImage *img = xrb->ximage;
@@ -3882,14 +3868,6 @@ get_row_rgba(struct gl_context *ctx, struct gl_renderbuffer *rb,
/* Read from Pixmap or Window */
XMesaImage *span = NULL;
int error;
-#ifdef XFree86Server
- span = XMesaCreateImage(xmesa->xm_visual->BitsPerPixel, n, 1, NULL);
- span->data = (char *)MALLOC(span->height * span->bytes_per_line);
- error = (!span->data);
- (*xmesa->display->GetImage)(xrb->drawable,
- x, YFLIP(xrb, y), n, 1, ZPixmap,
- ~0L, (pointer)span->data);
-#else
int k;
y = YFLIP(xrb, y);
k = clip_for_xgetimage(ctx, xrb->pixmap, &n, &x, &y);
@@ -3900,7 +3878,6 @@ get_row_rgba(struct gl_context *ctx, struct gl_renderbuffer *rb,
span = XGetImage( xmesa->display, xrb->pixmap,
x, y, n, 1, AllPlanes, ZPixmap );
error = check_xgetimage_errors();
-#endif
if (span && !error) {
switch (xmesa->pixelformat) {
case PF_Truecolor:
diff --git a/src/mesa/drivers/x11/xmesa.h b/src/mesa/drivers/x11/xmesa.h
index f63626a9702..98737fab248 100644
--- a/src/mesa/drivers/x11/xmesa.h
+++ b/src/mesa/drivers/x11/xmesa.h
@@ -72,13 +72,9 @@ and create a window, you must do the following to use the X/Mesa interface:
extern "C" {
#endif
-#ifdef XFree86Server
-#include "xmesa_xf86.h"
-#else
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include "xmesa_x.h"
-#endif
#include "GL/gl.h"
#ifdef AMIWIN
@@ -180,19 +176,6 @@ extern XMesaContext XMesaCreateContext( XMesaVisual v,
extern void XMesaDestroyContext( XMesaContext c );
-#ifdef XFree86Server
-/*
- * These are the extra routines required for integration with XFree86.
- * None of these routines should be user visible. -KEM
- */
-extern GLboolean XMesaForceCurrent( XMesaContext c );
-
-extern GLboolean XMesaLoseCurrent( XMesaContext c );
-
-extern GLboolean XMesaCopyContext( XMesaContext src,
- XMesaContext dst,
- GLuint mask );
-#endif /* XFree86Server */
/*
diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h
index 5d34b430cb6..63e3e211bf6 100644
--- a/src/mesa/drivers/x11/xmesaP.h
+++ b/src/mesa/drivers/x11/xmesaP.h
@@ -33,9 +33,6 @@
#include "fxmesa.h"
#include "xm_glide.h"
#endif
-#ifdef XFree86Server
-#include "xm_image.h"
-#endif
extern _glthread_Mutex _xmesa_lock;
@@ -88,13 +85,8 @@ struct xmesa_visual {
XMesaDisplay *display; /* The X11 display */
int screen, visualID;
int visualType;
-#ifdef XFree86Server
- GLint ColormapEntries;
- GLint nplanes;
-#else
XMesaVisualInfo visinfo; /* X's visual info (pointer to private copy) */
XVisualInfo *vishandle; /* Only used in fakeglx.c */
-#endif
GLint BitsPerPixel; /* True bits per pixel for XImages */
GLboolean ximage_flag; /* Use XImage for back buffer (not pixmap)? */
@@ -233,7 +225,7 @@ struct xmesa_buffer {
/* 0 = not available */
/* 1 = XImage support available */
/* 2 = Pixmap support available too */
-#if defined(USE_XSHM) && !defined(XFree86Server)
+#if defined(USE_XSHM)
XShmSegmentInfo shminfo;
#endif
@@ -259,11 +251,7 @@ struct xmesa_buffer {
/* Used to do XAllocColor/XFreeColors accounting: */
int num_alloced;
-#if defined(XFree86Server)
- Pixel alloced_colors[256];
-#else
unsigned long alloced_colors[256];
-#endif
#if defined( FX )
/* For 3Dfx Glide only */
@@ -578,9 +566,7 @@ extern void xmesa_register_swrast_functions( struct gl_context *ctx );
#define ENABLE_EXT_texure_compression_s3tc 0 /* SW texture compression */
-#ifdef XFree86Server
-#define ENABLE_EXT_timer_query 0
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define ENABLE_EXT_timer_query 1 /* should have 64-bit GLuint64EXT */
#else
#define ENABLE_EXT_timer_query 0 /* may not have 64-bit GLuint64EXT */