summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_optimize.c115
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c146
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c46
12 files changed, 290 insertions, 63 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index a242580273f..842d4b7aa10 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -54,6 +54,7 @@ DRIVER_SOURCES = \
brw_gs_emit.c \
brw_gs_state.c \
brw_misc_state.c \
+ brw_optimize.c \
brw_program.c \
brw_queryobj.c \
brw_sf.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index a512896f315..241193c3579 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -156,6 +156,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
brw->has_surface_tile_offset = GL_TRUE;
brw->has_compr4 = GL_TRUE;
brw->has_aa_line_parameters = GL_TRUE;
+ brw->has_pln = GL_TRUE;
} else {
brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index d6fc37e4d89..2855c93ea66 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -446,6 +446,7 @@ struct brw_context
GLboolean has_compr4;
GLboolean has_negative_rhw_bug;
GLboolean has_aa_line_parameters;
+ GLboolean has_pln;
;
struct {
struct brw_state_flags dirty;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index bb1b5f5ef03..984e56d00c8 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -550,6 +550,7 @@
#define BRW_OPCODE_DP2 87
#define BRW_OPCODE_DPA2 88
#define BRW_OPCODE_LINE 89
+#define BRW_OPCODE_PLN 90
#define BRW_OPCODE_NOP 126
#define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 54699cf8d34..ad61770212c 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -50,6 +50,7 @@ struct {
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
@@ -73,7 +74,7 @@ struct {
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 39eb88d7c2b..4f55158e8f3 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -795,6 +795,7 @@ ALU2(DPH)
ALU2(DP3)
ALU2(DP2)
ALU2(LINE)
+ALU2(PLN)
#undef ALU1
#undef ALU2
@@ -965,4 +966,9 @@ void brw_math_invert( struct brw_compile *p,
void brw_set_src1( struct brw_instruction *insn,
struct brw_reg reg );
+
+
+/* brw_optimize.c */
+void brw_optimize(struct brw_compile *p);
+
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index f69d5296137..d2395dec288 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -573,7 +573,7 @@ ALU2(DPH)
ALU2(DP3)
ALU2(DP2)
ALU2(LINE)
-
+ALU2(PLN)
@@ -1290,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p,
GLuint simd_mode)
{
GLboolean need_stall = 0;
-
+
if (writemask == 0) {
/*printf("%s: zero writemask??\n", __FUNCTION__); */
return;
@@ -1327,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p,
/* printf("need stall %x %x\n", newmask , writemask); */
}
else {
+ GLboolean dispatch_16 = GL_FALSE;
+
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
-
+
+ guess_execution_size(p->current, dest);
+ if (p->current->header.execution_size == BRW_EXECUTE_16)
+ dispatch_16 = GL_TRUE;
+
newmask = ~newmask & WRITEMASK_XYZW;
brw_push_insn_state(p);
@@ -1343,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p,
src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
dest = offset(dest, dst_offset);
- response_length = len * 2;
+
+ /* For 16-wide dispatch, masked channels are skipped in the
+ * response. For 8-wide, masked channels still take up slots,
+ * and are just not written to.
+ */
+ if (dispatch_16)
+ response_length = len * 2;
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
new file mode 100644
index 00000000000..57df9ea1151
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_optimize.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <[email protected]>
+ *
+ */
+
+#include "main/macros.h"
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+static GLboolean
+is_single_channel_dp4(struct brw_instruction *insn)
+{
+ if (insn->header.opcode != BRW_OPCODE_DP4 ||
+ insn->header.execution_size != BRW_EXECUTE_8 ||
+ insn->header.access_mode != BRW_ALIGN_16 ||
+ insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
+ return GL_FALSE;
+
+ if (!is_power_of_two(insn->bits1.da16.dest_writemask))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+/**
+ * Sets the dependency control fields on DP4 instructions.
+ *
+ * The hardware only tracks dependencies on a register basis, so when
+ * you do:
+ *
+ * DP4 dst.x src1 src2
+ * DP4 dst.y src1 src3
+ * DP4 dst.z src1 src4
+ * DP4 dst.w src1 src5
+ *
+ * It will wait to do the DP4 dst.y until the dst.x is resolved, etc.
+ * We can examine our instruction stream and set the dependency
+ * control fields to tell the hardware when to do it.
+ *
+ * We may want to extend this to other instructions that are used to
+ * fill in a channel at a time of the destination register.
+ */
+static void
+brw_set_dp4_dependency_control(struct brw_compile *p)
+{
+ int i;
+
+ for (i = 1; i < p->nr_insn; i++) {
+ struct brw_instruction *insn = &p->store[i];
+ struct brw_instruction *prev = &p->store[i - 1];
+
+ if (!is_single_channel_dp4(prev))
+ continue;
+
+ if (!is_single_channel_dp4(insn)) {
+ i++;
+ continue;
+ }
+
+ /* Only avoid hw dep control if the write masks are different
+ * channels of one reg.
+ */
+ if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask)
+ continue;
+ if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr)
+ continue;
+
+ /* Check if the second instruction depends on the previous one
+ * for a src.
+ */
+ if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE &&
+ (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
+ insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr))
+ continue;
+ if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE &&
+ (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT ||
+ insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr))
+ continue;
+
+ prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
+ insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
+ }
+}
+
+void
+brw_optimize(struct brw_compile *p)
+{
+ brw_set_dp4_dependency_control(p);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index a48804a660f..d16e916832e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -384,9 +384,8 @@ static void emit_sop( struct brw_vs_compile *c,
{
struct brw_compile *p = &c->func;
- brw_MOV(p, dst, brw_imm_f(0.0f));
- brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
- brw_MOV(p, dst, brw_imm_f(1.0f));
+ brw_CMP(p, brw_null_reg(), cond, arg1, arg0);
+ brw_SEL(p, dst, brw_null_reg(), brw_imm_f(1.0f));
brw_set_predicate_control_flag_value(p, 0xff);
}
@@ -1825,6 +1824,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
post_vs_emit(c, end_inst, last_inst);
+ brw_optimize(p);
+
if (INTEL_DEBUG & DEBUG_VS) {
int i;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 88d84ee82fe..47b764d24d1 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
const struct brw_reg *arg0);
+void emit_cmp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2);
void emit_ddxy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 9315bca3156..05e464d4b61 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,6 +34,23 @@
#include "brw_context.h"
#include "brw_wm.h"
+static GLboolean can_do_pln(struct intel_context *intel,
+ const struct brw_reg *deltas)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ if (!brw->has_pln)
+ return GL_FALSE;
+
+ if (deltas[1].nr != deltas[0].nr + 1)
+ return GL_FALSE;
+
+ if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
/* Not quite sure how correct this is - need to understand horiz
* vs. vertical strides a little better.
*/
@@ -45,7 +62,13 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
}
-/* Payload R0:
+/**
+ * Computes the screen-space x,y position of the pixels.
+ *
+ * This will be used by emit_delta_xy() or emit_wpos_xy() for
+ * interpolation of attributes..
+ *
+ * Payload R0:
*
* R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
* corresponding to each of the 16 execution channels.
@@ -60,7 +83,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
* R1.7 -- ?
* R1.8 -- ?
*/
-
void emit_pixel_xy(struct brw_wm_compile *c,
const struct brw_reg *dst,
GLuint mask)
@@ -100,7 +122,14 @@ void emit_pixel_xy(struct brw_wm_compile *c,
brw_pop_insn_state(p);
}
-
+/**
+ * Computes the screen-space x,y distance of the pixels from the start
+ * vertex.
+ *
+ * This will be used in linterp or pinterp with the start vertex value
+ * and the Cx, Cy, and C0 coefficients passed in from the setup engine
+ * to produce interpolated attribute values.
+ */
void emit_delta_xy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -108,25 +137,27 @@ void emit_delta_xy(struct brw_compile *p,
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
- /* Calc delta X,Y by subtracting origin in r1 from the pixel
- * centers.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_UW),
- negate(r1));
- }
+ if (mask == 0)
+ return;
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_UW),
- negate(suboffset(r1,1)));
+ assert(mask == WRITEMASK_XY);
- }
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers produced by emit_pixel_xy().
+ */
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
}
+/**
+ * Computes the pixel offset from the window origin for gl_FragCoord().
+ */
void emit_wpos_xy(struct brw_wm_compile *c,
const struct brw_reg *dst,
GLuint mask,
@@ -134,9 +165,6 @@ void emit_wpos_xy(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
- /* Calculate the pixel offset from window bottom left into destination
- * X and Y channels.
- */
if (mask & WRITEMASK_X) {
if (c->fp->program.PixelCenterInteger) {
/* X' = X */
@@ -186,6 +214,7 @@ void emit_pixel_w(struct brw_wm_compile *c,
const struct brw_reg *deltas)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
/* Don't need this if all you are doing is interpolating color, for
* instance.
@@ -196,8 +225,12 @@ void emit_pixel_w(struct brw_wm_compile *c,
/* Calc 1/w - just linterp wpos[3] optimized by putting the
* result straight into a message reg.
*/
- brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+ if (can_do_pln(intel, deltas)) {
+ brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
+ } else {
+ brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+ }
/* Calc w */
if (c->dispatch_width == 16) {
@@ -224,6 +257,7 @@ void emit_linterp(struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *deltas)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
GLuint i;
@@ -235,8 +269,12 @@ void emit_linterp(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
- brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ if (can_do_pln(intel, deltas)) {
+ brw_PLN(p, dst[i], interp[i], deltas[0]);
+ } else {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
}
}
}
@@ -249,6 +287,7 @@ void emit_pinterp(struct brw_compile *p,
const struct brw_reg *deltas,
const struct brw_reg *w)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
GLuint i;
@@ -260,8 +299,12 @@ void emit_pinterp(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
- brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ if (can_do_pln(intel, deltas)) {
+ brw_PLN(p, dst[i], interp[i], deltas[0]);
+ } else {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
}
}
for (i = 0; i < 4; i++) {
@@ -502,11 +545,8 @@ void emit_sop(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
brw_push_insn_state(p);
- brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_MOV(p, dst[i], brw_imm_f(0));
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst[i], brw_imm_f(1.0));
+ brw_CMP(p, brw_null_reg(), cond, arg1[i], arg0[i]);
+ brw_SEL(p, dst[i], brw_null_reg(), brw_imm_f(1.0));
brw_pop_insn_state(p);
}
}
@@ -566,12 +606,12 @@ static void emit_sne( struct brw_compile *p,
emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
}
-static void emit_cmp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *arg1,
- const struct brw_reg *arg2 )
+void emit_cmp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1,
+ const struct brw_reg *arg2)
{
GLuint i;
@@ -601,14 +641,10 @@ void emit_max(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg0[i]);
- brw_set_saturate(p, 0);
-
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg1[i]);
+ brw_SEL(p, dst[i], arg0[i], arg1[i]);
brw_set_saturate(p, 0);
brw_set_predicate_control_flag_value(p, 0xff);
}
@@ -625,14 +661,10 @@ void emit_min(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg1[i]);
- brw_set_saturate(p, 0);
-
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg0[i]);
+ brw_SEL(p, dst[i], arg0[i], arg1[i]);
brw_set_saturate(p, 0);
brw_set_predicate_control_flag_value(p, 0xff);
}
@@ -1086,11 +1118,19 @@ static void emit_kil( struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
- GLuint i;
-
- /* XXX - usually won't need 4 compares!
- */
+ GLuint i, j;
+
for (i = 0; i < 4; i++) {
+ /* Check if we've already done the comparison for this reg
+ * -- common when someone does KIL TEMP.wwww.
+ */
+ for (j = 0; j < i; j++) {
+ if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
+ break;
+ }
+ if (j != i)
+ continue;
+
brw_push_insn_state(p);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
brw_set_predicate_control_flag_value(p, 0xff);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index ea3c2405af9..0b66cc6c9f3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -289,6 +289,7 @@ reclaim_temps(struct brw_wm_compile *c)
*/
static void prealloc_reg(struct brw_wm_compile *c)
{
+ struct intel_context *intel = &c->func.brw->intel;
int i, j;
struct brw_reg reg;
int urb_read_length = 0;
@@ -413,6 +414,43 @@ static void prealloc_reg(struct brw_wm_compile *c)
}
}
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+
+ switch (inst->Opcode) {
+ case WM_DELTAXY:
+ /* Allocate WM_DELTAXY destination on G45/GM45 to an
+ * even-numbered GRF if possible so that we can use the PLN
+ * instruction.
+ */
+ if (inst->DstReg.WriteMask == WRITEMASK_XY &&
+ !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
+ !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
+ (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
+ int grf;
+
+ for (grf = c->first_free_grf & ~1;
+ grf < BRW_WM_MAX_GRF;
+ grf += 2)
+ {
+ if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
+ c->used_grf[grf] = GL_TRUE;
+ c->used_grf[grf + 1] = GL_TRUE;
+ c->first_free_grf = grf + 2; /* a guess */
+
+ set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
+ brw_vec8_grf(grf, 0));
+ set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
+ brw_vec8_grf(grf + 1, 0));
+ break;
+ }
+ }
+ }
+ default:
+ break;
+ }
+ }
+
/* An instruction may reference up to three constants.
* They'll be found in these registers.
* XXX alloc these on demand!
@@ -1869,6 +1907,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_LG2:
emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
break;
+ case OPCODE_CMP:
+ emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+ break;
case OPCODE_MIN:
emit_min(p, dst, dst_flags, args[0], args[1]);
break;
@@ -2026,8 +2067,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
}
break;
default:
- printf("unsupported IR in fragment shader %d\n",
- inst->Opcode);
+ printf("unsupported opcode %d (%s) in fragment shader\n",
+ inst->Opcode, inst->Opcode < MAX_OPCODE ?
+ _mesa_opcode_string(inst->Opcode) : "unknown");
}
/* Release temporaries containing any unaliased source regs. */