diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_disasm.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_optimize.c | 115 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_emit.c | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 146 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 46 |
12 files changed, 290 insertions, 63 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index a242580273f..842d4b7aa10 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -54,6 +54,7 @@ DRIVER_SOURCES = \ brw_gs_emit.c \ brw_gs_state.c \ brw_misc_state.c \ + brw_optimize.c \ brw_program.c \ brw_queryobj.c \ brw_sf.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index a512896f315..241193c3579 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -156,6 +156,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->has_surface_tile_offset = GL_TRUE; brw->has_compr4 = GL_TRUE; brw->has_aa_line_parameters = GL_TRUE; + brw->has_pln = GL_TRUE; } else { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d6fc37e4d89..2855c93ea66 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -446,6 +446,7 @@ struct brw_context GLboolean has_compr4; GLboolean has_negative_rhw_bug; GLboolean has_aa_line_parameters; + GLboolean has_pln; ; struct { struct brw_state_flags dirty; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index bb1b5f5ef03..984e56d00c8 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -550,6 +550,7 @@ #define BRW_OPCODE_DP2 87 #define BRW_OPCODE_DPA2 88 #define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_PLN 90 #define BRW_OPCODE_NOP 126 #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 54699cf8d34..ad61770212c 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -50,6 +50,7 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, @@ -73,7 +74,7 @@ struct { [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 39eb88d7c2b..4f55158e8f3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -795,6 +795,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) +ALU2(PLN) #undef ALU1 #undef ALU2 @@ -965,4 +966,9 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); + + +/* brw_optimize.c */ +void brw_optimize(struct brw_compile *p); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index f69d5296137..d2395dec288 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -573,7 +573,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) - +ALU2(PLN) @@ -1290,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p, GLuint simd_mode) { GLboolean need_stall = 0; - + if (writemask == 0) { /*printf("%s: zero writemask??\n", __FUNCTION__); */ return; @@ -1327,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p, /* printf("need stall %x %x\n", newmask , writemask); */ } else { + GLboolean dispatch_16 = GL_FALSE; + struct brw_reg m1 = brw_message_reg(msg_reg_nr); - + + guess_execution_size(p->current, dest); + if (p->current->header.execution_size == BRW_EXECUTE_16) + dispatch_16 = GL_TRUE; + newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); @@ -1343,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p, src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); - response_length = len * 2; + + /* For 16-wide dispatch, masked channels are skipped in the + * response. For 8-wide, masked channels still take up slots, + * and are just not written to. + */ + if (dispatch_16) + response_length = len * 2; } } diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c new file mode 100644 index 00000000000..57df9ea1151 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_optimize.c @@ -0,0 +1,115 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <[email protected]> + * + */ + +#include "main/macros.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + +static GLboolean +is_single_channel_dp4(struct brw_instruction *insn) +{ + if (insn->header.opcode != BRW_OPCODE_DP4 || + insn->header.execution_size != BRW_EXECUTE_8 || + insn->header.access_mode != BRW_ALIGN_16 || + insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE) + return GL_FALSE; + + if (!is_power_of_two(insn->bits1.da16.dest_writemask)) + return GL_FALSE; + + return GL_TRUE; +} + +/** + * Sets the dependency control fields on DP4 instructions. + * + * The hardware only tracks dependencies on a register basis, so when + * you do: + * + * DP4 dst.x src1 src2 + * DP4 dst.y src1 src3 + * DP4 dst.z src1 src4 + * DP4 dst.w src1 src5 + * + * It will wait to do the DP4 dst.y until the dst.x is resolved, etc. + * We can examine our instruction stream and set the dependency + * control fields to tell the hardware when to do it. + * + * We may want to extend this to other instructions that are used to + * fill in a channel at a time of the destination register. + */ +static void +brw_set_dp4_dependency_control(struct brw_compile *p) +{ + int i; + + for (i = 1; i < p->nr_insn; i++) { + struct brw_instruction *insn = &p->store[i]; + struct brw_instruction *prev = &p->store[i - 1]; + + if (!is_single_channel_dp4(prev)) + continue; + + if (!is_single_channel_dp4(insn)) { + i++; + continue; + } + + /* Only avoid hw dep control if the write masks are different + * channels of one reg. + */ + if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask) + continue; + if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr) + continue; + + /* Check if the second instruction depends on the previous one + * for a src. + */ + if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE && + (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT || + insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr)) + continue; + if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE && + (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT || + insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr)) + continue; + + prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED; + insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED; + } +} + +void +brw_optimize(struct brw_compile *p) +{ + brw_set_dp4_dependency_control(p); +} diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a48804a660f..d16e916832e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -384,9 +384,8 @@ static void emit_sop( struct brw_vs_compile *c, { struct brw_compile *p = &c->func; - brw_MOV(p, dst, brw_imm_f(0.0f)); - brw_CMP(p, brw_null_reg(), cond, arg0, arg1); - brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_CMP(p, brw_null_reg(), cond, arg1, arg0); + brw_SEL(p, dst, brw_null_reg(), brw_imm_f(1.0f)); brw_set_predicate_control_flag_value(p, 0xff); } @@ -1825,6 +1824,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) post_vs_emit(c, end_inst, last_inst); + brw_optimize(p); + if (INTEL_DEBUG & DEBUG_VS) { int i; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 88d84ee82fe..47b764d24d1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0); +void emit_cmp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 9315bca3156..05e464d4b61 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,6 +34,23 @@ #include "brw_context.h" #include "brw_wm.h" +static GLboolean can_do_pln(struct intel_context *intel, + const struct brw_reg *deltas) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + if (!brw->has_pln) + return GL_FALSE; + + if (deltas[1].nr != deltas[0].nr + 1) + return GL_FALSE; + + if (intel->gen < 6 && ((deltas[0].nr & 1) != 0)) + return GL_FALSE; + + return GL_TRUE; +} + /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -45,7 +62,13 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) } -/* Payload R0: +/** + * Computes the screen-space x,y position of the pixels. + * + * This will be used by emit_delta_xy() or emit_wpos_xy() for + * interpolation of attributes.. + * + * Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, * corresponding to each of the 16 execution channels. @@ -60,7 +83,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.7 -- ? * R1.8 -- ? */ - void emit_pixel_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask) @@ -100,7 +122,14 @@ void emit_pixel_xy(struct brw_wm_compile *c, brw_pop_insn_state(p); } - +/** + * Computes the screen-space x,y distance of the pixels from the start + * vertex. + * + * This will be used in linterp or pinterp with the start vertex value + * and the Cx, Cy, and C0 coefficients passed in from the setup engine + * to produce interpolated attribute values. + */ void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -108,25 +137,27 @@ void emit_delta_xy(struct brw_compile *p, { struct brw_reg r1 = brw_vec1_grf(1, 0); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW), - negate(r1)); - } + if (mask == 0) + return; - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); + assert(mask == WRITEMASK_XY); - } + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers produced by emit_pixel_xy(). + */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); } +/** + * Computes the pixel offset from the window origin for gl_FragCoord(). + */ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, @@ -134,9 +165,6 @@ void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ if (mask & WRITEMASK_X) { if (c->fp->program.PixelCenterInteger) { /* X' = X */ @@ -186,6 +214,7 @@ void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; /* Don't need this if all you are doing is interpolating color, for * instance. @@ -196,8 +225,12 @@ void emit_pixel_w(struct brw_wm_compile *c, /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ - brw_LINE(p, brw_null_reg(), interp3, deltas[0]); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, brw_message_reg(2), interp3, deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + } /* Calc w */ if (c->dispatch_width == 16) { @@ -224,6 +257,7 @@ void emit_linterp(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *deltas) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -235,8 +269,12 @@ void emit_linterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } } @@ -249,6 +287,7 @@ void emit_pinterp(struct brw_compile *p, const struct brw_reg *deltas, const struct brw_reg *w) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -260,8 +299,12 @@ void emit_pinterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } for (i = 0; i < 4; i++) { @@ -502,11 +545,8 @@ void emit_sop(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst[i], brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst[i], brw_imm_f(1.0)); + brw_CMP(p, brw_null_reg(), cond, arg1[i], arg0[i]); + brw_SEL(p, dst[i], brw_null_reg(), brw_imm_f(1.0)); brw_pop_insn_state(p); } } @@ -566,12 +606,12 @@ static void emit_sne( struct brw_compile *p, emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); } -static void emit_cmp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_cmp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; @@ -601,14 +641,10 @@ void emit_max(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg0[i]); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); + brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -625,14 +661,10 @@ void emit_min(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg1[i]); - brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[i], arg0[i]); + brw_SEL(p, dst[i], arg0[i], arg1[i]); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); } @@ -1086,11 +1118,19 @@ static void emit_kil( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - GLuint i; - - /* XXX - usually won't need 4 compares! - */ + GLuint i, j; + for (i = 0; i < 4; i++) { + /* Check if we've already done the comparison for this reg + * -- common when someone does KIL TEMP.wwww. + */ + for (j = 0; j < i; j++) { + if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0) + break; + } + if (j != i) + continue; + brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index ea3c2405af9..0b66cc6c9f3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -289,6 +289,7 @@ reclaim_temps(struct brw_wm_compile *c) */ static void prealloc_reg(struct brw_wm_compile *c) { + struct intel_context *intel = &c->func.brw->intel; int i, j; struct brw_reg reg; int urb_read_length = 0; @@ -413,6 +414,43 @@ static void prealloc_reg(struct brw_wm_compile *c) } } + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + + switch (inst->Opcode) { + case WM_DELTAXY: + /* Allocate WM_DELTAXY destination on G45/GM45 to an + * even-numbered GRF if possible so that we can use the PLN + * instruction. + */ + if (inst->DstReg.WriteMask == WRITEMASK_XY && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && + (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { + int grf; + + for (grf = c->first_free_grf & ~1; + grf < BRW_WM_MAX_GRF; + grf += 2) + { + if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { + c->used_grf[grf] = GL_TRUE; + c->used_grf[grf + 1] = GL_TRUE; + c->first_free_grf = grf + 2; /* a guess */ + + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, + brw_vec8_grf(grf, 0)); + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, + brw_vec8_grf(grf + 1, 0)); + break; + } + } + } + default: + break; + } + } + /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! @@ -1869,6 +1907,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; + case OPCODE_CMP: + emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); + break; case OPCODE_MIN: emit_min(p, dst, dst_flags, args[0], args[1]); break; @@ -2026,8 +2067,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - printf("unsupported IR in fragment shader %d\n", - inst->Opcode); + printf("unsupported opcode %d (%s) in fragment shader\n", + inst->Opcode, inst->Opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->Opcode) : "unknown"); } /* Release temporaries containing any unaliased source regs. */ |