summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2009-08-12 10:19:31 -0700
committerEric Anholt <[email protected]>2009-11-06 21:08:55 -0800
commit2b58c31257f8900067276b6d6537bb2ce54b1b10 (patch)
tree7bd54b7bbb6de3844eebf2aae17abc8d55b32d36
parent1e5400c575b72fbde16ef0d55fd3ba577fc1b6a5 (diff)
i965: Share most of the WM functions between brw_wm_glsl.c and brw_wm_emit.c
The PINTERP code should be faster for brw_wm_glsl.c now since brw_wm_emit.c's had been improved, and pixel_w should no longer stomp on a neighbor to dst.
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h34
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c115
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c257
3 files changed, 109 insertions, 297 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 7d470e8dfeb..eba828f6e36 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -325,11 +325,19 @@ void emit_alu2(struct brw_compile *p,
GLuint mask,
const struct brw_reg *arg0,
const struct brw_reg *arg1);
+void emit_cinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
void emit_ddxy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
GLboolean is_ddx,
const struct brw_reg *arg0);
+void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
void emit_dp3(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -345,6 +353,14 @@ void emit_dph(struct brw_compile *p,
GLuint mask,
const struct brw_reg *arg0,
const struct brw_reg *arg1);
+void emit_frontfacing(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask);
+void emit_linterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas);
void emit_lrp(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -368,12 +384,30 @@ void emit_math2(struct brw_wm_compile *c,
GLuint mask,
const struct brw_reg *arg0,
const struct brw_reg *arg1);
+void emit_pinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w);
+void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask);
+void emit_pixel_w(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas);
void emit_sop(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
GLuint cond,
const struct brw_reg *arg0,
const struct brw_reg *arg1);
+void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
void emit_xpd(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 763fe4b4d67..6eaa2792be4 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -44,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
return reg;
}
+
/* Payload R0:
*
* R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
@@ -60,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
* R1.8 -- ?
*/
-
-static void emit_pixel_xy(struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask)
+void emit_pixel_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask)
{
+ struct brw_compile *p = &c->func;
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+ struct brw_reg dst0_uw, dst1_uw;
+ brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ if (c->dispatch_width == 16) {
+ dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
+ dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
+ } else {
+ dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
+ dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
+ }
+
/* Calculate pixel centers by adding 1 or 0 to each of the
* micro-tile coordinates passed in r1.
*/
if (mask & WRITEMASK_X) {
brw_ADD(p,
- vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+ dst0_uw,
stride(suboffset(r1_uw, 4), 2, 4, 0),
brw_imm_v(0x10101010));
}
if (mask & WRITEMASK_Y) {
brw_ADD(p,
- vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+ dst1_uw,
stride(suboffset(r1_uw,5), 2, 4, 0),
brw_imm_v(0x11001100));
}
-
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ brw_pop_insn_state(p);
}
-
-static void emit_delta_xy(struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+void emit_delta_xy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
@@ -118,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p,
}
}
-static void emit_wpos_xy(struct brw_wm_compile *c,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0)
+void emit_wpos_xy(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
@@ -146,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
}
-static void emit_pixel_w( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas)
+void emit_pixel_w(struct brw_wm_compile *c,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
{
+ struct brw_compile *p = &c->func;
+
/* Don't need this if all you are doing is interpolating color, for
* instance.
*/
@@ -165,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p,
brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
/* Calc w */
- brw_math_16( p, dst[3],
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
- BRW_MATH_PRECISION_FULL);
+ if (c->dispatch_width == 16) {
+ brw_math_16(p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ } else {
+ brw_math(p, dst[3],
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2, brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
}
}
-
-static void emit_linterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas )
+void emit_linterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -199,12 +218,12 @@ static void emit_linterp( struct brw_compile *p,
}
-static void emit_pinterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0,
- const struct brw_reg *deltas,
- const struct brw_reg *w)
+void emit_pinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *deltas,
+ const struct brw_reg *w)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -229,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p,
}
-static void emit_cinterp( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask,
- const struct brw_reg *arg0 )
+void emit_cinterp(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
{
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
@@ -251,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p,
}
/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
-static void emit_frontfacing( struct brw_compile *p,
- const struct brw_reg *dst,
- GLuint mask )
+void emit_frontfacing(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask)
{
struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -1316,7 +1335,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
/* Generated instructions for calculating triangle interpolants:
*/
case WM_PIXELXY:
- emit_pixel_xy(p, dst, dst_flags);
+ emit_pixel_xy(c, dst, dst_flags);
break;
case WM_DELTAXY:
@@ -1328,7 +1347,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case WM_PIXELW:
- emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+ emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
break;
case WM_LINTERP:
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 02b119154fa..d0987362a4e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -671,67 +671,6 @@ unalias3(struct brw_wm_compile *c,
release_tmps(c, mark);
}
-static void emit_pixel_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg r1 = brw_vec1_grf(1, 0);
- struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
-
- struct brw_reg dst0, dst1;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- dst0 = get_dst_reg(c, inst, 0);
- dst1 = get_dst_reg(c, inst, 1);
- /* Calculate pixel centers by adding 1 or 0 to each of the
- * micro-tile coordinates passed in r1.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
- stride(suboffset(r1_uw, 4), 2, 4, 0),
- brw_imm_v(0x10101010));
- }
-
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
- stride(suboffset(r1_uw, 5), 2, 4, 0),
- brw_imm_v(0x11001100));
- }
-}
-
-static void emit_delta_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_reg r1 = brw_vec1_grf(1, 0);
- struct brw_reg dst0, dst1, src0, src1;
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- dst0 = get_dst_reg(c, inst, 0);
- dst1 = get_dst_reg(c, inst, 1);
- src0 = get_src_reg(c, inst, 0, 0);
- src1 = get_src_reg(c, inst, 0, 1);
- /* Calc delta X,Y by subtracting origin in r1 from the pixel
- * centers.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- dst0,
- retype(src0, BRW_REGISTER_TYPE_UW),
- negate(r1));
- }
-
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- dst1,
- retype(src1, BRW_REGISTER_TYPE_UW),
- negate(suboffset(r1,1)));
-
- }
-}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
@@ -829,154 +768,6 @@ static void emit_fb_write(struct brw_wm_compile *c,
fire_fb_write(c, 0, nr, target, eot);
}
-static void emit_pixel_w( struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- if (mask & WRITEMASK_W) {
- struct brw_reg dst, src0, delta0, delta1;
- struct brw_reg interp3;
-
- dst = get_dst_reg(c, inst, 3);
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
-
- interp3 = brw_vec1_grf(src0.nr+1, 4);
- /* Calc 1/w - just linterp wpos[3] optimized by putting the
- * result straight into a message reg.
- */
- brw_LINE(p, brw_null_reg(), interp3, delta0);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
-
- /* Calc w */
- brw_math_16( p, dst,
- BRW_MATH_FUNCTION_INV,
- BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
- BRW_MATH_PRECISION_FULL);
- }
-}
-
-static void emit_linterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst, delta0, delta1;
- struct brw_reg src0;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
- nr = src0.nr;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_LINE(p, brw_null_reg(), interp[i], delta0);
- brw_MAC(p, dst, suboffset(interp[i],1), delta1);
- }
- }
-}
-
-static void emit_cinterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- struct brw_reg interp[4];
- struct brw_reg dst, src0;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- nr = src0.nr;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, suboffset(interp[i],3));
- }
- }
-}
-
-static void emit_pinterp(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
-
- struct brw_reg interp[4];
- struct brw_reg dst, delta0, delta1;
- struct brw_reg src0, w;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- delta0 = get_src_reg(c, inst, 1, 0);
- delta1 = get_src_reg(c, inst, 1, 1);
- w = get_src_reg(c, inst, 2, 3);
- nr = src0.nr;
-
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
-
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_LINE(p, brw_null_reg(), interp[i], delta0);
- brw_MAC(p, dst, suboffset(interp[i],1),
- delta1);
- brw_MUL(p, dst, dst, w);
- }
- }
-}
-
-/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
-static void emit_frontfacing(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
- struct brw_reg dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
-
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, brw_imm_f(0.0));
- }
- }
-
- /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
- * us front face
- */
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, brw_imm_f(1.0));
- }
- }
- brw_set_predicate_control_flag_value(p, 0xff);
-}
-
static void emit_arl(struct brw_wm_compile *c,
const struct prog_instruction *inst)
{
@@ -2106,39 +1897,7 @@ static void emit_noise4( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-
-static void emit_wpos_xy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg src0[2], dst[2];
-
- dst[0] = get_dst_reg(c, inst, 0);
- dst[1] = get_dst_reg(c, inst, 1);
- src0[0] = get_src_reg(c, inst, 0, 0);
- src0[1] = get_src_reg(c, inst, 0, 1);
-
- /* Calculate the pixel offset from window bottom left into destination
- * X and Y channels.
- */
- if (mask & WRITEMASK_X) {
- /* X' = X - origin_x */
- brw_ADD(p,
- dst[0],
- retype(src0[0], BRW_REGISTER_TYPE_W),
- brw_imm_d(0 - c->key.origin_x));
- }
-
- if (mask & WRITEMASK_Y) {
- /* Y' = height - (Y - origin_y) = height + origin_y - Y */
- brw_ADD(p,
- dst[1],
- negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
- brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
- }
-}
/* TODO
BIAS on SIMD8 not working yet...
@@ -2378,31 +2137,31 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
switch (inst->Opcode) {
case WM_PIXELXY:
- emit_pixel_xy(c, inst);
+ emit_pixel_xy(c, dst, dst_flags);
break;
case WM_DELTAXY:
- emit_delta_xy(c, inst);
+ emit_delta_xy(p, dst, dst_flags, args[0]);
break;
case WM_PIXELW:
- emit_pixel_w(c, inst);
+ emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
break;
case WM_LINTERP:
- emit_linterp(c, inst);
+ emit_linterp(p, dst, dst_flags, args[0], args[1]);
break;
case WM_PINTERP:
- emit_pinterp(c, inst);
+ emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
break;
case WM_CINTERP:
- emit_cinterp(c, inst);
+ emit_cinterp(p, dst, dst_flags, args[0]);
break;
case WM_WPOSXY:
- emit_wpos_xy(c, inst);
+ emit_wpos_xy(c, dst, dst_flags, args[0]);
break;
case WM_FB_WRITE:
emit_fb_write(c, inst);
break;
case WM_FRONTFACING:
- emit_frontfacing(c, inst);
+ emit_frontfacing(p, dst, dst_flags);
break;
case OPCODE_ADD:
emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);