summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Berry <[email protected]>2012-06-20 13:40:45 -0700
committerPaul Berry <[email protected]>2012-06-22 07:59:34 -0700
commit82d25963a838cfebdeb9b080169979329ee850ea (patch)
tree8cc534b94e7666bfa3b3a299f168921548e2973a
parentd988ea5e810868244eb1a1e7ede32295c9ed9ad4 (diff)
i965: Compute dFdy() correctly for FBOs.
On i965, dFdx() and dFdy() are computed by taking advantage of the fact that each consecutive set of 4 pixels dispatched to the fragment shader always constitutes a contiguous 2x2 block of pixels in a fixed arrangement known as a "sub-span". So we calculate dFdx() by taking the difference between the values computed for the left and right halves of the sub-span, and we calculate dFdy() by taking the difference between the values computed for the top and bottom halves of the sub-span. However, there's a subtlety when FBOs are in use: since FBOs use a coordinate system where the origin is at the upper left, and window system framebuffers use a coordinate system where the origin is at the lower left, the computation of dFdy() needs to be negated for FBOs. This patch modifies the fragment shader back-ends to negate the value of dFdy() when an FBO is in use. It also modifies the code that populates the program key (brw_wm_populate_key() and brw_fs_precompile()) so that they always record in the program key whether we are rendering to an FBO or to a window system framebuffer; this ensures that the fragment shader will get recompiled when switching between FBO and non-FBO use. This will result in unnecessary recompiles of fragment shaders that don't use dFdy(). To fix that, we will need to adapt the GLSL and NV_fragment_program front-ends to record whether or not a given shader uses dFdy(). I plan to implement this in a future patch series; I've left FIXME comments in the code as a reminder. Fixes Piglit test "fbo-deriv". NOTE: This is a candidate for stable release branches. Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c15
6 files changed, 46 insertions, 9 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 313e720d382..43efd68e2c4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1848,6 +1848,13 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
struct brw_context *brw = brw_context(ctx);
struct brw_wm_prog_key key;
+ /* As a temporary measure we assume that all programs use dFdy() (and hence
+ * need to be compiled differently depending on whether we're rendering to
+ * an FBO). FIXME: set this bool correctly based on the contents of the
+ * program.
+ */
+ bool program_uses_dfdy = true;
+
if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
return true;
@@ -1892,6 +1899,9 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
if (fp->Base.InputsRead & FRAG_BIT_WPOS) {
key.drawable_height = ctx->DrawBuffer->Height;
+ }
+
+ if ((fp->Base.InputsRead & FRAG_BIT_WPOS) || program_uses_dfdy) {
key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 0c6c3e4e98a..2c2cb6cbf7c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -534,7 +534,8 @@ public:
struct brw_reg src);
void generate_discard(fs_inst *inst);
void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
- void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+ void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+ bool negate_value);
void generate_spill(fs_inst *inst, struct brw_reg src);
void generate_unspill(fs_inst *inst, struct brw_reg dst);
void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 522123c1dd8..0881ad76c88 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -441,8 +441,13 @@ fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
brw_ADD(p, dst, src0, negate(src1));
}
+/* The negate_value boolean is used to negate the derivative computation for
+ * FBOs, since they place the origin at the upper left instead of the lower
+ * left.
+ */
void
-fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+ bool negate_value)
{
struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
BRW_REGISTER_TYPE_F,
@@ -456,7 +461,10 @@ fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
BRW_WIDTH_4,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
- brw_ADD(p, dst, src0, negate(src1));
+ if (negate_value)
+ brw_ADD(p, dst, src1, negate(src0));
+ else
+ brw_ADD(p, dst, src0, negate(src1));
}
void
@@ -902,7 +910,7 @@ fs_visitor::generate_code()
generate_ddx(inst, dst, src[0]);
break;
case FS_OPCODE_DDY:
- generate_ddy(inst, dst, src[0]);
+ generate_ddy(inst, dst, src[0], c->key.render_to_fbo);
break;
case FS_OPCODE_SPILL:
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index e919e5a3627..300e3bb1187 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -420,6 +420,13 @@ static void brw_wm_populate_key( struct brw_context *brw,
GLuint line_aa;
GLuint i;
+ /* As a temporary measure we assume that all programs use dFdy() (and hence
+ * need to be compiled differently depending on whether we're rendering to
+ * an FBO). FIXME: set this bool correctly based on the contents of the
+ * program.
+ */
+ bool program_uses_dfdy = true;
+
memset(key, 0, sizeof(*key));
/* Build the index for table lookup
@@ -519,6 +526,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
*/
if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
key->drawable_height = ctx->DrawBuffer->Height;
+ }
+
+ if ((fp->program.Base.InputsRead & FRAG_BIT_WPOS) || program_uses_dfdy) {
key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 8f1cb8c9d8b..2cde2a0f537 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -346,7 +346,8 @@ void emit_ddxy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
bool is_ddx,
- const struct brw_reg *arg0);
+ const struct brw_reg *arg0,
+ bool negate_value);
void emit_delta_xy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index e27ff3528a8..1258efe08bb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -457,12 +457,16 @@ void emit_frontfacing(struct brw_compile *p,
* between each other. We could probably do it like ddx and swizzle the right
* order later, but bail for now and just produce
* ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ *
+ * The negate_value boolean is used to negate the d/dy computation for FBOs,
+ * since they place the origin at the upper left instead of the lower left.
*/
void emit_ddxy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
bool is_ddx,
- const struct brw_reg *arg0)
+ const struct brw_reg *arg0,
+ bool negate_value)
{
int i;
struct brw_reg src0, src1;
@@ -498,7 +502,10 @@ void emit_ddxy(struct brw_compile *p,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
}
- brw_ADD(p, dst[i], src0, negate(src1));
+ if (negate_value)
+ brw_ADD(p, dst[i], src1, negate(src0));
+ else
+ brw_ADD(p, dst[i], src0, negate(src1));
}
}
if (mask & SATURATE)
@@ -1746,11 +1753,11 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case OPCODE_DDX:
- emit_ddxy(p, dst, dst_flags, true, args[0]);
+ emit_ddxy(p, dst, dst_flags, true, args[0], false);
break;
case OPCODE_DDY:
- emit_ddxy(p, dst, dst_flags, false, args[0]);
+ emit_ddxy(p, dst, dst_flags, false, args[0], c->key.render_to_fbo);
break;
case OPCODE_DP2: