summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
authorChris Forbes <[email protected]>2013-11-17 20:00:00 +1300
committerChris Forbes <[email protected]>2014-07-13 10:01:24 +1200
commit9c0bddf73525677acaf2b136cf54375a6ac2c2b9 (patch)
tree9beded29291c31a169e28fc4814fd17f6d9db8ab /src/mesa/drivers/dri/i965
parent5ed147c26f800ffb11fb80b92aec9a772982aff6 (diff)
i965/fs: add support for ir_*_interpolate_at_* expressions
SIMD8-only for now. V5: - Fix style complaints - Move prototype to be with other oddball emit functions - Use unreachable() instead of assert() where possible V6: - Describe what is happening with the clamping - Add reg_width to make some expressions clearer Signed-off-by: Chris Forbes <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp150
2 files changed, 150 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 1d3f9d0cdf1..6f169dcf4c8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -439,6 +439,8 @@ public:
void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
fs_reg offset);
+ void emit_interpolate_expression(ir_expression *ir);
+
bool try_rewrite_rhs_to_dst(ir_assignment *ir,
fs_reg dst,
fs_reg src,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8e8affabd40..90bf3fa3384 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -344,6 +344,133 @@ fs_visitor::try_emit_mad(ir_expression *ir)
return true;
}
+static int
+pack_pixel_offset(float x)
+{
+ /* Clamp upper end of the range to +7/16. See explanation in non-constant
+ * offset case below. */
+ int n = MIN2((int)(x * 16), 7);
+ return n & 0xf;
+}
+
+void
+fs_visitor::emit_interpolate_expression(ir_expression *ir)
+{
+ /* in SIMD16 mode, the pixel interpolator returns coords interleaved
+ * 8 channels at a time, same as the barycentric coords presented in
+ * the FS payload. this requires a bit of extra work to support.
+ */
+ no16("interpolate_at_* not yet supported in SIMD16 mode.");
+
+ ir_dereference * deref = ir->operands[0]->as_dereference();
+ ir_swizzle * swiz = NULL;
+ if (!deref) {
+ /* the api does not allow a swizzle here, but the varying packing code
+ * may have pushed one into here.
+ */
+ swiz = ir->operands[0]->as_swizzle();
+ assert(swiz);
+ deref = swiz->val->as_dereference();
+ }
+ assert(deref);
+ ir_variable * var = deref->variable_referenced();
+ assert(var);
+
+ /* 1. collect interpolation factors */
+
+ fs_reg dst_x = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 2, 1));
+ fs_reg dst_y = dst_x;
+ dst_y.reg_offset++;
+
+ /* for most messages, we need one reg of ignored data; the hardware requires mlen==1
+ * even when there is no payload. in the per-slot offset case, we'll replace this with
+ * the proper source data. */
+ fs_reg src = fs_reg(this, glsl_type::float_type);
+ int mlen = 1; /* one reg unless overriden */
+ int reg_width = dispatch_width / 8;
+ fs_inst *inst;
+
+ switch (ir->operation) {
+ case ir_unop_interpolate_at_centroid:
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
+ break;
+
+ case ir_binop_interpolate_at_sample: {
+ ir_constant *sample_num = ir->operands[1]->as_constant();
+ assert(sample_num || !"nonconstant sample number should have been lowered.");
+
+ unsigned msg_data = sample_num->value.i[0] << 4;
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data));
+ break;
+ }
+
+ case ir_binop_interpolate_at_offset: {
+ ir_constant *const_offset = ir->operands[1]->as_constant();
+ if (const_offset) {
+ unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
+ (pack_pixel_offset(const_offset->value.f[1]) << 4);
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
+ fs_reg(msg_data));
+ } else {
+ /* pack the operands: hw wants offsets as 4 bit signed ints */
+ ir->operands[1]->accept(this);
+ src = fs_reg(this, glsl_type::ivec2_type);
+ fs_reg src2 = src;
+ for (int i = 0; i < 2; i++) {
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ emit(MUL(temp, this->result, fs_reg(16.0f)));
+ emit(MOV(src2, temp)); /* float to int */
+
+ /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires
+ * that we support a maximum offset of +0.5, which isn't representable
+ * in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16,
+ * which is the opposite of what the shader author wanted.
+ *
+ * This is legal due to ARB_gpu_shader5's quantization rules:
+ *
+ * "Not all values of <offset> may be supported; x and y offsets may
+ * be rounded to fixed-point values with the number of fraction bits
+ * given by the implementation-dependent constant
+ * FRAGMENT_INTERPOLATION_OFFSET_BITS"
+ */
+
+ fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7));
+ inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
+
+ src2.reg_offset++;
+ this->result.reg_offset++;
+ }
+
+ mlen = 2 * reg_width;
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
+ fs_reg(0u));
+ }
+ break;
+ }
+
+ default:
+ unreachable("not reached");
+ }
+
+ inst->mlen = mlen;
+ inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */
+ inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) ==
+ INTERP_QUALIFIER_NOPERSPECTIVE;
+
+ /* 2. emit linterp */
+
+ fs_reg res(this, ir->type);
+ this->result = res;
+
+ for (int i = 0; i < ir->type->vector_elements; i++) {
+ int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
+ emit(FS_OPCODE_LINTERP, res,
+ dst_x, dst_y,
+ fs_reg(interp_reg(var->data.location, ch)));
+ res.reg_offset++;
+ }
+}
+
void
fs_visitor::visit(ir_expression *ir)
{
@@ -355,9 +482,22 @@ fs_visitor::visit(ir_expression *ir)
if (try_emit_saturate(ir))
return;
- if (ir->operation == ir_binop_add) {
+
+ /* Deal with the real oddball stuff first */
+ switch (ir->operation) {
+ case ir_binop_add:
if (try_emit_mad(ir))
- return;
+ return;
+ break;
+
+ case ir_unop_interpolate_at_centroid:
+ case ir_binop_interpolate_at_offset:
+ case ir_binop_interpolate_at_sample:
+ emit_interpolate_expression(ir);
+ return;
+
+ default:
+ break;
}
for (operand = 0; operand < ir->get_num_operands(); operand++) {
@@ -815,6 +955,12 @@ fs_visitor::visit(ir_expression *ir)
inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]);
inst->predicate = BRW_PREDICATE_NORMAL;
break;
+
+ case ir_unop_interpolate_at_centroid:
+ case ir_binop_interpolate_at_offset:
+ case ir_binop_interpolate_at_sample:
+ unreachable("already handled above");
+ break;
}
}