diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 150 |
2 files changed, 150 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 1d3f9d0cdf1..6f169dcf4c8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -439,6 +439,8 @@ public: void emit_untyped_surface_read(unsigned surf_index, fs_reg dst, fs_reg offset); + void emit_interpolate_expression(ir_expression *ir); + bool try_rewrite_rhs_to_dst(ir_assignment *ir, fs_reg dst, fs_reg src, diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 8e8affabd40..90bf3fa3384 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -344,6 +344,133 @@ fs_visitor::try_emit_mad(ir_expression *ir) return true; } +static int +pack_pixel_offset(float x) +{ + /* Clamp upper end of the range to +7/16. See explanation in non-constant + * offset case below. */ + int n = MIN2((int)(x * 16), 7); + return n & 0xf; +} + +void +fs_visitor::emit_interpolate_expression(ir_expression *ir) +{ + /* in SIMD16 mode, the pixel interpolator returns coords interleaved + * 8 channels at a time, same as the barycentric coords presented in + * the FS payload. this requires a bit of extra work to support. + */ + no16("interpolate_at_* not yet supported in SIMD16 mode."); + + ir_dereference * deref = ir->operands[0]->as_dereference(); + ir_swizzle * swiz = NULL; + if (!deref) { + /* the api does not allow a swizzle here, but the varying packing code + * may have pushed one into here. + */ + swiz = ir->operands[0]->as_swizzle(); + assert(swiz); + deref = swiz->val->as_dereference(); + } + assert(deref); + ir_variable * var = deref->variable_referenced(); + assert(var); + + /* 1. collect interpolation factors */ + + fs_reg dst_x = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 2, 1)); + fs_reg dst_y = dst_x; + dst_y.reg_offset++; + + /* for most messages, we need one reg of ignored data; the hardware requires mlen==1 + * even when there is no payload. in the per-slot offset case, we'll replace this with + * the proper source data. */ + fs_reg src = fs_reg(this, glsl_type::float_type); + int mlen = 1; /* one reg unless overriden */ + int reg_width = dispatch_width / 8; + fs_inst *inst; + + switch (ir->operation) { + case ir_unop_interpolate_at_centroid: + inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u)); + break; + + case ir_binop_interpolate_at_sample: { + ir_constant *sample_num = ir->operands[1]->as_constant(); + assert(sample_num || !"nonconstant sample number should have been lowered."); + + unsigned msg_data = sample_num->value.i[0] << 4; + inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data)); + break; + } + + case ir_binop_interpolate_at_offset: { + ir_constant *const_offset = ir->operands[1]->as_constant(); + if (const_offset) { + unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) | + (pack_pixel_offset(const_offset->value.f[1]) << 4); + inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src, + fs_reg(msg_data)); + } else { + /* pack the operands: hw wants offsets as 4 bit signed ints */ + ir->operands[1]->accept(this); + src = fs_reg(this, glsl_type::ivec2_type); + fs_reg src2 = src; + for (int i = 0; i < 2; i++) { + fs_reg temp = fs_reg(this, glsl_type::float_type); + emit(MUL(temp, this->result, fs_reg(16.0f))); + emit(MOV(src2, temp)); /* float to int */ + + /* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires + * that we support a maximum offset of +0.5, which isn't representable + * in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16, + * which is the opposite of what the shader author wanted. + * + * This is legal due to ARB_gpu_shader5's quantization rules: + * + * "Not all values of <offset> may be supported; x and y offsets may + * be rounded to fixed-point values with the number of fraction bits + * given by the implementation-dependent constant + * FRAGMENT_INTERPOLATION_OFFSET_BITS" + */ + + fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7)); + inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */ + + src2.reg_offset++; + this->result.reg_offset++; + } + + mlen = 2 * reg_width; + inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src, + fs_reg(0u)); + } + break; + } + + default: + unreachable("not reached"); + } + + inst->mlen = mlen; + inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */ + inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) == + INTERP_QUALIFIER_NOPERSPECTIVE; + + /* 2. emit linterp */ + + fs_reg res(this, ir->type); + this->result = res; + + for (int i = 0; i < ir->type->vector_elements; i++) { + int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i; + emit(FS_OPCODE_LINTERP, res, + dst_x, dst_y, + fs_reg(interp_reg(var->data.location, ch))); + res.reg_offset++; + } +} + void fs_visitor::visit(ir_expression *ir) { @@ -355,9 +482,22 @@ fs_visitor::visit(ir_expression *ir) if (try_emit_saturate(ir)) return; - if (ir->operation == ir_binop_add) { + + /* Deal with the real oddball stuff first */ + switch (ir->operation) { + case ir_binop_add: if (try_emit_mad(ir)) - return; + return; + break; + + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + emit_interpolate_expression(ir); + return; + + default: + break; } for (operand = 0; operand < ir->get_num_operands(); operand++) { @@ -815,6 +955,12 @@ fs_visitor::visit(ir_expression *ir) inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]); inst->predicate = BRW_PREDICATE_NORMAL; break; + + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + unreachable("already handled above"); + break; } } |