diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compiler.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 175 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 9 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 410 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 16 |
5 files changed, 270 insertions, 341 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index a4855a09137..afb70a139ae 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -40,6 +40,7 @@ .lower_fdiv = true, \ .lower_flrp64 = true, \ .native_integers = true, \ + .use_interpolated_input_intrinsics = true, \ .vertex_id_zero_based = true static const struct nir_shader_compiler_options scalar_nir_options = { diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index fc91bbcfa46..4aae5742278 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1070,21 +1070,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg wpos) bld.MOV(wpos, this->wpos_w); } -static enum brw_barycentric_mode -barycentric_mode(enum glsl_interp_mode mode, - bool is_centroid, bool is_sample) +enum brw_barycentric_mode +brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op) { - unsigned bary; - /* Barycentric modes don't make sense for flat inputs. */ assert(mode != INTERP_MODE_FLAT); - if (is_sample) { - bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; - } else if (is_centroid) { - bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; - } else { + unsigned bary; + switch (op) { + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_at_offset: bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL; + break; + case nir_intrinsic_load_barycentric_centroid: + bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID; + break; + case nir_intrinsic_load_barycentric_sample: + case nir_intrinsic_load_barycentric_at_sample: + bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE; + break; + default: + unreachable("invalid intrinsic"); } if (mode == INTERP_MODE_NOPERSPECTIVE) @@ -1104,107 +1110,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary) return (enum brw_barycentric_mode) ((unsigned) bary - 1); } -void -fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name, - const glsl_type *type, - glsl_interp_mode interpolation_mode, - int *location, bool mod_centroid, - bool mod_sample) -{ - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; - - if (type->is_array() || type->is_matrix()) { - const glsl_type *elem_type = glsl_get_array_element(type); - const unsigned length = glsl_get_length(type); - - for (unsigned i = 0; i < length; i++) { - emit_general_interpolation(attr, name, elem_type, interpolation_mode, - location, mod_centroid, mod_sample); - } - } else if (type->is_record()) { - for (unsigned i = 0; i < type->length; i++) { - const glsl_type *field_type = type->fields.structure[i].type; - emit_general_interpolation(attr, name, field_type, interpolation_mode, - location, mod_centroid, mod_sample); - } - } else { - assert(type->is_scalar() || type->is_vector()); - - if (prog_data->urb_setup[*location] == -1) { - /* If there's no incoming setup data for this slot, don't - * emit interpolation for it. - */ - *attr = offset(*attr, bld, type->vector_elements); - (*location)++; - return; - } - - attr->type = brw_type_for_base_type(type->get_scalar_type()); - - if (interpolation_mode == INTERP_MODE_FLAT) { - /* Constant interpolation (flat shading) case. The SF has - * handed us defined values in only the constant offset - * field of the setup reg. - */ - unsigned vector_elements = type->vector_elements; - - /* Data starts at suboffet 3 in 32-bit units (12 bytes), so it is not - * 64-bit aligned and the current implementation fails to read the - * data properly. Instead, when there is a double input varying, - * read it as vector of floats with twice the number of components. - */ - if (attr->type == BRW_REGISTER_TYPE_DF) { - vector_elements *= 2; - attr->type = BRW_REGISTER_TYPE_F; - } - for (unsigned int i = 0; i < vector_elements; i++) { - struct brw_reg interp = interp_reg(*location, i); - interp = suboffset(interp, 3); - interp.type = attr->type; - bld.emit(FS_OPCODE_CINTERP, *attr, fs_reg(interp)); - *attr = offset(*attr, bld, 1); - } - } else { - /* Smooth/noperspective interpolation case. */ - enum brw_barycentric_mode bary = - barycentric_mode(interpolation_mode, mod_centroid, mod_sample); - - for (unsigned int i = 0; i < type->vector_elements; i++) { - fs_reg interp(interp_reg(*location, i)); - if (devinfo->needs_unlit_centroid_workaround && mod_centroid) { - /* Get the pixel/sample mask into f0 so that we know - * which pixels are lit. Then, for each channel that is - * unlit, replace the centroid data with non-centroid - * data. - */ - bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); - - fs_inst *inst; - inst = bld.emit(FS_OPCODE_LINTERP, *attr, - delta_xy[centroid_to_pixel(bary)], interp); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; - inst->no_dd_clear = true; - - inst = bld.emit(FS_OPCODE_LINTERP, *attr, - delta_xy[bary], interp); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = false; - inst->no_dd_check = true; - } else { - bld.emit(FS_OPCODE_LINTERP, *attr, delta_xy[bary], interp); - } - if (devinfo->gen < 6 && interpolation_mode == INTERP_MODE_SMOOTH) { - bld.MUL(*attr, *attr, this->pixel_w); - } - *attr = offset(*attr, bld, 1); - } - } - (*location)++; - } -} - fs_reg * fs_visitor::emit_frontfacing_interpolation() { @@ -6330,6 +6235,10 @@ fs_visitor::run_cs() /** * Return a bitfield where bit n is set if barycentric interpolation mode n * (see enum brw_barycentric_mode) is needed by the fragment shader. + * + * We examine the load_barycentric intrinsics rather than looking at input + * variables so that we catch interpolateAtCentroid() messages too, which + * also need the BRW_BARYCENTRIC_[NON]PERSPECTIVE_CENTROID mode set up. */ static unsigned brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo, @@ -6337,29 +6246,37 @@ brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo, { unsigned barycentric_interp_modes = 0; - nir_foreach_variable(var, &shader->inputs) { - /* Ignore WPOS; it doesn't require interpolation. */ - if (var->data.location == VARYING_SLOT_POS) + nir_foreach_function(f, shader) { + if (!f->impl) continue; - /* Flat inputs don't need barycentric modes. */ - if (var->data.interpolation == INTERP_MODE_FLAT) - continue; + nir_foreach_block(block, f->impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; - /* Determine the set (or sets) of barycentric coordinates needed to - * interpolate this variable. Note that when - * brw->needs_unlit_centroid_workaround is set, centroid interpolation - * uses PIXEL interpolation for unlit pixels and CENTROID interpolation - * for lit pixels, so we need both sets of barycentric coordinates. - */ - enum brw_barycentric_mode bary_mode = - barycentric_mode((glsl_interp_mode) var->data.interpolation, - var->data.centroid, var->data.sample); + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_interpolated_input) + continue; + + /* Ignore WPOS; it doesn't require interpolation. */ + if (nir_intrinsic_base(intrin) == VARYING_SLOT_POS) + continue; - barycentric_interp_modes |= 1 << bary_mode; + intrin = nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr); + enum glsl_interp_mode interp = (enum glsl_interp_mode) + nir_intrinsic_interp_mode(intrin); + nir_intrinsic_op bary_op = intrin->intrinsic; + enum brw_barycentric_mode bary = + brw_barycentric_mode(interp, bary_op); - if (var->data.centroid && devinfo->needs_unlit_centroid_workaround) - barycentric_interp_modes |= 1 << centroid_to_pixel(bary_mode); + barycentric_interp_modes |= 1 << bary; + + if (devinfo->needs_unlit_centroid_workaround && + bary_op == nir_intrinsic_load_barycentric_centroid) + barycentric_interp_modes |= 1 << centroid_to_pixel(bary); + } + } } return barycentric_interp_modes; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 7998f514155..574475f071a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -174,11 +174,6 @@ public: fs_reg *emit_samplepos_setup(); fs_reg *emit_sampleid_setup(); fs_reg *emit_samplemaskin_setup(); - void emit_general_interpolation(fs_reg *attr, const char *name, - const glsl_type *type, - glsl_interp_mode interpolation_mode, - int *location, bool mod_centroid, - bool mod_sample); fs_reg *emit_vs_system_value(int location); void emit_interpolation_setup_gen4(); void emit_interpolation_setup_gen6(); @@ -195,7 +190,6 @@ public: bool opt_zero_samples(); void emit_nir_code(); - void nir_setup_inputs(); void nir_setup_single_output_varying(fs_reg *reg, const glsl_type *type, unsigned *location); void nir_setup_outputs(); @@ -511,3 +505,6 @@ void shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld, uint32_t components); fs_reg setup_imm_df(const brw::fs_builder &bld, double v); + +enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode, + nir_intrinsic_op op); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 44c4bdc63ab..22cba8c32db 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -36,7 +36,6 @@ fs_visitor::emit_nir_code() /* emit the arrays used for inputs and outputs - load/store intrinsics will * be converted to reads/writes of these arrays */ - nir_setup_inputs(); nir_setup_outputs(); nir_setup_uniforms(); nir_emit_system_values(); @@ -50,38 +49,6 @@ fs_visitor::emit_nir_code() } void -fs_visitor::nir_setup_inputs() -{ - if (stage != MESA_SHADER_FRAGMENT) - return; - - nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs); - - nir_foreach_variable(var, &nir->inputs) { - fs_reg input = offset(nir_inputs, bld, var->data.driver_location); - - fs_reg reg; - if (var->data.location == VARYING_SLOT_POS) { - emit_fragcoord_interpolation(input); - } else if (var->data.location == VARYING_SLOT_LAYER) { - struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_LAYER, 1), 3); - reg.type = BRW_REGISTER_TYPE_D; - bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg); - } else if (var->data.location == VARYING_SLOT_VIEWPORT) { - struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_VIEWPORT, 2), 3); - reg.type = BRW_REGISTER_TYPE_D; - bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg); - } else { - int location = var->data.location; - emit_general_interpolation(&input, var->name, var->type, - (glsl_interp_mode) var->data.interpolation, - &location, var->data.centroid, - var->data.sample); - } - } -} - -void fs_visitor::nir_setup_single_output_varying(fs_reg *reg, const glsl_type *type, unsigned *location) @@ -3141,7 +3108,6 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { assert(stage == MESA_SHADER_FRAGMENT); - const struct brw_wm_prog_key *wm_key = (const struct brw_wm_prog_key *) key; fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -3198,189 +3164,245 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, break; } - case nir_intrinsic_interp_var_at_centroid: - case nir_intrinsic_interp_var_at_sample: - case nir_intrinsic_interp_var_at_offset: { - /* Handle ARB_gpu_shader5 interpolation intrinsics - * - * It's worth a quick word of explanation as to why we handle the full - * variable-based interpolation intrinsic rather than a lowered version - * with like we do for other inputs. We have to do that because the way - * we set up inputs doesn't allow us to use the already setup inputs for - * interpolation. At the beginning of the shader, we go through all of - * the input variables and do the initial interpolation and put it in - * the nir_inputs array based on its location as determined in - * nir_lower_io. If the input isn't used, dead code cleans up and - * everything works fine. However, when we get to the ARB_gpu_shader5 - * interpolation intrinsics, we need to reinterpolate the input - * differently. If we used an intrinsic that just had an index it would - * only give us the offset into the nir_inputs array. However, this is - * useless because that value is post-interpolation and we need - * pre-interpolation. In order to get the actual location of the bits - * we get from the vertex fetching hardware, we need the variable. - */ - fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); - const glsl_interp_mode interpolation = - (glsl_interp_mode) instr->variables[0]->var->data.interpolation; + case nir_intrinsic_load_input: { + /* load_input is only used for flat inputs */ + unsigned base = nir_intrinsic_base(instr); + unsigned component = nir_intrinsic_component(instr); + unsigned num_components = instr->num_components; + enum brw_reg_type type = dest.type; - switch (instr->intrinsic) { - case nir_intrinsic_interp_var_at_centroid: - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_CENTROID, - dst_xy, - fs_reg(), /* src */ - brw_imm_ud(0u), - interpolation); - break; + /* Special case fields in the VUE header */ + if (base == VARYING_SLOT_LAYER) + component = 1; + else if (base == VARYING_SLOT_VIEWPORT) + component = 2; - case nir_intrinsic_interp_var_at_sample: { - if (!wm_key->multisample_fbo) { - /* From the ARB_gpu_shader5 specification: - * "If multisample buffers are not available, the input varying - * will be evaluated at the center of the pixel." - */ - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_CENTROID, - dst_xy, - fs_reg(), /* src */ - brw_imm_ud(0u), - interpolation); - break; - } + if (nir_dest_bit_size(instr->dest) == 64) { + /* const_index is in 32-bit type size units that could not be aligned + * with DF. We need to read the double vector as if it was a float + * vector of twice the number of components to fetch the right data. + */ + type = BRW_REGISTER_TYPE_F; + num_components *= 2; + } - nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); + for (unsigned int i = 0; i < num_components; i++) { + struct brw_reg interp = interp_reg(base, component + i); + interp = suboffset(interp, 3); + bld.emit(FS_OPCODE_CINTERP, offset(retype(dest, type), bld, i), + retype(fs_reg(interp), type)); + } - if (const_sample) { - unsigned msg_data = const_sample->i32[0] << 4; + if (nir_dest_bit_size(instr->dest) == 64) { + shuffle_32bit_load_result_to_64bit_data(bld, + dest, + retype(dest, type), + instr->num_components); + } + break; + } + + case nir_intrinsic_load_barycentric_pixel: + case nir_intrinsic_load_barycentric_centroid: + case nir_intrinsic_load_barycentric_sample: + /* Do nothing - load_interpolated_input handling will handle it later. */ + break; + case nir_intrinsic_load_barycentric_at_sample: { + const glsl_interp_mode interpolation = + (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); + + nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]); + + if (const_sample) { + unsigned msg_data = const_sample->i32[0] << 4; + + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SAMPLE, + dest, + fs_reg(), /* src */ + brw_imm_ud(msg_data), + interpolation); + } else { + const fs_reg sample_src = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_UD); + + if (nir_src_is_dynamically_uniform(instr->src[0])) { + const fs_reg sample_id = bld.emit_uniformize(sample_src); + const fs_reg msg_data = vgrf(glsl_type::uint_type); + bld.exec_all().group(1, 0) + .SHL(msg_data, sample_id, brw_imm_ud(4u)); emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dst_xy, + dest, fs_reg(), /* src */ - brw_imm_ud(msg_data), + msg_data, interpolation); } else { - const fs_reg sample_src = retype(get_nir_src(instr->src[0]), - BRW_REGISTER_TYPE_UD); - - if (nir_src_is_dynamically_uniform(instr->src[0])) { - const fs_reg sample_id = bld.emit_uniformize(sample_src); - const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0) - .SHL(msg_data, sample_id, brw_imm_ud(4u)); + /* Make a loop that sends a message to the pixel interpolater + * for the sample number in each live channel. If there are + * multiple channels with the same sample number then these + * will be handled simultaneously with a single interation of + * the loop. + */ + bld.emit(BRW_OPCODE_DO); + + /* Get the next live sample number into sample_id_reg */ + const fs_reg sample_id = bld.emit_uniformize(sample_src); + + /* Set the flag register so that we can perform the send + * message on all channels that have the same sample number + */ + bld.CMP(bld.null_reg_ud(), + sample_src, sample_id, + BRW_CONDITIONAL_EQ); + const fs_reg msg_data = vgrf(glsl_type::uint_type); + bld.exec_all().group(1, 0) + .SHL(msg_data, sample_id, brw_imm_ud(4u)); + fs_inst *inst = emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dst_xy, + dest, fs_reg(), /* src */ msg_data, interpolation); - } else { - /* Make a loop that sends a message to the pixel interpolater - * for the sample number in each live channel. If there are - * multiple channels with the same sample number then these - * will be handled simultaneously with a single interation of - * the loop. - */ - bld.emit(BRW_OPCODE_DO); - - /* Get the next live sample number into sample_id_reg */ - const fs_reg sample_id = bld.emit_uniformize(sample_src); + set_predicate(BRW_PREDICATE_NORMAL, inst); - /* Set the flag register so that we can perform the send - * message on all channels that have the same sample number - */ - bld.CMP(bld.null_reg_ud(), - sample_src, sample_id, - BRW_CONDITIONAL_EQ); - const fs_reg msg_data = vgrf(glsl_type::uint_type); - bld.exec_all().group(1, 0) - .SHL(msg_data, sample_id, brw_imm_ud(4u)); - fs_inst *inst = - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SAMPLE, - dst_xy, - fs_reg(), /* src */ - msg_data, - interpolation); - set_predicate(BRW_PREDICATE_NORMAL, inst); - - /* Continue the loop if there are any live channels left */ - set_predicate_inv(BRW_PREDICATE_NORMAL, - true, /* inverse */ - bld.emit(BRW_OPCODE_WHILE)); - } + /* Continue the loop if there are any live channels left */ + set_predicate_inv(BRW_PREDICATE_NORMAL, + true, /* inverse */ + bld.emit(BRW_OPCODE_WHILE)); } - - break; } + break; + } - case nir_intrinsic_interp_var_at_offset: { - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + case nir_intrinsic_load_barycentric_at_offset: { + const glsl_interp_mode interpolation = + (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); - if (const_offset) { - unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf; - unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf; + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); - emit_pixel_interpolater_send(bld, - FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, - dst_xy, - fs_reg(), /* src */ - brw_imm_ud(off_x | (off_y << 4)), - interpolation); - } else { - fs_reg src = vgrf(glsl_type::ivec2_type); - fs_reg offset_src = retype(get_nir_src(instr->src[0]), - BRW_REGISTER_TYPE_F); - for (int i = 0; i < 2; i++) { - fs_reg temp = vgrf(glsl_type::float_type); - bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f)); - fs_reg itemp = vgrf(glsl_type::int_type); - /* float to int */ - bld.MOV(itemp, temp); - - /* Clamp the upper end of the range to +7/16. - * ARB_gpu_shader5 requires that we support a maximum offset - * of +0.5, which isn't representable in a S0.4 value -- if - * we didn't clamp it, we'd end up with -8/16, which is the - * opposite of what the shader author wanted. - * - * This is legal due to ARB_gpu_shader5's quantization - * rules: - * - * "Not all values of <offset> may be supported; x and y - * offsets may be rounded to fixed-point values with the - * number of fraction bits given by the - * implementation-dependent constant - * FRAGMENT_INTERPOLATION_OFFSET_BITS" - */ - set_condmod(BRW_CONDITIONAL_L, - bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7))); - } + if (const_offset) { + unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf; + unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf; - const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; - emit_pixel_interpolater_send(bld, - opcode, - dst_xy, - src, - brw_imm_ud(0u), - interpolation); + emit_pixel_interpolater_send(bld, + FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, + dest, + fs_reg(), /* src */ + brw_imm_ud(off_x | (off_y << 4)), + interpolation); + } else { + fs_reg src = vgrf(glsl_type::ivec2_type); + fs_reg offset_src = retype(get_nir_src(instr->src[0]), + BRW_REGISTER_TYPE_F); + for (int i = 0; i < 2; i++) { + fs_reg temp = vgrf(glsl_type::float_type); + bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f)); + fs_reg itemp = vgrf(glsl_type::int_type); + /* float to int */ + bld.MOV(itemp, temp); + + /* Clamp the upper end of the range to +7/16. + * ARB_gpu_shader5 requires that we support a maximum offset + * of +0.5, which isn't representable in a S0.4 value -- if + * we didn't clamp it, we'd end up with -8/16, which is the + * opposite of what the shader author wanted. + * + * This is legal due to ARB_gpu_shader5's quantization + * rules: + * + * "Not all values of <offset> may be supported; x and y + * offsets may be rounded to fixed-point values with the + * number of fraction bits given by the + * implementation-dependent constant + * FRAGMENT_INTERPOLATION_OFFSET_BITS" + */ + set_condmod(BRW_CONDITIONAL_L, + bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7))); } + + const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; + emit_pixel_interpolater_send(bld, + opcode, + dest, + src, + brw_imm_ud(0u), + interpolation); + } + break; + } + + case nir_intrinsic_load_interpolated_input: { + if (nir_intrinsic_base(instr) == VARYING_SLOT_POS) { + emit_fragcoord_interpolation(dest); break; } - default: - unreachable("Invalid intrinsic"); + assert(instr->src[0].ssa && + instr->src[0].ssa->parent_instr->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *bary_intrinsic = + nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr); + nir_intrinsic_op bary_intrin = bary_intrinsic->intrinsic; + enum glsl_interp_mode interp_mode = + (enum glsl_interp_mode) nir_intrinsic_interp_mode(bary_intrinsic); + fs_reg dst_xy; + + if (bary_intrin == nir_intrinsic_load_barycentric_at_offset || + bary_intrin == nir_intrinsic_load_barycentric_at_sample) { + /* Use the result of the PI message */ + dst_xy = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F); + } else { + /* Use the delta_xy values computed from the payload */ + enum brw_barycentric_mode bary = + brw_barycentric_mode(interp_mode, bary_intrin); + + dst_xy = this->delta_xy[bary]; } - for (unsigned j = 0; j < instr->num_components; j++) { - fs_reg src = interp_reg(instr->variables[0]->var->data.location, j); - src.type = dest.type; + for (unsigned int i = 0; i < instr->num_components; i++) { + fs_reg interp = + fs_reg(interp_reg(nir_intrinsic_base(instr), + nir_intrinsic_component(instr) + i)); + interp.type = BRW_REGISTER_TYPE_F; + dest.type = BRW_REGISTER_TYPE_F; - bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src); - dest = offset(dest, bld, 1); + if (devinfo->needs_unlit_centroid_workaround && + bary_intrin == nir_intrinsic_load_barycentric_centroid) { + + /* Get the pixel/sample mask into f0 so that we know which + * pixels are lit. Then, for each channel that is unlit, + * replace the centroid data with non-centroid data. + */ + bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + + fs_reg dest_i = offset(dest, bld, i); + fs_reg dst_xy_pixel = + delta_xy[brw_barycentric_mode(interp_mode, + nir_intrinsic_load_barycentric_pixel)]; + + fs_inst *inst; + inst = bld.emit(FS_OPCODE_LINTERP, dest_i, dst_xy_pixel, interp); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = true; + inst->no_dd_clear = true; + + inst = bld.emit(FS_OPCODE_LINTERP, dest_i, dst_xy, interp); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = false; + inst->no_dd_check = true; + } else if (devinfo->gen < 6 && interp_mode == INTERP_MODE_SMOOTH) { + fs_reg tmp = vgrf(glsl_type::float_type); + bld.emit(FS_OPCODE_LINTERP, tmp, dst_xy, interp); + bld.MUL(offset(dest, bld, i), tmp, this->pixel_w); + } else { + bld.emit(FS_OPCODE_LINTERP, offset(dest, bld, i), dst_xy, interp); + } } break; } + default: nir_emit_intrinsic(bld, instr); break; @@ -3947,26 +3969,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } case nir_intrinsic_load_input: { - fs_reg src; + fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type); unsigned num_components = instr->num_components; enum brw_reg_type type = dest.type; - if (stage == MESA_SHADER_VERTEX) { - src = fs_reg(ATTR, instr->const_index[0], dest.type); - } else { - assert(type_sz(type) >= 4); - if (type == BRW_REGISTER_TYPE_DF) { - /* const_index is in 32-bit type size units that could not be aligned - * with DF. We need to read the double vector as if it was a float - * vector of twice the number of components to fetch the right data. - */ - dest = retype(dest, BRW_REGISTER_TYPE_F); - num_components *= 2; - } - src = offset(retype(nir_inputs, dest.type), bld, - instr->const_index[0]); - } - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); assert(const_offset && "Indirect input loads not allowed"); src = offset(src, bld, const_offset->u32[0]); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 6c3e1d184e4..fe7653137ea 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -30,7 +30,8 @@ static bool is_input(nir_intrinsic_instr *intrin) { return intrin->intrinsic == nir_intrinsic_load_input || - intrin->intrinsic == nir_intrinsic_load_per_vertex_input; + intrin->intrinsic == nir_intrinsic_load_per_vertex_input || + intrin->intrinsic == nir_intrinsic_load_interpolated_input; } static bool @@ -282,9 +283,16 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) void brw_nir_lower_fs_inputs(nir_shader *nir) { - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, VARYING_SLOT_VAR0, - type_size_scalar); - nir_lower_io(nir, nir_var_shader_in, type_size_scalar); + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } + + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); + + add_const_offset_to_base(nir, nir_var_shader_in); } void |