summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp175
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h9
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp410
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c16
5 files changed, 270 insertions, 341 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c
index a4855a09137..afb70a139ae 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.c
+++ b/src/mesa/drivers/dri/i965/brw_compiler.c
@@ -40,6 +40,7 @@
.lower_fdiv = true, \
.lower_flrp64 = true, \
.native_integers = true, \
+ .use_interpolated_input_intrinsics = true, \
.vertex_id_zero_based = true
static const struct nir_shader_compiler_options scalar_nir_options = {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index fc91bbcfa46..4aae5742278 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1070,21 +1070,27 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg wpos)
bld.MOV(wpos, this->wpos_w);
}
-static enum brw_barycentric_mode
-barycentric_mode(enum glsl_interp_mode mode,
- bool is_centroid, bool is_sample)
+enum brw_barycentric_mode
+brw_barycentric_mode(enum glsl_interp_mode mode, nir_intrinsic_op op)
{
- unsigned bary;
-
/* Barycentric modes don't make sense for flat inputs. */
assert(mode != INTERP_MODE_FLAT);
- if (is_sample) {
- bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
- } else if (is_centroid) {
- bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
- } else {
+ unsigned bary;
+ switch (op) {
+ case nir_intrinsic_load_barycentric_pixel:
+ case nir_intrinsic_load_barycentric_at_offset:
bary = BRW_BARYCENTRIC_PERSPECTIVE_PIXEL;
+ break;
+ case nir_intrinsic_load_barycentric_centroid:
+ bary = BRW_BARYCENTRIC_PERSPECTIVE_CENTROID;
+ break;
+ case nir_intrinsic_load_barycentric_sample:
+ case nir_intrinsic_load_barycentric_at_sample:
+ bary = BRW_BARYCENTRIC_PERSPECTIVE_SAMPLE;
+ break;
+ default:
+ unreachable("invalid intrinsic");
}
if (mode == INTERP_MODE_NOPERSPECTIVE)
@@ -1104,107 +1110,6 @@ centroid_to_pixel(enum brw_barycentric_mode bary)
return (enum brw_barycentric_mode) ((unsigned) bary - 1);
}
-void
-fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
- const glsl_type *type,
- glsl_interp_mode interpolation_mode,
- int *location, bool mod_centroid,
- bool mod_sample)
-{
- assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
-
- if (type->is_array() || type->is_matrix()) {
- const glsl_type *elem_type = glsl_get_array_element(type);
- const unsigned length = glsl_get_length(type);
-
- for (unsigned i = 0; i < length; i++) {
- emit_general_interpolation(attr, name, elem_type, interpolation_mode,
- location, mod_centroid, mod_sample);
- }
- } else if (type->is_record()) {
- for (unsigned i = 0; i < type->length; i++) {
- const glsl_type *field_type = type->fields.structure[i].type;
- emit_general_interpolation(attr, name, field_type, interpolation_mode,
- location, mod_centroid, mod_sample);
- }
- } else {
- assert(type->is_scalar() || type->is_vector());
-
- if (prog_data->urb_setup[*location] == -1) {
- /* If there's no incoming setup data for this slot, don't
- * emit interpolation for it.
- */
- *attr = offset(*attr, bld, type->vector_elements);
- (*location)++;
- return;
- }
-
- attr->type = brw_type_for_base_type(type->get_scalar_type());
-
- if (interpolation_mode == INTERP_MODE_FLAT) {
- /* Constant interpolation (flat shading) case. The SF has
- * handed us defined values in only the constant offset
- * field of the setup reg.
- */
- unsigned vector_elements = type->vector_elements;
-
- /* Data starts at suboffet 3 in 32-bit units (12 bytes), so it is not
- * 64-bit aligned and the current implementation fails to read the
- * data properly. Instead, when there is a double input varying,
- * read it as vector of floats with twice the number of components.
- */
- if (attr->type == BRW_REGISTER_TYPE_DF) {
- vector_elements *= 2;
- attr->type = BRW_REGISTER_TYPE_F;
- }
- for (unsigned int i = 0; i < vector_elements; i++) {
- struct brw_reg interp = interp_reg(*location, i);
- interp = suboffset(interp, 3);
- interp.type = attr->type;
- bld.emit(FS_OPCODE_CINTERP, *attr, fs_reg(interp));
- *attr = offset(*attr, bld, 1);
- }
- } else {
- /* Smooth/noperspective interpolation case. */
- enum brw_barycentric_mode bary =
- barycentric_mode(interpolation_mode, mod_centroid, mod_sample);
-
- for (unsigned int i = 0; i < type->vector_elements; i++) {
- fs_reg interp(interp_reg(*location, i));
- if (devinfo->needs_unlit_centroid_workaround && mod_centroid) {
- /* Get the pixel/sample mask into f0 so that we know
- * which pixels are lit. Then, for each channel that is
- * unlit, replace the centroid data with non-centroid
- * data.
- */
- bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
-
- fs_inst *inst;
- inst = bld.emit(FS_OPCODE_LINTERP, *attr,
- delta_xy[centroid_to_pixel(bary)], interp);
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = true;
- inst->no_dd_clear = true;
-
- inst = bld.emit(FS_OPCODE_LINTERP, *attr,
- delta_xy[bary], interp);
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = false;
- inst->no_dd_check = true;
- } else {
- bld.emit(FS_OPCODE_LINTERP, *attr, delta_xy[bary], interp);
- }
- if (devinfo->gen < 6 && interpolation_mode == INTERP_MODE_SMOOTH) {
- bld.MUL(*attr, *attr, this->pixel_w);
- }
- *attr = offset(*attr, bld, 1);
- }
- }
- (*location)++;
- }
-}
-
fs_reg *
fs_visitor::emit_frontfacing_interpolation()
{
@@ -6330,6 +6235,10 @@ fs_visitor::run_cs()
/**
* Return a bitfield where bit n is set if barycentric interpolation mode n
* (see enum brw_barycentric_mode) is needed by the fragment shader.
+ *
+ * We examine the load_barycentric intrinsics rather than looking at input
+ * variables so that we catch interpolateAtCentroid() messages too, which
+ * also need the BRW_BARYCENTRIC_[NON]PERSPECTIVE_CENTROID mode set up.
*/
static unsigned
brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
@@ -6337,29 +6246,37 @@ brw_compute_barycentric_interp_modes(const struct brw_device_info *devinfo,
{
unsigned barycentric_interp_modes = 0;
- nir_foreach_variable(var, &shader->inputs) {
- /* Ignore WPOS; it doesn't require interpolation. */
- if (var->data.location == VARYING_SLOT_POS)
+ nir_foreach_function(f, shader) {
+ if (!f->impl)
continue;
- /* Flat inputs don't need barycentric modes. */
- if (var->data.interpolation == INTERP_MODE_FLAT)
- continue;
+ nir_foreach_block(block, f->impl) {
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
- /* Determine the set (or sets) of barycentric coordinates needed to
- * interpolate this variable. Note that when
- * brw->needs_unlit_centroid_workaround is set, centroid interpolation
- * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
- * for lit pixels, so we need both sets of barycentric coordinates.
- */
- enum brw_barycentric_mode bary_mode =
- barycentric_mode((glsl_interp_mode) var->data.interpolation,
- var->data.centroid, var->data.sample);
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)
+ continue;
+
+ /* Ignore WPOS; it doesn't require interpolation. */
+ if (nir_intrinsic_base(intrin) == VARYING_SLOT_POS)
+ continue;
- barycentric_interp_modes |= 1 << bary_mode;
+ intrin = nir_instr_as_intrinsic(intrin->src[0].ssa->parent_instr);
+ enum glsl_interp_mode interp = (enum glsl_interp_mode)
+ nir_intrinsic_interp_mode(intrin);
+ nir_intrinsic_op bary_op = intrin->intrinsic;
+ enum brw_barycentric_mode bary =
+ brw_barycentric_mode(interp, bary_op);
- if (var->data.centroid && devinfo->needs_unlit_centroid_workaround)
- barycentric_interp_modes |= 1 << centroid_to_pixel(bary_mode);
+ barycentric_interp_modes |= 1 << bary;
+
+ if (devinfo->needs_unlit_centroid_workaround &&
+ bary_op == nir_intrinsic_load_barycentric_centroid)
+ barycentric_interp_modes |= 1 << centroid_to_pixel(bary);
+ }
+ }
}
return barycentric_interp_modes;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 7998f514155..574475f071a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -174,11 +174,6 @@ public:
fs_reg *emit_samplepos_setup();
fs_reg *emit_sampleid_setup();
fs_reg *emit_samplemaskin_setup();
- void emit_general_interpolation(fs_reg *attr, const char *name,
- const glsl_type *type,
- glsl_interp_mode interpolation_mode,
- int *location, bool mod_centroid,
- bool mod_sample);
fs_reg *emit_vs_system_value(int location);
void emit_interpolation_setup_gen4();
void emit_interpolation_setup_gen6();
@@ -195,7 +190,6 @@ public:
bool opt_zero_samples();
void emit_nir_code();
- void nir_setup_inputs();
void nir_setup_single_output_varying(fs_reg *reg, const glsl_type *type,
unsigned *location);
void nir_setup_outputs();
@@ -511,3 +505,6 @@ void shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld,
uint32_t components);
fs_reg setup_imm_df(const brw::fs_builder &bld,
double v);
+
+enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
+ nir_intrinsic_op op);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 44c4bdc63ab..22cba8c32db 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -36,7 +36,6 @@ fs_visitor::emit_nir_code()
/* emit the arrays used for inputs and outputs - load/store intrinsics will
* be converted to reads/writes of these arrays
*/
- nir_setup_inputs();
nir_setup_outputs();
nir_setup_uniforms();
nir_emit_system_values();
@@ -50,38 +49,6 @@ fs_visitor::emit_nir_code()
}
void
-fs_visitor::nir_setup_inputs()
-{
- if (stage != MESA_SHADER_FRAGMENT)
- return;
-
- nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
-
- nir_foreach_variable(var, &nir->inputs) {
- fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
-
- fs_reg reg;
- if (var->data.location == VARYING_SLOT_POS) {
- emit_fragcoord_interpolation(input);
- } else if (var->data.location == VARYING_SLOT_LAYER) {
- struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_LAYER, 1), 3);
- reg.type = BRW_REGISTER_TYPE_D;
- bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
- } else if (var->data.location == VARYING_SLOT_VIEWPORT) {
- struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_VIEWPORT, 2), 3);
- reg.type = BRW_REGISTER_TYPE_D;
- bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
- } else {
- int location = var->data.location;
- emit_general_interpolation(&input, var->name, var->type,
- (glsl_interp_mode) var->data.interpolation,
- &location, var->data.centroid,
- var->data.sample);
- }
- }
-}
-
-void
fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
const glsl_type *type,
unsigned *location)
@@ -3141,7 +3108,6 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_FRAGMENT);
- const struct brw_wm_prog_key *wm_key = (const struct brw_wm_prog_key *) key;
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
@@ -3198,189 +3164,245 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
break;
}
- case nir_intrinsic_interp_var_at_centroid:
- case nir_intrinsic_interp_var_at_sample:
- case nir_intrinsic_interp_var_at_offset: {
- /* Handle ARB_gpu_shader5 interpolation intrinsics
- *
- * It's worth a quick word of explanation as to why we handle the full
- * variable-based interpolation intrinsic rather than a lowered version
- * with like we do for other inputs. We have to do that because the way
- * we set up inputs doesn't allow us to use the already setup inputs for
- * interpolation. At the beginning of the shader, we go through all of
- * the input variables and do the initial interpolation and put it in
- * the nir_inputs array based on its location as determined in
- * nir_lower_io. If the input isn't used, dead code cleans up and
- * everything works fine. However, when we get to the ARB_gpu_shader5
- * interpolation intrinsics, we need to reinterpolate the input
- * differently. If we used an intrinsic that just had an index it would
- * only give us the offset into the nir_inputs array. However, this is
- * useless because that value is post-interpolation and we need
- * pre-interpolation. In order to get the actual location of the bits
- * we get from the vertex fetching hardware, we need the variable.
- */
- fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
- const glsl_interp_mode interpolation =
- (glsl_interp_mode) instr->variables[0]->var->data.interpolation;
+ case nir_intrinsic_load_input: {
+ /* load_input is only used for flat inputs */
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ unsigned num_components = instr->num_components;
+ enum brw_reg_type type = dest.type;
- switch (instr->intrinsic) {
- case nir_intrinsic_interp_var_at_centroid:
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_CENTROID,
- dst_xy,
- fs_reg(), /* src */
- brw_imm_ud(0u),
- interpolation);
- break;
+ /* Special case fields in the VUE header */
+ if (base == VARYING_SLOT_LAYER)
+ component = 1;
+ else if (base == VARYING_SLOT_VIEWPORT)
+ component = 2;
- case nir_intrinsic_interp_var_at_sample: {
- if (!wm_key->multisample_fbo) {
- /* From the ARB_gpu_shader5 specification:
- * "If multisample buffers are not available, the input varying
- * will be evaluated at the center of the pixel."
- */
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_CENTROID,
- dst_xy,
- fs_reg(), /* src */
- brw_imm_ud(0u),
- interpolation);
- break;
- }
+ if (nir_dest_bit_size(instr->dest) == 64) {
+ /* const_index is in 32-bit type size units that could not be aligned
+ * with DF. We need to read the double vector as if it was a float
+ * vector of twice the number of components to fetch the right data.
+ */
+ type = BRW_REGISTER_TYPE_F;
+ num_components *= 2;
+ }
- nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
+ for (unsigned int i = 0; i < num_components; i++) {
+ struct brw_reg interp = interp_reg(base, component + i);
+ interp = suboffset(interp, 3);
+ bld.emit(FS_OPCODE_CINTERP, offset(retype(dest, type), bld, i),
+ retype(fs_reg(interp), type));
+ }
- if (const_sample) {
- unsigned msg_data = const_sample->i32[0] << 4;
+ if (nir_dest_bit_size(instr->dest) == 64) {
+ shuffle_32bit_load_result_to_64bit_data(bld,
+ dest,
+ retype(dest, type),
+ instr->num_components);
+ }
+ break;
+ }
+
+ case nir_intrinsic_load_barycentric_pixel:
+ case nir_intrinsic_load_barycentric_centroid:
+ case nir_intrinsic_load_barycentric_sample:
+ /* Do nothing - load_interpolated_input handling will handle it later. */
+ break;
+ case nir_intrinsic_load_barycentric_at_sample: {
+ const glsl_interp_mode interpolation =
+ (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
+
+ nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
+
+ if (const_sample) {
+ unsigned msg_data = const_sample->i32[0] << 4;
+
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dest,
+ fs_reg(), /* src */
+ brw_imm_ud(msg_data),
+ interpolation);
+ } else {
+ const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+
+ if (nir_src_is_dynamically_uniform(instr->src[0])) {
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0)
+ .SHL(msg_data, sample_id, brw_imm_ud(4u));
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
+ dest,
fs_reg(), /* src */
- brw_imm_ud(msg_data),
+ msg_data,
interpolation);
} else {
- const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
- BRW_REGISTER_TYPE_UD);
-
- if (nir_src_is_dynamically_uniform(instr->src[0])) {
- const fs_reg sample_id = bld.emit_uniformize(sample_src);
- const fs_reg msg_data = vgrf(glsl_type::uint_type);
- bld.exec_all().group(1, 0)
- .SHL(msg_data, sample_id, brw_imm_ud(4u));
+ /* Make a loop that sends a message to the pixel interpolater
+ * for the sample number in each live channel. If there are
+ * multiple channels with the same sample number then these
+ * will be handled simultaneously with a single interation of
+ * the loop.
+ */
+ bld.emit(BRW_OPCODE_DO);
+
+ /* Get the next live sample number into sample_id_reg */
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+
+ /* Set the flag register so that we can perform the send
+ * message on all channels that have the same sample number
+ */
+ bld.CMP(bld.null_reg_ud(),
+ sample_src, sample_id,
+ BRW_CONDITIONAL_EQ);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0)
+ .SHL(msg_data, sample_id, brw_imm_ud(4u));
+ fs_inst *inst =
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
+ dest,
fs_reg(), /* src */
msg_data,
interpolation);
- } else {
- /* Make a loop that sends a message to the pixel interpolater
- * for the sample number in each live channel. If there are
- * multiple channels with the same sample number then these
- * will be handled simultaneously with a single interation of
- * the loop.
- */
- bld.emit(BRW_OPCODE_DO);
-
- /* Get the next live sample number into sample_id_reg */
- const fs_reg sample_id = bld.emit_uniformize(sample_src);
+ set_predicate(BRW_PREDICATE_NORMAL, inst);
- /* Set the flag register so that we can perform the send
- * message on all channels that have the same sample number
- */
- bld.CMP(bld.null_reg_ud(),
- sample_src, sample_id,
- BRW_CONDITIONAL_EQ);
- const fs_reg msg_data = vgrf(glsl_type::uint_type);
- bld.exec_all().group(1, 0)
- .SHL(msg_data, sample_id, brw_imm_ud(4u));
- fs_inst *inst =
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
- fs_reg(), /* src */
- msg_data,
- interpolation);
- set_predicate(BRW_PREDICATE_NORMAL, inst);
-
- /* Continue the loop if there are any live channels left */
- set_predicate_inv(BRW_PREDICATE_NORMAL,
- true, /* inverse */
- bld.emit(BRW_OPCODE_WHILE));
- }
+ /* Continue the loop if there are any live channels left */
+ set_predicate_inv(BRW_PREDICATE_NORMAL,
+ true, /* inverse */
+ bld.emit(BRW_OPCODE_WHILE));
}
-
- break;
}
+ break;
+ }
- case nir_intrinsic_interp_var_at_offset: {
- nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ case nir_intrinsic_load_barycentric_at_offset: {
+ const glsl_interp_mode interpolation =
+ (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
- if (const_offset) {
- unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
- unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
- dst_xy,
- fs_reg(), /* src */
- brw_imm_ud(off_x | (off_y << 4)),
- interpolation);
- } else {
- fs_reg src = vgrf(glsl_type::ivec2_type);
- fs_reg offset_src = retype(get_nir_src(instr->src[0]),
- BRW_REGISTER_TYPE_F);
- for (int i = 0; i < 2; i++) {
- fs_reg temp = vgrf(glsl_type::float_type);
- bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f));
- fs_reg itemp = vgrf(glsl_type::int_type);
- /* float to int */
- bld.MOV(itemp, temp);
-
- /* Clamp the upper end of the range to +7/16.
- * ARB_gpu_shader5 requires that we support a maximum offset
- * of +0.5, which isn't representable in a S0.4 value -- if
- * we didn't clamp it, we'd end up with -8/16, which is the
- * opposite of what the shader author wanted.
- *
- * This is legal due to ARB_gpu_shader5's quantization
- * rules:
- *
- * "Not all values of <offset> may be supported; x and y
- * offsets may be rounded to fixed-point values with the
- * number of fraction bits given by the
- * implementation-dependent constant
- * FRAGMENT_INTERPOLATION_OFFSET_BITS"
- */
- set_condmod(BRW_CONDITIONAL_L,
- bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7)));
- }
+ if (const_offset) {
+ unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
+ unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
- const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
- emit_pixel_interpolater_send(bld,
- opcode,
- dst_xy,
- src,
- brw_imm_ud(0u),
- interpolation);
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+ dest,
+ fs_reg(), /* src */
+ brw_imm_ud(off_x | (off_y << 4)),
+ interpolation);
+ } else {
+ fs_reg src = vgrf(glsl_type::ivec2_type);
+ fs_reg offset_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_F);
+ for (int i = 0; i < 2; i++) {
+ fs_reg temp = vgrf(glsl_type::float_type);
+ bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f));
+ fs_reg itemp = vgrf(glsl_type::int_type);
+ /* float to int */
+ bld.MOV(itemp, temp);
+
+ /* Clamp the upper end of the range to +7/16.
+ * ARB_gpu_shader5 requires that we support a maximum offset
+ * of +0.5, which isn't representable in a S0.4 value -- if
+ * we didn't clamp it, we'd end up with -8/16, which is the
+ * opposite of what the shader author wanted.
+ *
+ * This is legal due to ARB_gpu_shader5's quantization
+ * rules:
+ *
+ * "Not all values of <offset> may be supported; x and y
+ * offsets may be rounded to fixed-point values with the
+ * number of fraction bits given by the
+ * implementation-dependent constant
+ * FRAGMENT_INTERPOLATION_OFFSET_BITS"
+ */
+ set_condmod(BRW_CONDITIONAL_L,
+ bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7)));
}
+
+ const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
+ emit_pixel_interpolater_send(bld,
+ opcode,
+ dest,
+ src,
+ brw_imm_ud(0u),
+ interpolation);
+ }
+ break;
+ }
+
+ case nir_intrinsic_load_interpolated_input: {
+ if (nir_intrinsic_base(instr) == VARYING_SLOT_POS) {
+ emit_fragcoord_interpolation(dest);
break;
}
- default:
- unreachable("Invalid intrinsic");
+ assert(instr->src[0].ssa &&
+ instr->src[0].ssa->parent_instr->type == nir_instr_type_intrinsic);
+ nir_intrinsic_instr *bary_intrinsic =
+ nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
+ nir_intrinsic_op bary_intrin = bary_intrinsic->intrinsic;
+ enum glsl_interp_mode interp_mode =
+ (enum glsl_interp_mode) nir_intrinsic_interp_mode(bary_intrinsic);
+ fs_reg dst_xy;
+
+ if (bary_intrin == nir_intrinsic_load_barycentric_at_offset ||
+ bary_intrin == nir_intrinsic_load_barycentric_at_sample) {
+ /* Use the result of the PI message */
+ dst_xy = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F);
+ } else {
+ /* Use the delta_xy values computed from the payload */
+ enum brw_barycentric_mode bary =
+ brw_barycentric_mode(interp_mode, bary_intrin);
+
+ dst_xy = this->delta_xy[bary];
}
- for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
- src.type = dest.type;
+ for (unsigned int i = 0; i < instr->num_components; i++) {
+ fs_reg interp =
+ fs_reg(interp_reg(nir_intrinsic_base(instr),
+ nir_intrinsic_component(instr) + i));
+ interp.type = BRW_REGISTER_TYPE_F;
+ dest.type = BRW_REGISTER_TYPE_F;
- bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
- dest = offset(dest, bld, 1);
+ if (devinfo->needs_unlit_centroid_workaround &&
+ bary_intrin == nir_intrinsic_load_barycentric_centroid) {
+
+ /* Get the pixel/sample mask into f0 so that we know which
+ * pixels are lit. Then, for each channel that is unlit,
+ * replace the centroid data with non-centroid data.
+ */
+ bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+
+ fs_reg dest_i = offset(dest, bld, i);
+ fs_reg dst_xy_pixel =
+ delta_xy[brw_barycentric_mode(interp_mode,
+ nir_intrinsic_load_barycentric_pixel)];
+
+ fs_inst *inst;
+ inst = bld.emit(FS_OPCODE_LINTERP, dest_i, dst_xy_pixel, interp);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = true;
+ inst->no_dd_clear = true;
+
+ inst = bld.emit(FS_OPCODE_LINTERP, dest_i, dst_xy, interp);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = false;
+ inst->no_dd_check = true;
+ } else if (devinfo->gen < 6 && interp_mode == INTERP_MODE_SMOOTH) {
+ fs_reg tmp = vgrf(glsl_type::float_type);
+ bld.emit(FS_OPCODE_LINTERP, tmp, dst_xy, interp);
+ bld.MUL(offset(dest, bld, i), tmp, this->pixel_w);
+ } else {
+ bld.emit(FS_OPCODE_LINTERP, offset(dest, bld, i), dst_xy, interp);
+ }
}
break;
}
+
default:
nir_emit_intrinsic(bld, instr);
break;
@@ -3947,26 +3969,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
case nir_intrinsic_load_input: {
- fs_reg src;
+ fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
unsigned num_components = instr->num_components;
enum brw_reg_type type = dest.type;
- if (stage == MESA_SHADER_VERTEX) {
- src = fs_reg(ATTR, instr->const_index[0], dest.type);
- } else {
- assert(type_sz(type) >= 4);
- if (type == BRW_REGISTER_TYPE_DF) {
- /* const_index is in 32-bit type size units that could not be aligned
- * with DF. We need to read the double vector as if it was a float
- * vector of twice the number of components to fetch the right data.
- */
- dest = retype(dest, BRW_REGISTER_TYPE_F);
- num_components *= 2;
- }
- src = offset(retype(nir_inputs, dest.type), bld,
- instr->const_index[0]);
- }
-
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
assert(const_offset && "Indirect input loads not allowed");
src = offset(src, bld, const_offset->u32[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 6c3e1d184e4..fe7653137ea 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -30,7 +30,8 @@ static bool
is_input(nir_intrinsic_instr *intrin)
{
return intrin->intrinsic == nir_intrinsic_load_input ||
- intrin->intrinsic == nir_intrinsic_load_per_vertex_input;
+ intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
+ intrin->intrinsic == nir_intrinsic_load_interpolated_input;
}
static bool
@@ -282,9 +283,16 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map)
void
brw_nir_lower_fs_inputs(nir_shader *nir)
{
- nir_assign_var_locations(&nir->inputs, &nir->num_inputs, VARYING_SLOT_VAR0,
- type_size_scalar);
- nir_lower_io(nir, nir_var_shader_in, type_size_scalar);
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+ var->data.driver_location = var->data.location;
+ }
+
+ nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
+
+ /* This pass needs actual constants */
+ nir_opt_constant_folding(nir);
+
+ add_const_offset_to_base(nir, nir_var_shader_in);
}
void