diff options
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 15 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 5 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 53 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_visitor.cpp | 15 |
4 files changed, 30 insertions, 58 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index f10df1dcbeb..97f47ab92c2 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6766,6 +6766,21 @@ fs_visitor::lower_barycentrics() const fs_builder ubld = ibld.exec_all().group(8, 0); switch (inst->opcode) { + case FS_OPCODE_LINTERP : { + assert(inst->exec_size == 16); + const fs_reg tmp = ibld.vgrf(inst->src[0].type, 2); + fs_reg srcs[4]; + + for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) + srcs[i] = horiz_offset(offset(inst->src[0], ibld, i % 2), + 8 * (i / 2)); + + ubld.LOAD_PAYLOAD(tmp, srcs, ARRAY_SIZE(srcs), ARRAY_SIZE(srcs)); + + inst->src[0] = tmp; + progress = true; + break; + } case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index d84f99db036..a682fac9aa6 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -571,13 +571,14 @@ namespace brw { return fs_reg(); const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2); - const brw::fs_builder hbld = bld.exec_all().group(16, 0); + const brw::fs_builder hbld = bld.exec_all().group(8, 0); const unsigned m = bld.dispatch_width() / hbld.dispatch_width(); fs_reg *const components = new fs_reg[2 * m]; for (unsigned c = 0; c < 2; c++) { for (unsigned g = 0; g < m; g++) - components[c * m + g] = offset(brw_vec8_grf(regs[g], 0), hbld, c); + components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0), + hbld, c + 2 * (g % 2)); } hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index ffaf90764f5..3bed5406576 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3313,44 +3313,6 @@ alloc_frag_output(fs_visitor *v, unsigned location) unreachable("Invalid location"); } -/* Annoyingly, we get the barycentrics into the shader in a layout that's - * optimized for PLN but it doesn't work nearly as well as one would like for - * manual interpolation. - */ -static void -shuffle_from_pln_layout(const fs_builder &bld, fs_reg dest, fs_reg pln_data) -{ - dest.type = BRW_REGISTER_TYPE_F; - pln_data.type = BRW_REGISTER_TYPE_F; - const fs_reg dest_u = offset(dest, bld, 0); - const fs_reg dest_v = offset(dest, bld, 1); - - for (unsigned g = 0; g < bld.dispatch_width() / 8; g++) { - const fs_builder gbld = bld.group(8, g); - gbld.MOV(horiz_offset(dest_u, g * 8), - byte_offset(pln_data, (g * 2 + 0) * REG_SIZE)); - gbld.MOV(horiz_offset(dest_v, g * 8), - byte_offset(pln_data, (g * 2 + 1) * REG_SIZE)); - } -} - -static void -shuffle_to_pln_layout(const fs_builder &bld, fs_reg pln_data, fs_reg src) -{ - pln_data.type = BRW_REGISTER_TYPE_F; - src.type = BRW_REGISTER_TYPE_F; - const fs_reg src_u = offset(src, bld, 0); - const fs_reg src_v = offset(src, bld, 1); - - for (unsigned g = 0; g < bld.dispatch_width() / 8; g++) { - const fs_builder gbld = bld.group(8, g); - gbld.MOV(byte_offset(pln_data, (g * 2 + 0) * REG_SIZE), - horiz_offset(src_u, g * 8)); - gbld.MOV(byte_offset(pln_data, (g * 2 + 1) * REG_SIZE), - horiz_offset(src_v, g * 8)); - } -} - void fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) @@ -3565,8 +3527,9 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); enum brw_barycentric_mode bary = brw_barycentric_mode(interp_mode, instr->intrinsic); - - shuffle_from_pln_layout(bld, dest, this->delta_xy[bary]); + const fs_reg srcs[] = { offset(this->delta_xy[bary], bld, 0), + offset(this->delta_xy[bary], bld, 1) }; + bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0); break; } @@ -3711,18 +3674,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, if (bary_intrin == nir_intrinsic_load_barycentric_at_offset || bary_intrin == nir_intrinsic_load_barycentric_at_sample) { - /* Use the result of the PI message. Because the load_barycentric - * intrinsics return a regular vec2 and we need it in PLN layout, we - * have to do a translation. Fortunately, copy-prop cleans this up - * reliably. - */ - dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2); - shuffle_to_pln_layout(bld, dst_xy, get_nir_src(instr->src[0])); + /* Use the result of the PI message. */ + dst_xy = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F); } else { /* Use the delta_xy values computed from the payload */ enum brw_barycentric_mode bary = brw_barycentric_mode(interp_mode, bary_intrin); - dst_xy = this->delta_xy[bary]; } diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 476a9c64a5b..81d0e466cc7 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -176,11 +176,11 @@ fs_visitor::emit_interpolation_setup_gen4() const fs_reg xstart(negate(brw_vec1_grf(1, 0))); const fs_reg ystart(negate(brw_vec1_grf(1, 1))); - if (devinfo->has_pln && dispatch_width == 16) { - for (unsigned i = 0; i < 2; i++) { - abld.half(i).ADD(half(offset(delta_xy, abld, i), 0), + if (devinfo->has_pln) { + for (unsigned i = 0; i < dispatch_width / 8; i++) { + abld.half(i).ADD(half(offset(delta_xy, abld, 0), i), half(this->pixel_x, i), xstart); - abld.half(i).ADD(half(offset(delta_xy, abld, i), 1), + abld.half(i).ADD(half(offset(delta_xy, abld, 1), i), half(this->pixel_y, i), ystart); } } else { @@ -358,11 +358,10 @@ fs_visitor::emit_interpolation_setup_gen6() for (unsigned c = 0; c < 2; c++) { for (unsigned q = 0; q < dispatch_width / 8; q++) { - const unsigned idx = c + (q & 2) + (q & 1) * dispatch_width / 8; set_predicate(BRW_PREDICATE_NORMAL, - bld.half(q).SEL(horiz_offset(delta_xy[i], idx * 8), - horiz_offset(centroid_delta_xy, idx * 8), - horiz_offset(pixel_delta_xy, idx * 8))); + bld.half(q).SEL(half(offset(delta_xy[i], bld, c), q), + half(offset(centroid_delta_xy, bld, c), q), + half(offset(pixel_delta_xy, bld, c), q))); } } } |