aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Schürmann <[email protected]>2020-01-23 19:12:55 +0100
committerMarge Bot <[email protected]>2020-01-29 18:45:23 +0000
commit6f718edcedd713beb547cda48aa8dc63a0af4c35 (patch)
tree5d50ff4d3ea67c3acc41be14cb9436d8f65c13a7
parent901f06e9ad6ba53a9e0bb400f5fa629b491c253f (diff)
aco: simplify gathering of MIMG address components
This patch has a slight effect on pipelinedb: Totals from affected shaders: SGPRS: 23616 -> 21504 (-8.94 %) VGPRS: 15088 -> 14444 (-4.27 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 662660 -> 664600 (0.29 %) bytes LDS: 49 -> 49 (0.00 %) blocks Max Waves: 3079 -> 3204 (4.06 %) Reviewed-by: Rhys Perry <[email protected]> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3602> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3602>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp208
1 files changed, 79 insertions, 129 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index c1c68334414..6aab75eb774 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6972,22 +6972,20 @@ void build_cube_select(isel_context *ctx, Temp ma, Temp id, Temp deriv,
*out_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), two, tmp);
}
-void prepare_cube_coords(isel_context *ctx, Temp* coords, Temp* ddx, Temp* ddy, bool is_deriv, bool is_array)
+void prepare_cube_coords(isel_context *ctx, std::vector<Temp>& coords, Temp* ddx, Temp* ddy, bool is_deriv, bool is_array)
{
Builder bld(ctx->program, ctx->block);
- Temp coord_args[4], ma, tc, sc, id;
- for (unsigned i = 0; i < (is_array ? 4 : 3); i++)
- coord_args[i] = emit_extract_vector(ctx, *coords, i, v1);
+ Temp ma, tc, sc, id;
if (is_array) {
- coord_args[3] = bld.vop1(aco_opcode::v_rndne_f32, bld.def(v1), coord_args[3]);
+ coords[3] = bld.vop1(aco_opcode::v_rndne_f32, bld.def(v1), coords[3]);
// see comment in ac_prepare_cube_coords()
if (ctx->options->chip_class <= GFX8)
- coord_args[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), coord_args[3]);
+ coords[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), coords[3]);
}
- ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coord_args[0], coord_args[1], coord_args[2]);
+ ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
aco_ptr<VOP3A_instruction> vop3a{create_instruction<VOP3A_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
vop3a->operands[0] = Operand(ma);
@@ -6996,15 +6994,15 @@ void prepare_cube_coords(isel_context *ctx, Temp* coords, Temp* ddx, Temp* ddy,
vop3a->definitions[0] = Definition(invma);
ctx->block->instructions.emplace_back(std::move(vop3a));
- sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coord_args[0], coord_args[1], coord_args[2]);
+ sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
if (!is_deriv)
sc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), sc, invma, Operand(0x3fc00000u/*1.5*/));
- tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coord_args[0], coord_args[1], coord_args[2]);
+ tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
if (!is_deriv)
tc = bld.vop2(aco_opcode::v_madak_f32, bld.def(v1), tc, invma, Operand(0x3fc00000u/*1.5*/));
- id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coord_args[0], coord_args[1], coord_args[2]);
+ id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]);
if (is_deriv) {
sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, invma);
@@ -7033,27 +7031,11 @@ void prepare_cube_coords(isel_context *ctx, Temp* coords, Temp* ddx, Temp* ddy,
}
if (is_array)
- id = bld.vop2(aco_opcode::v_madmk_f32, bld.def(v1), coord_args[3], id, Operand(0x41000000u/*8.0*/));
- *coords = bld.pseudo(aco_opcode::p_create_vector, bld.def(v3), sc, tc, id);
-
-}
-
-Temp apply_round_slice(isel_context *ctx, Temp coords, unsigned idx)
-{
- Temp coord_vec[3];
- for (unsigned i = 0; i < coords.size(); i++)
- coord_vec[i] = emit_extract_vector(ctx, coords, i, v1);
-
- Builder bld(ctx->program, ctx->block);
- coord_vec[idx] = bld.vop1(aco_opcode::v_rndne_f32, bld.def(v1), coord_vec[idx]);
-
- aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, coords.size(), 1)};
- for (unsigned i = 0; i < coords.size(); i++)
- vec->operands[i] = Operand(coord_vec[i]);
- Temp res = bld.tmp(RegType::vgpr, coords.size());
- vec->definitions[0] = Definition(res);
- ctx->block->instructions.emplace_back(std::move(vec));
- return res;
+ id = bld.vop2(aco_opcode::v_madmk_f32, bld.def(v1), coords[3], id, Operand(0x41000000u/*8.0*/));
+ coords.resize(3);
+ coords[0] = sc;
+ coords[1] = tc;
+ coords[2] = id;
}
void get_const_vec(nir_ssa_def *vec, nir_const_value *cv[4])
@@ -7075,8 +7057,10 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
Builder bld(ctx->program, ctx->block);
bool has_bias = false, has_lod = false, level_zero = false, has_compare = false,
has_offset = false, has_ddx = false, has_ddy = false, has_derivs = false, has_sample_index = false;
- Temp resource, sampler, fmask_ptr, bias = Temp(), coords, compare = Temp(), sample_index = Temp(),
- lod = Temp(), offset = Temp(), ddx = Temp(), ddy = Temp(), derivs = Temp();
+ Temp resource, sampler, fmask_ptr, bias = Temp(), compare = Temp(), sample_index = Temp(),
+ lod = Temp(), offset = Temp(), ddx = Temp(), ddy = Temp();
+ std::vector<Temp> coords;
+ std::vector<Temp> derivs;
nir_const_value *sample_index_cv = NULL;
nir_const_value *const_offset[4] = {NULL, NULL, NULL, NULL};
enum glsl_base_type stype;
@@ -7089,9 +7073,12 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
- case nir_tex_src_coord:
- coords = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[i].src.ssa));
+ case nir_tex_src_coord: {
+ Temp coord = get_ssa_temp(ctx, instr->src[i].src.ssa);
+ for (unsigned i = 0; i < coord.size(); i++)
+ coords.emplace_back(emit_extract_vector(ctx, coord, i, v1));
break;
+ }
case nir_tex_src_bias:
if (instr->op == nir_texop_txb) {
bias = get_ssa_temp(ctx, instr->src[i].src.ssa);
@@ -7139,7 +7126,7 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
break;
}
}
-// TODO: all other cases: structure taken from ac_nir_to_llvm.c
+
if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
return get_buffer_size(ctx, resource, get_ssa_temp(ctx, &instr->dest.ssa), true);
@@ -7219,15 +7206,19 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
}
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->coord_components)
- prepare_cube_coords(ctx, &coords, &ddx, &ddy, instr->op == nir_texop_txd, instr->is_array && instr->op != nir_texop_lod);
+ prepare_cube_coords(ctx, coords, &ddx, &ddy, instr->op == nir_texop_txd, instr->is_array && instr->op != nir_texop_lod);
/* pack derivatives */
if (has_ddx || has_ddy) {
if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && ctx->options->chip_class == GFX9) {
- derivs = bld.pseudo(aco_opcode::p_create_vector, bld.def(v4),
- ddx, Operand(0u), ddy, Operand(0u));
+ assert(has_ddx && has_ddy && ddy.size() == 1 && ddy.size() == 1);
+ Temp zero = bld.copy(bld.def(v1), Operand(0u));
+ derivs = {ddy, zero, ddy, zero};
} else {
- derivs = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, ddx.size() + ddy.size()), ddx, ddy);
+ for (unsigned i = 0; has_ddx && i < ddx.size(); i++)
+ derivs.emplace_back(emit_extract_vector(ctx, ddx, i, v1));
+ for (unsigned i = 0; has_ddy && i < ddy.size(); i++)
+ derivs.emplace_back(emit_extract_vector(ctx, ddy, i, v1));
}
has_derivs = true;
}
@@ -7236,7 +7227,7 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
instr->is_array &&
instr->op != nir_texop_txf)
- coords = apply_round_slice(ctx, coords, 1);
+ coords[1] = bld.vop1(aco_opcode::v_rndne_f32, bld.def(v1), coords[1]);
if (instr->coord_components > 2 &&
(instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
@@ -7248,21 +7239,16 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
instr->op != nir_texop_txf_ms &&
instr->op != nir_texop_fragment_fetch &&
instr->op != nir_texop_fragment_mask_fetch)
- coords = apply_round_slice(ctx, coords, 2);
+ coords[2] = bld.vop1(aco_opcode::v_rndne_f32, bld.def(v1), coords[2]);
if (ctx->options->chip_class == GFX9 &&
instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
instr->op != nir_texop_lod && instr->coord_components) {
assert(coords.size() > 0 && coords.size() < 3);
- aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, coords.size() + 1, 1)};
- vec->operands[0] = Operand(emit_extract_vector(ctx, coords, 0, v1));
- vec->operands[1] = instr->op == nir_texop_txf ? Operand((uint32_t) 0) : Operand((uint32_t) 0x3f000000);
- if (coords.size() > 1)
- vec->operands[2] = Operand(emit_extract_vector(ctx, coords, 1, v1));
- coords = bld.tmp(RegType::vgpr, coords.size() + 1);
- vec->definitions[0] = Definition(coords);
- ctx->block->instructions.emplace_back(std::move(vec));
+ coords.insert(std::next(coords.begin()), bld.copy(bld.def(v1), instr->op == nir_texop_txf ?
+ Operand((uint32_t) 0) :
+ Operand((uint32_t) 0x3f000000)));
}
bool da = should_declare_array(ctx, instr->sampler_dim, instr->is_array);
@@ -7279,31 +7265,14 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
Operand op(sample_index);
if (sample_index_cv)
op = Operand(sample_index_cv->u32);
- std::vector<Temp> fmask_load_address;
- for (unsigned i = 0; i < coords.size(); i++)
- fmask_load_address.emplace_back(emit_extract_vector(ctx, coords, i, v1));
- sample_index = adjust_sample_index_using_fmask(ctx, da, fmask_load_address, op, fmask_ptr);
+ sample_index = adjust_sample_index_using_fmask(ctx, da, coords, op, fmask_ptr);
}
if (has_offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
- Temp split_coords[coords.size()];
- emit_split_vector(ctx, coords, coords.size());
- for (unsigned i = 0; i < coords.size(); i++)
- split_coords[i] = emit_extract_vector(ctx, coords, i, v1);
-
- unsigned i = 0;
- for (; i < std::min(offset.size(), instr->coord_components); i++) {
+ for (unsigned i = 0; i < std::min(offset.size(), instr->coord_components); i++) {
Temp off = emit_extract_vector(ctx, offset, i, v1);
- split_coords[i] = bld.vadd32(bld.def(v1), split_coords[i], off);
+ coords[i] = bld.vadd32(bld.def(v1), coords[i], off);
}
-
- aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, coords.size(), 1)};
- for (unsigned i = 0; i < coords.size(); i++)
- vec->operands[i] = Operand(split_coords[i]);
- coords = bld.tmp(coords.regClass());
- vec->definitions[0] = Definition(coords);
- ctx->block->instructions.emplace_back(std::move(vec));
-
has_offset = false;
}
@@ -7404,12 +7373,9 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
half_texel[i] = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0xbf000000/*-0.5*/), half_texel[i]);
}
- Temp orig_coords[2] = {
- emit_extract_vector(ctx, coords, 0, v1),
- emit_extract_vector(ctx, coords, 1, v1)};
Temp new_coords[2] = {
- bld.vop2(aco_opcode::v_add_f32, bld.def(v1), orig_coords[0], half_texel[0]),
- bld.vop2(aco_opcode::v_add_f32, bld.def(v1), orig_coords[1], half_texel[1])
+ bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[0], half_texel[0]),
+ bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[1], half_texel[1])
};
if (tg4_integer_cube_workaround) {
@@ -7458,63 +7424,14 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
ctx->block->instructions.emplace_back(std::move(vec));
new_coords[0] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
- new_coords[0], orig_coords[0], tg4_compare_cube_wa64);
+ new_coords[0], coords[0], tg4_compare_cube_wa64);
new_coords[1] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
- new_coords[1], orig_coords[1], tg4_compare_cube_wa64);
- }
-
- if (coords.size() == 3) {
- coords = bld.pseudo(aco_opcode::p_create_vector, bld.def(v3),
- new_coords[0], new_coords[1],
- emit_extract_vector(ctx, coords, 2, v1));
- } else {
- assert(coords.size() == 2);
- coords = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2),
- new_coords[0], new_coords[1]);
- }
- }
-
- std::vector<Operand> args;
- if (has_offset)
- args.emplace_back(Operand(offset));
- if (has_bias)
- args.emplace_back(Operand(bias));
- if (has_compare)
- args.emplace_back(Operand(compare));
- if (has_derivs)
- args.emplace_back(Operand(derivs));
- args.emplace_back(Operand(coords));
- if (has_sample_index)
- args.emplace_back(Operand(sample_index));
- if (has_lod)
- args.emplace_back(lod);
-
- Temp arg;
- if (args.size() > 1) {
- aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, args.size(), 1)};
- unsigned size = 0;
- for (unsigned i = 0; i < args.size(); i++) {
- size += args[i].size();
- vec->operands[i] = args[i];
+ new_coords[1], coords[1], tg4_compare_cube_wa64);
}
- RegClass rc = RegClass(RegType::vgpr, size);
- Temp tmp = bld.tmp(rc);
- vec->definitions[0] = Definition(tmp);
- ctx->block->instructions.emplace_back(std::move(vec));
- arg = tmp;
- } else {
- assert(args[0].isTemp());
- arg = as_vgpr(ctx, args[0].getTemp());
+ coords[0] = new_coords[0];
+ coords[1] = new_coords[1];
}
- /* we don't need the bias, sample index, compare value or offset to be
- * computed in WQM but if the p_create_vector copies the coordinates, then it
- * needs to be in WQM */
- if (!(has_ddx && has_ddy) && !has_lod && !level_zero &&
- instr->sampler_dim != GLSL_SAMPLER_DIM_MS &&
- instr->sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
- arg = emit_wqm(ctx, arg, bld.tmp(arg.regClass()), true);
-
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
//FIXME: if (ctx->abi->gfx9_stride_size_workaround) return ac_build_buffer_load_format_gfx9_safe()
@@ -7542,7 +7459,7 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->operands[0] = Operand(resource);
- mubuf->operands[1] = Operand(coords);
+ mubuf->operands[1] = Operand(coords[0]);
mubuf->operands[2] = Operand((uint32_t) 0);
mubuf->definitions[0] = Definition(tmp_dst);
mubuf->idxen = true;
@@ -7553,6 +7470,30 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
return;
}
+ /* gather MIMG address components */
+ std::vector<Temp> args;
+ if (has_offset)
+ args.emplace_back(offset);
+ if (has_bias)
+ args.emplace_back(bias);
+ if (has_compare)
+ args.emplace_back(compare);
+ if (has_derivs)
+ args.insert(args.end(), derivs.begin(), derivs.end());
+
+ args.insert(args.end(), coords.begin(), coords.end());
+ if (has_sample_index)
+ args.emplace_back(sample_index);
+ if (has_lod)
+ args.emplace_back(lod);
+
+ Temp arg = bld.tmp(RegClass(RegType::vgpr, args.size()));
+ aco_ptr<Instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, args.size(), 1)};
+ vec->definitions[0] = Definition(arg);
+ for (unsigned i = 0; i < args.size(); i++)
+ vec->operands[i] = Operand(args[i]);
+ ctx->block->instructions.emplace_back(std::move(vec));
+
if (instr->op == nir_texop_txf ||
instr->op == nir_texop_txf_ms ||
@@ -7648,6 +7589,15 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
opcode = aco_opcode::image_get_lod;
}
+ /* we don't need the bias, sample index, compare value or offset to be
+ * computed in WQM but if the p_create_vector copies the coordinates, then it
+ * needs to be in WQM */
+ if (ctx->stage == fragment_fs &&
+ !has_derivs && !has_lod && !level_zero &&
+ instr->sampler_dim != GLSL_SAMPLER_DIM_MS &&
+ instr->sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
+ arg = emit_wqm(ctx, arg, bld.tmp(arg.regClass()), true);
+
tex.reset(create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 1));
tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(sampler);