diff options
-rw-r--r-- | src/gallium/drivers/vc4/Makefile.sources | 1 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c | 172 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 101 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 18 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_uniforms.c | 18 |
5 files changed, 295 insertions, 15 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 6fb40c20562..24b577ae9f3 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -21,6 +21,7 @@ C_SOURCES := \ vc4_job.c \ vc4_nir_lower_blend.c \ vc4_nir_lower_io.c \ + vc4_nir_lower_txf_ms.c \ vc4_opt_algebraic.c \ vc4_opt_constant_folding.c \ vc4_opt_copy_propagation.c \ diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c new file mode 100644 index 00000000000..54873e6186a --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vc4_qir.h" +#include "kernel/vc4_packet.h" +#include "tgsi/tgsi_info.h" +#include "glsl/nir/nir_builder.h" + +/** @file vc4_nir_lower_txf_ms.c + * Walks the NIR generated by TGSI-to-NIR to lower its nir_texop_txf_ms + * coordinates to do the math necessary and use a plain nir_texop_txf instead. + * + * MSAA textures are laid out as 32x32-aligned blocks of RGBA8888 or Z24S8. + * We can't load them through the normal sampler path because of the lack of + * linear support in the hardware. So, we treat MSAA textures as a giant UBO + * and do the math in the shader. + */ + +static void +vc4_nir_lower_txf_ms_instr(struct vc4_compile *c, nir_builder *b, + nir_tex_instr *txf_ms) +{ + if (txf_ms->op != nir_texop_txf_ms) + return; + + b->cursor = nir_before_instr(&txf_ms->instr); + + nir_tex_instr *txf = nir_tex_instr_create(c->s, 1); + txf->op = nir_texop_txf; + txf->sampler = txf_ms->sampler; + txf->sampler_index = txf_ms->sampler_index; + txf->coord_components = txf_ms->coord_components; + txf->is_shadow = txf_ms->is_shadow; + txf->is_new_style_shadow = txf_ms->is_new_style_shadow; + + nir_ssa_def *coord = NULL, *sample_index = NULL; + for (int i = 0; i < txf_ms->num_srcs; i++) { + assert(txf_ms->src[i].src.is_ssa); + + switch (txf_ms->src[i].src_type) { + case nir_tex_src_coord: + coord = txf_ms->src[i].src.ssa; + break; + case nir_tex_src_ms_index: + sample_index = txf_ms->src[i].src.ssa; + break; + default: + unreachable("Unknown txf_ms src\n"); + } + } + assert(coord); + assert(sample_index); + + nir_ssa_def *x = nir_channel(b, coord, 0); + nir_ssa_def *y = nir_channel(b, coord, 1); + + uint32_t tile_w = 32; + uint32_t tile_h = 32; + uint32_t tile_w_shift = 5; + uint32_t tile_h_shift = 5; + uint32_t tile_size = (tile_h * tile_w * + VC4_MAX_SAMPLES * sizeof(uint32_t)); + unsigned unit = txf_ms->sampler_index; + uint32_t w = align(c->key->tex[unit].msaa_width, tile_w); + uint32_t w_tiles = w / tile_w; + + nir_ssa_def *x_tile = nir_ushr(b, x, nir_imm_int(b, tile_w_shift)); + nir_ssa_def *y_tile = nir_ushr(b, y, nir_imm_int(b, tile_h_shift)); + nir_ssa_def *tile_addr = nir_iadd(b, + nir_imul(b, x_tile, + nir_imm_int(b, tile_size)), + nir_imul(b, y_tile, + nir_imm_int(b, (w_tiles * + tile_size)))); + nir_ssa_def *x_subspan = nir_iand(b, x, + nir_imm_int(b, (tile_w - 1) & ~1)); + nir_ssa_def *y_subspan = nir_iand(b, y, + nir_imm_int(b, (tile_h - 1) & ~1)); + nir_ssa_def *subspan_addr = nir_iadd(b, + nir_imul(b, x_subspan, + nir_imm_int(b, 2 * VC4_MAX_SAMPLES * sizeof(uint32_t))), + nir_imul(b, y_subspan, + nir_imm_int(b, + tile_w * + VC4_MAX_SAMPLES * + sizeof(uint32_t)))); + + nir_ssa_def *pixel_addr = nir_ior(b, + nir_iand(b, + nir_ishl(b, x, + nir_imm_int(b, 2)), + nir_imm_int(b, (1 << 2))), + nir_iand(b, + nir_ishl(b, y, + nir_imm_int(b, 3)), + nir_imm_int(b, (1 << 3)))); + + nir_ssa_def *sample_addr = nir_ishl(b, sample_index, nir_imm_int(b, 4)); + + nir_ssa_def *addr = nir_iadd(b, + nir_ior(b, sample_addr, pixel_addr), + nir_iadd(b, subspan_addr, tile_addr)); + + txf->src[0].src_type = nir_tex_src_coord; + txf->src[0].src = nir_src_for_ssa(nir_vec2(b, addr, nir_imm_int(b, 0))); + nir_ssa_dest_init(&txf->instr, &txf->dest, 4, NULL); + nir_builder_instr_insert(b, &txf->instr); + nir_ssa_def_rewrite_uses(&txf_ms->dest.ssa, + nir_src_for_ssa(&txf->dest.ssa)); + nir_instr_remove(&txf_ms->instr); +} + +static bool +vc4_nir_lower_txf_ms_block(nir_block *block, void *arg) +{ + struct vc4_compile *c = arg; + nir_function_impl *impl = + nir_cf_node_get_function(&block->cf_node); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_tex) { + vc4_nir_lower_txf_ms_instr(c, &b, + nir_instr_as_tex(instr)); + } + } + + return true; +} + +static bool +vc4_nir_lower_txf_ms_impl(struct vc4_compile *c, nir_function_impl *impl) +{ + nir_foreach_block(impl, vc4_nir_lower_txf_ms_block, c); + + nir_metadata_preserve(impl, + nir_metadata_block_index | + nir_metadata_dominance); + + return true; +} + +void +vc4_nir_lower_txf_ms(struct vc4_compile *c) +{ + nir_foreach_overload(c->s, overload) { + if (overload->impl) + vc4_nir_lower_txf_ms_impl(c, overload->impl); + } +} diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index dda2d84b5b3..31968bb5db9 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -294,6 +294,76 @@ ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1) qir_uniform_ui(c, 24))); } +static struct qreg +ntq_scale_depth_texture(struct vc4_compile *c, struct qreg src) +{ + struct qreg depthf = qir_ITOF(c, qir_SHR(c, src, + qir_uniform_ui(c, 8))); + return qir_FMUL(c, depthf, qir_uniform_f(c, 1.0f/0xffffff)); +} + +/** + * Emits a lowered TXF_MS from an MSAA texture. + * + * The addressing math has been lowered in NIR, and now we just need to read + * it like a UBO. + */ +static void +ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr) +{ + uint32_t tile_width = 32; + uint32_t tile_height = 32; + uint32_t tile_size = (tile_height * tile_width * + VC4_MAX_SAMPLES * sizeof(uint32_t)); + + unsigned unit = instr->sampler_index; + uint32_t w = align(c->key->tex[unit].msaa_width, tile_width); + uint32_t w_tiles = w / tile_width; + uint32_t h = align(c->key->tex[unit].msaa_height, tile_height); + uint32_t h_tiles = h / tile_height; + uint32_t size = w_tiles * h_tiles * tile_size; + + struct qreg addr; + assert(instr->num_srcs == 1); + assert(instr->src[0].src_type == nir_tex_src_coord); + addr = ntq_get_src(c, instr->src[0].src, 0); + + /* Perform the clamping required by kernel validation. */ + addr = qir_MAX(c, addr, qir_uniform_ui(c, 0)); + addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4)); + + qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit)); + + struct qreg tex = qir_TEX_RESULT(c); + c->num_texture_samples++; + + struct qreg texture_output[4]; + enum pipe_format format = c->key->tex[unit].format; + if (util_format_is_depth_or_stencil(format)) { + struct qreg scaled = ntq_scale_depth_texture(c, tex); + for (int i = 0; i < 4; i++) + texture_output[i] = scaled; + } else { + struct qreg tex_result_unpacked[4]; + for (int i = 0; i < 4; i++) + tex_result_unpacked[i] = qir_UNPACK_8_F(c, tex, i); + + const uint8_t *format_swiz = + vc4_get_format_swizzle(c->key->tex[unit].format); + for (int i = 0; i < 4; i++) { + texture_output[i] = + get_swizzled_channel(c, tex_result_unpacked, + format_swiz[i]); + } + } + + struct qreg *dest = ntq_get_dest(c, &instr->dest); + for (int i = 0; i < 4; i++) { + dest[i] = get_swizzled_channel(c, texture_output, + c->key->tex[unit].swizzle[i]); + } +} + static void ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) { @@ -301,6 +371,11 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) bool is_txb = false, is_txl = false, has_proj = false; unsigned unit = instr->sampler_index; + if (instr->op == nir_texop_txf) { + ntq_emit_txf(c, instr); + return; + } + for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { case nir_tex_src_coord: @@ -396,11 +471,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) struct qreg unpacked[4]; if (util_format_is_depth_or_stencil(format)) { - struct qreg depthf = qir_ITOF(c, qir_SHR(c, tex, - qir_uniform_ui(c, 8))); - struct qreg normalized = qir_FMUL(c, depthf, - qir_uniform_f(c, 1.0f/0xffffff)); - + struct qreg normalized = ntq_scale_depth_texture(c, tex); struct qreg depth_output; struct qreg one = qir_uniform_f(c, 1.0f); @@ -1712,6 +1783,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, nir_lower_clip_vs(c->s, c->key->ucp_enables); vc4_nir_lower_io(c); + vc4_nir_lower_txf_ms(c); nir_lower_idiv(c->s); nir_lower_load_const_to_scalar(c->s); @@ -1947,12 +2019,19 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key, struct pipe_sampler_state *sampler_state = texstate->samplers[i]; - if (sampler) { - key->tex[i].format = sampler->format; - key->tex[i].swizzle[0] = sampler->swizzle_r; - key->tex[i].swizzle[1] = sampler->swizzle_g; - key->tex[i].swizzle[2] = sampler->swizzle_b; - key->tex[i].swizzle[3] = sampler->swizzle_a; + if (!sampler) + continue; + + key->tex[i].format = sampler->format; + key->tex[i].swizzle[0] = sampler->swizzle_r; + key->tex[i].swizzle[1] = sampler->swizzle_g; + key->tex[i].swizzle[2] = sampler->swizzle_b; + key->tex[i].swizzle[3] = sampler->swizzle_a; + + if (sampler->texture->nr_samples) { + key->tex[i].msaa_width = sampler->texture->width0; + key->tex[i].msaa_height = sampler->texture->height0; + } else if (sampler){ key->tex[i].compare_mode = sampler_state->compare_mode; key->tex[i].compare_func = sampler_state->compare_func; key->tex[i].wrap_s = sampler_state->wrap_s; diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 4e406d60d72..d53095ed222 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -233,6 +233,8 @@ enum quniform_contents { /** A reference to a texture config parameter 2 cubemap stride uniform */ QUNIFORM_TEXTURE_CONFIG_P2, + QUNIFORM_TEXTURE_MSAA_ADDR, + QUNIFORM_UBO_ADDR, QUNIFORM_TEXRECT_SCALE_X, @@ -287,11 +289,18 @@ struct vc4_key { struct vc4_uncompiled_shader *shader_state; struct { enum pipe_format format; - unsigned compare_mode:1; - unsigned compare_func:3; - unsigned wrap_s:3; - unsigned wrap_t:3; uint8_t swizzle[4]; + union { + struct { + unsigned compare_mode:1; + unsigned compare_func:3; + unsigned wrap_s:3; + unsigned wrap_t:3; + }; + struct { + uint16_t msaa_width, msaa_height; + }; + }; } tex[VC4_MAX_TEXTURE_SAMPLERS]; uint8_t ucp_enables; }; @@ -490,6 +499,7 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b, enum quniform_contents contents); nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b, nir_ssa_def **srcs, int swiz); +void vc4_nir_lower_txf_ms(struct vc4_compile *c); void qir_lower_uniforms(struct vc4_compile *c); void qpu_schedule_instructions(struct vc4_compile *c); diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c index 5dfdd73f7bd..262531f1bd7 100644 --- a/src/gallium/drivers/vc4/vc4_uniforms.c +++ b/src/gallium/drivers/vc4/vc4_uniforms.c @@ -71,6 +71,18 @@ write_texture_p2(struct vc4_context *vc4, VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD)); } +static void +write_texture_msaa_addr(struct vc4_context *vc4, + struct vc4_cl_out **uniforms, + struct vc4_texture_stateobj *texstate, + uint32_t unit) +{ + struct pipe_sampler_view *texture = texstate->textures[unit]; + struct vc4_resource *rsc = vc4_resource(texture->texture); + + cl_aligned_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, 0); +} + #define SWIZ(x,y,z,w) { \ UTIL_FORMAT_SWIZZLE_##x, \ @@ -244,6 +256,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0); break; + case QUNIFORM_TEXTURE_MSAA_ADDR: + write_texture_msaa_addr(vc4, &uniforms, + texstate, uinfo->data[i]); + break; + case QUNIFORM_TEXTURE_BORDER_COLOR: write_texture_border_color(vc4, &uniforms, texstate, uinfo->data[i]); @@ -349,6 +366,7 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader) case QUNIFORM_TEXTURE_CONFIG_P1: case QUNIFORM_TEXTURE_CONFIG_P2: case QUNIFORM_TEXTURE_BORDER_COLOR: + case QUNIFORM_TEXTURE_MSAA_ADDR: case QUNIFORM_TEXRECT_SCALE_X: case QUNIFORM_TEXRECT_SCALE_Y: dirty |= VC4_DIRTY_TEXSTATE; |