summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources1
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c172
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c101
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h18
-rw-r--r--src/gallium/drivers/vc4/vc4_uniforms.c18
5 files changed, 295 insertions, 15 deletions
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 6fb40c20562..24b577ae9f3 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -21,6 +21,7 @@ C_SOURCES := \
vc4_job.c \
vc4_nir_lower_blend.c \
vc4_nir_lower_io.c \
+ vc4_nir_lower_txf_ms.c \
vc4_opt_algebraic.c \
vc4_opt_constant_folding.c \
vc4_opt_copy_propagation.c \
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
new file mode 100644
index 00000000000..54873e6186a
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "vc4_qir.h"
+#include "kernel/vc4_packet.h"
+#include "tgsi/tgsi_info.h"
+#include "glsl/nir/nir_builder.h"
+
+/** @file vc4_nir_lower_txf_ms.c
+ * Walks the NIR generated by TGSI-to-NIR to lower its nir_texop_txf_ms
+ * coordinates to do the math necessary and use a plain nir_texop_txf instead.
+ *
+ * MSAA textures are laid out as 32x32-aligned blocks of RGBA8888 or Z24S8.
+ * We can't load them through the normal sampler path because of the lack of
+ * linear support in the hardware. So, we treat MSAA textures as a giant UBO
+ * and do the math in the shader.
+ */
+
+static void
+vc4_nir_lower_txf_ms_instr(struct vc4_compile *c, nir_builder *b,
+ nir_tex_instr *txf_ms)
+{
+ if (txf_ms->op != nir_texop_txf_ms)
+ return;
+
+ b->cursor = nir_before_instr(&txf_ms->instr);
+
+ nir_tex_instr *txf = nir_tex_instr_create(c->s, 1);
+ txf->op = nir_texop_txf;
+ txf->sampler = txf_ms->sampler;
+ txf->sampler_index = txf_ms->sampler_index;
+ txf->coord_components = txf_ms->coord_components;
+ txf->is_shadow = txf_ms->is_shadow;
+ txf->is_new_style_shadow = txf_ms->is_new_style_shadow;
+
+ nir_ssa_def *coord = NULL, *sample_index = NULL;
+ for (int i = 0; i < txf_ms->num_srcs; i++) {
+ assert(txf_ms->src[i].src.is_ssa);
+
+ switch (txf_ms->src[i].src_type) {
+ case nir_tex_src_coord:
+ coord = txf_ms->src[i].src.ssa;
+ break;
+ case nir_tex_src_ms_index:
+ sample_index = txf_ms->src[i].src.ssa;
+ break;
+ default:
+ unreachable("Unknown txf_ms src\n");
+ }
+ }
+ assert(coord);
+ assert(sample_index);
+
+ nir_ssa_def *x = nir_channel(b, coord, 0);
+ nir_ssa_def *y = nir_channel(b, coord, 1);
+
+ uint32_t tile_w = 32;
+ uint32_t tile_h = 32;
+ uint32_t tile_w_shift = 5;
+ uint32_t tile_h_shift = 5;
+ uint32_t tile_size = (tile_h * tile_w *
+ VC4_MAX_SAMPLES * sizeof(uint32_t));
+ unsigned unit = txf_ms->sampler_index;
+ uint32_t w = align(c->key->tex[unit].msaa_width, tile_w);
+ uint32_t w_tiles = w / tile_w;
+
+ nir_ssa_def *x_tile = nir_ushr(b, x, nir_imm_int(b, tile_w_shift));
+ nir_ssa_def *y_tile = nir_ushr(b, y, nir_imm_int(b, tile_h_shift));
+ nir_ssa_def *tile_addr = nir_iadd(b,
+ nir_imul(b, x_tile,
+ nir_imm_int(b, tile_size)),
+ nir_imul(b, y_tile,
+ nir_imm_int(b, (w_tiles *
+ tile_size))));
+ nir_ssa_def *x_subspan = nir_iand(b, x,
+ nir_imm_int(b, (tile_w - 1) & ~1));
+ nir_ssa_def *y_subspan = nir_iand(b, y,
+ nir_imm_int(b, (tile_h - 1) & ~1));
+ nir_ssa_def *subspan_addr = nir_iadd(b,
+ nir_imul(b, x_subspan,
+ nir_imm_int(b, 2 * VC4_MAX_SAMPLES * sizeof(uint32_t))),
+ nir_imul(b, y_subspan,
+ nir_imm_int(b,
+ tile_w *
+ VC4_MAX_SAMPLES *
+ sizeof(uint32_t))));
+
+ nir_ssa_def *pixel_addr = nir_ior(b,
+ nir_iand(b,
+ nir_ishl(b, x,
+ nir_imm_int(b, 2)),
+ nir_imm_int(b, (1 << 2))),
+ nir_iand(b,
+ nir_ishl(b, y,
+ nir_imm_int(b, 3)),
+ nir_imm_int(b, (1 << 3))));
+
+ nir_ssa_def *sample_addr = nir_ishl(b, sample_index, nir_imm_int(b, 4));
+
+ nir_ssa_def *addr = nir_iadd(b,
+ nir_ior(b, sample_addr, pixel_addr),
+ nir_iadd(b, subspan_addr, tile_addr));
+
+ txf->src[0].src_type = nir_tex_src_coord;
+ txf->src[0].src = nir_src_for_ssa(nir_vec2(b, addr, nir_imm_int(b, 0)));
+ nir_ssa_dest_init(&txf->instr, &txf->dest, 4, NULL);
+ nir_builder_instr_insert(b, &txf->instr);
+ nir_ssa_def_rewrite_uses(&txf_ms->dest.ssa,
+ nir_src_for_ssa(&txf->dest.ssa));
+ nir_instr_remove(&txf_ms->instr);
+}
+
+static bool
+vc4_nir_lower_txf_ms_block(nir_block *block, void *arg)
+{
+ struct vc4_compile *c = arg;
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&block->cf_node);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type == nir_instr_type_tex) {
+ vc4_nir_lower_txf_ms_instr(c, &b,
+ nir_instr_as_tex(instr));
+ }
+ }
+
+ return true;
+}
+
+static bool
+vc4_nir_lower_txf_ms_impl(struct vc4_compile *c, nir_function_impl *impl)
+{
+ nir_foreach_block(impl, vc4_nir_lower_txf_ms_block, c);
+
+ nir_metadata_preserve(impl,
+ nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return true;
+}
+
+void
+vc4_nir_lower_txf_ms(struct vc4_compile *c)
+{
+ nir_foreach_overload(c->s, overload) {
+ if (overload->impl)
+ vc4_nir_lower_txf_ms_impl(c, overload->impl);
+ }
+}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index dda2d84b5b3..31968bb5db9 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -294,6 +294,76 @@ ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1)
qir_uniform_ui(c, 24)));
}
+static struct qreg
+ntq_scale_depth_texture(struct vc4_compile *c, struct qreg src)
+{
+ struct qreg depthf = qir_ITOF(c, qir_SHR(c, src,
+ qir_uniform_ui(c, 8)));
+ return qir_FMUL(c, depthf, qir_uniform_f(c, 1.0f/0xffffff));
+}
+
+/**
+ * Emits a lowered TXF_MS from an MSAA texture.
+ *
+ * The addressing math has been lowered in NIR, and now we just need to read
+ * it like a UBO.
+ */
+static void
+ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
+{
+ uint32_t tile_width = 32;
+ uint32_t tile_height = 32;
+ uint32_t tile_size = (tile_height * tile_width *
+ VC4_MAX_SAMPLES * sizeof(uint32_t));
+
+ unsigned unit = instr->sampler_index;
+ uint32_t w = align(c->key->tex[unit].msaa_width, tile_width);
+ uint32_t w_tiles = w / tile_width;
+ uint32_t h = align(c->key->tex[unit].msaa_height, tile_height);
+ uint32_t h_tiles = h / tile_height;
+ uint32_t size = w_tiles * h_tiles * tile_size;
+
+ struct qreg addr;
+ assert(instr->num_srcs == 1);
+ assert(instr->src[0].src_type == nir_tex_src_coord);
+ addr = ntq_get_src(c, instr->src[0].src, 0);
+
+ /* Perform the clamping required by kernel validation. */
+ addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
+ addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4));
+
+ qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
+
+ struct qreg tex = qir_TEX_RESULT(c);
+ c->num_texture_samples++;
+
+ struct qreg texture_output[4];
+ enum pipe_format format = c->key->tex[unit].format;
+ if (util_format_is_depth_or_stencil(format)) {
+ struct qreg scaled = ntq_scale_depth_texture(c, tex);
+ for (int i = 0; i < 4; i++)
+ texture_output[i] = scaled;
+ } else {
+ struct qreg tex_result_unpacked[4];
+ for (int i = 0; i < 4; i++)
+ tex_result_unpacked[i] = qir_UNPACK_8_F(c, tex, i);
+
+ const uint8_t *format_swiz =
+ vc4_get_format_swizzle(c->key->tex[unit].format);
+ for (int i = 0; i < 4; i++) {
+ texture_output[i] =
+ get_swizzled_channel(c, tex_result_unpacked,
+ format_swiz[i]);
+ }
+ }
+
+ struct qreg *dest = ntq_get_dest(c, &instr->dest);
+ for (int i = 0; i < 4; i++) {
+ dest[i] = get_swizzled_channel(c, texture_output,
+ c->key->tex[unit].swizzle[i]);
+ }
+}
+
static void
ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
{
@@ -301,6 +371,11 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
bool is_txb = false, is_txl = false, has_proj = false;
unsigned unit = instr->sampler_index;
+ if (instr->op == nir_texop_txf) {
+ ntq_emit_txf(c, instr);
+ return;
+ }
+
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
case nir_tex_src_coord:
@@ -396,11 +471,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
struct qreg unpacked[4];
if (util_format_is_depth_or_stencil(format)) {
- struct qreg depthf = qir_ITOF(c, qir_SHR(c, tex,
- qir_uniform_ui(c, 8)));
- struct qreg normalized = qir_FMUL(c, depthf,
- qir_uniform_f(c, 1.0f/0xffffff));
-
+ struct qreg normalized = ntq_scale_depth_texture(c, tex);
struct qreg depth_output;
struct qreg one = qir_uniform_f(c, 1.0f);
@@ -1712,6 +1783,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
nir_lower_clip_vs(c->s, c->key->ucp_enables);
vc4_nir_lower_io(c);
+ vc4_nir_lower_txf_ms(c);
nir_lower_idiv(c->s);
nir_lower_load_const_to_scalar(c->s);
@@ -1947,12 +2019,19 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
struct pipe_sampler_state *sampler_state =
texstate->samplers[i];
- if (sampler) {
- key->tex[i].format = sampler->format;
- key->tex[i].swizzle[0] = sampler->swizzle_r;
- key->tex[i].swizzle[1] = sampler->swizzle_g;
- key->tex[i].swizzle[2] = sampler->swizzle_b;
- key->tex[i].swizzle[3] = sampler->swizzle_a;
+ if (!sampler)
+ continue;
+
+ key->tex[i].format = sampler->format;
+ key->tex[i].swizzle[0] = sampler->swizzle_r;
+ key->tex[i].swizzle[1] = sampler->swizzle_g;
+ key->tex[i].swizzle[2] = sampler->swizzle_b;
+ key->tex[i].swizzle[3] = sampler->swizzle_a;
+
+ if (sampler->texture->nr_samples) {
+ key->tex[i].msaa_width = sampler->texture->width0;
+ key->tex[i].msaa_height = sampler->texture->height0;
+ } else if (sampler){
key->tex[i].compare_mode = sampler_state->compare_mode;
key->tex[i].compare_func = sampler_state->compare_func;
key->tex[i].wrap_s = sampler_state->wrap_s;
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 4e406d60d72..d53095ed222 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -233,6 +233,8 @@ enum quniform_contents {
/** A reference to a texture config parameter 2 cubemap stride uniform */
QUNIFORM_TEXTURE_CONFIG_P2,
+ QUNIFORM_TEXTURE_MSAA_ADDR,
+
QUNIFORM_UBO_ADDR,
QUNIFORM_TEXRECT_SCALE_X,
@@ -287,11 +289,18 @@ struct vc4_key {
struct vc4_uncompiled_shader *shader_state;
struct {
enum pipe_format format;
- unsigned compare_mode:1;
- unsigned compare_func:3;
- unsigned wrap_s:3;
- unsigned wrap_t:3;
uint8_t swizzle[4];
+ union {
+ struct {
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned wrap_s:3;
+ unsigned wrap_t:3;
+ };
+ struct {
+ uint16_t msaa_width, msaa_height;
+ };
+ };
} tex[VC4_MAX_TEXTURE_SAMPLERS];
uint8_t ucp_enables;
};
@@ -490,6 +499,7 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
enum quniform_contents contents);
nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
nir_ssa_def **srcs, int swiz);
+void vc4_nir_lower_txf_ms(struct vc4_compile *c);
void qir_lower_uniforms(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
index 5dfdd73f7bd..262531f1bd7 100644
--- a/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -71,6 +71,18 @@ write_texture_p2(struct vc4_context *vc4,
VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
}
+static void
+write_texture_msaa_addr(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc4_resource *rsc = vc4_resource(texture->texture);
+
+ cl_aligned_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, 0);
+}
+
#define SWIZ(x,y,z,w) { \
UTIL_FORMAT_SWIZZLE_##x, \
@@ -244,6 +256,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
break;
+ case QUNIFORM_TEXTURE_MSAA_ADDR:
+ write_texture_msaa_addr(vc4, &uniforms,
+ texstate, uinfo->data[i]);
+ break;
+
case QUNIFORM_TEXTURE_BORDER_COLOR:
write_texture_border_color(vc4, &uniforms,
texstate, uinfo->data[i]);
@@ -349,6 +366,7 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
case QUNIFORM_TEXTURE_CONFIG_P1:
case QUNIFORM_TEXTURE_CONFIG_P2:
case QUNIFORM_TEXTURE_BORDER_COLOR:
+ case QUNIFORM_TEXTURE_MSAA_ADDR:
case QUNIFORM_TEXRECT_SCALE_X:
case QUNIFORM_TEXRECT_SCALE_Y:
dirty |= VC4_DIRTY_TEXSTATE;