aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2015-07-31 20:58:57 -0700
committerEric Anholt <[email protected]>2015-10-20 12:47:27 +0100
commit921feb8782bdc3c459922858bee6d55919467436 (patch)
tree78de8ee7d805489f662193278744a5607d724d3a /src
parent85b946478c326df853926ed18bfbd898c0a514ef (diff)
vc4: Switch our vertex attr lowering to being NIR-based.
This exposes more information to NIR's optimization, and should be particularly useful when we do range-based optimization. total uniforms in shared programs: 32066 -> 32065 (-0.00%) uniforms in affected programs: 21 -> 20 (-4.76%) total instructions in shared programs: 93104 -> 92630 (-0.51%) instructions in affected programs: 31901 -> 31427 (-1.49%)
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c233
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c110
2 files changed, 200 insertions, 143 deletions
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index 761e2c819c5..caf706aa2a6 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -23,6 +23,7 @@
#include "vc4_qir.h"
#include "glsl/nir/nir_builder.h"
+#include "util/u_format.h"
/**
* Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
@@ -50,14 +51,182 @@ replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
nir_instr_remove(&intr->instr);
}
+static nir_ssa_def *
+vc4_nir_unpack_8i(nir_builder *b, nir_ssa_def *src, unsigned chan)
+{
+ return nir_ubitfield_extract(b,
+ src,
+ nir_imm_int(b, 8 * chan),
+ nir_imm_int(b, 8));
+}
+
+/** Returns the 16 bit field as a sign-extended 32-bit value. */
+static nir_ssa_def *
+vc4_nir_unpack_16i(nir_builder *b, nir_ssa_def *src, unsigned chan)
+{
+ return nir_ibitfield_extract(b,
+ src,
+ nir_imm_int(b, 16 * chan),
+ nir_imm_int(b, 16));
+}
+
+/** Returns the 16 bit field as an unsigned 32 bit value. */
+static nir_ssa_def *
+vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
+{
+ if (chan == 0) {
+ return nir_iand(b, src, nir_imm_int(b, 0xffff));
+ } else {
+ return nir_ushr(b, src, nir_imm_int(b, 16));
+ }
+}
+
+static nir_ssa_def *
+vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
+{
+ return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false);
+}
+
+static nir_ssa_def *
+vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
+ nir_builder *b,
+ nir_ssa_def **vpm_reads,
+ uint8_t swiz,
+ const struct util_format_description *desc)
+{
+ const struct util_format_channel_description *chan =
+ &desc->channel[swiz];
+ nir_ssa_def *temp;
+
+ if (swiz > UTIL_FORMAT_SWIZZLE_W) {
+ return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
+ } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) {
+ return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
+ } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) {
+ if (chan->normalized) {
+ return nir_fmul(b,
+ nir_i2f(b, vpm_reads[swiz]),
+ nir_imm_float(b,
+ 1.0 / 0x7fffffff));
+ } else {
+ return nir_i2f(b, vpm_reads[swiz]);
+ }
+ } else if (chan->size == 8 &&
+ (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
+ nir_ssa_def *vpm = vpm_reads[0];
+ if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
+ temp = nir_ixor(b, vpm, nir_imm_int(b, 0x80808080));
+ if (chan->normalized) {
+ return nir_fsub(b, nir_fmul(b,
+ vc4_nir_unpack_8f(b, temp, swiz),
+ nir_imm_float(b, 2.0)),
+ nir_imm_float(b, 1.0));
+ } else {
+ return nir_fadd(b,
+ nir_i2f(b,
+ vc4_nir_unpack_8i(b, temp,
+ swiz)),
+ nir_imm_float(b, -128.0));
+ }
+ } else {
+ if (chan->normalized) {
+ return vc4_nir_unpack_8f(b, vpm, swiz);
+ } else {
+ return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz));
+ }
+ }
+ } else if (chan->size == 16 &&
+ (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
+ nir_ssa_def *vpm = vpm_reads[swiz / 2];
+
+ /* Note that UNPACK_16F eats a half float, not ints, so we use
+ * UNPACK_16_I for all of these.
+ */
+ if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
+ temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1));
+ if (chan->normalized) {
+ return nir_fmul(b, temp,
+ nir_imm_float(b, 1/32768.0f));
+ } else {
+ return temp;
+ }
+ } else {
+ temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1));
+ if (chan->normalized) {
+ return nir_fmul(b, temp,
+ nir_imm_float(b, 1 / 65535.0));
+ } else {
+ return temp;
+ }
+ }
+ } else {
+ return NULL;
+ }
+}
+
+static void
+vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+
+ int attr = intr->const_index[0];
+ enum pipe_format format = c->vs_key->attr_formats[attr];
+ uint32_t attr_size = util_format_get_blocksize(format);
+
+ /* All TGSI-to-NIR inputs are vec4. */
+ assert(intr->num_components == 4);
+
+ /* Generate dword loads for the VPM values (Since these intrinsics may
+ * be reordered, the actual reads will be generated at the top of the
+ * shader by ntq_setup_inputs().
+ */
+ nir_ssa_def *vpm_reads[4];
+ for (int i = 0; i < align(attr_size, 4) / 4; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s,
+ nir_intrinsic_load_input);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr_comp->instr);
+
+ vpm_reads[i] = &intr_comp->dest.ssa;
+ }
+
+ bool format_warned = false;
+ const struct util_format_description *desc =
+ util_format_description(format);
+
+ nir_ssa_def *dests[4];
+ for (int i = 0; i < 4; i++) {
+ uint8_t swiz = desc->swizzle[i];
+ dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz,
+ desc);
+
+ if (!dests[i]) {
+ if (!format_warned) {
+ fprintf(stderr,
+ "vtx element %d unsupported type: %s\n",
+ attr, util_format_name(format));
+ format_warned = true;
+ }
+ dests[i] = nir_imm_float(b, 0.0);
+ }
+ }
+
+ replace_intrinsic_with_vec4(b, intr, dests);
+}
+
static void
-vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
- nir_intrinsic_instr *intr)
+vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
{
b->cursor = nir_before_instr(&intr->instr);
- if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
- VC4_NIR_TLB_COLOR_READ_INPUT) {
+ if (intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
/* This doesn't need any lowering. */
return;
}
@@ -87,38 +256,31 @@ vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
dests[i] = &intr_comp->dest.ssa;
}
- switch (c->stage) {
- case QSTAGE_FRAG:
- if (input_var->data.location == VARYING_SLOT_FACE) {
- dests[0] = nir_fsub(b,
- nir_imm_float(b, 1.0),
- nir_fmul(b,
- nir_i2f(b, dests[0]),
- nir_imm_float(b, 2.0)));
- dests[1] = nir_imm_float(b, 0.0);
+ if (input_var->data.location == VARYING_SLOT_FACE) {
+ dests[0] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ nir_fmul(b,
+ nir_i2f(b, dests[0]),
+ nir_imm_float(b, 2.0)));
+ dests[1] = nir_imm_float(b, 0.0);
+ dests[2] = nir_imm_float(b, 0.0);
+ dests[3] = nir_imm_float(b, 1.0);
+ } else if (input_var->data.location >= VARYING_SLOT_VAR0) {
+ if (c->fs_key->point_sprite_mask &
+ (1 << (input_var->data.location -
+ VARYING_SLOT_VAR0))) {
+ if (!c->fs_key->is_points) {
+ dests[0] = nir_imm_float(b, 0.0);
+ dests[1] = nir_imm_float(b, 0.0);
+ }
+ if (c->fs_key->point_coord_upper_left) {
+ dests[1] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ dests[1]);
+ }
dests[2] = nir_imm_float(b, 0.0);
dests[3] = nir_imm_float(b, 1.0);
- } else if (input_var->data.location >= VARYING_SLOT_VAR0) {
- if (c->fs_key->point_sprite_mask &
- (1 << (input_var->data.location -
- VARYING_SLOT_VAR0))) {
- if (!c->fs_key->is_points) {
- dests[0] = nir_imm_float(b, 0.0);
- dests[1] = nir_imm_float(b, 0.0);
- }
- if (c->fs_key->point_coord_upper_left) {
- dests[1] = nir_fsub(b,
- nir_imm_float(b, 1.0),
- dests[1]);
- }
- dests[2] = nir_imm_float(b, 0.0);
- dests[3] = nir_imm_float(b, 1.0);
- }
}
- break;
- case QSTAGE_COORD:
- case QSTAGE_VERT:
- break;
}
replace_intrinsic_with_vec4(b, intr, dests);
@@ -232,7 +394,10 @@ vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
switch (intr->intrinsic) {
case nir_intrinsic_load_input:
- vc4_nir_lower_input(c, b, intr);
+ if (c->stage == QSTAGE_FRAG)
+ vc4_nir_lower_fs_input(c, b, intr);
+ else
+ vc4_nir_lower_vertex_attr(c, b, intr);
break;
case nir_intrinsic_store_output:
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index d3e856a8530..6e9ec6530c6 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -602,126 +602,18 @@ ntq_fsign(struct vc4_compile *c, struct qreg src)
qir_uniform_f(c, -1.0));
}
-static struct qreg
-get_channel_from_vpm(struct vc4_compile *c,
- struct qreg *vpm_reads,
- uint8_t swiz,
- const struct util_format_description *desc)
-{
- const struct util_format_channel_description *chan =
- &desc->channel[swiz];
- struct qreg temp;
-
- if (swiz > UTIL_FORMAT_SWIZZLE_W)
- return get_swizzled_channel(c, vpm_reads, swiz);
- else if (chan->size == 32 &&
- chan->type == UTIL_FORMAT_TYPE_FLOAT) {
- return get_swizzled_channel(c, vpm_reads, swiz);
- } else if (chan->size == 32 &&
- chan->type == UTIL_FORMAT_TYPE_SIGNED) {
- if (chan->normalized) {
- return qir_FMUL(c,
- qir_ITOF(c, vpm_reads[swiz]),
- qir_uniform_f(c,
- 1.0 / 0x7fffffff));
- } else {
- return qir_ITOF(c, vpm_reads[swiz]);
- }
- } else if (chan->size == 8 &&
- (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
- chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
- struct qreg vpm = vpm_reads[0];
- if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
- temp = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080));
- if (chan->normalized) {
- return qir_FSUB(c, qir_FMUL(c,
- qir_UNPACK_8_F(c, temp, swiz),
- qir_uniform_f(c, 2.0)),
- qir_uniform_f(c, 1.0));
- } else {
- return qir_FADD(c,
- qir_ITOF(c,
- qir_UNPACK_8_I(c, temp,
- swiz)),
- qir_uniform_f(c, -128.0));
- }
- } else {
- if (chan->normalized) {
- return qir_UNPACK_8_F(c, vpm, swiz);
- } else {
- return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz));
- }
- }
- } else if (chan->size == 16 &&
- (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
- chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
- struct qreg vpm = vpm_reads[swiz / 2];
-
- /* Note that UNPACK_16F eats a half float, not ints, so we use
- * UNPACK_16_I for all of these.
- */
- if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
- temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2));
- if (chan->normalized) {
- return qir_FMUL(c, temp,
- qir_uniform_f(c, 1/32768.0f));
- } else {
- return temp;
- }
- } else {
- /* UNPACK_16I sign-extends, so we have to emit ANDs. */
- temp = vpm;
- if (swiz == 1 || swiz == 3)
- temp = qir_UNPACK_16_I(c, temp, 1);
- temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff));
- temp = qir_ITOF(c, temp);
-
- if (chan->normalized) {
- return qir_FMUL(c, temp,
- qir_uniform_f(c, 1 / 65535.0));
- } else {
- return temp;
- }
- }
- } else {
- return c->undef;
- }
-}
-
static void
emit_vertex_input(struct vc4_compile *c, int attr)
{
enum pipe_format format = c->vs_key->attr_formats[attr];
uint32_t attr_size = util_format_get_blocksize(format);
- struct qreg vpm_reads[4];
c->vattr_sizes[attr] = align(attr_size, 4);
for (int i = 0; i < align(attr_size, 4) / 4; i++) {
struct qreg vpm = { QFILE_VPM, attr * 4 + i };
- vpm_reads[i] = qir_MOV(c, vpm);
+ c->inputs[attr * 4 + i] = qir_MOV(c, vpm);
c->num_inputs++;
}
-
- bool format_warned = false;
- const struct util_format_description *desc =
- util_format_description(format);
-
- for (int i = 0; i < 4; i++) {
- uint8_t swiz = desc->swizzle[i];
- struct qreg result = get_channel_from_vpm(c, vpm_reads,
- swiz, desc);
-
- if (result.file == QFILE_NULL) {
- if (!format_warned) {
- fprintf(stderr,
- "vtx element %d unsupported type: %s\n",
- attr, util_format_name(format));
- format_warned = true;
- }
- result = qir_uniform_f(c, 0.0);
- }
- c->inputs[attr * 4 + i] = result;
- }
}
static void