diff options
-rw-r--r-- | src/gallium/drivers/vc4/vc4_program.c | 31 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qir.h | 20 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_qpu_emit.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_register_allocate.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_screen.c | 16 |
6 files changed, 94 insertions, 6 deletions
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 1efdf37097f..e37303fb69b 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1026,6 +1026,37 @@ get_channel_from_vpm(struct vc4_compile *c, return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz)); } } + } else if (chan->size == 16 && + (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || + chan->type == UTIL_FORMAT_TYPE_SIGNED)) { + struct qreg vpm = vpm_reads[swiz / 2]; + + /* Note that UNPACK_16F eats a half float, not ints, so we use + * UNPACK_16_I for all of these. + */ + if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { + temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2)); + if (chan->normalized) { + return qir_FMUL(c, temp, + qir_uniform_f(c, 1/32768.0f)); + } else { + return temp; + } + } else { + /* UNPACK_16I sign-extends, so we have to emit ANDs. */ + temp = vpm; + if (swiz == 1 || swiz == 3) + temp = qir_UNPACK_16_I(c, temp, 1); + temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff)); + temp = qir_ITOF(c, temp); + + if (chan->normalized) { + return qir_FMUL(c, temp, + qir_uniform_f(c, 1 / 65535.0)); + } else { + return temp; + } + } } else { return c->undef; } diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 8cd571d5b77..49b79014c09 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -103,10 +103,14 @@ static const struct qir_op_info qir_op_info[] = { [QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 }, [QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 }, [QOP_UNPACK_8D_F] = { "unpack_8d_f", 1, 1 }, + [QOP_UNPACK_16A_F] = { "unpack_16a_f", 1, 1 }, + [QOP_UNPACK_16B_F] = { "unpack_16b_f", 1, 1 }, [QOP_UNPACK_8A_I] = { "unpack_8a_i", 1, 1 }, [QOP_UNPACK_8B_I] = { "unpack_8b_i", 1, 1 }, [QOP_UNPACK_8C_I] = { "unpack_8c_i", 1, 1 }, [QOP_UNPACK_8D_I] = { "unpack_8d_i", 1, 1 }, + [QOP_UNPACK_16A_I] = { "unpack_16a_i", 1, 1 }, + [QOP_UNPACK_16B_I] = { "unpack_16b_i", 1, 1 }, }; static const char * diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 9da120ab912..46f4c12b22c 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -113,11 +113,15 @@ enum qop { QOP_UNPACK_8B_F, QOP_UNPACK_8C_F, QOP_UNPACK_8D_F, + QOP_UNPACK_16A_F, + QOP_UNPACK_16B_F, QOP_UNPACK_8A_I, QOP_UNPACK_8B_I, QOP_UNPACK_8C_I, QOP_UNPACK_8D_I, + QOP_UNPACK_16A_I, + QOP_UNPACK_16B_I, /** Texture x coordinate parameter write */ QOP_TEX_S, @@ -510,6 +514,22 @@ qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i) } static inline struct qreg +qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i) +{ + struct qreg t = qir_get_temp(c); + qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef)); + return t; +} + +static inline struct qreg +qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i) +{ + struct qreg t = qir_get_temp(c); + qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef)); + return t; +} + +static inline struct qreg qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y) { return qir_EXP2(c, qir_FMUL(c, diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index f8807276660..530ec8bf501 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -141,6 +141,15 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) uint32_t vpm_read_offset = 0; bool written_r3 = false; bool needs_restore; + /* Map from the QIR ops enum order to QPU unpack bits. */ + static const uint32_t unpack_map[] = { + QPU_UNPACK_8A, + QPU_UNPACK_8B, + QPU_UNPACK_8C, + QPU_UNPACK_8D, + QPU_UNPACK_16A_TO_F32, + QPU_UNPACK_16B_TO_F32, + }; make_empty_list(&c->qpu_inst_list); @@ -472,6 +481,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) case QOP_UNPACK_8B_F: case QOP_UNPACK_8C_F: case QOP_UNPACK_8D_F: + case QOP_UNPACK_16A_F: + case QOP_UNPACK_16B_F: { assert(src[0].mux == QPU_MUX_A); /* Since we're setting the pack bits, if the @@ -480,20 +491,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ? qpu_rb(31) : dst), src[0], src[0])); - *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A + - (qinst->op - - QOP_UNPACK_8A_F), + *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op - + QOP_UNPACK_8A_F], QPU_UNPACK); if (dst.mux == QPU_MUX_A) { queue(c, qpu_a_MOV(dst, qpu_rb(31))); } + } break; case QOP_UNPACK_8A_I: case QOP_UNPACK_8B_I: case QOP_UNPACK_8C_I: case QOP_UNPACK_8D_I: + case QOP_UNPACK_16A_I: + case QOP_UNPACK_16B_I: { assert(src[0].mux == QPU_MUX_A); /* Since we're setting the pack bits, if the @@ -501,14 +514,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) */ queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ? qpu_rb(31) : dst), src[0])); - *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A + - (qinst->op - - QOP_UNPACK_8A_I), + *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op - + QOP_UNPACK_8A_I], QPU_UNPACK); if (dst.mux == QPU_MUX_A) { queue(c, qpu_a_MOV(dst, qpu_rb(31))); } + } break; default: diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 8f8c1899071..9eae7fca758 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -258,10 +258,14 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) case QOP_UNPACK_8B_F: case QOP_UNPACK_8C_F: case QOP_UNPACK_8D_F: + case QOP_UNPACK_16A_F: + case QOP_UNPACK_16B_F: case QOP_UNPACK_8A_I: case QOP_UNPACK_8B_I: case QOP_UNPACK_8C_I: case QOP_UNPACK_8D_I: + case QOP_UNPACK_16A_I: + case QOP_UNPACK_16B_I: /* The unpack flags require an A-file src register. */ ra_set_node_class(g, temp_to_node[inst->src[0].index], vc4->reg_class_a); diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 6bb158b5990..62912d84eb6 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -346,6 +346,22 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen, case PIPE_FORMAT_R32G32B32_FLOAT: case PIPE_FORMAT_R32G32_FLOAT: case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16_USCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16_SSCALED: case PIPE_FORMAT_R8G8B8A8_UNORM: case PIPE_FORMAT_R8G8B8_UNORM: case PIPE_FORMAT_R8G8_UNORM: |