summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2014-07-15 12:29:32 -0700
committerEric Anholt <[email protected]>2014-08-11 14:40:45 -0700
commit66c6c401279aa4152a24681f64d0e101aa004593 (patch)
tree8707e885b36cc7a3b522637823eb5dbf83bdd3a2 /src/gallium/drivers
parentd5a6e3dd9b094a427c7842817227db25fbece134 (diff)
vc4: Add support for texturing (under simulation)
Only rgba8888 works, and only a single texture unit, and it's only under simulation because I haven't built the kernel interface yet. v2: Rebase on helpers. v3: Fold in the don't-break-the-arm-build fix.
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/vc4/vc4_context.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_draw.c3
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c151
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.c10
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h62
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_defines.h14
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_disasm.c17
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_emit.c32
8 files changed, 277 insertions, 13 deletions
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index f36b96b0c25..18e6264b0b6 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -197,6 +197,7 @@ void *vc4_simulator_alloc(struct vc4_screen *screen, uint32_t size);
void vc4_get_uniform_bo(struct vc4_context *vc4,
struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
+ struct vc4_texture_stateobj *texstate,
int shader_index, struct vc4_bo **out_bo,
uint32_t *out_offset);
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index c0fb082c548..9e4454b141d 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -171,12 +171,15 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
uint32_t fs_ubo_offset, vs_ubo_offset, cs_ubo_offset;
vc4_get_uniform_bo(vc4, vc4->prog.fs,
&vc4->constbuf[PIPE_SHADER_FRAGMENT],
+ &vc4->fragtex,
0, &fs_ubo, &fs_ubo_offset);
vc4_get_uniform_bo(vc4, vc4->prog.vs,
&vc4->constbuf[PIPE_SHADER_VERTEX],
+ &vc4->verttex,
0, &vs_ubo, &vs_ubo_offset);
vc4_get_uniform_bo(vc4, vc4->prog.vs,
&vc4->constbuf[PIPE_SHADER_VERTEX],
+ &vc4->verttex,
1, &cs_ubo, &cs_ubo_offset);
cl_start_shader_reloc(&vc4->shader_rec, 6 + vtx->num_elements);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 0b26f5adf5b..b45507d154d 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -35,6 +35,9 @@
#include "vc4_context.h"
#include "vc4_qpu.h"
#include "vc4_qir.h"
+#ifdef USE_VC4_SIMULATOR
+#include "simpenrose/simpenrose.h"
+#endif
struct tgsi_to_qir {
struct tgsi_parse_context parser;
@@ -157,6 +160,10 @@ get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i)
case TGSI_FILE_INPUT:
r = trans->inputs[src->Index * 4 + s];
break;
+ case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_SAMPLER_VIEW:
+ r = c->undef;
+ break;
default:
fprintf(stderr, "unknown src file %d\n", src->File);
abort();
@@ -278,6 +285,51 @@ tgsi_to_qir_lrp(struct tgsi_to_qir *trans,
}
+static void
+tgsi_to_qir_tex(struct tgsi_to_qir *trans,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src)
+{
+ struct qcompile *c = trans->c;
+
+ assert(!tgsi_inst->Instruction.Saturate);
+
+ struct qreg s = src[0 * 4 + 0];
+ struct qreg t = src[0 * 4 + 1];
+
+ if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+ struct qreg proj = qir_RCP(c, src[0 * 4 + 3]);
+ s = qir_FMUL(c, s, proj);
+ t = qir_FMUL(c, t, proj);
+ }
+
+ uint32_t tex_and_sampler = 0; /* XXX */
+ qir_TEX_T(c, t, add_uniform(trans, QUNIFORM_TEXTURE_CONFIG_P0,
+ tex_and_sampler));
+
+ struct qreg sampler_p1 = add_uniform(trans, QUNIFORM_TEXTURE_CONFIG_P1,
+ tex_and_sampler);
+ if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
+ qir_TEX_B(c, src[0 * 4 + 3], sampler_p1);
+ qir_TEX_S(c, s, add_uniform(trans, QUNIFORM_CONSTANT, 0));
+ } else {
+ qir_TEX_S(c, s, sampler_p1);
+ }
+
+ qir_emit(c, qir_inst(QOP_TEX_RESULT, c->undef, c->undef, c->undef));
+
+ for (int i = 0; i < 4; i++) {
+ if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ struct qreg dst = qir_get_temp(c);
+ qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i,
+ dst,
+ c->undef, c->undef));
+ update_dst(trans, tgsi_inst, i, dst);
+ }
+}
+
static struct qreg
tgsi_to_qir_pow(struct tgsi_to_qir *trans,
struct tgsi_full_instruction *tgsi_inst,
@@ -577,13 +629,6 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
if (tgsi_op == TGSI_OPCODE_END)
return;
- if (tgsi_op > ARRAY_SIZE(op_trans) || !op_trans[tgsi_op].func) {
- fprintf(stderr, "unknown tgsi inst: ");
- tgsi_dump_instruction(tgsi_inst, asdf++);
- fprintf(stderr, "\n");
- abort();
- }
-
struct qreg src_regs[12];
for (int s = 0; s < 3; s++) {
for (int i = 0; i < 4; i++) {
@@ -592,6 +637,24 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
}
}
+ switch (tgsi_op) {
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_TXB:
+ tgsi_to_qir_tex(trans, tgsi_inst,
+ op_trans[tgsi_op].op, src_regs);
+ return;
+ default:
+ break;
+ }
+
+ if (tgsi_op > ARRAY_SIZE(op_trans) || !(op_trans[tgsi_op].func)) {
+ fprintf(stderr, "unknown tgsi inst: ");
+ tgsi_dump_instruction(tgsi_inst, asdf++);
+ fprintf(stderr, "\n");
+ abort();
+ }
+
for (int i = 0; i < 4; i++) {
if (!(tgsi_inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -1043,9 +1106,74 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
free(so);
}
+static uint32_t translate_wrap(uint32_t p_wrap)
+{
+ switch (p_wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return 0;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return 1;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return 2;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return 3;
+ default:
+ fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
+ assert(!"not reached");
+ return 0;
+ }
+}
+
+static uint32_t
+get_texture_p0(struct vc4_texture_stateobj *texstate,
+ uint32_t tex_and_sampler)
+{
+ uint32_t texi = (tex_and_sampler >> 0) & 0xff;
+ struct pipe_sampler_view *texture = texstate->textures[texi];
+ struct vc4_resource *rsc = vc4_resource(texture->texture);
+
+ return (texture->u.tex.last_level |
+#if USE_VC4_SIMULATOR
+ simpenrose_hw_addr(rsc->bo->map) /* XXX */
+#else
+ 0 /* XXX */
+#endif
+ /* XXX: data type */);
+}
+
+static uint32_t
+get_texture_p1(struct vc4_texture_stateobj *texstate,
+ uint32_t tex_and_sampler)
+{
+ uint32_t texi = (tex_and_sampler >> 0) & 0xff;
+ uint32_t sampi = (tex_and_sampler >> 8) & 0xff;
+ struct pipe_sampler_view *texture = texstate->textures[texi];
+ struct pipe_sampler_state *sampler = texstate->samplers[sampi];
+ static const uint32_t mipfilter_map[] = {
+ [PIPE_TEX_MIPFILTER_NEAREST] = 2,
+ [PIPE_TEX_MIPFILTER_LINEAR] = 4,
+ [PIPE_TEX_MIPFILTER_NONE] = 0
+ };
+ static const uint32_t imgfilter_map[] = {
+ [PIPE_TEX_FILTER_NEAREST] = 1,
+ [PIPE_TEX_FILTER_LINEAR] = 0,
+ };
+
+ return ((1 << 31) /* XXX: data type */|
+ (texture->texture->height0 << 20) |
+ (texture->texture->width0 << 8) |
+ (imgfilter_map[sampler->mag_img_filter] << 7) |
+ ((imgfilter_map[sampler->min_img_filter] +
+ mipfilter_map[sampler->min_mip_filter]) << 4) |
+ (translate_wrap(sampler->wrap_t) << 2) |
+ (translate_wrap(sampler->wrap_s) << 0));
+}
+
void
vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
+ struct vc4_texture_stateobj *texstate,
int shader_index, struct vc4_bo **out_bo,
uint32_t *out_offset)
{
@@ -1055,6 +1183,7 @@ vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
uint32_t *map = vc4_bo_map(ubo);
for (int i = 0; i < uinfo->count; i++) {
+
switch (uinfo->contents[i]) {
case QUNIFORM_CONSTANT:
map[i] = uinfo->data[i];
@@ -1068,6 +1197,14 @@ vc4_get_uniform_bo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
case QUNIFORM_VIEWPORT_Y_SCALE:
map[i] = fui(vc4->framebuffer.height * -16.0f / 2.0f);
break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P0:
+ map[i] = get_texture_p0(texstate, uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ map[i] = get_texture_p1(texstate, uinfo->data[i]);
+ break;
}
#if 0
fprintf(stderr, "%p/%d: %d: 0x%08x (%f)\n",
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 495d16e2a25..0499eb9406f 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -63,6 +63,16 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_VPM_READ] = { "vpm_read", 0, 1, true },
[QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
[QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
+
+ [QOP_TEX_S] = { "tex_s", 0, 2 },
+ [QOP_TEX_T] = { "tex_t", 0, 2 },
+ [QOP_TEX_R] = { "tex_r", 0, 2 },
+ [QOP_TEX_B] = { "tex_b", 0, 2 },
+ [QOP_TEX_RESULT] = { "tex_result", 0, 0 },
+ [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 0 },
+ [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 0 },
+ [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 0 },
+ [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 0 },
};
static const char *
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 753f82e5021..a76d091b327 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -72,6 +72,24 @@ enum qop {
QOP_VPM_READ,
QOP_TLB_COLOR_WRITE,
QOP_VARY_ADD_C,
+
+ /** Texture x coordinate parameter write */
+ QOP_TEX_S,
+ /** Texture y coordinate parameter write */
+ QOP_TEX_T,
+ /** Texture border color parameter or cube map z coordinate write */
+ QOP_TEX_R,
+ /** Texture LOD bias parameter write */
+ QOP_TEX_B,
+ /**
+ * Signal of texture read being necessary and then reading r4 into
+ * the destination
+ */
+ QOP_TEX_RESULT,
+ QOP_R4_UNPACK_A,
+ QOP_R4_UNPACK_B,
+ QOP_R4_UNPACK_C,
+ QOP_R4_UNPACK_D
};
struct simple_node {
@@ -120,6 +138,25 @@ enum quniform_contents {
QUNIFORM_VIEWPORT_X_SCALE,
QUNIFORM_VIEWPORT_Y_SCALE,
/** @} */
+
+ /**
+ * A reference to a texture config parameter 0 uniform.
+ *
+ * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
+ * defines texture type, miplevels, and such. It will be found as a
+ * parameter to the first QOP_TEX_[STRB] instruction in a sequence.
+ */
+ QUNIFORM_TEXTURE_CONFIG_P0,
+
+ /**
+ * A reference to a texture config parameter 1 uniform.
+ *
+ * This is a uniform implicitly loaded with a QPU_W_TMU* write, which
+ * defines texture width, height, filters, and wrap modes. It will be
+ * found as a parameter to the second QOP_TEX_[STRB] instruction in a
+ * sequence.
+ */
+ QUNIFORM_TEXTURE_CONFIG_P1,
};
struct qcompile {
@@ -178,6 +215,20 @@ qir_##name(struct qcompile *c, struct qreg a, struct qreg b) \
return t; \
}
+#define QIR_NODST_1(name) \
+static inline void \
+qir_##name(struct qcompile *c, struct qreg a) \
+{ \
+ qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef)); \
+}
+
+#define QIR_NODST_2(name) \
+static inline void \
+qir_##name(struct qcompile *c, struct qreg a, struct qreg b) \
+{ \
+ qir_emit(c, qir_inst(QOP_##name, c->undef, a, b)); \
+}
+
QIR_ALU1(MOV)
QIR_ALU2(FADD)
QIR_ALU2(FSUB)
@@ -194,12 +245,11 @@ QIR_ALU1(EXP2)
QIR_ALU1(LOG2)
QIR_ALU2(PACK_SCALED)
QIR_ALU1(VARY_ADD_C)
-
-static inline void
-qir_VPM_WRITE(struct qcompile *c, struct qreg a)
-{
- qir_emit(c, qir_inst(QOP_VPM_WRITE, c->undef, a, c->undef));
-}
+QIR_NODST_1(VPM_WRITE)
+QIR_NODST_2(TEX_S)
+QIR_NODST_2(TEX_T)
+QIR_NODST_2(TEX_R)
+QIR_NODST_2(TEX_B)
static inline struct qreg
qir_CMP(struct qcompile *c, struct qreg cmp, struct qreg a, struct qreg b)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_defines.h b/src/gallium/drivers/vc4/vc4_qpu_defines.h
index bdd5d94708f..224d9aaa44d 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_defines.h
+++ b/src/gallium/drivers/vc4/vc4_qpu_defines.h
@@ -195,6 +195,17 @@ enum qpu_pack_a {
QPU_PACK_A_8D_SAT,
};
+enum qpu_unpack_r4 {
+ QPU_UNPACK_R4_NOP,
+ QPU_UNPACK_R4_F16A_TO_F32,
+ QPU_UNPACK_R4_F16B_TO_F32,
+ QPU_UNPACK_R4_8D_REP,
+ QPU_UNPACK_R4_8A,
+ QPU_UNPACK_R4_8B,
+ QPU_UNPACK_R4_8C,
+ QPU_UNPACK_R4_8D,
+};
+
#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
/* Using the GNU statement expression extension */
#define QPU_SET_FIELD(value, field) \
@@ -209,6 +220,9 @@ enum qpu_pack_a {
#define QPU_SIG_SHIFT 60
#define QPU_SIG_MASK QPU_MASK(63, 60)
+#define QPU_UNPACK_SHIFT 57
+#define QPU_UNPACK_MASK QPU_MASK(59, 57)
+
/**
* If set, the pack field means PACK_MUL or R4 packing, instead of normal
* regfile a packing.
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 4ec6d9657b7..525710585ae 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -93,6 +93,17 @@ static const char *qpu_pack_mul[] = {
[QPU_PACK_MUL_8D] = "8d",
};
+static const char *qpu_unpack_r4[] = {
+ [QPU_UNPACK_R4_NOP] = "",
+ [QPU_UNPACK_R4_F16A_TO_F32] = "f16a",
+ [QPU_UNPACK_R4_F16B_TO_F32] = "f16b",
+ [QPU_UNPACK_R4_8D_REP] = "8d_rep",
+ [QPU_UNPACK_R4_8A] = "8a",
+ [QPU_UNPACK_R4_8B] = "8b",
+ [QPU_UNPACK_R4_8C] = "8c",
+ [QPU_UNPACK_R4_8D] = "8d",
+};
+
static const char *special_read_a[] = {
"uni",
NULL,
@@ -263,6 +274,7 @@ print_alu_src(uint64_t inst, uint32_t mux)
uint32_t raddr = (is_a ?
QPU_GET_FIELD(inst, QPU_RADDR_A) :
QPU_GET_FIELD(inst, QPU_RADDR_B));
+ uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK);
if (mux <= QPU_MUX_R5)
fprintf(stderr, "r%d", mux);
@@ -287,6 +299,11 @@ print_alu_src(uint64_t inst, uint32_t mux)
else
fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
}
+
+ if (mux == QPU_MUX_R4 && (inst & QPU_PM) &&
+ unpack != QPU_UNPACK_R4_NOP) {
+ fprintf(stderr, ".%s", DESC(qpu_unpack_r4, unpack));
+ }
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 2eebabee419..9d55390c67f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -490,6 +490,38 @@ vc4_generate_code(struct qcompile *c)
break;
}
+ case QOP_TEX_S:
+ case QOP_TEX_T:
+ case QOP_TEX_R:
+ case QOP_TEX_B:
+ queue(c, qpu_inst(qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
+ (qinst->op -
+ QOP_TEX_S)),
+ src[0]),
+ qpu_m_NOP()));
+ break;
+
+ case QOP_TEX_RESULT:
+ queue(c, qpu_inst(qpu_a_NOP(), qpu_m_NOP()));
+ *last_inst(c) = qpu_set_sig(*last_inst(c),
+ QPU_SIG_LOAD_TMU0);
+
+ break;
+
+ case QOP_R4_UNPACK_A:
+ case QOP_R4_UNPACK_B:
+ case QOP_R4_UNPACK_C:
+ case QOP_R4_UNPACK_D:
+ queue(c, qpu_inst(qpu_a_MOV(dst, qpu_r4()),
+ qpu_m_NOP()));
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_R4_8A +
+ (qinst->op -
+ QOP_R4_UNPACK_A),
+ QPU_UNPACK);
+
+ break;
+
default:
assert(qinst->op < ARRAY_SIZE(translate));
assert(translate[qinst->op].op != 0); /* NOPs */