diff options
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx')
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c | 34 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 76 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_compiler.h | 55 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 37 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/instr-a3xx.h | 31 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3.c (renamed from src/gallium/drivers/freedreno/a3xx/ir-a3xx.c) | 59 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/ir3.h (renamed from src/gallium/drivers/freedreno/a3xx/ir-a3xx.h) | 148 |
8 files changed, 307 insertions, 145 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c index 2d5ae62a64a..0e45ec54b38 100644 --- a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c @@ -285,21 +285,7 @@ static void print_instr_cat2(instr_t *instr) static void print_instr_cat3(instr_t *instr) { instr_cat3_t *cat3 = &instr->cat3; - bool full = true; - - // XXX is this based on opc or some other bit? - switch (cat3->opc) { - case OPC_MAD_F16: - case OPC_MAD_U16: - case OPC_MAD_S16: - case OPC_SEL_B16: - case OPC_SEL_S16: - case OPC_SEL_F16: - case OPC_SAD_S16: - case OPC_SAD_S32: // really?? - full = false; - break; - } + bool full = instr_cat3_full(cat3); printf(" "); print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false); @@ -747,26 +733,12 @@ struct opc_info { #undef OPC }; -#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)])) - -static uint32_t getopc(instr_t *instr) -{ - switch (instr->opc_cat) { - case 0: return instr->cat0.opc; - case 1: return 0; - case 2: return instr->cat2.opc; - case 3: return instr->cat3.opc; - case 4: return instr->cat4.opc; - case 5: return instr->cat5.opc; - case 6: return instr->cat6.opc; - default: return 0; - } -} +#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)])) static void print_instr(uint32_t *dwords, int level, int n) { instr_t *instr = (instr_t *)dwords; - uint32_t opc = getopc(instr); + uint32_t opc = instr_opc(instr); const char *name; printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 2c32c0fa2a7..5ab34e557b9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -44,12 +44,13 @@ #include "fd3_util.h" #include "instr-a3xx.h" -#include "ir-a3xx.h" +#include "ir3.h" struct fd3_compile_context { const struct tgsi_token *tokens; struct ir3_shader *ir; + struct ir3_block *block; struct fd3_shader_stateobj *so; struct tgsi_parse_context parser; @@ -124,6 +125,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, ctx->tokens = tokens; ctx->ir = so->ir; + ctx->block = ir3_block_create(ctx->ir, 0, 0, 0); ctx->so = so; ctx->last_input = NULL; ctx->last_rel = NULL; @@ -176,7 +178,7 @@ compile_error(struct fd3_compile_context *ctx, const char *format, ...) _debug_vprintf(format, ap); va_end(ap); tgsi_dump(ctx->tokens, 0); - assert(0); + debug_assert(0); } #define compile_assert(ctx, cond) do { \ @@ -208,11 +210,17 @@ handle_last_rel(struct fd3_compile_context *ctx) } } +static struct ir3_instruction * +instr_create(struct fd3_compile_context *ctx, int category, opc_t opc) +{ + return ir3_instr_create(ctx->block, category, opc); +} + static void add_nop(struct fd3_compile_context *ctx, unsigned count) { while (count-- > 0) - ir3_instr_create(ctx->ir, 0, OPC_NOP); + instr_create(ctx, 0, OPC_NOP); } static unsigned @@ -241,6 +249,7 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_dst_register *dst, unsigned chan) { unsigned flags = 0, num = 0; + struct ir3_register *reg; switch (dst->File) { case TGSI_FILE_OUTPUT: @@ -256,10 +265,17 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, break; } + if (dst->Indirect) + flags |= IR3_REG_RELATIV; if (ctx->so->half_precision) flags |= IR3_REG_HALF; - return ir3_reg_create(instr, regid(num, chan), flags); + reg = ir3_reg_create(instr, regid(num, chan), flags); + + if (dst->Indirect) + ctx->last_rel = instr; + + return reg; } static struct ir3_register * @@ -517,9 +533,9 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, /* can't have abs or neg on a mov instr, so use * absneg.f instead to handle these cases: */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ABSNEG_F); + instr = instr_create(ctx, 2, OPC_ABSNEG_F); } else { - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = type_mov; instr->cat1.dst_type = type_mov; } @@ -539,10 +555,10 @@ create_clamp(struct fd3_compile_context *ctx, { struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F); + instr = instr_create(ctx, 2, OPC_MAX_F); vectorize(ctx, instr, dst, 2, val, 0, minval, 0); - instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F); + instr = instr_create(ctx, 2, OPC_MIN_F); vectorize(ctx, instr, dst, 2, val, 0, maxval, 0); } @@ -707,7 +723,7 @@ trans_arl(const struct instr_translater *t, tmp_src = get_internal_temp_hr(ctx, &tmp_dst); /* cov.{f32,f16}s16 Rtmp, Rsrc */ - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = get_ftype(ctx); instr->cat1.dst_type = TYPE_S16; add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; @@ -716,7 +732,7 @@ trans_arl(const struct instr_translater *t, add_nop(ctx, 3); /* shl.b Rtmp, Rtmp, 2 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B); + instr = instr_create(ctx, 2, OPC_SHL_B); add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; @@ -724,7 +740,7 @@ trans_arl(const struct instr_translater *t, add_nop(ctx, 3); /* mova a0, Rtmp */ - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = TYPE_S16; instr->cat1.dst_type = TYPE_S16; add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; @@ -804,7 +820,7 @@ trans_samp(const struct instr_translater *t, tmp_src = get_internal_temp(ctx, &tmp_dst); for (j = 0; (j < 4) && (order[j] >= 0); j++) { - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = type_mov; instr->cat1.dst_type = type_mov; add_dst_reg(ctx, instr, &tmp_dst, j); @@ -817,7 +833,7 @@ trans_samp(const struct instr_translater *t, add_nop(ctx, 4 - j); } - instr = ir3_instr_create(ctx->ir, 5, t->opc); + instr = instr_create(ctx, 5, t->opc); instr->cat5.type = get_ftype(ctx); instr->cat5.samp = samp->Index; instr->cat5.tex = samp->Index; @@ -915,7 +931,7 @@ trans_cmp(const struct instr_translater *t, a0 = get_unconst(ctx, a0); /* cmps.f.ge tmp, a0, a1 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + instr = instr_create(ctx, 2, OPC_CMPS_F); instr->cat2.condition = condition; vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); @@ -924,7 +940,7 @@ trans_cmp(const struct instr_translater *t, case TGSI_OPCODE_SGE: case TGSI_OPCODE_SLE: /* cov.u16f16 dst, tmp0 */ - instr = ir3_instr_create(ctx->ir, 1, 0); + instr = instr_create(ctx, 1, 0); instr->cat1.src_type = get_utype(ctx); instr->cat1.dst_type = get_ftype(ctx); vectorize(ctx, instr, dst, 1, tmp_src, 0); @@ -934,12 +950,12 @@ trans_cmp(const struct instr_translater *t, case TGSI_OPCODE_SLT: case TGSI_OPCODE_CMP: /* add.s tmp, tmp, -1 */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); + instr = instr_create(ctx, 2, OPC_ADD_S); vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED); if (t->tgsi_opc == TGSI_OPCODE_CMP) { /* sel.{f32,f16} dst, src2, tmp, src1 */ - instr = ir3_instr_create(ctx->ir, 3, + instr = instr_create(ctx, 3, ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); vectorize(ctx, instr, dst, 3, &inst->Src[2].Register, 0, @@ -949,7 +965,7 @@ trans_cmp(const struct instr_translater *t, get_immediate(ctx, &constval0, fui(0.0)); get_immediate(ctx, &constval1, fui(1.0)); /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */ - instr = ir3_instr_create(ctx->ir, 3, + instr = instr_create(ctx, 3, ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32); vectorize(ctx, instr, dst, 3, &constval0, 0, tmp_src, 0, &constval1, 0); @@ -990,7 +1006,7 @@ pop_branch(struct fd3_compile_context *ctx) * and set (jp) flag on whatever the next instruction was, rather * than inserting an extra nop.. */ - instr = ir3_instr_create(ctx->ir, 0, OPC_NOP); + instr = instr_create(ctx, 0, OPC_NOP); instr->flags |= IR3_INSTR_JP; /* pop the branch instruction from the stack and fix up branch target: */ @@ -1018,13 +1034,13 @@ trans_if(const struct instr_translater *t, if (is_const(src)) src = get_unconst(ctx, src); - instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + instr = instr_create(ctx, 2, OPC_CMPS_F); ir3_reg_create(instr, regid(REG_P0, 0), 0); add_src_reg(ctx, instr, src, src->SwizzleX); add_src_reg(ctx, instr, &constval, constval.SwizzleX); instr->cat2.condition = IR3_COND_EQ; - instr = ir3_instr_create(ctx->ir, 0, OPC_BR); + instr = instr_create(ctx, 0, OPC_BR); push_branch(ctx, instr); } @@ -1036,7 +1052,7 @@ trans_else(const struct instr_translater *t, struct ir3_instruction *instr; /* for first half of if/else/endif, generate a jump past the else: */ - instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP); + instr = instr_create(ctx, 0, OPC_JUMP); pop_branch(ctx); push_branch(ctx, instr); @@ -1060,7 +1076,7 @@ instr_cat0(const struct instr_translater *t, struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) { - ir3_instr_create(ctx->ir, 0, t->opc); + instr_create(ctx, 0, t->opc); } static void @@ -1083,7 +1099,7 @@ instr_cat1(const struct instr_translater *t, * in the future if we start supporting widening/narrowing or * conversion to/from integer.. */ - instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + instr = instr_create(ctx, 2, OPC_ADD_F); get_immediate(ctx, &constval, fui(0.0)); vectorize(ctx, instr, dst, 2, src, 0, &constval, 0); } else { @@ -1129,14 +1145,14 @@ instr_cat2(const struct instr_translater *t, case OPC_SETRM: case OPC_CBITS_B: /* these only have one src reg */ - instr = ir3_instr_create(ctx->ir, 2, t->opc); + instr = instr_create(ctx, 2, t->opc); vectorize(ctx, instr, dst, 1, src0, src0_flags); break; default: if (is_const(src0) && is_const(src1)) src0 = get_unconst(ctx, src0); - instr = ir3_instr_create(ctx->ir, 2, t->opc); + instr = instr_create(ctx, 2, t->opc); vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, src1_flags); break; @@ -1186,7 +1202,7 @@ instr_cat3(const struct instr_translater *t, } } - instr = ir3_instr_create(ctx->ir, 3, + instr = instr_create(ctx, 3, ctx->so->half_precision ? t->hopc : t->opc); vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, &inst->Src[2].Register, 0); @@ -1214,8 +1230,8 @@ instr_cat4(const struct instr_translater *t, for (i = 0, n = 0; i < 4; i++) { if (dst->WriteMask & (1 << i)) { if (n++) - ir3_instr_create(ctx->ir, 0, OPC_NOP); - instr = ir3_instr_create(ctx->ir, 4, t->opc); + add_nop(ctx, 1); + instr = instr_create(ctx, 4, t->opc); add_dst_reg(ctx, instr, dst, i); add_src_reg(ctx, instr, src, src->SwizzleX); } @@ -1315,7 +1331,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) struct ir3_instruction *instr; struct ir3_register *dst; - instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F); + instr = instr_create(ctx, 2, OPC_BARY_F); /* dst register: */ dst = ir3_reg_create(instr, r + j, flags); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h index da25cdce88a..5cdb245640b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h @@ -33,61 +33,6 @@ #include "fd3_util.h" -/* ************************************************************************* */ -/* split this out or find some helper to use.. like main/bitset.h.. */ - -#define MAX_REG 256 - -typedef uint8_t regmask_t[2 * MAX_REG / 8]; - -static inline unsigned regmask_idx(struct ir3_register *reg) -{ - unsigned num = reg->num; - assert(num < MAX_REG); - if (reg->flags & IR3_REG_HALF) - num += MAX_REG; - return num; -} - -static inline void regmask_init(regmask_t *regmask) -{ - memset(regmask, 0, sizeof(*regmask)); -} - -static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) -{ - unsigned idx = regmask_idx(reg); - unsigned i; - for (i = 0; i < 4; i++, idx++) - if (reg->wrmask & (1 << i)) - (*regmask)[idx / 8] |= 1 << (idx % 8); -} - -static inline unsigned regmask_get(regmask_t *regmask, - struct ir3_register *reg) -{ - unsigned idx = regmask_idx(reg); - unsigned i; - for (i = 0; i < 4; i++, idx++) - if (reg->wrmask & (1 << i)) - if ((*regmask)[idx / 8] & (1 << (idx % 8))) - return true; - return false; -} - -/* comp: - * 0 - x - * 1 - y - * 2 - z - * 3 - w - */ -static inline uint32_t regid(int num, int comp) -{ - return (num << 2) | (comp & 0x3); -} - -/* ************************************************************************* */ - int fd3_compile_shader(struct fd3_shader_stateobj *so, const struct tgsi_token *tokens); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 3df29ecc911..ddb33ca5844 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -79,9 +79,10 @@ static void fixup_vp_regfootprint(struct fd3_shader_stateobj *so) { unsigned i; - for (i = 0; i < so->inputs_count; i++) { + for (i = 0; i < so->inputs_count; i++) so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2); - } + for (i = 0; i < so->outputs_count; i++) + so->info.max_reg = MAX2(so->info.max_reg, so->outputs[i].regid >> 2); } static struct fd3_shader_stateobj * @@ -230,7 +231,7 @@ find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic) } static uint32_t -find_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic) +find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic) { int j; for (j = 0; j < so->outputs_count; j++) @@ -257,13 +258,13 @@ fd3_program_emit(struct fd_ringbuffer *ring, fsi = &fp->info; } - pos_regid = find_regid(vp, + pos_regid = find_output_regid(vp, fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); - posz_regid = find_regid(fp, + posz_regid = find_output_regid(fp, fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); - psize_regid = find_regid(vp, + psize_regid = find_output_regid(vp, fd3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); - color_regid = find_regid(fp, + color_regid = find_output_regid(fp, fd3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); /* we could probably divide this up into things that need to be @@ -501,10 +502,11 @@ create_blit_fp(struct pipe_context *pctx) { struct fd3_shader_stateobj *so; struct ir3_shader *ir = ir3_shader_create(); + struct ir3_block *block = ir3_block_create(ir, 0, 0, 0); struct ir3_instruction *instr; /* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */ - instr = ir3_instr_create(ir, 2, OPC_BARY_F); + instr = ir3_instr_create(block, 2, OPC_BARY_F); instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; instr->repeat = 1; @@ -514,11 +516,11 @@ create_blit_fp(struct pipe_context *pctx) ir3_reg_create(instr, regid(0,0), 0); /* r0.x */ /* (rpt5)nop */ - instr = ir3_instr_create(ir, 0, OPC_NOP); + instr = ir3_instr_create(block, 0, OPC_NOP); instr->repeat = 5; /* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */ - instr = ir3_instr_create(ir, 5, OPC_SAM); + instr = ir3_instr_create(block, 5, OPC_SAM); instr->cat5.samp = 0; instr->cat5.tex = 0; instr->cat5.type = TYPE_F32; @@ -528,7 +530,7 @@ create_blit_fp(struct pipe_context *pctx) ir3_reg_create(instr, regid(0,2), 0); /* r0.z */ /* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */ - instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */ + instr = ir3_instr_create(block, 1, 0); /* mov/cov instructions have no opc */ instr->flags = IR3_INSTR_SY; instr->repeat = 3; instr->cat1.src_type = TYPE_F32; @@ -538,7 +540,7 @@ create_blit_fp(struct pipe_context *pctx) ir3_reg_create(instr, regid(0,0), IR3_REG_R); /* (r)r0.x */ /* end */ - instr = ir3_instr_create(ir, 0, OPC_END); + instr = ir3_instr_create(block, 0, OPC_END); so = create_internal_shader(pctx, SHADER_FRAGMENT, ir); if (!so) @@ -573,10 +575,11 @@ create_blit_vp(struct pipe_context *pctx) { struct fd3_shader_stateobj *so; struct ir3_shader *ir = ir3_shader_create(); + struct ir3_block *block = ir3_block_create(ir, 0, 0, 0); struct ir3_instruction *instr; /* (sy)(ss)end */ - instr = ir3_instr_create(ir, 0, OPC_END); + instr = ir3_instr_create(block, 0, OPC_END); instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; so = create_internal_shader(pctx, SHADER_VERTEX, ir); @@ -611,10 +614,11 @@ create_solid_fp(struct pipe_context *pctx) { struct fd3_shader_stateobj *so; struct ir3_shader *ir = ir3_shader_create(); + struct ir3_block *block = ir3_block_create(ir, 0, 0, 0); struct ir3_instruction *instr; /* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */ - instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */ + instr = ir3_instr_create(block, 1, 0); /* mov/cov instructions have no opc */ instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; instr->repeat = 3; instr->cat1.src_type = TYPE_F16; @@ -625,7 +629,7 @@ create_solid_fp(struct pipe_context *pctx) IR3_REG_CONST | IR3_REG_R); /* end */ - instr = ir3_instr_create(ir, 0, OPC_END); + instr = ir3_instr_create(block, 0, OPC_END); so = create_internal_shader(pctx, SHADER_FRAGMENT, ir); if (!so) @@ -650,10 +654,11 @@ create_solid_vp(struct pipe_context *pctx) { struct fd3_shader_stateobj *so; struct ir3_shader *ir = ir3_shader_create(); + struct ir3_block *block = ir3_block_create(ir, 0, 0, 0); struct ir3_instruction *instr; /* (sy)(ss)end */ - instr = ir3_instr_create(ir, 0, OPC_END); + instr = ir3_instr_create(block, 0, OPC_END); instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index 4aeeb2e3006..c781dfe4be9 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -33,7 +33,7 @@ #include "freedreno_context.h" -#include "ir-a3xx.h" +#include "ir3.h" #include "disasm.h" typedef uint16_t fd3_semantic; /* semantic name + index */ @@ -43,6 +43,16 @@ fd3_semantic_name(uint8_t name, uint16_t index) return (name << 8) | (index & 0xff); } +static inline uint8_t sem2name(fd3_semantic sem) +{ + return sem >> 8; +} + +static inline uint16_t sem2idx(fd3_semantic sem) +{ + return sem & 0xff; +} + struct fd3_shader_stateobj { enum shader_t type; diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h index 1085ddf8c12..b0f78341131 100644 --- a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h @@ -438,6 +438,23 @@ typedef struct PACKED { uint32_t opc_cat : 3; } instr_cat3_t; +static inline bool instr_cat3_full(instr_cat3_t *cat3) +{ + switch (cat3->opc) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + return false; + default: + return true; + } +} + typedef struct PACKED { /* dword0: */ union PACKED { @@ -612,4 +629,18 @@ typedef union PACKED { }; } instr_t; +static inline uint32_t instr_opc(instr_t *instr) +{ + switch (instr->opc_cat) { + case 0: return instr->cat0.opc; + case 1: return 0; + case 2: return instr->cat2.opc; + case 3: return instr->cat3.opc; + case 4: return instr->cat4.opc; + case 5: return instr->cat5.opc; + case 6: return instr->cat6.opc; + default: return 0; + } +} + #endif /* INSTR_A3XX_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c b/src/gallium/drivers/freedreno/a3xx/ir3.c index a39214ee663..2a06d42c7d6 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3.c @@ -21,7 +21,7 @@ * SOFTWARE. */ -#include "ir-a3xx.h" +#include "ir3.h" #include <stdlib.h> #include <stdio.h> @@ -72,7 +72,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info, if (reg->flags & IR3_REG_IMMED) { val.iim_val = reg->iim_val; } else { - int8_t max = (reg->num + repeat) >> 2; + int8_t components = util_last_bit(reg->wrmask); + int8_t max = (reg->num + repeat + components - 1) >> 2; val.comp = reg->num & 0x3; val.num = reg->num >> 2; @@ -514,6 +515,7 @@ void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *in info->max_reg = -1; info->max_half_reg = -1; info->max_const = -1; + info->instrs_count = 0; /* need a integer number of instruction "groups" (sets of four * instructions), so pad out w/ NOPs if needed: @@ -528,6 +530,7 @@ void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *in int ret = emit[instr->category](instr, dwords, info); if (ret) goto fail; + info->instrs_count += 1 + instr->repeat; dwords += 2; } @@ -552,30 +555,68 @@ static struct ir3_register * reg_create(struct ir3_shader *shader, static void insert_instr(struct ir3_shader *shader, struct ir3_instruction *instr) { +#ifdef DEBUG + static uint32_t serialno = 0; + instr->serialno = ++serialno; +#endif assert(shader->instrs_count < ARRAY_SIZE(shader->instrs)); shader->instrs[shader->instrs_count++] = instr; } -struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, +struct ir3_block * ir3_block_create(struct ir3_shader *shader, + unsigned ntmp, unsigned nin, unsigned nout) +{ + struct ir3_block *block; + unsigned size; + char *ptr; + + size = sizeof(*block); + size += sizeof(block->temporaries[0]) * ntmp; + size += sizeof(block->inputs[0]) * nin; + size += sizeof(block->outputs[0]) * nout; + + ptr = ir3_alloc(shader, size); + + block = (void *)ptr; + ptr += sizeof(*block); + + block->temporaries = (void *)ptr; + block->ntemporaries = ntmp; + ptr += sizeof(block->temporaries[0]) * ntmp; + + block->inputs = (void *)ptr; + block->ninputs = nin; + ptr += sizeof(block->inputs[0]) * nin; + + block->outputs = (void *)ptr; + block->noutputs = nout; + ptr += sizeof(block->outputs[0]) * nout; + + block->shader = shader; + + return block; +} + +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, int category, opc_t opc) { struct ir3_instruction *instr = - ir3_alloc(shader, sizeof(struct ir3_instruction)); - instr->shader = shader; + ir3_alloc(block->shader, sizeof(struct ir3_instruction)); + instr->block = block; instr->category = category; instr->opc = opc; - insert_instr(shader, instr); + insert_instr(block->shader, instr); return instr; } struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) { struct ir3_instruction *new_instr = - ir3_alloc(instr->shader, sizeof(struct ir3_instruction)); + ir3_alloc(instr->block->shader, sizeof(struct ir3_instruction)); unsigned i; *new_instr = *instr; - insert_instr(instr->shader, new_instr); + insert_instr(instr->block->shader, new_instr); /* clone registers: */ new_instr->regs_count = 0; @@ -592,7 +633,7 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, int num, int flags) { - struct ir3_register *reg = reg_create(instr->shader, num, flags); + struct ir3_register *reg = reg_create(instr->block->shader, num, flags); assert(instr->regs_count < ARRAY_SIZE(instr->regs)); instr->regs[instr->regs_count++] = reg; return reg; diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir3.h index b0afe1868eb..896bec114fa 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h +++ b/src/gallium/drivers/freedreno/a3xx/ir3.h @@ -28,15 +28,19 @@ #include <stdbool.h> #include "instr-a3xx.h" +#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */ /* low level intermediate representation of an adreno shader program */ struct ir3_shader; +struct ir3_instruction; +struct ir3_block; struct ir3_shader * fd_asm_parse(const char *src); struct ir3_shader_info { uint16_t sizedwords; + uint16_t instrs_count; /* expanded to account for rpt's */ /* NOTE: max_reg, etc, does not include registers not touched * by the shader (ie. vertex fetched via VFD_DECODE but not * touched by shader) @@ -84,7 +88,7 @@ struct ir3_register { }; struct ir3_instruction { - struct ir3_shader *shader; + struct ir3_block *block; int category; opc_t opc; enum { @@ -138,7 +142,7 @@ struct ir3_instruction { } flags; int repeat; unsigned regs_count; - struct ir3_register *regs[4]; + struct ir3_register *regs[5]; union { struct { char inv; @@ -168,6 +172,9 @@ struct ir3_instruction { int iim_val; } cat6; }; +#ifdef DEBUG + uint32_t serialno; +#endif }; #define MAX_INSTRS 1024 @@ -179,16 +186,151 @@ struct ir3_shader { unsigned heap_idx; }; +struct ir3_block { + struct ir3_shader *shader; + unsigned ntemporaries, ninputs, noutputs; + /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */ + struct ir3_instruction **temporaries; + struct ir3_instruction **inputs; + struct ir3_instruction **outputs; + struct ir3_block *parent; + struct ir3_instruction *head; +}; + struct ir3_shader * ir3_shader_create(void); void ir3_shader_destroy(struct ir3_shader *shader); void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *info); -struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, +struct ir3_block * ir3_block_create(struct ir3_shader *shader, + unsigned ntmp, unsigned nin, unsigned nout); + +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, int category, opc_t opc); struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, int num, int flags); + +/* comp: + * 0 - x + * 1 - y + * 2 - z + * 3 - w + */ +static inline uint32_t regid(int num, int comp) +{ + return (num << 2) | (comp & 0x3); +} + +static inline uint32_t reg_num(struct ir3_register *reg) +{ + return reg->num >> 2; +} + +static inline uint32_t reg_comp(struct ir3_register *reg) +{ + return reg->num & 0x3; +} + +static inline bool is_alu(struct ir3_instruction *instr) +{ + return (1 <= instr->category) && (instr->category <= 3); +} + +static inline bool is_sfu(struct ir3_instruction *instr) +{ + return (instr->category == 4); +} + +static inline bool is_tex(struct ir3_instruction *instr) +{ + return (instr->category == 5); +} + +static inline bool is_input(struct ir3_instruction *instr) +{ + return (instr->category == 2) && (instr->opc == OPC_BARY_F); +} + +static inline bool is_gpr(struct ir3_register *reg) +{ + return !(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)); +} + +/* TODO combine is_gpr()/reg_gpr().. */ +static inline bool reg_gpr(struct ir3_register *r) +{ + if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV)) + return false; + if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) + return false; + return true; +} + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + +/* ************************************************************************* */ +/* split this out or find some helper to use.. like main/bitset.h.. */ + +#include <string.h> + +#define MAX_REG 256 + +typedef uint8_t regmask_t[2 * MAX_REG / 8]; + +static inline unsigned regmask_idx(struct ir3_register *reg) +{ + unsigned num = reg->num; + assert(num < MAX_REG); + if (reg->flags & IR3_REG_HALF) + num += MAX_REG; + return num; +} + +static inline void regmask_init(regmask_t *regmask) +{ + memset(regmask, 0, sizeof(*regmask)); +} + +static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + unsigned i; + for (i = 0; i < 4; i++, idx++) + if (reg->wrmask & (1 << i)) + (*regmask)[idx / 8] |= 1 << (idx % 8); +} + +/* set bits in a if not set in b, conceptually: + * a |= (reg & ~b) + */ +static inline void regmask_set_if_not(regmask_t *a, + struct ir3_register *reg, regmask_t *b) +{ + unsigned idx = regmask_idx(reg); + unsigned i; + for (i = 0; i < 4; i++, idx++) + if (reg->wrmask & (1 << i)) + if (!((*b)[idx / 8] & (1 << (idx % 8)))) + (*a)[idx / 8] |= 1 << (idx % 8); +} + +static inline unsigned regmask_get(regmask_t *regmask, + struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + unsigned i; + for (i = 0; i < 4; i++, idx++) + if (reg->wrmask & (1 << i)) + if ((*regmask)[idx / 8] & (1 << (idx % 8))) + return true; + return false; +} + +/* ************************************************************************* */ + #endif /* IR3_H_ */ |