summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/a3xx
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno/a3xx')
-rw-r--r--src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c34
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.c76
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_compiler.h55
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c37
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.h12
-rw-r--r--src/gallium/drivers/freedreno/a3xx/instr-a3xx.h31
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3.c (renamed from src/gallium/drivers/freedreno/a3xx/ir-a3xx.c)59
-rw-r--r--src/gallium/drivers/freedreno/a3xx/ir3.h (renamed from src/gallium/drivers/freedreno/a3xx/ir-a3xx.h)148
8 files changed, 307 insertions, 145 deletions
diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
index 2d5ae62a64a..0e45ec54b38 100644
--- a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
+++ b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
@@ -285,21 +285,7 @@ static void print_instr_cat2(instr_t *instr)
static void print_instr_cat3(instr_t *instr)
{
instr_cat3_t *cat3 = &instr->cat3;
- bool full = true;
-
- // XXX is this based on opc or some other bit?
- switch (cat3->opc) {
- case OPC_MAD_F16:
- case OPC_MAD_U16:
- case OPC_MAD_S16:
- case OPC_SEL_B16:
- case OPC_SEL_S16:
- case OPC_SEL_F16:
- case OPC_SAD_S16:
- case OPC_SAD_S32: // really??
- full = false;
- break;
- }
+ bool full = instr_cat3_full(cat3);
printf(" ");
print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false);
@@ -747,26 +733,12 @@ struct opc_info {
#undef OPC
};
-#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)]))
-
-static uint32_t getopc(instr_t *instr)
-{
- switch (instr->opc_cat) {
- case 0: return instr->cat0.opc;
- case 1: return 0;
- case 2: return instr->cat2.opc;
- case 3: return instr->cat3.opc;
- case 4: return instr->cat4.opc;
- case 5: return instr->cat5.opc;
- case 6: return instr->cat6.opc;
- default: return 0;
- }
-}
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)]))
static void print_instr(uint32_t *dwords, int level, int n)
{
instr_t *instr = (instr_t *)dwords;
- uint32_t opc = getopc(instr);
+ uint32_t opc = instr_opc(instr);
const char *name;
printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
index 2c32c0fa2a7..5ab34e557b9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
@@ -44,12 +44,13 @@
#include "fd3_util.h"
#include "instr-a3xx.h"
-#include "ir-a3xx.h"
+#include "ir3.h"
struct fd3_compile_context {
const struct tgsi_token *tokens;
struct ir3_shader *ir;
+ struct ir3_block *block;
struct fd3_shader_stateobj *so;
struct tgsi_parse_context parser;
@@ -124,6 +125,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
ctx->tokens = tokens;
ctx->ir = so->ir;
+ ctx->block = ir3_block_create(ctx->ir, 0, 0, 0);
ctx->so = so;
ctx->last_input = NULL;
ctx->last_rel = NULL;
@@ -176,7 +178,7 @@ compile_error(struct fd3_compile_context *ctx, const char *format, ...)
_debug_vprintf(format, ap);
va_end(ap);
tgsi_dump(ctx->tokens, 0);
- assert(0);
+ debug_assert(0);
}
#define compile_assert(ctx, cond) do { \
@@ -208,11 +210,17 @@ handle_last_rel(struct fd3_compile_context *ctx)
}
}
+static struct ir3_instruction *
+instr_create(struct fd3_compile_context *ctx, int category, opc_t opc)
+{
+ return ir3_instr_create(ctx->block, category, opc);
+}
+
static void
add_nop(struct fd3_compile_context *ctx, unsigned count)
{
while (count-- > 0)
- ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ instr_create(ctx, 0, OPC_NOP);
}
static unsigned
@@ -241,6 +249,7 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
const struct tgsi_dst_register *dst, unsigned chan)
{
unsigned flags = 0, num = 0;
+ struct ir3_register *reg;
switch (dst->File) {
case TGSI_FILE_OUTPUT:
@@ -256,10 +265,17 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
break;
}
+ if (dst->Indirect)
+ flags |= IR3_REG_RELATIV;
if (ctx->so->half_precision)
flags |= IR3_REG_HALF;
- return ir3_reg_create(instr, regid(num, chan), flags);
+ reg = ir3_reg_create(instr, regid(num, chan), flags);
+
+ if (dst->Indirect)
+ ctx->last_rel = instr;
+
+ return reg;
}
static struct ir3_register *
@@ -517,9 +533,9 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
/* can't have abs or neg on a mov instr, so use
* absneg.f instead to handle these cases:
*/
- instr = ir3_instr_create(ctx->ir, 2, OPC_ABSNEG_F);
+ instr = instr_create(ctx, 2, OPC_ABSNEG_F);
} else {
- instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr = instr_create(ctx, 1, 0);
instr->cat1.src_type = type_mov;
instr->cat1.dst_type = type_mov;
}
@@ -539,10 +555,10 @@ create_clamp(struct fd3_compile_context *ctx,
{
struct ir3_instruction *instr;
- instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+ instr = instr_create(ctx, 2, OPC_MAX_F);
vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
- instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
+ instr = instr_create(ctx, 2, OPC_MIN_F);
vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
}
@@ -707,7 +723,7 @@ trans_arl(const struct instr_translater *t,
tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
/* cov.{f32,f16}s16 Rtmp, Rsrc */
- instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr = instr_create(ctx, 1, 0);
instr->cat1.src_type = get_ftype(ctx);
instr->cat1.dst_type = TYPE_S16;
add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
@@ -716,7 +732,7 @@ trans_arl(const struct instr_translater *t,
add_nop(ctx, 3);
/* shl.b Rtmp, Rtmp, 2 */
- instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B);
+ instr = instr_create(ctx, 2, OPC_SHL_B);
add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
@@ -724,7 +740,7 @@ trans_arl(const struct instr_translater *t,
add_nop(ctx, 3);
/* mova a0, Rtmp */
- instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr = instr_create(ctx, 1, 0);
instr->cat1.src_type = TYPE_S16;
instr->cat1.dst_type = TYPE_S16;
add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
@@ -804,7 +820,7 @@ trans_samp(const struct instr_translater *t,
tmp_src = get_internal_temp(ctx, &tmp_dst);
for (j = 0; (j < 4) && (order[j] >= 0); j++) {
- instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr = instr_create(ctx, 1, 0);
instr->cat1.src_type = type_mov;
instr->cat1.dst_type = type_mov;
add_dst_reg(ctx, instr, &tmp_dst, j);
@@ -817,7 +833,7 @@ trans_samp(const struct instr_translater *t,
add_nop(ctx, 4 - j);
}
- instr = ir3_instr_create(ctx->ir, 5, t->opc);
+ instr = instr_create(ctx, 5, t->opc);
instr->cat5.type = get_ftype(ctx);
instr->cat5.samp = samp->Index;
instr->cat5.tex = samp->Index;
@@ -915,7 +931,7 @@ trans_cmp(const struct instr_translater *t,
a0 = get_unconst(ctx, a0);
/* cmps.f.ge tmp, a0, a1 */
- instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+ instr = instr_create(ctx, 2, OPC_CMPS_F);
instr->cat2.condition = condition;
vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
@@ -924,7 +940,7 @@ trans_cmp(const struct instr_translater *t,
case TGSI_OPCODE_SGE:
case TGSI_OPCODE_SLE:
/* cov.u16f16 dst, tmp0 */
- instr = ir3_instr_create(ctx->ir, 1, 0);
+ instr = instr_create(ctx, 1, 0);
instr->cat1.src_type = get_utype(ctx);
instr->cat1.dst_type = get_ftype(ctx);
vectorize(ctx, instr, dst, 1, tmp_src, 0);
@@ -934,12 +950,12 @@ trans_cmp(const struct instr_translater *t,
case TGSI_OPCODE_SLT:
case TGSI_OPCODE_CMP:
/* add.s tmp, tmp, -1 */
- instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+ instr = instr_create(ctx, 2, OPC_ADD_S);
vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED);
if (t->tgsi_opc == TGSI_OPCODE_CMP) {
/* sel.{f32,f16} dst, src2, tmp, src1 */
- instr = ir3_instr_create(ctx->ir, 3,
+ instr = instr_create(ctx, 3,
ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
vectorize(ctx, instr, dst, 3,
&inst->Src[2].Register, 0,
@@ -949,7 +965,7 @@ trans_cmp(const struct instr_translater *t,
get_immediate(ctx, &constval0, fui(0.0));
get_immediate(ctx, &constval1, fui(1.0));
/* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
- instr = ir3_instr_create(ctx->ir, 3,
+ instr = instr_create(ctx, 3,
ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
vectorize(ctx, instr, dst, 3,
&constval0, 0, tmp_src, 0, &constval1, 0);
@@ -990,7 +1006,7 @@ pop_branch(struct fd3_compile_context *ctx)
* and set (jp) flag on whatever the next instruction was, rather
* than inserting an extra nop..
*/
- instr = ir3_instr_create(ctx->ir, 0, OPC_NOP);
+ instr = instr_create(ctx, 0, OPC_NOP);
instr->flags |= IR3_INSTR_JP;
/* pop the branch instruction from the stack and fix up branch target: */
@@ -1018,13 +1034,13 @@ trans_if(const struct instr_translater *t,
if (is_const(src))
src = get_unconst(ctx, src);
- instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+ instr = instr_create(ctx, 2, OPC_CMPS_F);
ir3_reg_create(instr, regid(REG_P0, 0), 0);
add_src_reg(ctx, instr, src, src->SwizzleX);
add_src_reg(ctx, instr, &constval, constval.SwizzleX);
instr->cat2.condition = IR3_COND_EQ;
- instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
+ instr = instr_create(ctx, 0, OPC_BR);
push_branch(ctx, instr);
}
@@ -1036,7 +1052,7 @@ trans_else(const struct instr_translater *t,
struct ir3_instruction *instr;
/* for first half of if/else/endif, generate a jump past the else: */
- instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP);
+ instr = instr_create(ctx, 0, OPC_JUMP);
pop_branch(ctx);
push_branch(ctx, instr);
@@ -1060,7 +1076,7 @@ instr_cat0(const struct instr_translater *t,
struct fd3_compile_context *ctx,
struct tgsi_full_instruction *inst)
{
- ir3_instr_create(ctx->ir, 0, t->opc);
+ instr_create(ctx, 0, t->opc);
}
static void
@@ -1083,7 +1099,7 @@ instr_cat1(const struct instr_translater *t,
* in the future if we start supporting widening/narrowing or
* conversion to/from integer..
*/
- instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+ instr = instr_create(ctx, 2, OPC_ADD_F);
get_immediate(ctx, &constval, fui(0.0));
vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
} else {
@@ -1129,14 +1145,14 @@ instr_cat2(const struct instr_translater *t,
case OPC_SETRM:
case OPC_CBITS_B:
/* these only have one src reg */
- instr = ir3_instr_create(ctx->ir, 2, t->opc);
+ instr = instr_create(ctx, 2, t->opc);
vectorize(ctx, instr, dst, 1, src0, src0_flags);
break;
default:
if (is_const(src0) && is_const(src1))
src0 = get_unconst(ctx, src0);
- instr = ir3_instr_create(ctx->ir, 2, t->opc);
+ instr = instr_create(ctx, 2, t->opc);
vectorize(ctx, instr, dst, 2, src0, src0_flags,
src1, src1_flags);
break;
@@ -1186,7 +1202,7 @@ instr_cat3(const struct instr_translater *t,
}
}
- instr = ir3_instr_create(ctx->ir, 3,
+ instr = instr_create(ctx, 3,
ctx->so->half_precision ? t->hopc : t->opc);
vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
&inst->Src[2].Register, 0);
@@ -1214,8 +1230,8 @@ instr_cat4(const struct instr_translater *t,
for (i = 0, n = 0; i < 4; i++) {
if (dst->WriteMask & (1 << i)) {
if (n++)
- ir3_instr_create(ctx->ir, 0, OPC_NOP);
- instr = ir3_instr_create(ctx->ir, 4, t->opc);
+ add_nop(ctx, 1);
+ instr = instr_create(ctx, 4, t->opc);
add_dst_reg(ctx, instr, dst, i);
add_src_reg(ctx, instr, src, src->SwizzleX);
}
@@ -1315,7 +1331,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
struct ir3_instruction *instr;
struct ir3_register *dst;
- instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F);
+ instr = instr_create(ctx, 2, OPC_BARY_F);
/* dst register: */
dst = ir3_reg_create(instr, r + j, flags);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
index da25cdce88a..5cdb245640b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
@@ -33,61 +33,6 @@
#include "fd3_util.h"
-/* ************************************************************************* */
-/* split this out or find some helper to use.. like main/bitset.h.. */
-
-#define MAX_REG 256
-
-typedef uint8_t regmask_t[2 * MAX_REG / 8];
-
-static inline unsigned regmask_idx(struct ir3_register *reg)
-{
- unsigned num = reg->num;
- assert(num < MAX_REG);
- if (reg->flags & IR3_REG_HALF)
- num += MAX_REG;
- return num;
-}
-
-static inline void regmask_init(regmask_t *regmask)
-{
- memset(regmask, 0, sizeof(*regmask));
-}
-
-static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
-{
- unsigned idx = regmask_idx(reg);
- unsigned i;
- for (i = 0; i < 4; i++, idx++)
- if (reg->wrmask & (1 << i))
- (*regmask)[idx / 8] |= 1 << (idx % 8);
-}
-
-static inline unsigned regmask_get(regmask_t *regmask,
- struct ir3_register *reg)
-{
- unsigned idx = regmask_idx(reg);
- unsigned i;
- for (i = 0; i < 4; i++, idx++)
- if (reg->wrmask & (1 << i))
- if ((*regmask)[idx / 8] & (1 << (idx % 8)))
- return true;
- return false;
-}
-
-/* comp:
- * 0 - x
- * 1 - y
- * 2 - z
- * 3 - w
- */
-static inline uint32_t regid(int num, int comp)
-{
- return (num << 2) | (comp & 0x3);
-}
-
-/* ************************************************************************* */
-
int fd3_compile_shader(struct fd3_shader_stateobj *so,
const struct tgsi_token *tokens);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 3df29ecc911..ddb33ca5844 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -79,9 +79,10 @@ static void
fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
{
unsigned i;
- for (i = 0; i < so->inputs_count; i++) {
+ for (i = 0; i < so->inputs_count; i++)
so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2);
- }
+ for (i = 0; i < so->outputs_count; i++)
+ so->info.max_reg = MAX2(so->info.max_reg, so->outputs[i].regid >> 2);
}
static struct fd3_shader_stateobj *
@@ -230,7 +231,7 @@ find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
}
static uint32_t
-find_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
{
int j;
for (j = 0; j < so->outputs_count; j++)
@@ -257,13 +258,13 @@ fd3_program_emit(struct fd_ringbuffer *ring,
fsi = &fp->info;
}
- pos_regid = find_regid(vp,
+ pos_regid = find_output_regid(vp,
fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
- posz_regid = find_regid(fp,
+ posz_regid = find_output_regid(fp,
fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
- psize_regid = find_regid(vp,
+ psize_regid = find_output_regid(vp,
fd3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
- color_regid = find_regid(fp,
+ color_regid = find_output_regid(fp,
fd3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
/* we could probably divide this up into things that need to be
@@ -501,10 +502,11 @@ create_blit_fp(struct pipe_context *pctx)
{
struct fd3_shader_stateobj *so;
struct ir3_shader *ir = ir3_shader_create();
+ struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
struct ir3_instruction *instr;
/* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */
- instr = ir3_instr_create(ir, 2, OPC_BARY_F);
+ instr = ir3_instr_create(block, 2, OPC_BARY_F);
instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
instr->repeat = 1;
@@ -514,11 +516,11 @@ create_blit_fp(struct pipe_context *pctx)
ir3_reg_create(instr, regid(0,0), 0); /* r0.x */
/* (rpt5)nop */
- instr = ir3_instr_create(ir, 0, OPC_NOP);
+ instr = ir3_instr_create(block, 0, OPC_NOP);
instr->repeat = 5;
/* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */
- instr = ir3_instr_create(ir, 5, OPC_SAM);
+ instr = ir3_instr_create(block, 5, OPC_SAM);
instr->cat5.samp = 0;
instr->cat5.tex = 0;
instr->cat5.type = TYPE_F32;
@@ -528,7 +530,7 @@ create_blit_fp(struct pipe_context *pctx)
ir3_reg_create(instr, regid(0,2), 0); /* r0.z */
/* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */
- instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */
+ instr = ir3_instr_create(block, 1, 0); /* mov/cov instructions have no opc */
instr->flags = IR3_INSTR_SY;
instr->repeat = 3;
instr->cat1.src_type = TYPE_F32;
@@ -538,7 +540,7 @@ create_blit_fp(struct pipe_context *pctx)
ir3_reg_create(instr, regid(0,0), IR3_REG_R); /* (r)r0.x */
/* end */
- instr = ir3_instr_create(ir, 0, OPC_END);
+ instr = ir3_instr_create(block, 0, OPC_END);
so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
if (!so)
@@ -573,10 +575,11 @@ create_blit_vp(struct pipe_context *pctx)
{
struct fd3_shader_stateobj *so;
struct ir3_shader *ir = ir3_shader_create();
+ struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
struct ir3_instruction *instr;
/* (sy)(ss)end */
- instr = ir3_instr_create(ir, 0, OPC_END);
+ instr = ir3_instr_create(block, 0, OPC_END);
instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
so = create_internal_shader(pctx, SHADER_VERTEX, ir);
@@ -611,10 +614,11 @@ create_solid_fp(struct pipe_context *pctx)
{
struct fd3_shader_stateobj *so;
struct ir3_shader *ir = ir3_shader_create();
+ struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
struct ir3_instruction *instr;
/* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */
- instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */
+ instr = ir3_instr_create(block, 1, 0); /* mov/cov instructions have no opc */
instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
instr->repeat = 3;
instr->cat1.src_type = TYPE_F16;
@@ -625,7 +629,7 @@ create_solid_fp(struct pipe_context *pctx)
IR3_REG_CONST | IR3_REG_R);
/* end */
- instr = ir3_instr_create(ir, 0, OPC_END);
+ instr = ir3_instr_create(block, 0, OPC_END);
so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
if (!so)
@@ -650,10 +654,11 @@ create_solid_vp(struct pipe_context *pctx)
{
struct fd3_shader_stateobj *so;
struct ir3_shader *ir = ir3_shader_create();
+ struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
struct ir3_instruction *instr;
/* (sy)(ss)end */
- instr = ir3_instr_create(ir, 0, OPC_END);
+ instr = ir3_instr_create(block, 0, OPC_END);
instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index 4aeeb2e3006..c781dfe4be9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -33,7 +33,7 @@
#include "freedreno_context.h"
-#include "ir-a3xx.h"
+#include "ir3.h"
#include "disasm.h"
typedef uint16_t fd3_semantic; /* semantic name + index */
@@ -43,6 +43,16 @@ fd3_semantic_name(uint8_t name, uint16_t index)
return (name << 8) | (index & 0xff);
}
+static inline uint8_t sem2name(fd3_semantic sem)
+{
+ return sem >> 8;
+}
+
+static inline uint16_t sem2idx(fd3_semantic sem)
+{
+ return sem & 0xff;
+}
+
struct fd3_shader_stateobj {
enum shader_t type;
diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
index 1085ddf8c12..b0f78341131 100644
--- a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
@@ -438,6 +438,23 @@ typedef struct PACKED {
uint32_t opc_cat : 3;
} instr_cat3_t;
+static inline bool instr_cat3_full(instr_cat3_t *cat3)
+{
+ switch (cat3->opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_U16:
+ case OPC_MAD_S16:
+ case OPC_SEL_B16:
+ case OPC_SEL_S16:
+ case OPC_SEL_F16:
+ case OPC_SAD_S16:
+ case OPC_SAD_S32: // really??
+ return false;
+ default:
+ return true;
+ }
+}
+
typedef struct PACKED {
/* dword0: */
union PACKED {
@@ -612,4 +629,18 @@ typedef union PACKED {
};
} instr_t;
+static inline uint32_t instr_opc(instr_t *instr)
+{
+ switch (instr->opc_cat) {
+ case 0: return instr->cat0.opc;
+ case 1: return 0;
+ case 2: return instr->cat2.opc;
+ case 3: return instr->cat3.opc;
+ case 4: return instr->cat4.opc;
+ case 5: return instr->cat5.opc;
+ case 6: return instr->cat6.opc;
+ default: return 0;
+ }
+}
+
#endif /* INSTR_A3XX_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c b/src/gallium/drivers/freedreno/a3xx/ir3.c
index a39214ee663..2a06d42c7d6 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c
+++ b/src/gallium/drivers/freedreno/a3xx/ir3.c
@@ -21,7 +21,7 @@
* SOFTWARE.
*/
-#include "ir-a3xx.h"
+#include "ir3.h"
#include <stdlib.h>
#include <stdio.h>
@@ -72,7 +72,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info,
if (reg->flags & IR3_REG_IMMED) {
val.iim_val = reg->iim_val;
} else {
- int8_t max = (reg->num + repeat) >> 2;
+ int8_t components = util_last_bit(reg->wrmask);
+ int8_t max = (reg->num + repeat + components - 1) >> 2;
val.comp = reg->num & 0x3;
val.num = reg->num >> 2;
@@ -514,6 +515,7 @@ void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *in
info->max_reg = -1;
info->max_half_reg = -1;
info->max_const = -1;
+ info->instrs_count = 0;
/* need a integer number of instruction "groups" (sets of four
* instructions), so pad out w/ NOPs if needed:
@@ -528,6 +530,7 @@ void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *in
int ret = emit[instr->category](instr, dwords, info);
if (ret)
goto fail;
+ info->instrs_count += 1 + instr->repeat;
dwords += 2;
}
@@ -552,30 +555,68 @@ static struct ir3_register * reg_create(struct ir3_shader *shader,
static void insert_instr(struct ir3_shader *shader,
struct ir3_instruction *instr)
{
+#ifdef DEBUG
+ static uint32_t serialno = 0;
+ instr->serialno = ++serialno;
+#endif
assert(shader->instrs_count < ARRAY_SIZE(shader->instrs));
shader->instrs[shader->instrs_count++] = instr;
}
-struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader,
+struct ir3_block * ir3_block_create(struct ir3_shader *shader,
+ unsigned ntmp, unsigned nin, unsigned nout)
+{
+ struct ir3_block *block;
+ unsigned size;
+ char *ptr;
+
+ size = sizeof(*block);
+ size += sizeof(block->temporaries[0]) * ntmp;
+ size += sizeof(block->inputs[0]) * nin;
+ size += sizeof(block->outputs[0]) * nout;
+
+ ptr = ir3_alloc(shader, size);
+
+ block = (void *)ptr;
+ ptr += sizeof(*block);
+
+ block->temporaries = (void *)ptr;
+ block->ntemporaries = ntmp;
+ ptr += sizeof(block->temporaries[0]) * ntmp;
+
+ block->inputs = (void *)ptr;
+ block->ninputs = nin;
+ ptr += sizeof(block->inputs[0]) * nin;
+
+ block->outputs = (void *)ptr;
+ block->noutputs = nout;
+ ptr += sizeof(block->outputs[0]) * nout;
+
+ block->shader = shader;
+
+ return block;
+}
+
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
int category, opc_t opc)
{
struct ir3_instruction *instr =
- ir3_alloc(shader, sizeof(struct ir3_instruction));
- instr->shader = shader;
+ ir3_alloc(block->shader, sizeof(struct ir3_instruction));
+ instr->block = block;
instr->category = category;
instr->opc = opc;
- insert_instr(shader, instr);
+ insert_instr(block->shader, instr);
return instr;
}
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
{
struct ir3_instruction *new_instr =
- ir3_alloc(instr->shader, sizeof(struct ir3_instruction));
+ ir3_alloc(instr->block->shader, sizeof(struct ir3_instruction));
unsigned i;
*new_instr = *instr;
- insert_instr(instr->shader, new_instr);
+ insert_instr(instr->block->shader, new_instr);
/* clone registers: */
new_instr->regs_count = 0;
@@ -592,7 +633,7 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags)
{
- struct ir3_register *reg = reg_create(instr->shader, num, flags);
+ struct ir3_register *reg = reg_create(instr->block->shader, num, flags);
assert(instr->regs_count < ARRAY_SIZE(instr->regs));
instr->regs[instr->regs_count++] = reg;
return reg;
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir3.h
index b0afe1868eb..896bec114fa 100644
--- a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
+++ b/src/gallium/drivers/freedreno/a3xx/ir3.h
@@ -28,15 +28,19 @@
#include <stdbool.h>
#include "instr-a3xx.h"
+#include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */
/* low level intermediate representation of an adreno shader program */
struct ir3_shader;
+struct ir3_instruction;
+struct ir3_block;
struct ir3_shader * fd_asm_parse(const char *src);
struct ir3_shader_info {
uint16_t sizedwords;
+ uint16_t instrs_count; /* expanded to account for rpt's */
/* NOTE: max_reg, etc, does not include registers not touched
* by the shader (ie. vertex fetched via VFD_DECODE but not
* touched by shader)
@@ -84,7 +88,7 @@ struct ir3_register {
};
struct ir3_instruction {
- struct ir3_shader *shader;
+ struct ir3_block *block;
int category;
opc_t opc;
enum {
@@ -138,7 +142,7 @@ struct ir3_instruction {
} flags;
int repeat;
unsigned regs_count;
- struct ir3_register *regs[4];
+ struct ir3_register *regs[5];
union {
struct {
char inv;
@@ -168,6 +172,9 @@ struct ir3_instruction {
int iim_val;
} cat6;
};
+#ifdef DEBUG
+ uint32_t serialno;
+#endif
};
#define MAX_INSTRS 1024
@@ -179,16 +186,151 @@ struct ir3_shader {
unsigned heap_idx;
};
+struct ir3_block {
+ struct ir3_shader *shader;
+ unsigned ntemporaries, ninputs, noutputs;
+ /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
+ struct ir3_instruction **temporaries;
+ struct ir3_instruction **inputs;
+ struct ir3_instruction **outputs;
+ struct ir3_block *parent;
+ struct ir3_instruction *head;
+};
+
struct ir3_shader * ir3_shader_create(void);
void ir3_shader_destroy(struct ir3_shader *shader);
void * ir3_shader_assemble(struct ir3_shader *shader,
struct ir3_shader_info *info);
-struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader,
+struct ir3_block * ir3_block_create(struct ir3_shader *shader,
+ unsigned ntmp, unsigned nin, unsigned nout);
+
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
int category, opc_t opc);
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
+
+/* comp:
+ * 0 - x
+ * 1 - y
+ * 2 - z
+ * 3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+ return (num << 2) | (comp & 0x3);
+}
+
+static inline uint32_t reg_num(struct ir3_register *reg)
+{
+ return reg->num >> 2;
+}
+
+static inline uint32_t reg_comp(struct ir3_register *reg)
+{
+ return reg->num & 0x3;
+}
+
+static inline bool is_alu(struct ir3_instruction *instr)
+{
+ return (1 <= instr->category) && (instr->category <= 3);
+}
+
+static inline bool is_sfu(struct ir3_instruction *instr)
+{
+ return (instr->category == 4);
+}
+
+static inline bool is_tex(struct ir3_instruction *instr)
+{
+ return (instr->category == 5);
+}
+
+static inline bool is_input(struct ir3_instruction *instr)
+{
+ return (instr->category == 2) && (instr->opc == OPC_BARY_F);
+}
+
+static inline bool is_gpr(struct ir3_register *reg)
+{
+ return !(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED));
+}
+
+/* TODO combine is_gpr()/reg_gpr().. */
+static inline bool reg_gpr(struct ir3_register *r)
+{
+ if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV))
+ return false;
+ if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
+ return false;
+ return true;
+}
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+/* ************************************************************************* */
+/* split this out or find some helper to use.. like main/bitset.h.. */
+
+#include <string.h>
+
+#define MAX_REG 256
+
+typedef uint8_t regmask_t[2 * MAX_REG / 8];
+
+static inline unsigned regmask_idx(struct ir3_register *reg)
+{
+ unsigned num = reg->num;
+ assert(num < MAX_REG);
+ if (reg->flags & IR3_REG_HALF)
+ num += MAX_REG;
+ return num;
+}
+
+static inline void regmask_init(regmask_t *regmask)
+{
+ memset(regmask, 0, sizeof(*regmask));
+}
+
+static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
+{
+ unsigned idx = regmask_idx(reg);
+ unsigned i;
+ for (i = 0; i < 4; i++, idx++)
+ if (reg->wrmask & (1 << i))
+ (*regmask)[idx / 8] |= 1 << (idx % 8);
+}
+
+/* set bits in a if not set in b, conceptually:
+ * a |= (reg & ~b)
+ */
+static inline void regmask_set_if_not(regmask_t *a,
+ struct ir3_register *reg, regmask_t *b)
+{
+ unsigned idx = regmask_idx(reg);
+ unsigned i;
+ for (i = 0; i < 4; i++, idx++)
+ if (reg->wrmask & (1 << i))
+ if (!((*b)[idx / 8] & (1 << (idx % 8))))
+ (*a)[idx / 8] |= 1 << (idx % 8);
+}
+
+static inline unsigned regmask_get(regmask_t *regmask,
+ struct ir3_register *reg)
+{
+ unsigned idx = regmask_idx(reg);
+ unsigned i;
+ for (i = 0; i < 4; i++, idx++)
+ if (reg->wrmask & (1 << i))
+ if ((*regmask)[idx / 8] & (1 << (idx % 8)))
+ return true;
+ return false;
+}
+
+/* ************************************************************************* */
+
#endif /* IR3_H_ */