diff options
author | Rob Clark <[email protected]> | 2014-07-25 11:15:59 -0400 |
---|---|---|
committer | Rob Clark <[email protected]> | 2014-07-25 13:29:28 -0400 |
commit | db193e5ad06e7a2fbcffb3bb5df85d212eb12291 (patch) | |
tree | 58d1ec24c0af7b1acb1477eeaababe3d7eda6019 /src/gallium/drivers/freedreno | |
parent | 7d7e6ae9c3544ce1889aa9b8a34545c6f42017e7 (diff) |
freedreno/ir3: split out shader compiler from a3xx
Move the bits we want to share between generations from fd3_program to
ir3_shader. So overall structure is:
fdN_shader_stateobj -> ir3_shader -> ir3_shader_variant -> ir3
|- ...
\- ir3_shader_variant -> ir3
So the ir3_shader becomes the topmost generation neutral object, which
manages the set of variants each of which generates, compiles, and
assembles it's own ir.
There is a bit of additional renaming to s/fd3_compiler/ir3_compiler/,
etc.
Keep the split between the gallium level stateobj and the shader helper
object because it might be a good idea to pre-compute some generation
specific register values (ie. anything that is independent of linking).
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r-- | src/gallium/drivers/freedreno/Makefile.am | 4 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/Makefile.sources | 25 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_draw.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_emit.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_gmem.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.c | 217 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_program.h | 123 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a3xx/fd3_util.h | 18 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/disasm-a3xx.c (renamed from src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/instr-a3xx.h (renamed from src/gallium/drivers/freedreno/a3xx/instr-a3xx.h) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.c (renamed from src/gallium/drivers/freedreno/a3xx/ir3.c) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3.h (renamed from src/gallium/drivers/freedreno/a3xx/ir3.h) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler.c (renamed from src/gallium/drivers/freedreno/a3xx/fd3_compiler.c) | 147 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler.h (renamed from src/gallium/drivers/freedreno/a3xx/fd3_compiler.h) | 11 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_old.c (renamed from src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c) | 110 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_cp.c (renamed from src/gallium/drivers/freedreno/a3xx/ir3_cp.c) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_depth.c (renamed from src/gallium/drivers/freedreno/a3xx/ir3_depth.c) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_dump.c (renamed from src/gallium/drivers/freedreno/a3xx/ir3_dump.c) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_flatten.c (renamed from src/gallium/drivers/freedreno/a3xx/ir3_flatten.c) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c (renamed from src/gallium/drivers/freedreno/a3xx/ir3_ra.c) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_sched.c (renamed from src/gallium/drivers/freedreno/a3xx/ir3_sched.c) | 0 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.c | 211 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_shader.h | 163 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_visitor.h (renamed from src/gallium/drivers/freedreno/a3xx/ir3_visitor.h) | 0 |
25 files changed, 580 insertions, 477 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index 7947dd1a56e..7d9c6e4933a 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -7,6 +7,7 @@ AM_CFLAGS = \ -Wno-packed-bitfield-compat \ -I$(top_srcdir)/src/gallium/drivers/freedreno/a3xx \ -I$(top_srcdir)/src/gallium/drivers/freedreno/a2xx \ + -I$(top_srcdir)/src/gallium/drivers/freedreno/ir3 \ $(GALLIUM_DRIVER_CFLAGS) \ $(FREEDRENO_CFLAGS) @@ -15,4 +16,5 @@ noinst_LTLIBRARIES = libfreedreno.la libfreedreno_la_SOURCES = \ $(C_SOURCES) \ $(a2xx_SOURCES) \ - $(a3xx_SOURCES) + $(a3xx_SOURCES) \ + $(ir3_SOURCES) diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 0dc7fc08512..85e0b7eda6f 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -33,8 +33,6 @@ a2xx_SOURCES := \ a3xx_SOURCES := \ a3xx/fd3_blend.c \ - a3xx/fd3_compiler.c \ - a3xx/fd3_compiler_old.c \ a3xx/fd3_context.c \ a3xx/fd3_draw.c \ a3xx/fd3_emit.c \ @@ -45,12 +43,17 @@ a3xx_SOURCES := \ a3xx/fd3_screen.c \ a3xx/fd3_texture.c \ a3xx/fd3_util.c \ - a3xx/fd3_zsa.c \ - a3xx/disasm-a3xx.c \ - a3xx/ir3_cp.c \ - a3xx/ir3_depth.c \ - a3xx/ir3_dump.c \ - a3xx/ir3_flatten.c \ - a3xx/ir3_ra.c \ - a3xx/ir3_sched.c \ - a3xx/ir3.c + a3xx/fd3_zsa.c + +ir3_SOURCES := \ + ir3/disasm-a3xx.c \ + ir3/ir3_compiler.c \ + ir3/ir3_compiler_old.c \ + ir3/ir3_shader.c \ + ir3/ir3_cp.c \ + ir3/ir3_depth.c \ + ir3/ir3_dump.c \ + ir3/ir3_flatten.c \ + ir3/ir3_ra.c \ + ir3/ir3_sched.c \ + ir3/ir3.c diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index 4b2d94103f5..89af740c07c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -44,7 +44,7 @@ static void emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring, - struct fd3_shader_key key) + struct ir3_shader_key key) { struct fd_vertex_stateobj *vtx = ctx->vtx; struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf; @@ -70,7 +70,7 @@ emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring, static void draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info, - struct fd_ringbuffer *ring, unsigned dirty, struct fd3_shader_key key) + struct fd_ringbuffer *ring, unsigned dirty, struct ir3_shader_key key) { fd3_emit_state(ctx, ring, &ctx->prog, dirty, key); @@ -99,7 +99,7 @@ static void fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) { unsigned dirty = ctx->dirty; - struct fd3_shader_key key = { + struct ir3_shader_key key = { /* do binning pass first: */ .binning_pass = true, .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, @@ -127,7 +127,7 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty) { struct fd3_context *fd3_ctx = fd3_context(ctx); struct fd_ringbuffer *ring = ctx->binning_ring; - struct fd3_shader_key key = { + struct ir3_shader_key key = { .binning_pass = true, .half_precision = true, }; @@ -168,7 +168,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers, struct fd_ringbuffer *ring = ctx->ring; unsigned dirty = ctx->dirty; unsigned ce, i; - struct fd3_shader_key key = { + struct ir3_shader_key key = { .half_precision = true, }; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 1e4de26406a..44932dc241d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -87,7 +87,7 @@ static void emit_constants(struct fd_ringbuffer *ring, enum adreno_state_block sb, struct fd_constbuf_stateobj *constbuf, - struct fd3_shader_variant *shader) + struct ir3_shader_variant *shader) { uint32_t enabled_mask = constbuf->enabled_mask; uint32_t first_immediate; @@ -291,7 +291,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, - struct fd3_shader_variant *vp, + struct ir3_shader_variant *vp, struct fd3_vertex_buf *vbufs, uint32_t n) { uint32_t i, j, last = 0; @@ -350,10 +350,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd_program_stateobj *prog, uint32_t dirty, - struct fd3_shader_key key) + struct ir3_shader_key key) { - struct fd3_shader_variant *vp; - struct fd3_shader_variant *fp; + struct ir3_shader_variant *vp; + struct ir3_shader_variant *fp; fp = fd3_shader_variant(prog->fp, key); vp = fd3_shader_variant(prog->vp, key); diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h index f2ae4dc295e..5735c9f873d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -33,7 +33,7 @@ #include "freedreno_context.h" #include "fd3_util.h" - +#include "ir3_shader.h" struct fd_ringbuffer; enum adreno_state_block; @@ -56,11 +56,11 @@ struct fd3_vertex_buf { }; void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, - struct fd3_shader_variant *vp, + struct ir3_shader_variant *vp, struct fd3_vertex_buf *vbufs, uint32_t n); void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd_program_stateobj *prog, uint32_t dirty, - struct fd3_shader_key key); + struct ir3_shader_key key); void fd3_emit_restore(struct fd_context *ctx); #endif /* FD3_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c index 8519a90ccfa..6828d0e1fb4 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -43,7 +43,7 @@ #include "fd3_util.h" #include "fd3_zsa.h" -static const struct fd3_shader_key key = { +static const struct ir3_shader_key key = { // XXX should set this based on render target format! We don't // want half_precision if float32 render target!!! .half_precision = true, diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 164b1521a89..78c71d42e39 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -38,176 +38,23 @@ #include "freedreno_program.h" #include "fd3_program.h" -#include "fd3_compiler.h" #include "fd3_emit.h" #include "fd3_texture.h" #include "fd3_util.h" static void -delete_variant(struct fd3_shader_variant *v) +delete_shader_stateobj(struct fd3_shader_stateobj *so) { - ir3_destroy(v->ir); - fd_bo_del(v->bo); - free(v); -} - -static void -assemble_variant(struct fd3_shader_variant *so) -{ - struct fd_context *ctx = fd_context(so->so->pctx); - uint32_t sz, *bin; - - bin = ir3_assemble(so->ir, &so->info); - sz = so->info.sizedwords * 4; - - so->bo = fd_bo_new(ctx->dev, sz, - DRM_FREEDRENO_GEM_CACHE_WCOMBINE | - DRM_FREEDRENO_GEM_TYPE_KMEM); - - memcpy(fd_bo_map(so->bo), bin, sz); - - free(bin); - - so->instrlen = so->info.sizedwords / 8; - so->constlen = so->info.max_const + 1; -} - -/* for vertex shader, the inputs are loaded into registers before the shader - * is executed, so max_regs from the shader instructions might not properly - * reflect the # of registers actually used: - */ -static void -fixup_vp_regfootprint(struct fd3_shader_variant *so) -{ - unsigned i; - for (i = 0; i < so->inputs_count; i++) { - if (so->inputs[i].compmask) { - uint32_t regid = (so->inputs[i].regid + 3) >> 2; - so->info.max_reg = MAX2(so->info.max_reg, regid); - } - } - for (i = 0; i < so->outputs_count; i++) { - uint32_t regid = (so->outputs[i].regid + 3) >> 2; - so->info.max_reg = MAX2(so->info.max_reg, regid); - } -} - -static struct fd3_shader_variant * -create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key) -{ - struct fd3_shader_variant *v = CALLOC_STRUCT(fd3_shader_variant); - const struct tgsi_token *tokens = so->tokens; - int ret; - - if (!v) - return NULL; - - v->so = so; - v->key = key; - v->type = so->type; - - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", so->type, - key.binning_pass, key.color_two_side, key.half_precision); - tgsi_dump(tokens, 0); - } - - if (!(fd_mesa_debug & FD_DBG_NOOPT)) { - ret = fd3_compile_shader(v, tokens, key); - if (ret) { - debug_error("new compiler failed, trying fallback!"); - - v->inputs_count = 0; - v->outputs_count = 0; - v->total_in = 0; - v->has_samp = false; - v->immediates_count = 0; - } - } else { - ret = -1; /* force fallback to old compiler */ - } - - if (ret) - ret = fd3_compile_shader_old(v, tokens, key); - - if (ret) { - debug_error("compile failed!"); - goto fail; - } - - assemble_variant(v); - if (!v->bo) { - debug_error("assemble failed!"); - goto fail; - } - - if (so->type == SHADER_VERTEX) - fixup_vp_regfootprint(v); - - if (fd_mesa_debug & FD_DBG_DISASM) { - DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type, - key.binning_pass, key.color_two_side, key.half_precision); - disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type); - } - - return v; - -fail: - delete_variant(v); - return NULL; -} - -struct fd3_shader_variant * -fd3_shader_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key) -{ - struct fd3_shader_variant *v; - - /* some shader key values only apply to vertex or frag shader, - * so normalize the key to avoid constructing multiple identical - * variants: - */ - if (so->type == SHADER_FRAGMENT) { - key.binning_pass = false; - } - if (so->type == SHADER_VERTEX) { - key.color_two_side = false; - key.half_precision = false; - } - - for (v = so->variants; v; v = v->next) - if (!memcmp(&key, &v->key, sizeof(key))) - return v; - - /* compile new variant if it doesn't exist already: */ - v = create_variant(so, key); - v->next = so->variants; - so->variants = v; - - return v; -} - - -static void -delete_shader(struct fd3_shader_stateobj *so) -{ - struct fd3_shader_variant *v, *t; - for (v = so->variants; v; ) { - t = v; - v = v->next; - delete_variant(t); - } - free((void *)so->tokens); + ir3_shader_destroy(so->shader); free(so); } static struct fd3_shader_stateobj * -create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso, +create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, enum shader_t type) { struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj); - so->pctx = pctx; - so->type = type; - so->tokens = tgsi_dup_tokens(cso->tokens); + so->shader = ir3_shader_create(pctx, cso->tokens, type); return so; } @@ -215,32 +62,32 @@ static void * fd3_fp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader(pctx, cso, SHADER_FRAGMENT); + return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); } static void fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso) { struct fd3_shader_stateobj *so = hwcso; - delete_shader(so); + delete_shader_stateobj(so); } static void * fd3_vp_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { - return create_shader(pctx, cso, SHADER_VERTEX); + return create_shader_stateobj(pctx, cso, SHADER_VERTEX); } static void fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) { struct fd3_shader_stateobj *so = hwcso; - delete_shader(so); + delete_shader_stateobj(so); } static void -emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_variant *so) +emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum adreno_state_block sb; @@ -281,7 +128,7 @@ emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_variant *so) } static int -find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) +find_output(const struct ir3_shader_variant *so, ir3_semantic semantic) { int j; @@ -297,7 +144,7 @@ find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) */ if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) { unsigned idx = sem2idx(semantic); - return find_output(so, fd3_semantic_name(TGSI_SEMANTIC_COLOR, idx)); + return find_output(so, ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx)); } debug_assert(0); @@ -306,7 +153,7 @@ find_output(const struct fd3_shader_variant *so, fd3_semantic semantic) } static int -next_varying(const struct fd3_shader_variant *so, int i) +next_varying(const struct ir3_shader_variant *so, int i) { while (++i < so->inputs_count) if (so->inputs[i].compmask && so->inputs[i].bary) @@ -315,7 +162,7 @@ next_varying(const struct fd3_shader_variant *so, int i) } static uint32_t -find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic) +find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic) { int j; for (j = 0; j < so->outputs_count; j++) @@ -326,9 +173,9 @@ find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic) void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, struct fd3_shader_key key) + struct fd_program_stateobj *prog, struct ir3_shader_key key) { - const struct fd3_shader_variant *vp, *fp; + const struct ir3_shader_variant *vp, *fp; const struct ir3_info *vsi, *fsi; uint32_t pos_regid, posz_regid, psize_regid, color_regid; int i, j, k; @@ -337,7 +184,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, if (key.binning_pass) { /* use dummy stateobj to simplify binning vs non-binning: */ - static const struct fd3_shader_variant binning_fp = {}; + static const struct ir3_shader_variant binning_fp = {}; fp = &binning_fp; } else { fp = fd3_shader_variant(prog->fp, key); @@ -347,13 +194,13 @@ fd3_program_emit(struct fd_ringbuffer *ring, fsi = &fp->info; pos_regid = find_output_regid(vp, - fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); + ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); posz_regid = find_output_regid(fp, - fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); + ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); psize_regid = find_output_regid(vp, - fd3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); + ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); color_regid = find_output_regid(fp, - fd3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -522,16 +369,16 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); - OUT_RING(ring, fp->so->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ - OUT_RING(ring, fp->so->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ - OUT_RING(ring, fp->so->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ - OUT_RING(ring, fp->so->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ + OUT_RING(ring, fp->shader->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ + OUT_RING(ring, fp->shader->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ + OUT_RING(ring, fp->shader->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ + OUT_RING(ring, fp->shader->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); - OUT_RING(ring, fp->so->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ - OUT_RING(ring, fp->so->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ - OUT_RING(ring, fp->so->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ - OUT_RING(ring, fp->so->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ + OUT_RING(ring, fp->shader->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ + OUT_RING(ring, fp->shader->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ + OUT_RING(ring, fp->shader->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ + OUT_RING(ring, fp->shader->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ } OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); @@ -558,10 +405,10 @@ fix_blit_fp(struct pipe_context *pctx) struct fd_context *ctx = fd_context(pctx); struct fd3_shader_stateobj *so = ctx->blit_prog.fp; - so->vpsrepl[0] = 0x99999999; - so->vpsrepl[1] = 0x99999999; - so->vpsrepl[2] = 0x99999999; - so->vpsrepl[3] = 0x99999999; + so->shader->vpsrepl[0] = 0x99999999; + so->shader->vpsrepl[1] = 0x99999999; + so->shader->vpsrepl[2] = 0x99999999; + so->shader->vpsrepl[3] = 0x99999999; } void diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index e2ed1cc3dda..cebaeecc5bc 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -30,127 +30,22 @@ #define FD3_PROGRAM_H_ #include "pipe/p_context.h" - #include "freedreno_context.h" -#include "fd3_util.h" -#include "ir3.h" -#include "disasm.h" - -typedef uint16_t fd3_semantic; /* semantic name + index */ -static inline fd3_semantic -fd3_semantic_name(uint8_t name, uint16_t index) -{ - return (name << 8) | (index & 0xff); -} - -static inline uint8_t sem2name(fd3_semantic sem) -{ - return sem >> 8; -} - -static inline uint16_t sem2idx(fd3_semantic sem) -{ - return sem & 0xff; -} - -struct fd3_shader_variant { - struct fd_bo *bo; - - struct fd3_shader_key key; - - struct ir3_info info; - struct ir3 *ir; - - /* the instructions length is in units of instruction groups - * (4 instructions, 8 dwords): - */ - unsigned instrlen; - - /* the constants length is in units of vec4's, and is the sum of - * the uniforms and the built-in compiler constants - */ - unsigned constlen; - - /* About Linkage: - * + Let the frag shader determine the position/compmask for the - * varyings, since it is the place where we know if the varying - * is actually used, and if so, which components are used. So - * what the hw calls "outloc" is taken from the "inloc" of the - * frag shader. - * + From the vert shader, we only need the output regid - */ - - /* for frag shader, pos_regid holds the frag_pos, ie. what is passed - * to bary.f instructions - */ - uint8_t pos_regid; - bool frag_coord, frag_face; - - /* varyings/outputs: */ - unsigned outputs_count; - struct { - fd3_semantic semantic; - uint8_t regid; - } outputs[16 + 2]; /* +POSITION +PSIZE */ - bool writes_pos, writes_psize; - - /* vertices/inputs: */ - unsigned inputs_count; - struct { - fd3_semantic semantic; - uint8_t regid; - uint8_t compmask; - uint8_t ncomp; - /* in theory inloc of fs should match outloc of vs: */ - uint8_t inloc; - uint8_t bary; - } inputs[16 + 2]; /* +POSITION +FACE */ - - unsigned total_in; /* sum of inputs (scalar) */ - - /* do we have one or more texture sample instructions: */ - bool has_samp; - - /* const reg # of first immediate, ie. 1 == c1 - * (not regid, because TGSI thinks in terms of vec4 registers, - * not scalar registers) - */ - unsigned first_immediate; - unsigned immediates_count; - struct { - uint32_t val[4]; - } immediates[64]; - - /* shader varients form a linked list: */ - struct fd3_shader_variant *next; - - /* replicated here to avoid passing extra ptrs everywhere: */ - enum shader_t type; - struct fd3_shader_stateobj *so; -}; +#include "ir3_shader.h" struct fd3_shader_stateobj { - enum shader_t type; - - struct pipe_context *pctx; - const struct tgsi_token *tokens; - - struct fd3_shader_variant *variants; - - /* so far, only used for blit_prog shader.. values for - * VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE - * - * Possibly should be in fd3_program_variant? - */ - uint32_t vinterp[4], vpsrepl[4]; + struct ir3_shader *shader; }; -struct fd3_shader_variant * fd3_shader_variant(struct fd3_shader_stateobj *so, - struct fd3_shader_key key); - void fd3_program_emit(struct fd_ringbuffer *ring, - struct fd_program_stateobj *prog, struct fd3_shader_key key); + struct fd_program_stateobj *prog, struct ir3_shader_key key); void fd3_prog_init(struct pipe_context *pctx); +static inline struct ir3_shader_variant * +fd3_shader_variant(struct fd3_shader_stateobj *so, struct ir3_shader_key key) +{ + return ir3_shader_variant(so->shader, key); +} + #endif /* FD3_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.h b/src/gallium/drivers/freedreno/a3xx/fd3_util.h index 6462d18f913..4681840b173 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_util.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.h @@ -43,22 +43,4 @@ enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); -/* Configuration key used to identify a shader variant.. different - * shader variants can be used to implement features not supported - * in hw (two sided color), binning-pass vertex shader, etc. - * - * NOTE: this is declared here (rather than fd3_program.h) as it is - * passed around through a lot of the emit code in various parts - * which would otherwise not necessarily need to incl fd3_program.h - */ -struct fd3_shader_key { - /* vertex shader variant parameters: */ - unsigned binning_pass : 1; - - /* fragment shader variant parameters: */ - unsigned color_two_side : 1; - unsigned half_precision : 1; -}; -struct fd3_shader_variant; - #endif /* FD3_UTIL_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c index 8c3704bf658..8c3704bf658 100644 --- a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index c67f1037ced..c67f1037ced 100644 --- a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index ea2a9251b28..ea2a9251b28 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 9ed914ba2e4..9ed914ba2e4 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 0c22e55711b..1fa2fd4e389 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -40,18 +40,19 @@ #include "tgsi/tgsi_scan.h" #include "freedreno_lowering.h" +#include "freedreno_util.h" -#include "fd3_compiler.h" -#include "fd3_program.h" +#include "ir3_compiler.h" +#include "ir3_shader.h" #include "instr-a3xx.h" #include "ir3.h" -struct fd3_compile_context { +struct ir3_compile_context { const struct tgsi_token *tokens; bool free_tokens; struct ir3 *ir; - struct fd3_shader_variant *so; + struct ir3_shader_variant *so; struct ir3_block *block; struct ir3_instruction *current_instr; @@ -117,15 +118,15 @@ struct fd3_compile_context { }; -static void vectorize(struct fd3_compile_context *ctx, +static void vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr, struct tgsi_dst_register *dst, int nsrcs, ...); -static void create_mov(struct fd3_compile_context *ctx, +static void create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src); -static type_t get_ftype(struct fd3_compile_context *ctx); +static type_t get_ftype(struct ir3_compile_context *ctx); static unsigned -compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, +compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, const struct tgsi_token *tokens) { unsigned ret; @@ -188,7 +189,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, } static void -compile_error(struct fd3_compile_context *ctx, const char *format, ...) +compile_error(struct ir3_compile_context *ctx, const char *format, ...) { va_list ap; va_start(ap, format); @@ -203,7 +204,7 @@ compile_error(struct fd3_compile_context *ctx, const char *format, ...) } while (0) static void -compile_free(struct fd3_compile_context *ctx) +compile_free(struct ir3_compile_context *ctx) { if (ctx->free_tokens) free((void *)ctx->tokens); @@ -212,7 +213,7 @@ compile_free(struct fd3_compile_context *ctx) struct instr_translater { void (*fxn)(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst); unsigned tgsi_opc; opc_t opc; @@ -221,7 +222,7 @@ struct instr_translater { }; static void -instr_finish(struct fd3_compile_context *ctx) +instr_finish(struct ir3_compile_context *ctx) { unsigned i; @@ -243,34 +244,34 @@ instr_finish(struct fd3_compile_context *ctx) * stuff. */ static void -instr_atomic_start(struct fd3_compile_context *ctx) +instr_atomic_start(struct ir3_compile_context *ctx) { ctx->atomic = true; } static void -instr_atomic_end(struct fd3_compile_context *ctx) +instr_atomic_end(struct ir3_compile_context *ctx) { ctx->atomic = false; instr_finish(ctx); } static struct ir3_instruction * -instr_create(struct fd3_compile_context *ctx, int category, opc_t opc) +instr_create(struct ir3_compile_context *ctx, int category, opc_t opc) { instr_finish(ctx); return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc)); } static struct ir3_instruction * -instr_clone(struct fd3_compile_context *ctx, struct ir3_instruction *instr) +instr_clone(struct ir3_compile_context *ctx, struct ir3_instruction *instr) { instr_finish(ctx); return (ctx->current_instr = ir3_instr_clone(instr)); } static struct ir3_block * -push_block(struct fd3_compile_context *ctx) +push_block(struct ir3_compile_context *ctx) { struct ir3_block *block; unsigned ntmp, nin, nout; @@ -320,7 +321,7 @@ push_block(struct fd3_compile_context *ctx) } static void -pop_block(struct fd3_compile_context *ctx) +pop_block(struct ir3_compile_context *ctx) { ctx->block = ctx->block->parent; compile_assert(ctx, ctx->block); @@ -390,7 +391,7 @@ block_temporary(struct ir3_block *block, unsigned n) } static struct ir3_instruction * -create_immed(struct fd3_compile_context *ctx, float val) +create_immed(struct ir3_compile_context *ctx, float val) { /* NOTE: *don't* use instr_create() here! */ @@ -404,7 +405,7 @@ create_immed(struct fd3_compile_context *ctx, float val) } static void -ssa_dst(struct fd3_compile_context *ctx, struct ir3_instruction *instr, +ssa_dst(struct ir3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_dst_register *dst, unsigned chan) { unsigned n = regid(dst->Index, chan); @@ -445,7 +446,7 @@ ssa_dst(struct fd3_compile_context *ctx, struct ir3_instruction *instr, } static void -ssa_src(struct fd3_compile_context *ctx, struct ir3_register *reg, +ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg, const struct tgsi_src_register *src, unsigned chan) { struct ir3_block *block = ctx->block; @@ -490,7 +491,7 @@ ssa_src(struct fd3_compile_context *ctx, struct ir3_register *reg, } static struct ir3_register * -add_dst_reg_wrmask(struct fd3_compile_context *ctx, +add_dst_reg_wrmask(struct ir3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_dst_register *dst, unsigned chan, unsigned wrmask) { @@ -557,14 +558,14 @@ add_dst_reg_wrmask(struct fd3_compile_context *ctx, } static struct ir3_register * -add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, +add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_dst_register *dst, unsigned chan) { return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1); } static struct ir3_register * -add_src_reg_wrmask(struct fd3_compile_context *ctx, +add_src_reg_wrmask(struct ir3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_src_register *src, unsigned chan, unsigned wrmask) { @@ -668,7 +669,7 @@ add_src_reg_wrmask(struct fd3_compile_context *ctx, } static struct ir3_register * -add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, +add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_src_register *src, unsigned chan) { return add_src_reg_wrmask(ctx, instr, src, chan, 0x1); @@ -693,7 +694,7 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) * generated by a single TGSI op. */ static struct tgsi_src_register * -get_internal_temp(struct fd3_compile_context *ctx, +get_internal_temp(struct ir3_compile_context *ctx, struct tgsi_dst_register *tmp_dst) { struct tgsi_src_register *tmp_src; @@ -736,13 +737,13 @@ is_rel_or_const(struct tgsi_src_register *src) } static type_t -get_ftype(struct fd3_compile_context *ctx) +get_ftype(struct ir3_compile_context *ctx) { return TYPE_F32; } static type_t -get_utype(struct fd3_compile_context *ctx) +get_utype(struct ir3_compile_context *ctx) { return TYPE_U32; } @@ -764,7 +765,7 @@ src_swiz(struct tgsi_src_register *src, int chan) * generate a move to temporary gpr: */ static struct tgsi_src_register * -get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) +get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src) { struct tgsi_dst_register tmp_dst; struct tgsi_src_register *tmp_src; @@ -779,7 +780,7 @@ get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) } static void -get_immediate(struct fd3_compile_context *ctx, +get_immediate(struct ir3_compile_context *ctx, struct tgsi_src_register *reg, uint32_t val) { unsigned neg, swiz, idx, i; @@ -826,7 +827,7 @@ get_immediate(struct fd3_compile_context *ctx, } static void -create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, +create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src) { type_t type_mov = get_ftype(ctx); @@ -855,7 +856,7 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, } static void -create_clamp(struct fd3_compile_context *ctx, +create_clamp(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *val, struct tgsi_src_register *minval, struct tgsi_src_register *maxval) { @@ -869,7 +870,7 @@ create_clamp(struct fd3_compile_context *ctx, } static void -create_clamp_imm(struct fd3_compile_context *ctx, +create_clamp_imm(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, uint32_t minval, uint32_t maxval) { @@ -885,7 +886,7 @@ create_clamp_imm(struct fd3_compile_context *ctx, } static struct tgsi_dst_register * -get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) +get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = &inst->Dst[0].Register; unsigned i; @@ -908,7 +909,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) } static void -put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, +put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst, struct tgsi_dst_register *dst) { /* if necessary, add mov back into original dst: */ @@ -921,7 +922,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, * to turn a scalar instruction into a vector operation: */ static void -vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, +vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr, struct tgsi_dst_register *dst, int nsrcs, ...) { va_list ap; @@ -992,7 +993,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, static void trans_clamp(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -1008,7 +1009,7 @@ trans_clamp(const struct instr_translater *t, /* ARL(x) = x, but mova from hrN.x to a0.. */ static void trans_arl(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1059,7 +1060,7 @@ struct tex_info { }; static const struct tex_info * -get_tex_info(struct fd3_compile_context *ctx, +get_tex_info(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { static const struct tex_info tex1d = { @@ -1171,7 +1172,7 @@ get_tex_info(struct fd3_compile_context *ctx, } static struct tgsi_src_register * -get_tex_coord(struct fd3_compile_context *ctx, +get_tex_coord(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst, const struct tex_info *tinf) { @@ -1238,7 +1239,7 @@ get_tex_coord(struct fd3_compile_context *ctx, static void trans_samp(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1291,7 +1292,7 @@ trans_samp(const struct instr_translater *t, */ static void trans_cmp(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1405,7 +1406,7 @@ trans_cmp(const struct instr_translater *t, */ static void trans_icmp(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1475,7 +1476,7 @@ trans_icmp(const struct instr_translater *t, */ static void -push_branch(struct fd3_compile_context *ctx, bool inv, +push_branch(struct ir3_compile_context *ctx, bool inv, struct ir3_instruction *instr, struct ir3_instruction *cond) { unsigned int idx = ctx->branch_count++; @@ -1488,7 +1489,7 @@ push_branch(struct fd3_compile_context *ctx, bool inv, } static struct ir3_instruction * -pop_branch(struct fd3_compile_context *ctx) +pop_branch(struct ir3_compile_context *ctx) { unsigned int idx = --ctx->branch_count; return ctx->branch[idx].instr; @@ -1496,7 +1497,7 @@ pop_branch(struct fd3_compile_context *ctx) static void trans_if(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr, *cond; @@ -1532,7 +1533,7 @@ trans_if(const struct instr_translater *t, static void trans_else(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1565,7 +1566,7 @@ find_output(struct ir3_block *block, unsigned n) } static struct ir3_instruction * -create_phi(struct fd3_compile_context *ctx, struct ir3_instruction *cond, +create_phi(struct ir3_compile_context *ctx, struct ir3_instruction *cond, struct ir3_instruction *a, struct ir3_instruction *b) { struct ir3_instruction *phi; @@ -1598,7 +1599,7 @@ create_phi(struct fd3_compile_context *ctx, struct ir3_instruction *cond, static void trans_endif(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1715,7 +1716,7 @@ trans_endif(const struct instr_translater *t, static void trans_kill(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr, *immed, *cond = NULL; @@ -1762,7 +1763,7 @@ trans_kill(const struct instr_translater *t, static void trans_killif(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_src_register *src = &inst->Src[0].Register; @@ -1795,7 +1796,7 @@ trans_killif(const struct instr_translater *t, static void trans_cov(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1833,7 +1834,7 @@ trans_cov(const struct instr_translater *t, static void instr_cat0(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { instr_create(ctx, 0, t->opc); @@ -1841,7 +1842,7 @@ instr_cat0(const struct instr_translater *t, static void instr_cat1(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -1852,7 +1853,7 @@ instr_cat1(const struct instr_translater *t, static void instr_cat2(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -1906,7 +1907,7 @@ instr_cat2(const struct instr_translater *t, static void instr_cat3(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -1936,7 +1937,7 @@ instr_cat3(const struct instr_translater *t, static void instr_cat4(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -2035,14 +2036,14 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { INSTR(F2U, trans_cov), }; -static fd3_semantic +static ir3_semantic decl_semantic(const struct tgsi_declaration_semantic *sem) { - return fd3_semantic_name(sem->Name, sem->Index); + return ir3_semantic_name(sem->Name, sem->Index); } static struct ir3_instruction * -decl_in_frag_bary(struct fd3_compile_context *ctx, unsigned regid, +decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid, unsigned j, unsigned inloc) { struct ir3_instruction *instr; @@ -2073,7 +2074,7 @@ decl_in_frag_bary(struct fd3_compile_context *ctx, unsigned regid, * of the interpolated vertex position W component. */ static struct ir3_instruction * -decl_in_frag_coord(struct fd3_compile_context *ctx, unsigned regid, +decl_in_frag_coord(struct ir3_compile_context *ctx, unsigned regid, unsigned j) { struct ir3_instruction *instr, *src; @@ -2137,7 +2138,7 @@ decl_in_frag_coord(struct fd3_compile_context *ctx, unsigned regid, * back-facing polygon. */ static struct ir3_instruction * -decl_in_frag_face(struct fd3_compile_context *ctx, unsigned regid, +decl_in_frag_face(struct ir3_compile_context *ctx, unsigned regid, unsigned j) { struct ir3_instruction *instr, *src; @@ -2194,9 +2195,9 @@ decl_in_frag_face(struct fd3_compile_context *ctx, unsigned regid, } static void -decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) +decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_variant *so = ctx->so; + struct ir3_shader_variant *so = ctx->so; unsigned name = decl->Semantic.Name; unsigned i; @@ -2259,9 +2260,9 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) } static void -decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) +decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_variant *so = ctx->so; + struct ir3_shader_variant *so = ctx->so; unsigned comp = 0; unsigned name = decl->Semantic.Name; unsigned i; @@ -2328,9 +2329,9 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) * frag_face. */ static void -fixup_frag_inputs(struct fd3_compile_context *ctx) +fixup_frag_inputs(struct ir3_compile_context *ctx) { - struct fd3_shader_variant *so = ctx->so; + struct ir3_shader_variant *so = ctx->so; struct ir3_block *block = ctx->block; struct ir3_instruction **inputs; struct ir3_instruction *instr; @@ -2395,7 +2396,7 @@ fixup_frag_inputs(struct fd3_compile_context *ctx) } static void -compile_instructions(struct fd3_compile_context *ctx) +compile_instructions(struct ir3_compile_context *ctx) { push_block(ctx); @@ -2473,7 +2474,7 @@ compile_instructions(struct fd3_compile_context *ctx) } static void -compile_dump(struct fd3_compile_context *ctx) +compile_dump(struct ir3_compile_context *ctx) { const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag"; static unsigned n = 0; @@ -2489,10 +2490,10 @@ compile_dump(struct fd3_compile_context *ctx) } int -fd3_compile_shader(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, struct fd3_shader_key key) +ir3_compile_shader(struct ir3_shader_variant *so, + const struct tgsi_token *tokens, struct ir3_shader_key key) { - struct fd3_compile_context ctx; + struct ir3_compile_context ctx; struct ir3_block *block; struct ir3_instruction **inputs; unsigned i, j, actual_in; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h index a53bb3ee9a5..9b11b3d8abf 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h @@ -29,15 +29,14 @@ #ifndef FD3_COMPILER_H_ #define FD3_COMPILER_H_ -#include "fd3_program.h" -#include "fd3_util.h" +#include "ir3_shader.h" -int fd3_compile_shader(struct fd3_shader_variant *so, +int ir3_compile_shader(struct ir3_shader_variant *so, const struct tgsi_token *tokens, - struct fd3_shader_key key); -int fd3_compile_shader_old(struct fd3_shader_variant *so, + struct ir3_shader_key key); +int ir3_compile_shader_old(struct ir3_shader_variant *so, const struct tgsi_token *tokens, - struct fd3_shader_key key); + struct ir3_shader_key key); #endif /* FD3_COMPILER_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_old.c index 66f724b35c0..1e1ca7ad813 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_old.c @@ -40,21 +40,21 @@ #include "tgsi/tgsi_scan.h" #include "freedreno_lowering.h" +#include "freedreno_util.h" -#include "fd3_compiler.h" -#include "fd3_program.h" -#include "fd3_util.h" +#include "ir3_compiler.h" +#include "ir3_shader.h" #include "instr-a3xx.h" #include "ir3.h" -struct fd3_compile_context { +struct ir3_compile_context { const struct tgsi_token *tokens; bool free_tokens; struct ir3 *ir; struct ir3_block *block; - struct fd3_shader_variant *so; + struct ir3_shader_variant *so; struct tgsi_parse_context parser; unsigned type; @@ -113,14 +113,14 @@ struct fd3_compile_context { }; -static void vectorize(struct fd3_compile_context *ctx, +static void vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr, struct tgsi_dst_register *dst, int nsrcs, ...); -static void create_mov(struct fd3_compile_context *ctx, +static void create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src); static unsigned -compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, +compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, const struct tgsi_token *tokens) { unsigned ret, base = 0; @@ -194,7 +194,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_variant *so, } static void -compile_error(struct fd3_compile_context *ctx, const char *format, ...) +compile_error(struct ir3_compile_context *ctx, const char *format, ...) { va_list ap; va_start(ap, format); @@ -209,7 +209,7 @@ compile_error(struct fd3_compile_context *ctx, const char *format, ...) } while (0) static void -compile_free(struct fd3_compile_context *ctx) +compile_free(struct ir3_compile_context *ctx) { if (ctx->free_tokens) free((void *)ctx->tokens); @@ -218,7 +218,7 @@ compile_free(struct fd3_compile_context *ctx) struct instr_translater { void (*fxn)(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst); unsigned tgsi_opc; opc_t opc; @@ -227,7 +227,7 @@ struct instr_translater { }; static void -handle_last_rel(struct fd3_compile_context *ctx) +handle_last_rel(struct ir3_compile_context *ctx) { if (ctx->last_rel) { ctx->last_rel->flags |= IR3_INSTR_UL; @@ -236,20 +236,20 @@ handle_last_rel(struct fd3_compile_context *ctx) } static struct ir3_instruction * -instr_create(struct fd3_compile_context *ctx, int category, opc_t opc) +instr_create(struct ir3_compile_context *ctx, int category, opc_t opc) { return ir3_instr_create(ctx->block, category, opc); } static void -add_nop(struct fd3_compile_context *ctx, unsigned count) +add_nop(struct ir3_compile_context *ctx, unsigned count) { while (count-- > 0) instr_create(ctx, 0, OPC_NOP); } static unsigned -src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg) +src_flags(struct ir3_compile_context *ctx, struct ir3_register *reg) { unsigned flags = 0; @@ -270,7 +270,7 @@ src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg) } static struct ir3_register * -add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, +add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_dst_register *dst, unsigned chan) { unsigned flags = 0, num = 0; @@ -304,7 +304,7 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, } static struct ir3_register * -add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, +add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, const struct tgsi_src_register *src, unsigned chan) { unsigned flags = 0, num = 0; @@ -377,7 +377,7 @@ src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) * generated by a single TGSI op. */ static struct tgsi_src_register * -get_internal_temp(struct fd3_compile_context *ctx, +get_internal_temp(struct ir3_compile_context *ctx, struct tgsi_dst_register *tmp_dst) { struct tgsi_src_register *tmp_src; @@ -404,7 +404,7 @@ get_internal_temp(struct fd3_compile_context *ctx, * instructions generated by a single TGSI op. */ static struct tgsi_src_register * -get_internal_temp_hr(struct fd3_compile_context *ctx, +get_internal_temp_hr(struct ir3_compile_context *ctx, struct tgsi_dst_register *tmp_dst) { struct tgsi_src_register *tmp_src; @@ -453,13 +453,13 @@ is_rel_or_const(struct tgsi_src_register *src) } static type_t -get_ftype(struct fd3_compile_context *ctx) +get_ftype(struct ir3_compile_context *ctx) { return ctx->so->key.half_precision ? TYPE_F16 : TYPE_F32; } static type_t -get_utype(struct fd3_compile_context *ctx) +get_utype(struct ir3_compile_context *ctx) { return ctx->so->key.half_precision ? TYPE_U16 : TYPE_U32; } @@ -481,7 +481,7 @@ src_swiz(struct tgsi_src_register *src, int chan) * generate a move to temporary gpr: */ static struct tgsi_src_register * -get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) +get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src) { struct tgsi_dst_register tmp_dst; struct tgsi_src_register *tmp_src; @@ -496,7 +496,7 @@ get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src) } static void -get_immediate(struct fd3_compile_context *ctx, +get_immediate(struct ir3_compile_context *ctx, struct tgsi_src_register *reg, uint32_t val) { unsigned neg, swiz, idx, i; @@ -543,7 +543,7 @@ get_immediate(struct fd3_compile_context *ctx, } static void -create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, +create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *src) { type_t type_mov = get_ftype(ctx); @@ -574,7 +574,7 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, } static void -create_clamp(struct fd3_compile_context *ctx, +create_clamp(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, struct tgsi_src_register *val, struct tgsi_src_register *minval, struct tgsi_src_register *maxval) { @@ -588,7 +588,7 @@ create_clamp(struct fd3_compile_context *ctx, } static void -create_clamp_imm(struct fd3_compile_context *ctx, +create_clamp_imm(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, uint32_t minval, uint32_t maxval) { @@ -604,7 +604,7 @@ create_clamp_imm(struct fd3_compile_context *ctx, } static struct tgsi_dst_register * -get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) +get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = &inst->Dst[0].Register; unsigned i; @@ -627,7 +627,7 @@ get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) } static void -put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, +put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst, struct tgsi_dst_register *dst) { /* if necessary, add mov back into original dst: */ @@ -640,7 +640,7 @@ put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, * to turn a scalar instruction into a vector operation: */ static void -vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, +vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr, struct tgsi_dst_register *dst, int nsrcs, ...) { va_list ap; @@ -716,7 +716,7 @@ vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, static void trans_clamp(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -732,7 +732,7 @@ trans_clamp(const struct instr_translater *t, /* ARL(x) = x, but mova from hrN.x to a0.. */ static void trans_arl(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -778,7 +778,7 @@ trans_arl(const struct instr_translater *t, /* texture fetch/sample instructions: */ static void trans_samp(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_register *r; @@ -908,7 +908,7 @@ trans_samp(const struct instr_translater *t, */ static void trans_cmp(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1007,7 +1007,7 @@ trans_cmp(const struct instr_translater *t, */ static unsigned -find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr) +find_instruction(struct ir3_compile_context *ctx, struct ir3_instruction *instr) { unsigned i; for (i = 0; i < ctx->ir->instrs_count; i++) @@ -1017,13 +1017,13 @@ find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr) } static void -push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr) +push_branch(struct ir3_compile_context *ctx, struct ir3_instruction *instr) { ctx->branch[ctx->branch_count++] = instr; } static void -pop_branch(struct fd3_compile_context *ctx) +pop_branch(struct ir3_compile_context *ctx) { struct ir3_instruction *instr; @@ -1047,7 +1047,7 @@ pop_branch(struct fd3_compile_context *ctx) */ static void trans_if(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1071,7 +1071,7 @@ trans_if(const struct instr_translater *t, static void trans_else(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct ir3_instruction *instr; @@ -1085,7 +1085,7 @@ trans_else(const struct instr_translater *t, static void trans_endif(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { pop_branch(ctx); @@ -1098,7 +1098,7 @@ trans_endif(const struct instr_translater *t, static void instr_cat0(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { instr_create(ctx, 0, t->opc); @@ -1106,7 +1106,7 @@ instr_cat0(const struct instr_translater *t, static void instr_cat1(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -1136,7 +1136,7 @@ instr_cat1(const struct instr_translater *t, static void instr_cat2(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -1188,7 +1188,7 @@ instr_cat2(const struct instr_translater *t, static void instr_cat3(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -1219,7 +1219,7 @@ instr_cat3(const struct instr_translater *t, static void instr_cat4(const struct instr_translater *t, - struct fd3_compile_context *ctx, + struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) { struct tgsi_dst_register *dst = get_dst(ctx, inst); @@ -1290,16 +1290,16 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { INSTR(KILL, instr_cat0, .opc = OPC_KILL), }; -static fd3_semantic +static ir3_semantic decl_semantic(const struct tgsi_declaration_semantic *sem) { - return fd3_semantic_name(sem->Name, sem->Index); + return ir3_semantic_name(sem->Name, sem->Index); } static int -decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) +decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_variant *so = ctx->so; + struct ir3_shader_variant *so = ctx->so; unsigned base = ctx->base_reg[TGSI_FILE_INPUT]; unsigned i, flags = 0; int nop = 0; @@ -1366,9 +1366,9 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) } static void -decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) +decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) { - struct fd3_shader_variant *so = ctx->so; + struct ir3_shader_variant *so = ctx->so; unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT]; unsigned comp = 0; unsigned name = decl->Semantic.Name; @@ -1419,13 +1419,13 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) } static void -decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) +decl_samp(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) { ctx->so->has_samp = true; } static void -compile_instructions(struct fd3_compile_context *ctx) +compile_instructions(struct ir3_compile_context *ctx) { struct ir3 *ir = ctx->ir; int nop = 0; @@ -1502,10 +1502,10 @@ compile_instructions(struct fd3_compile_context *ctx) } int -fd3_compile_shader_old(struct fd3_shader_variant *so, - const struct tgsi_token *tokens, struct fd3_shader_key key) +ir3_compile_shader_old(struct ir3_shader_variant *so, + const struct tgsi_token *tokens, struct ir3_shader_key key) { - struct fd3_compile_context ctx; + struct ir3_compile_context ctx; assert(!so->ir); diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 73c2a27c6eb..73c2a27c6eb 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index dcc0362f0c8..dcc0362f0c8 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c b/src/gallium/drivers/freedreno/ir3/ir3_dump.c index 1a6f49d51cd..1a6f49d51cd 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_dump.c diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_flatten.c b/src/gallium/drivers/freedreno/ir3/ir3_flatten.c index 9389227034c..9389227034c 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_flatten.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_flatten.c diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index b916dd51393..b916dd51393 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 3ef67731926..3ef67731926 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c new file mode 100644 index 00000000000..ddf99dbc46e --- /dev/null +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -0,0 +1,211 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "freedreno_context.h" +#include "freedreno_lowering.h" +#include "freedreno_util.h" + +#include "ir3_shader.h" +#include "ir3_compiler.h" + + +static void +delete_variant(struct ir3_shader_variant *v) +{ + ir3_destroy(v->ir); + fd_bo_del(v->bo); + free(v); +} + +static void +assemble_variant(struct ir3_shader_variant *v) +{ + struct fd_context *ctx = fd_context(v->shader->pctx); + uint32_t sz, *bin; + + bin = ir3_assemble(v->ir, &v->info); + sz = v->info.sizedwords * 4; + + v->bo = fd_bo_new(ctx->dev, sz, + DRM_FREEDRENO_GEM_CACHE_WCOMBINE | + DRM_FREEDRENO_GEM_TYPE_KMEM); + + memcpy(fd_bo_map(v->bo), bin, sz); + + free(bin); + + v->instrlen = v->info.sizedwords / 8; + v->constlen = v->info.max_const + 1; +} + +/* for vertex shader, the inputs are loaded into registers before the shader + * is executed, so max_regs from the shader instructions might not properly + * reflect the # of registers actually used: + */ +static void +fixup_vp_regfootprint(struct ir3_shader_variant *v) +{ + unsigned i; + for (i = 0; i < v->inputs_count; i++) { + if (v->inputs[i].compmask) { + uint32_t regid = (v->inputs[i].regid + 3) >> 2; + v->info.max_reg = MAX2(v->info.max_reg, regid); + } + } + for (i = 0; i < v->outputs_count; i++) { + uint32_t regid = (v->outputs[i].regid + 3) >> 2; + v->info.max_reg = MAX2(v->info.max_reg, regid); + } +} + +static struct ir3_shader_variant * +create_variant(struct ir3_shader *shader, struct ir3_shader_key key) +{ + struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant); + const struct tgsi_token *tokens = shader->tokens; + int ret; + + if (!v) + return NULL; + + v->shader = shader; + v->key = key; + v->type = shader->type; + + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type, + key.binning_pass, key.color_two_side, key.half_precision); + tgsi_dump(tokens, 0); + } + + if (!(fd_mesa_debug & FD_DBG_NOOPT)) { + ret = ir3_compile_shader(v, tokens, key); + if (ret) { + debug_error("new compiler failed, trying fallback!"); + + v->inputs_count = 0; + v->outputs_count = 0; + v->total_in = 0; + v->has_samp = false; + v->immediates_count = 0; + } + } else { + ret = -1; /* force fallback to old compiler */ + } + + if (ret) + ret = ir3_compile_shader_old(v, tokens, key); + + if (ret) { + debug_error("compile failed!"); + goto fail; + } + + assemble_variant(v); + if (!v->bo) { + debug_error("assemble failed!"); + goto fail; + } + + if (shader->type == SHADER_VERTEX) + fixup_vp_regfootprint(v); + + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type, + key.binning_pass, key.color_two_side, key.half_precision); + disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type); + } + + return v; + +fail: + delete_variant(v); + return NULL; +} + +struct ir3_shader_variant * +ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key) +{ + struct ir3_shader_variant *v; + + /* some shader key values only apply to vertex or frag shader, + * so normalize the key to avoid constructing multiple identical + * variants: + */ + if (shader->type == SHADER_FRAGMENT) { + key.binning_pass = false; + } + if (shader->type == SHADER_VERTEX) { + key.color_two_side = false; + key.half_precision = false; + } + + for (v = shader->variants; v; v = v->next) + if (!memcmp(&key, &v->key, sizeof(key))) + return v; + + /* compile new variant if it doesn't exist already: */ + v = create_variant(shader, key); + v->next = shader->variants; + shader->variants = v; + + return v; +} + + +void +ir3_shader_destroy(struct ir3_shader *shader) +{ + struct ir3_shader_variant *v, *t; + for (v = shader->variants; v; ) { + t = v; + v = v->next; + delete_variant(t); + } + free((void *)shader->tokens); + free(shader); +} + +struct ir3_shader * +ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens, + enum shader_t type) +{ + struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader); + shader->pctx = pctx; + shader->type = type; + shader->tokens = tgsi_dup_tokens(tokens); + return shader; +} diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h new file mode 100644 index 00000000000..1a91fcbcb13 --- /dev/null +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -0,0 +1,163 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef IR3_SHADER_H_ +#define IR3_SHADER_H_ + +#include "ir3.h" +#include "disasm.h" + +typedef uint16_t ir3_semantic; /* semantic name + index */ +static inline ir3_semantic +ir3_semantic_name(uint8_t name, uint16_t index) +{ + return (name << 8) | (index & 0xff); +} + +static inline uint8_t sem2name(ir3_semantic sem) +{ + return sem >> 8; +} + +static inline uint16_t sem2idx(ir3_semantic sem) +{ + return sem & 0xff; +} + +/* Configuration key used to identify a shader variant.. different + * shader variants can be used to implement features not supported + * in hw (two sided color), binning-pass vertex shader, etc. + */ +struct ir3_shader_key { + /* vertex shader variant parameters: */ + unsigned binning_pass : 1; + + /* fragment shader variant parameters: */ + unsigned color_two_side : 1; + unsigned half_precision : 1; +}; + +struct ir3_shader_variant { + struct fd_bo *bo; + + struct ir3_shader_key key; + + struct ir3_info info; + struct ir3 *ir; + + /* the instructions length is in units of instruction groups + * (4 instructions, 8 dwords): + */ + unsigned instrlen; + + /* the constants length is in units of vec4's, and is the sum of + * the uniforms and the built-in compiler constants + */ + unsigned constlen; + + /* About Linkage: + * + Let the frag shader determine the position/compmask for the + * varyings, since it is the place where we know if the varying + * is actually used, and if so, which components are used. So + * what the hw calls "outloc" is taken from the "inloc" of the + * frag shader. + * + From the vert shader, we only need the output regid + */ + + /* for frag shader, pos_regid holds the frag_pos, ie. what is passed + * to bary.f instructions + */ + uint8_t pos_regid; + bool frag_coord, frag_face; + + /* varyings/outputs: */ + unsigned outputs_count; + struct { + ir3_semantic semantic; + uint8_t regid; + } outputs[16 + 2]; /* +POSITION +PSIZE */ + bool writes_pos, writes_psize; + + /* vertices/inputs: */ + unsigned inputs_count; + struct { + ir3_semantic semantic; + uint8_t regid; + uint8_t compmask; + uint8_t ncomp; + /* in theory inloc of fs should match outloc of vs: */ + uint8_t inloc; + uint8_t bary; + } inputs[16 + 2]; /* +POSITION +FACE */ + + unsigned total_in; /* sum of inputs (scalar) */ + + /* do we have one or more texture sample instructions: */ + bool has_samp; + + /* const reg # of first immediate, ie. 1 == c1 + * (not regid, because TGSI thinks in terms of vec4 registers, + * not scalar registers) + */ + unsigned first_immediate; + unsigned immediates_count; + struct { + uint32_t val[4]; + } immediates[64]; + + /* shader variants form a linked list: */ + struct ir3_shader_variant *next; + + /* replicated here to avoid passing extra ptrs everywhere: */ + enum shader_t type; + struct ir3_shader *shader; +}; + +struct ir3_shader { + enum shader_t type; + + struct pipe_context *pctx; + const struct tgsi_token *tokens; + + struct ir3_shader_variant *variants; + + /* so far, only used for blit_prog shader.. values for + * VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE + */ + uint32_t vinterp[4], vpsrepl[4]; +}; + + +struct ir3_shader * ir3_shader_create(struct pipe_context *pctx, + const struct tgsi_token *tokens, enum shader_t type); +void ir3_shader_destroy(struct ir3_shader *shader); + +struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader, + struct ir3_shader_key key); + +#endif /* IR3_SHADER_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_visitor.h b/src/gallium/drivers/freedreno/ir3/ir3_visitor.h index 1c60d1620ca..1c60d1620ca 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_visitor.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_visitor.h |