diff options
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 138 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.h | 39 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 484 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_context.c | 182 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_context.h | 79 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_draw.c | 105 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_query.c | 200 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_resource.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_screen.c | 25 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_screen.h | 18 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 1602 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_sq.h | 27 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 476 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_inlines.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_texture.c | 461 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600d.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/r600/radeon.h | 198 |
18 files changed, 3248 insertions, 814 deletions
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 9ea9d4354d6..6483dac7039 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -76,6 +76,27 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) { LIST_INITHEAD(&bc->cf); bc->family = family; + switch (bc->family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + bc->chiprev = 0; + break; + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + bc->chiprev = 1; + break; + default: + R600_ERR("unknown family %d\n", bc->family); + return -EINVAL; + } return 0; } @@ -107,7 +128,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) return 0; } -int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) { struct r600_bc_alu *nalu = r600_bc_alu(); struct r600_bc_alu *lalu; @@ -119,7 +140,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) nalu->nliteral = 0; /* cf can contains only alu or only vtx or only tex */ - if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) || bc->force_add_cf) { /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */ r = r600_bc_add_cf(bc); @@ -127,7 +148,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) free(nalu); return r; } - bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3; + bc->cf_last->inst = (type << 3); } if (alu->last && (bc->cf_last->ndw >> 1) >= 124) { bc->force_add_cf = 1; @@ -162,6 +183,11 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) return 0; } +int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +{ + return r600_bc_add_alu_type(bc, alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU); +} + int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) { struct r600_bc_alu *alu; @@ -172,7 +198,17 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { return 0; } - if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP || + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE || + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL || + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK || + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE || + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END || + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { + return 0; + } + if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) && + (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) || LIST_IS_EMPTY(&bc->cf_last->alu)) { R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); return -EINVAL; @@ -241,6 +277,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) return 0; } +int r600_bc_add_cfinst(struct r600_bc *bc, int inst) +{ + int r; + r = r600_bc_add_cf(bc); + if (r) + return r; + + bc->cf_last->cond = V_SQ_CF_COND_ACTIVE; + bc->cf_last->inst = inst; + return 0; +} + static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | @@ -292,38 +340,44 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign unsigned i; /* don't replace gpr by pv or ps for destination register */ + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + if (alu->is_op3) { - bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_LAST(alu->last); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | S_SQ_ALU_WORD1_BANK_SWIZZLE(0); } else { - bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | - S_SQ_ALU_WORD0_LAST(alu->last); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + S_SQ_ALU_WORD1_BANK_SWIZZLE(0) | + S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | + S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } if (alu->last) { + if (alu->nliteral && !alu->literal_added) { + R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n"); + } for (i = 0; i < alu->nliteral; i++) { bc->bytecode[id++] = alu->value[i]; } @@ -337,6 +391,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | S_SQ_CF_ALU_WORD1_BARRIER(1) | @@ -364,6 +419,20 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) | S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); break; + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); + bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COND(cf->cond) | + S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); + + break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; @@ -380,6 +449,8 @@ int r600_bc_build(struct r600_bc *bc) unsigned addr; int r; + if (bc->callstack[0].max > 0) + bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; /* first path compute addr of each CF block */ /* addr start after all the CF instructions */ @@ -387,6 +458,7 @@ int r600_bc_build(struct r600_bc *bc) LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -398,6 +470,14 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: break; + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; @@ -417,22 +497,13 @@ int r600_bc_build(struct r600_bc *bc) return r; switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { - switch (bc->family) { - case CHIP_R600: - case CHIP_RV610: - case CHIP_RV630: - case CHIP_RV670: - case CHIP_RV620: - case CHIP_RV635: - case CHIP_RS780: - case CHIP_RS880: + switch(bc->chiprev) { + case 0: r = r600_bc_alu_build(bc, alu, addr); break; - case CHIP_RV770: - case CHIP_RV730: - case CHIP_RV710: - case CHIP_RV740: + case 1: r = r700_bc_alu_build(bc, alu, addr); break; default: @@ -466,6 +537,13 @@ int r600_bc_build(struct r600_bc *bc) break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 10d98afaf00..9e65fcdd4fa 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -31,6 +31,7 @@ struct r600_bc_alu_src { unsigned chan; unsigned neg; unsigned abs; + unsigned rel; }; struct r600_bc_alu_dst { @@ -38,6 +39,7 @@ struct r600_bc_alu_dst { unsigned chan; unsigned clamp; unsigned write; + unsigned rel; }; struct r600_bc_alu { @@ -47,6 +49,7 @@ struct r600_bc_alu { unsigned inst; unsigned last; unsigned is_op3; + unsigned predicate; unsigned nliteral; unsigned literal_added; u32 value[4]; @@ -114,22 +117,55 @@ struct r600_bc_cf { unsigned addr; unsigned ndw; unsigned id; + unsigned cond; + unsigned pop_count; + unsigned cf_addr; /* control flow addr */ struct list_head alu; struct list_head tex; struct list_head vtx; struct r600_bc_output output; }; +#define FC_NONE 0 +#define FC_IF 1 +#define FC_LOOP 2 +#define FC_REP 3 +#define FC_PUSH_VPM 4 +#define FC_PUSH_WQM 5 + +struct r600_cf_stack_entry { + int type; + struct r600_bc_cf *start; + struct r600_bc_cf **mid; /* used to store the else point */ + int num_mid; +}; + +#define SQ_MAX_CALL_DEPTH 0x00000020 +struct r600_cf_callstack { + unsigned fc_sp_before_entry; + int sub_desc_index; + int current; + int max; +}; + struct r600_bc { enum radeon_family family; + int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ struct list_head cf; struct r600_bc_cf *cf_last; unsigned ndw; unsigned ncf; unsigned ngpr; + unsigned nstack; unsigned nresource; unsigned force_add_cf; u32 *bytecode; + + u32 fc_sp; + struct r600_cf_stack_entry fc_stack[32]; + + unsigned call_sp; + struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH]; }; int r600_bc_init(struct r600_bc *bc, enum radeon_family family); @@ -139,5 +175,6 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); - +int r600_bc_add_cfinst(struct r600_bc *bc, int inst); +int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); #endif diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index f4eedfe4cb1..e6ded342e59 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -24,6 +24,7 @@ * Jerome Glisse * Marek Olšák */ +#include <errno.h> #include <pipe/p_screen.h> #include <util/u_blitter.h> #include <util/u_inlines.h> @@ -31,9 +32,12 @@ #include "util/u_surface.h" #include "r600_screen.h" #include "r600_context.h" +#include "r600d.h" -static void r600_blitter_save_states(struct r600_context *rctx) +static void r600_blitter_save_states(struct pipe_context *ctx) { + struct r600_context *rctx = r600_context(ctx); + util_blitter_save_blend(rctx->blitter, rctx->blend); util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->dsa); if (rctx->stencil_ref) { @@ -47,48 +51,58 @@ static void r600_blitter_save_states(struct r600_context *rctx) if (rctx->viewport) { util_blitter_save_viewport(rctx->blitter, &rctx->viewport->state.viewport); } - /* XXX util_blitter_save_clip(rctx->blitter, &rctx->clip); */ + if (rctx->clip) { + util_blitter_save_clip(rctx->blitter, &rctx->clip->state.clip); + } util_blitter_save_vertex_buffers(rctx->blitter, rctx->nvertex_buffer, rctx->vertex_buffer); /* remove ptr so they don't get deleted */ rctx->blend = NULL; + rctx->clip = NULL; rctx->vs_shader = NULL; rctx->ps_shader = NULL; rctx->rasterizer = NULL; rctx->dsa = NULL; rctx->vertex_elements = NULL; + + /* suspend queries */ + r600_queries_suspend(ctx); } static void r600_clear(struct pipe_context *ctx, unsigned buffers, - const float *rgba, double depth, unsigned stencil) + const float *rgba, double depth, unsigned stencil) { struct r600_context *rctx = r600_context(ctx); struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - r600_blitter_save_states(rctx); + r600_blitter_save_states(ctx); util_blitter_clear(rctx->blitter, fb->width, fb->height, fb->nr_cbufs, buffers, rgba, depth, stencil); + /* resume queries */ + r600_queries_resume(ctx); } -static void r600_clear_render_target(struct pipe_context *pipe, +static void r600_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dst, const float *rgba, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct r600_context *rctx = r600_context(pipe); + struct r600_context *rctx = r600_context(ctx); struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - r600_blitter_save_states(rctx); + r600_blitter_save_states(ctx); util_blitter_save_framebuffer(rctx->blitter, fb); util_blitter_clear_render_target(rctx->blitter, dst, rgba, dstx, dsty, width, height); + /* resume queries */ + r600_queries_resume(ctx); } -static void r600_clear_depth_stencil(struct pipe_context *pipe, +static void r600_clear_depth_stencil(struct pipe_context *ctx, struct pipe_surface *dst, unsigned clear_flags, double depth, @@ -96,17 +110,20 @@ static void r600_clear_depth_stencil(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height) { - struct r600_context *rctx = r600_context(pipe); + struct r600_context *rctx = r600_context(ctx); struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; - r600_blitter_save_states(rctx); + r600_blitter_save_states(ctx); util_blitter_save_framebuffer(rctx->blitter, fb); util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); + /* resume queries */ + r600_queries_resume(ctx); } -static void r600_resource_copy_region(struct pipe_context *pipe, + +static void r600_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_subresource subdst, unsigned dstx, unsigned dsty, unsigned dstz, @@ -115,7 +132,7 @@ static void r600_resource_copy_region(struct pipe_context *pipe, unsigned srcx, unsigned srcy, unsigned srcz, unsigned width, unsigned height) { - util_resource_copy_region(pipe, dst, subdst, dstx, dsty, dstz, + util_resource_copy_region(ctx, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height); } @@ -126,3 +143,446 @@ void r600_init_blit_functions(struct r600_context *rctx) rctx->context.clear_depth_stencil = r600_clear_depth_stencil; rctx->context.resource_copy_region = r600_resource_copy_region; } + + +struct r600_blit_states { + struct radeon_state rasterizer; + struct radeon_state dsa; + struct radeon_state blend; + struct radeon_state cb_cntl; + struct radeon_state vgt; + struct radeon_state draw; + struct radeon_state vs_constant0; + struct radeon_state vs_constant1; + struct radeon_state vs_constant2; + struct radeon_state vs_constant3; + struct radeon_state ps_shader; + struct radeon_state vs_shader; + struct radeon_state vs_resource0; + struct radeon_state vs_resource1; +}; + +static int r600_blit_state_vs_resources(struct r600_screen *rscreen, struct r600_blit_states *bstates) +{ + struct radeon_state *rstate; + struct radeon_bo *bo; + u32 vbo[] = { + 0xBF800000, 0xBF800000, 0x3F800000, 0x3F800000, + 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, + 0x3F800000, 0xBF800000, 0x3F800000, 0x3F800000, + 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, + 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, + 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000, + 0xBF800000, 0x3F800000, 0x3F800000, 0x3F800000, + 0x3F000000, 0x3F000000, 0x3F000000, 0x00000000 + }; + + /* simple shader */ + bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL); + if (bo == NULL) { + return -ENOMEM; + } + if (radeon_bo_map(rscreen->rw, bo)) { + radeon_bo_decref(rscreen->rw, bo); + return -ENOMEM; + } + memcpy(bo->data, vbo, 128); + radeon_bo_unmap(rscreen->rw, bo); + + rstate = &bstates->vs_resource0; + radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 0, R600_SHADER_VS); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000080; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000; + rstate->bo[0] = bo; + rstate->nbo = 1; + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + if (radeon_state_pm4(rstate)) { + radeon_state_fini(rstate); + return -ENOMEM; + } + + rstate = &bstates->vs_resource1; + radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, 1, R600_SHADER_VS); + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD0] = 0x00000010; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD1] = 0x00000070; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD2] = 0x02302000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD3] = 0x00000000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD4] = 0x00000000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD5] = 0x00000000; + rstate->states[R600_VS_RESOURCE__RESOURCE160_WORD6] = 0xC0000000; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, bo); + rstate->nbo = 1; + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + if (radeon_state_pm4(rstate)) { + radeon_state_fini(rstate); + return -ENOMEM; + } + + return 0; +} + +static void r600_blit_state_vs_shader(struct r600_screen *rscreen, struct radeon_state *rstate) +{ + struct radeon_bo *bo; + u32 shader_bc_r600[] = { + 0x00000004, 0x81000400, + 0x00000008, 0xA01C0000, + 0xC001A03C, 0x94000688, + 0xC0024000, 0x94200688, + 0x7C000000, 0x002D1001, + 0x00080000, 0x00000000, + 0x7C000100, 0x002D1002, + 0x00080000, 0x00000000, + 0x00000001, 0x00601910, + 0x00000401, 0x20601910, + 0x00000801, 0x40601910, + 0x80000C01, 0x60601910, + 0x00000002, 0x00801910, + 0x00000402, 0x20801910, + 0x00000802, 0x40801910, + 0x80000C02, 0x60801910 + }; + u32 shader_bc_r700[] = { + 0x00000004, 0x81000400, + 0x00000008, 0xA01C0000, + 0xC001A03C, 0x94000688, + 0xC0024000, 0x94200688, + 0x7C000000, 0x002D1001, + 0x00080000, 0x00000000, + 0x7C000100, 0x002D1002, + 0x00080000, 0x00000000, + 0x00000001, 0x00600C90, + 0x00000401, 0x20600C90, + 0x00000801, 0x40600C90, + 0x80000C01, 0x60600C90, + 0x00000002, 0x00800C90, + 0x00000402, 0x20800C90, + 0x00000802, 0x40800C90, + 0x80000C02, 0x60800C90 + }; + + /* simple shader */ + bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL); + if (bo == NULL) { + return; + } + if (radeon_bo_map(rscreen->rw, bo)) { + radeon_bo_decref(rscreen->rw, bo); + return; + } + switch (rscreen->chip_class) { + case R600: + memcpy(bo->data, shader_bc_r600, 128); + break; + case R700: + memcpy(bo->data, shader_bc_r700, 128); + break; + default: + R600_ERR("unsupported chip family\n"); + radeon_bo_unmap(rscreen->rw, bo); + radeon_bo_decref(rscreen->rw, bo); + return; + } + radeon_bo_unmap(rscreen->rw, bo); + + radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_0] = 0x03020100; + rstate->states[R600_VS_SHADER__SPI_VS_OUT_ID_1] = 0x07060504; + rstate->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = 0x00000005; + + rstate->bo[0] = bo; + rstate->bo[1] = radeon_bo_incref(rscreen->rw, bo); + rstate->nbo = 2; + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; + + radeon_state_pm4(rstate); +} + +static void r600_blit_state_ps_shader(struct r600_screen *rscreen, struct radeon_state *rstate) +{ + struct radeon_bo *bo; + u32 shader_bc_r600[] = { + 0x00000002, 0xA00C0000, + 0xC0008000, 0x94200688, + 0x00000000, 0x00201910, + 0x00000400, 0x20201910, + 0x00000800, 0x40201910, + 0x80000C00, 0x60201910 + }; + u32 shader_bc_r700[] = { + 0x00000002, 0xA00C0000, + 0xC0008000, 0x94200688, + 0x00000000, 0x00200C90, + 0x00000400, 0x20200C90, + 0x00000800, 0x40200C90, + 0x80000C00, 0x60200C90 + }; + + /* simple shader */ + bo = radeon_bo(rscreen->rw, 0, 128, 4096, NULL); + if (bo == NULL) { + radeon_bo_decref(rscreen->rw, bo); + return; + } + if (radeon_bo_map(rscreen->rw, bo)) { + return; + } + switch (rscreen->chip_class) { + case R600: + memcpy(bo->data, shader_bc_r600, 48); + break; + case R700: + memcpy(bo->data, shader_bc_r700, 48); + break; + default: + R600_ERR("unsupported chip family\n"); + radeon_bo_unmap(rscreen->rw, bo); + radeon_bo_decref(rscreen->rw, bo); + return; + } + radeon_bo_unmap(rscreen->rw, bo); + + radeon_state_init(rstate, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0] = 0x00000C00; + rstate->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = 0x10000001; + rstate->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002; + rstate->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = 0x00000002; + + rstate->bo[0] = bo; + rstate->nbo = 1; + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + + radeon_state_pm4(rstate); +} + +static void r600_blit_state_vgt(struct r600_screen *rscreen, struct radeon_state *rstate) +{ + radeon_state_init(rstate, rscreen->rw, R600_STATE_VGT, 0, 0); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; + rstate->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; + rstate->states[R600_VGT__VGT_PRIMITIVE_TYPE] = 0x00000005; + + radeon_state_pm4(rstate); +} + +static void r600_blit_state_draw(struct r600_screen *rscreen, struct radeon_state *rstate) +{ + radeon_state_init(rstate, rscreen->rw, R600_STATE_DRAW, 0, 0); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_DRAW__VGT_DRAW_INITIATOR] = 0x00000002; + rstate->states[R600_DRAW__VGT_NUM_INDICES] = 0x00000004; + + radeon_state_pm4(rstate); +} + +static void r600_blit_state_vs_constant(struct r600_screen *rscreen, struct radeon_state *rstate, + unsigned id, float c0, float c1, float c2, float c3) +{ + radeon_state_init(rstate, rscreen->rw, R600_STATE_CONSTANT, id, R600_SHADER_VS); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT0_256] = fui(c0); + rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT1_256] = fui(c1); + rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT2_256] = fui(c2); + rstate->states[R600_VS_CONSTANT__SQ_ALU_CONSTANT3_256] = fui(c3); + + radeon_state_pm4(rstate); +} + +static void r600_blit_state_rasterizer(struct r600_screen *rscreen, struct radeon_state *rstate) +{ + radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_CLIP_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_CL_GB_HORZ_DISC_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_CLIP_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_CL_GB_VERT_DISC_ADJ] = 0x3F800000; + rstate->states[R600_RASTERIZER__PA_SC_LINE_CNTL] = 0x00000400; + rstate->states[R600_RASTERIZER__PA_SC_LINE_STIPPLE] = 0x00000005; + rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; + rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; + rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = 0x00080004; + rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; + + radeon_state_pm4(rstate); +} + +static void r600_blit_state_dsa(struct r600_screen *rscreen, struct radeon_state *rstate) +{ + radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; + rstate->states[R600_DSA__DB_DEPTH_CLEAR] = 0x3F800000; + rstate->states[R600_DSA__DB_RENDER_CONTROL] = 0x00000060; + rstate->states[R600_DSA__DB_RENDER_OVERRIDE] = 0x0000002A; + rstate->states[R600_DSA__DB_SHADER_CONTROL] = 0x00000210; + + radeon_state_pm4(rstate); +} + +static void r600_blit_state_blend(struct r600_screen *rscreen, struct radeon_state *rstate) +{ + radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); + radeon_state_pm4(rstate); +} + +static void r600_blit_state_cb_cntl(struct r600_screen *rscreen, struct radeon_state *rstate) +{ + radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); + rstate->states[R600_CB_CNTL__CB_CLRCMP_CONTROL] = 0x01000000; + rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; + rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; + rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = 0x00CC0080; + rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = 0x0000000F; + rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = 0x0000000F; + rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; + radeon_state_pm4(rstate); +} + +static int r600_blit_states_init(struct pipe_context *ctx, struct r600_blit_states *bstates) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + + r600_blit_state_ps_shader(rscreen, &bstates->ps_shader); + r600_blit_state_vs_shader(rscreen, &bstates->vs_shader); + r600_blit_state_vgt(rscreen, &bstates->vgt); + r600_blit_state_draw(rscreen, &bstates->draw); + r600_blit_state_vs_constant(rscreen, &bstates->vs_constant0, 0, 1.0, 0.0, 0.0, 0.0); + r600_blit_state_vs_constant(rscreen, &bstates->vs_constant1, 1, 0.0, 1.0, 0.0, 0.0); + r600_blit_state_vs_constant(rscreen, &bstates->vs_constant2, 2, 0.0, 0.0, -0.00199900055, 0.0); + r600_blit_state_vs_constant(rscreen, &bstates->vs_constant3, 3, 0.0, 0.0, -0.99900049, 1.0); + r600_blit_state_rasterizer(rscreen, &bstates->rasterizer); + r600_blit_state_dsa(rscreen, &bstates->dsa); + r600_blit_state_blend(rscreen, &bstates->blend); + r600_blit_state_cb_cntl(rscreen, &bstates->cb_cntl); + r600_blit_state_vs_resources(rscreen, bstates); + return 0; +} + +static void r600_blit_states_destroy(struct pipe_context *ctx, struct r600_blit_states *bstates) +{ + radeon_state_fini(&bstates->ps_shader); + radeon_state_fini(&bstates->vs_shader); + radeon_state_fini(&bstates->vs_resource0); + radeon_state_fini(&bstates->vs_resource1); +} + +int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_context *rctx = r600_context(ctx); + struct radeon_draw draw; + struct r600_blit_states bstates; + int r; + + r = r600_texture_scissor(ctx, rtexture, level); + if (r) { + return r; + } + r = r600_texture_cb(ctx, rtexture, 0, level); + if (r) { + return r; + } + r = r600_texture_db(ctx, rtexture, level); + if (r) { + return r; + } + r = r600_texture_viewport(ctx, rtexture, level); + if (r) { + return r; + } + + r = r600_blit_states_init(ctx, &bstates); + if (r) { + return r; + } + bstates.dsa.states[R600_DSA__DB_RENDER_CONTROL] = 0x0000008C; + bstates.cb_cntl.states[R600_CB_CNTL__CB_TARGET_MASK] = 0x00000001; + /* force rebuild */ + bstates.dsa.cpm4 = bstates.cb_cntl.cpm4 = 0; + if (radeon_state_pm4(&bstates.dsa)) { + goto out; + } + if (radeon_state_pm4(&bstates.cb_cntl)) { + goto out; + } + + r = radeon_draw_init(&draw, rscreen->rw); + if (r) { + R600_ERR("failed creating draw for uncompressing textures\n"); + goto out; + } + + radeon_draw_bind(&draw, &bstates.vs_shader); + radeon_draw_bind(&draw, &bstates.ps_shader); + radeon_draw_bind(&draw, &bstates.rasterizer); + radeon_draw_bind(&draw, &bstates.dsa); + radeon_draw_bind(&draw, &bstates.blend); + radeon_draw_bind(&draw, &bstates.cb_cntl); + radeon_draw_bind(&draw, &rctx->config); + radeon_draw_bind(&draw, &bstates.vgt); + radeon_draw_bind(&draw, &bstates.draw); + radeon_draw_bind(&draw, &bstates.vs_resource0); + radeon_draw_bind(&draw, &bstates.vs_resource1); + radeon_draw_bind(&draw, &bstates.vs_constant0); + radeon_draw_bind(&draw, &bstates.vs_constant1); + radeon_draw_bind(&draw, &bstates.vs_constant2); + radeon_draw_bind(&draw, &bstates.vs_constant3); + radeon_draw_bind(&draw, &rtexture->viewport[level]); + radeon_draw_bind(&draw, &rtexture->scissor[level]); + radeon_draw_bind(&draw, &rtexture->cb[0][level]); + radeon_draw_bind(&draw, &rtexture->db[level]); + + /* suspend queries */ + r600_queries_suspend(ctx); + + /* schedule draw*/ + r = radeon_ctx_set_draw(&rctx->ctx, &draw); + if (r == -EBUSY) { + r600_flush(ctx, 0, NULL); + r = radeon_ctx_set_draw(&rctx->ctx, &draw); + } + if (r) { + goto out; + } + + /* resume queries */ + r600_queries_resume(ctx); + +out: + r600_blit_states_destroy(ctx, &bstates); + return r; +} diff --git a/src/gallium/drivers/r600/r600_context.c b/src/gallium/drivers/r600/r600_context.c index edde80c660a..7a0e5b4049f 100644 --- a/src/gallium/drivers/r600/r600_context.c +++ b/src/gallium/drivers/r600/r600_context.c @@ -34,10 +34,26 @@ #include "r600_resource.h" #include "r600d.h" + static void r600_destroy_context(struct pipe_context *context) { struct r600_context *rctx = r600_context(context); + rctx->rasterizer = r600_context_state_decref(rctx->rasterizer); + rctx->poly_stipple = r600_context_state_decref(rctx->poly_stipple); + rctx->scissor = r600_context_state_decref(rctx->scissor); + rctx->clip = r600_context_state_decref(rctx->clip); + rctx->ps_shader = r600_context_state_decref(rctx->ps_shader); + rctx->vs_shader = r600_context_state_decref(rctx->vs_shader); + rctx->depth = r600_context_state_decref(rctx->depth); + rctx->stencil = r600_context_state_decref(rctx->stencil); + rctx->alpha = r600_context_state_decref(rctx->alpha); + rctx->dsa = r600_context_state_decref(rctx->dsa); + rctx->blend = r600_context_state_decref(rctx->blend); + rctx->stencil_ref = r600_context_state_decref(rctx->stencil_ref); + rctx->viewport = r600_context_state_decref(rctx->viewport); + rctx->framebuffer = r600_context_state_decref(rctx->framebuffer); + radeon_ctx_fini(&rctx->ctx); FREE(rctx); } @@ -45,27 +61,35 @@ void r600_flush(struct pipe_context *ctx, unsigned flags, struct pipe_fence_handle **fence) { struct r600_context *rctx = r600_context(ctx); - struct r600_screen *rscreen = rctx->screen; + struct r600_query *rquery; static int dc = 0; char dname[256]; - if (radeon_ctx_pm4(rctx->ctx)) - return; + /* suspend queries */ + r600_queries_suspend(ctx); /* FIXME dumping should be removed once shader support instructions * without throwing bad code */ - if (!rctx->ctx->cpm4) + if (!rctx->ctx.cdwords) goto out; +#if 0 sprintf(dname, "gallium-%08d.bof", dc); - if (dc < 1) - radeon_ctx_dump_bof(rctx->ctx, dname); + if (dc < 2) { + radeon_ctx_dump_bof(&rctx->ctx, dname); + R600_ERR("dumped %s\n", dname); + } +#endif #if 1 - radeon_ctx_submit(rctx->ctx); + radeon_ctx_submit(&rctx->ctx); #endif + LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { + rquery->flushed = true; + } dc++; out: - rctx->ctx = radeon_ctx_decref(rctx->ctx); - rctx->ctx = radeon_ctx(rscreen->rw); + radeon_ctx_clear(&rctx->ctx); + /* resume queries */ + r600_queries_resume(ctx); } static void r600_init_config(struct r600_context *rctx) @@ -207,9 +231,9 @@ static void r600_init_config(struct r600_context *rctx) num_es_stack_entries = 0; break; } - rctx->hw_states.config = radeon_state(rctx->rw, R600_CONFIG_TYPE, R600_CONFIG); + radeon_state_init(&rctx->config, rctx->rw, R600_STATE_CONFIG, 0, 0); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_CONFIG] = 0x00000000; switch (family) { case CHIP_RV610: case CHIP_RV620: @@ -218,75 +242,85 @@ static void r600_init_config(struct r600_context *rctx) case CHIP_RV710: break; default: - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VC_ENABLE(1); break; } - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); - rctx->hw_states.config->states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_DX9_CONSTS(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ALU_INST_PREFER_VECTOR(1); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_PS_PRIO(ps_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_VS_PRIO(vs_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_GS_PRIO(gs_prio); + rctx->config.states[R600_CONFIG__SQ_CONFIG] |= S_008C00_ES_PRIO(es_prio); + + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1] |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + rctx->config.states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - rctx->hw_states.config->states[R600_CONFIG__SQ_GPR_RESOURCE_MGMT_2] |= S_008C08_NUM_GS_GPRS(num_es_gprs); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); + rctx->config.states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_PS_THREADS(num_ps_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_VS_THREADS(num_vs_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_GS_THREADS(num_gs_threads); - rctx->hw_states.config->states[R600_CONFIG__SQ_THREAD_RESOURCE_MGMT] |= S_008C0C_NUM_ES_THREADS(num_es_threads); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_PS_STACK_ENTRIES(num_ps_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_1] |= S_008C10_NUM_VS_STACK_ENTRIES(num_vs_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); + rctx->config.states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] = 0; - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_GS_STACK_ENTRIES(num_gs_stack_entries); - rctx->hw_states.config->states[R600_CONFIG__SQ_STACK_RESOURCE_MGMT_2] |= S_008C14_NUM_ES_STACK_ENTRIES(num_es_stack_entries); + rctx->config.states[R600_CONFIG__VC_ENHANCE] = 0x00000000; + rctx->config.states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; - rctx->hw_states.config->states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; - rctx->hw_states.config->states[R600_CONFIG__VC_ENHANCE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__DB_DEBUG] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; - rctx->hw_states.config->states[R600_CONFIG__SX_MISC] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; - rctx->hw_states.config->states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; - rctx->hw_states.config->states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; - rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; - rctx->hw_states.config->states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; - rctx->hw_states.config->states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; - radeon_state_pm4(rctx->hw_states.config); + if (family >= CHIP_RV770) { + rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00004000; + rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000002; + rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x00000000; + rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x00420204; + rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000000; + rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00514000; + } else { + rctx->config.states[R600_CONFIG__SQ_DYN_GPR_CNTL_PS_FLUSH_REQ] = 0x00000000; + rctx->config.states[R600_CONFIG__TA_CNTL_AUX] = 0x07000003; + rctx->config.states[R600_CONFIG__DB_DEBUG] = 0x82000000; + rctx->config.states[R600_CONFIG__DB_WATERMARKS] = 0x01020204; + rctx->config.states[R600_CONFIG__SPI_THREAD_GROUPING] = 0x00000001; + rctx->config.states[R600_CONFIG__PA_SC_MODE_CNTL] = 0x00004010; + } + rctx->config.states[R600_CONFIG__CB_SHADER_CONTROL] = 0x00000003; + rctx->config.states[R600_CONFIG__SQ_ESGS_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GSVS_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_ESTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_VSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_PSTMP_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_FBUF_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_REDUC_RING_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__SQ_GS_VERT_ITEMSIZE] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_OUTPUT_PATH_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_MAX_TESS_LEVEL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_MIN_TESS_LEVEL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_HOS_REUSE_DEPTH] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_PRIM_TYPE] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_FIRST_DECR] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_DECR] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_0_FMT_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GROUP_VECT_1_FMT_CNTL] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_GS_MODE] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_STRMOUT_EN] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_REUSE_OFF] = 0x00000001; + rctx->config.states[R600_CONFIG__VGT_VTX_CNT_EN] = 0x00000000; + rctx->config.states[R600_CONFIG__VGT_STRMOUT_BUFFER_EN] = 0x00000000; + radeon_state_pm4(&rctx->config); } struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) @@ -320,7 +354,7 @@ struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv) r600_init_config(rctx); - rctx->ctx = radeon_ctx(rscreen->rw); - rctx->draw = radeon_draw(rscreen->rw); + radeon_ctx_init(&rctx->ctx, rscreen->rw); + radeon_draw_init(&rctx->draw, rscreen->rw); return &rctx->context; } diff --git a/src/gallium/drivers/r600/r600_context.h b/src/gallium/drivers/r600/r600_context.h index 76d5de86532..cea08130545 100644 --- a/src/gallium/drivers/r600/r600_context.h +++ b/src/gallium/drivers/r600/r600_context.h @@ -30,9 +30,32 @@ #include <tgsi/tgsi_parse.h> #include <tgsi/tgsi_util.h> #include <util/u_blitter.h> +#include <util/u_double_list.h> #include "radeon.h" #include "r600_shader.h" +#define R600_QUERY_STATE_STARTED (1 << 0) +#define R600_QUERY_STATE_ENDED (1 << 1) +#define R600_QUERY_STATE_SUSPENDED (1 << 2) + +struct r600_query { + u64 result; + /* The kind of query. Currently only OQ is supported. */ + unsigned type; + /* How many results have been written, in dwords. It's incremented + * after end_query and flush. */ + unsigned num_results; + /* if we've flushed the query */ + boolean flushed; + unsigned state; + /* The buffer where query results are stored. */ + struct radeon_bo *buffer; + unsigned buffer_size; + /* linked list of queries */ + struct list_head list; + struct radeon_state rstate; +}; + /* XXX move this to a more appropriate place */ union pipe_states { struct pipe_rasterizer_state rasterizer; @@ -72,13 +95,16 @@ enum pipe_state_type { pipe_type_count }; +#define R600_MAX_RSTATE 16 + struct r600_context_state { union pipe_states state; unsigned refcount; unsigned type; - struct radeon_state *rstate; + struct radeon_state rstate[R600_MAX_RSTATE]; struct r600_shader shader; struct radeon_bo *bo; + unsigned nrstate; }; struct r600_vertex_element @@ -89,28 +115,25 @@ struct r600_vertex_element }; struct r600_context_hw_states { - struct radeon_state *rasterizer; - struct radeon_state *scissor; - struct radeon_state *dsa; - struct radeon_state *blend; - struct radeon_state *viewport; - struct radeon_state *cb[8]; - struct radeon_state *config; - struct radeon_state *cb_cntl; - struct radeon_state *db; - unsigned ps_nresource; - unsigned ps_nsampler; - struct radeon_state *ps_resource[160]; - struct radeon_state *ps_sampler[16]; + struct radeon_state rasterizer; + struct radeon_state scissor; + struct radeon_state dsa; + struct radeon_state cb_cntl; }; struct r600_context { struct pipe_context context; struct r600_screen *screen; struct radeon *rw; - struct radeon_ctx *ctx; + struct radeon_ctx ctx; struct blitter_context *blitter; - struct radeon_draw *draw; + struct radeon_draw draw; + struct radeon_state config; + /* FIXME get rid of those vs_resource,vs/ps_constant */ + struct radeon_state vs_resource[160]; + unsigned vs_nresource; + struct radeon_state vs_constant[256]; + struct radeon_state ps_constant[256]; /* hw states */ struct r600_context_hw_states hw_states; /* pipe states */ @@ -134,14 +157,15 @@ struct r600_context { struct r600_context_state *stencil_ref; struct r600_context_state *viewport; struct r600_context_state *framebuffer; - struct r600_context_state *ps_sampler[PIPE_MAX_ATTRIBS]; - struct r600_context_state *vs_sampler[PIPE_MAX_ATTRIBS]; - struct r600_context_state *ps_sampler_view[PIPE_MAX_ATTRIBS]; - struct r600_context_state *vs_sampler_view[PIPE_MAX_ATTRIBS]; + struct radeon_state *ps_sampler[PIPE_MAX_ATTRIBS]; + struct radeon_state *vs_sampler[PIPE_MAX_ATTRIBS]; + struct radeon_state *ps_sampler_view[PIPE_MAX_ATTRIBS]; + struct radeon_state *vs_sampler_view[PIPE_MAX_ATTRIBS]; struct r600_vertex_element *vertex_elements; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_index_buffer index_buffer; - struct pipe_blend_color blend_color; + struct pipe_blend_color blend_color; + struct list_head query_list; }; /* Convenience cast wrapper. */ @@ -150,13 +174,18 @@ static INLINE struct r600_context *r600_context(struct pipe_context *pipe) return (struct r600_context*)pipe; } +static INLINE struct r600_query* r600_query(struct pipe_query* q) +{ + return (struct r600_query*)q; +} + struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigned type, const void *state); struct r600_context_state *r600_context_state_incref(struct r600_context_state *rstate); struct r600_context_state *r600_context_state_decref(struct r600_context_state *rstate); void r600_flush(struct pipe_context *ctx, unsigned flags, struct pipe_fence_handle **fence); -int r600_context_hw_states(struct r600_context *rctx); +int r600_context_hw_states(struct pipe_context *ctx); void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); @@ -178,4 +207,10 @@ extern int r600_pipe_shader_update(struct pipe_context *ctx, uint32_t r600_translate_texformat(enum pipe_format format, const unsigned char *swizzle_view, uint32_t *word4_p, uint32_t *yuv_format_p); + +/* query */ +extern void r600_queries_resume(struct pipe_context *ctx); +extern void r600_queries_suspend(struct pipe_context *ctx); + + #endif diff --git a/src/gallium/drivers/r600/r600_draw.c b/src/gallium/drivers/r600/r600_draw.c index f0584551620..fabd337d239 100644 --- a/src/gallium/drivers/r600/r600_draw.c +++ b/src/gallium/drivers/r600/r600_draw.c @@ -31,6 +31,7 @@ #include <util/u_math.h> #include <util/u_inlines.h> #include <util/u_memory.h> +#include "radeon.h" #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" @@ -38,8 +39,8 @@ struct r600_draw { struct pipe_context *ctx; - struct radeon_state *draw; - struct radeon_state *vgt; + struct radeon_state draw; + struct radeon_state vgt; unsigned mode; unsigned start; unsigned count; @@ -51,6 +52,7 @@ static int r600_draw_common(struct r600_draw *draw) { struct r600_context *rctx = r600_context(draw->ctx); struct r600_screen *rscreen = rctx->screen; + /* FIXME vs_resource */ struct radeon_state *vs_resource; struct r600_resource *rbuffer; unsigned i, j, offset, format, prim; @@ -58,7 +60,7 @@ static int r600_draw_common(struct r600_draw *draw) struct pipe_vertex_buffer *vertex_buffer; int r; - r = r600_context_hw_states(rctx); + r = r600_context_hw_states(draw->ctx); if (r) return r; switch (draw->index_size) { @@ -81,6 +83,7 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_conv_pipe_prim(draw->mode, &prim); if (r) return r; + /* rebuild vertex shader if input format changed */ r = r600_pipe_shader_update(draw->ctx, rctx->vs_shader); if (r) @@ -88,26 +91,24 @@ static int r600_draw_common(struct r600_draw *draw) r = r600_pipe_shader_update(draw->ctx, rctx->ps_shader); if (r) return r; - r = radeon_draw_set(rctx->draw, rctx->vs_shader->rstate); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->ps_shader->rstate); - if (r) - return r; + radeon_draw_bind(&rctx->draw, &rctx->vs_shader->rstate[0]); + radeon_draw_bind(&rctx->draw, &rctx->ps_shader->rstate[0]); + for (i = 0 ; i < rctx->vs_nresource; i++) { + radeon_state_fini(&rctx->vs_resource[i]); + } for (i = 0 ; i < rctx->vertex_elements->count; i++) { + vs_resource = &rctx->vs_resource[i]; j = rctx->vertex_elements->elements[i].vertex_buffer_index; vertex_buffer = &rctx->vertex_buffer[j]; rbuffer = (struct r600_resource*)vertex_buffer->buffer; offset = rctx->vertex_elements->elements[i].src_offset + vertex_buffer->buffer_offset; format = r600_translate_colorformat(rctx->vertex_elements->elements[i].src_format); - vs_resource = radeon_state(rscreen->rw, R600_VS_RESOURCE_TYPE, R600_VS_RESOURCE + i); - if (vs_resource == NULL) - return -ENOMEM; + radeon_state_init(vs_resource, rscreen->rw, R600_STATE_RESOURCE, i, R600_SHADER_VS); vs_resource->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); vs_resource->nbo = 1; vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = offset; - vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset; + vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = rbuffer->bo->size - offset - 1; vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = S_038008_STRIDE(vertex_buffer->stride) | S_038008_DATA_FORMAT(format); vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = 0x00000000; @@ -116,59 +117,61 @@ static int r600_draw_common(struct r600_draw *draw) vs_resource->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = 0xC0000000; vs_resource->placement[0] = RADEON_GEM_DOMAIN_GTT; vs_resource->placement[1] = RADEON_GEM_DOMAIN_GTT; - r = radeon_draw_set_new(rctx->draw, vs_resource); - if (r) + r = radeon_state_pm4(vs_resource); + if (r) { return r; + } + radeon_draw_bind(&rctx->draw, vs_resource); } + rctx->vs_nresource = rctx->vertex_elements->count; /* FIXME start need to change winsys */ - draw->draw = radeon_state(rscreen->rw, R600_DRAW_TYPE, R600_DRAW); - if (draw->draw == NULL) - return -ENOMEM; - draw->draw->states[R600_DRAW__VGT_NUM_INDICES] = draw->count; - draw->draw->states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; + radeon_state_init(&draw->draw, rscreen->rw, R600_STATE_DRAW, 0, 0); + draw->draw.states[R600_DRAW__VGT_NUM_INDICES] = draw->count; + draw->draw.states[R600_DRAW__VGT_DRAW_INITIATOR] = vgt_draw_initiator; if (draw->index_buffer) { rbuffer = (struct r600_resource*)draw->index_buffer; - draw->draw->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - draw->draw->placement[0] = RADEON_GEM_DOMAIN_GTT; - draw->draw->placement[1] = RADEON_GEM_DOMAIN_GTT; - draw->draw->nbo = 1; + draw->draw.bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + draw->draw.placement[0] = RADEON_GEM_DOMAIN_GTT; + draw->draw.placement[1] = RADEON_GEM_DOMAIN_GTT; + draw->draw.nbo = 1; } - r = radeon_draw_set_new(rctx->draw, draw->draw); - if (r) + r = radeon_state_pm4(&draw->draw); + if (r) { return r; - draw->vgt = radeon_state(rscreen->rw, R600_VGT_TYPE, R600_VGT); - if (draw->vgt == NULL) - return -ENOMEM; - draw->vgt->states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; - draw->vgt->states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; - draw->vgt->states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INDX_OFFSET] = draw->start; - draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; - draw->vgt->states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; - draw->vgt->states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; - draw->vgt->states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; - r = radeon_draw_set_new(rctx->draw, draw->vgt); - if (r) + } + radeon_draw_bind(&rctx->draw, &draw->draw); + + radeon_state_init(&draw->vgt, rscreen->rw, R600_STATE_VGT, 0, 0); + draw->vgt.states[R600_VGT__VGT_PRIMITIVE_TYPE] = prim; + draw->vgt.states[R600_VGT__VGT_MAX_VTX_INDX] = 0x00FFFFFF; + draw->vgt.states[R600_VGT__VGT_MIN_VTX_INDX] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INDX_OFFSET] = draw->start; + draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_INDX] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_DMA_INDEX_TYPE] = vgt_dma_index_type; + draw->vgt.states[R600_VGT__VGT_PRIMITIVEID_EN] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_DMA_NUM_INSTANCES] = 0x00000001; + draw->vgt.states[R600_VGT__VGT_MULTI_PRIM_IB_RESET_EN] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_0] = 0x00000000; + draw->vgt.states[R600_VGT__VGT_INSTANCE_STEP_RATE_1] = 0x00000000; + r = radeon_state_pm4(&draw->vgt); + if (r) { return r; - /* FIXME */ - r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + } + radeon_draw_bind(&rctx->draw, &draw->vgt); + + r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); if (r == -EBUSY) { r600_flush(draw->ctx, 0, NULL); - r = radeon_ctx_set_draw_new(rctx->ctx, rctx->draw); + r = radeon_ctx_set_draw(&rctx->ctx, &rctx->draw); } - if (r) - return r; - rctx->draw = radeon_draw_duplicate(rctx->draw); - return 0; + return r; } void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct r600_context *rctx = r600_context(ctx); struct r600_draw draw; + int r; assert(info->index_bias == 0); @@ -189,5 +192,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) draw.index_size = 0; draw.index_buffer = NULL; } - r600_draw_common(&draw); + r = r600_draw_common(&draw); + if (r) + fprintf(stderr,"draw common failed %d\n", r); } diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 9b02ae680e7..530940ed843 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -24,39 +24,225 @@ * Jerome Glisse * Corbin Simpson */ +#include <errno.h> #include <util/u_inlines.h> #include <util/u_format.h> #include <util/u_memory.h> #include "r600_screen.h" #include "r600_context.h" -static struct pipe_query *r600_create_query(struct pipe_context *pipe, unsigned query_type) +static void r600_query_begin(struct r600_context *rctx, struct r600_query *rquery) { - return NULL; + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate = &rquery->rstate; + + radeon_state_fini(rstate); + radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_BEGIN, 0, 0); + rstate->states[R600_QUERY__OFFSET] = rquery->num_results; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rquery->buffer); + rstate->nbo = 1; + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + if (radeon_state_pm4(rstate)) { + radeon_state_fini(rstate); + } +} + +static void r600_query_end(struct r600_context *rctx, struct r600_query *rquery) +{ + struct r600_screen *rscreen = rctx->screen; + struct radeon_state *rstate = &rquery->rstate; + + radeon_state_fini(rstate); + radeon_state_init(rstate, rscreen->rw, R600_STATE_QUERY_END, 0, 0); + rstate->states[R600_QUERY__OFFSET] = rquery->num_results + 8; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rquery->buffer); + rstate->nbo = 1; + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + if (radeon_state_pm4(rstate)) { + radeon_state_fini(rstate); + } } -static void r600_destroy_query(struct pipe_context *pipe, struct pipe_query *query) +static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) { + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_context *rctx = r600_context(ctx); + struct r600_query *q; + + if (query_type != PIPE_QUERY_OCCLUSION_COUNTER) + return NULL; + + q = CALLOC_STRUCT(r600_query); + if (!q) + return NULL; + + q->type = query_type; + LIST_ADDTAIL(&q->list, &rctx->query_list); + q->buffer_size = 4096; + + q->buffer = radeon_bo(rscreen->rw, 0, q->buffer_size, 1, NULL); + if (!q->buffer) { + FREE(q); + return NULL; + } + return (struct pipe_query *)q; +} + +static void r600_destroy_query(struct pipe_context *ctx, + struct pipe_query *query) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_query *q = r600_query(query); + + radeon_bo_decref(rscreen->rw, q->buffer); + LIST_DEL(&q->list); FREE(query); } -static void r600_begin_query(struct pipe_context *pipe, struct pipe_query *query) +static void r600_query_result(struct pipe_context *ctx, struct r600_query *rquery) { + struct r600_screen *rscreen = r600_screen(ctx->screen); + u64 start, end; + u32 *results; + int i; + + radeon_bo_wait(rscreen->rw, rquery->buffer); + radeon_bo_map(rscreen->rw, rquery->buffer); + results = rquery->buffer->data; + for (i = 0; i < rquery->num_results; i += 4) { + start = (u64)results[i] | (u64)results[i + 1] << 32; + end = (u64)results[i + 2] | (u64)results[i + 3] << 32; + if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) { + rquery->result += end - start; + } + } + radeon_bo_unmap(rscreen->rw, rquery->buffer); + rquery->num_results = 0; } -static void r600_end_query(struct pipe_context *pipe, struct pipe_query *query) +static void r600_query_resume(struct pipe_context *ctx, struct r600_query *rquery) { + struct r600_context *rctx = r600_context(ctx); + + if (rquery->num_results >= ((rquery->buffer_size >> 2) - 2)) { + /* running out of space */ + if (!rquery->flushed) { + ctx->flush(ctx, 0, NULL); + } + r600_query_result(ctx, rquery); + } + r600_query_begin(rctx, rquery); + rquery->flushed = false; +} + +static void r600_query_suspend(struct pipe_context *ctx, struct r600_query *rquery) +{ + struct r600_context *rctx = r600_context(ctx); + + r600_query_end(rctx, rquery); + rquery->num_results += 16; } -static boolean r600_get_query_result(struct pipe_context *pipe, +static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_query *rquery = r600_query(query); + int r; + + rquery->state = R600_QUERY_STATE_STARTED; + rquery->num_results = 0; + rquery->flushed = false; + r600_query_resume(ctx, rquery); + r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate); + if (r == -EBUSY) { + /* this shouldn't happen */ + R600_ERR("had to flush while emitting end query\n"); + ctx->flush(ctx, 0, NULL); + r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate); + } +} + +static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_query *rquery = r600_query(query); + int r; + + rquery->state &= ~R600_QUERY_STATE_STARTED; + rquery->state |= R600_QUERY_STATE_ENDED; + r600_query_suspend(ctx, rquery); + r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate); + if (r == -EBUSY) { + /* this shouldn't happen */ + R600_ERR("had to flush while emitting end query\n"); + ctx->flush(ctx, 0, NULL); + r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate); + } +} + +void r600_queries_suspend(struct pipe_context *ctx) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_query *rquery; + int r; + + LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { + if (rquery->state & R600_QUERY_STATE_STARTED) { + r600_query_suspend(ctx, rquery); + r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate); + if (r == -EBUSY) { + /* this shouldn't happen */ + R600_ERR("had to flush while emitting end query\n"); + ctx->flush(ctx, 0, NULL); + r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate); + } + } + rquery->state |= R600_QUERY_STATE_SUSPENDED; + } +} + +void r600_queries_resume(struct pipe_context *ctx) +{ + struct r600_context *rctx = r600_context(ctx); + struct r600_query *rquery; + int r; + + LIST_FOR_EACH_ENTRY(rquery, &rctx->query_list, list) { + if (rquery->state & R600_QUERY_STATE_STARTED) { + r600_query_resume(ctx, rquery); + r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate); + if (r == -EBUSY) { + /* this shouldn't happen */ + R600_ERR("had to flush while emitting end query\n"); + ctx->flush(ctx, 0, NULL); + r = radeon_ctx_set_query_state(&rctx->ctx, &rquery->rstate); + } + } + rquery->state &= ~R600_QUERY_STATE_SUSPENDED; + } +} + +static boolean r600_get_query_result(struct pipe_context *ctx, struct pipe_query *query, - boolean wait, void *result) + boolean wait, void *vresult) { + struct r600_query *rquery = r600_query(query); + uint64_t *result = (uint64_t*)vresult; + + if (!rquery->flushed) { + ctx->flush(ctx, 0, NULL); + rquery->flushed = true; + } + r600_query_result(ctx, rquery); + *result = rquery->result; + rquery->result = 0; return TRUE; } void r600_init_query_functions(struct r600_context* rctx) { + LIST_INITHEAD(&rctx->query_list); + rctx->context.create_query = r600_create_query; rctx->context.destroy_query = r600_destroy_query; rctx->context.begin_query = r600_begin_query; diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index bb90e76fb78..129667ad20f 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -44,10 +44,22 @@ struct r600_resource_texture { struct r600_resource resource; unsigned long offset[PIPE_MAX_TEXTURE_LEVELS]; unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long width[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long height[PIPE_MAX_TEXTURE_LEVELS]; unsigned long layer_size[PIPE_MAX_TEXTURE_LEVELS]; unsigned long pitch_override; unsigned long bpt; unsigned long size; + unsigned tilled; + unsigned array_mode; + unsigned tile_type; + unsigned depth; + unsigned dirty; + struct radeon_bo *uncompressed; + struct radeon_state scissor[PIPE_MAX_TEXTURE_LEVELS]; + struct radeon_state cb[8][PIPE_MAX_TEXTURE_LEVELS]; + struct radeon_state db[PIPE_MAX_TEXTURE_LEVELS]; + struct radeon_state viewport[PIPE_MAX_TEXTURE_LEVELS]; }; void r600_init_context_resource_functions(struct r600_context *r600); diff --git a/src/gallium/drivers/r600/r600_screen.c b/src/gallium/drivers/r600/r600_screen.c index cdaca9ed7db..a047a49a6c5 100644 --- a/src/gallium/drivers/r600/r600_screen.c +++ b/src/gallium/drivers/r600/r600_screen.c @@ -69,6 +69,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE: + case PIPE_CAP_DEPTH_CLAMP: return 1; /* Unsupported features (boolean caps). */ @@ -77,7 +78,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_STREAM_OUTPUT: case PIPE_CAP_INDEP_BLEND_FUNC: /* FIXME allow this */ case PIPE_CAP_GEOMETRY_SHADER4: - case PIPE_CAP_DEPTH_CLAMP: /* FIXME allow this */ return 0; /* Texturing. */ @@ -234,11 +234,34 @@ static void r600_destroy_screen(struct pipe_screen* pscreen) struct pipe_screen *r600_screen_create(struct radeon *rw) { struct r600_screen* rscreen; + enum radeon_family family = radeon_get_family(rw); rscreen = CALLOC_STRUCT(r600_screen); if (rscreen == NULL) { return NULL; } + + switch (family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + rscreen->chip_class = R600; + break; + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + rscreen->chip_class = R700; + break; + default: + FREE(rscreen); + return NULL; + } rscreen->rw = rw; rscreen->screen.winsys = (struct pipe_winsys*)rw; rscreen->screen.destroy = r600_destroy_screen; diff --git a/src/gallium/drivers/r600/r600_screen.h b/src/gallium/drivers/r600/r600_screen.h index 53b560c617f..b9938f117a8 100644 --- a/src/gallium/drivers/r600/r600_screen.h +++ b/src/gallium/drivers/r600/r600_screen.h @@ -30,6 +30,7 @@ #include <radeon_drm.h> #include "radeon.h" #include "util/u_transfer.h" +#include "r600_resource.h" /* Texture transfer. */ struct r600_transfer { @@ -38,11 +39,19 @@ struct r600_transfer { /* Buffer transfer. */ struct pipe_transfer *buffer_transfer; unsigned offset; + struct pipe_resource *linear_texture; +}; + +enum chip_class { + R600, + R700, + EVERGREEN, }; struct r600_screen { struct pipe_screen screen; struct radeon *rw; + enum chip_class chip_class; }; static INLINE struct r600_screen *r600_screen(struct pipe_screen *screen) @@ -62,7 +71,7 @@ unsigned r600_buffer_is_referenced_by_cs(struct pipe_context *context, struct pipe_resource *r600_buffer_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle); -/* Texture transfer functions. */ +/* r600_texture.c texture transfer functions. */ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, struct pipe_resource *texture, struct pipe_subresource sr, @@ -74,7 +83,14 @@ void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer); void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); +int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); +int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level); +int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); +int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); +int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); +/* r600_blit.c */ +int r600_blit_uncompress_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level); /* helpers */ int r600_conv_pipe_format(unsigned pformat, unsigned *format); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 956c7e7930c..0ba26a23112 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -48,6 +48,9 @@ struct r600_shader_ctx { struct r600_bc *bc; struct r600_shader *shader; u32 value[4]; + u32 *literals; + u32 nliterals; + u32 max_driver_temp_used; }; struct r600_shader_tgsi_instruction { @@ -105,8 +108,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_screen *rscreen = r600_screen(ctx->screen); int r; -fprintf(stderr, "--------------------------------------------------------------\n"); -tgsi_dump(tokens, 0); +//fprintf(stderr, "--------------------------------------------------------------\n"); +//tgsi_dump(tokens, 0); if (rpshader == NULL) return -ENOMEM; rpshader->shader.family = radeon_get_family(rscreen->rw); @@ -120,7 +123,7 @@ tgsi_dump(tokens, 0); R600_ERR("building bytecode failed !\n"); return r; } -fprintf(stderr, "______________________________________________________________\n"); +//fprintf(stderr, "______________________________________________________________\n"); return 0; } @@ -131,10 +134,9 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta struct radeon_state *state; unsigned i, tmp; - rpshader->rstate = radeon_state_decref(rpshader->rstate); - state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER); - if (state == NULL) - return -ENOMEM; + state = &rpshader->rstate[0]; + radeon_state_fini(&rpshader->rstate[0]); + radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS); for (i = 0; i < 10; i++) { state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0; } @@ -144,12 +146,13 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); - rpshader->rstate = state; - rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->nbo = 2; - rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack); + state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); + state->nbo = 2; + state->placement[0] = RADEON_GEM_DOMAIN_GTT; + state->placement[2] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } @@ -161,17 +164,20 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta struct r600_context *rctx = r600_context(ctx); struct radeon_state *state; unsigned i, tmp, exports_ps, num_cout; + boolean have_pos = FALSE; + state = &rpshader->rstate[0]; rasterizer = &rctx->rasterizer->state.rasterizer; - rpshader->rstate = radeon_state_decref(rpshader->rstate); - state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER); - if (state == NULL) - return -ENOMEM; + radeon_state_fini(state); + radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS); for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(i); tmp |= S_028644_SEL_CENTROID(1); + if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) + have_pos = TRUE; if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || - rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) { + rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || + rshader->input[i].name == TGSI_SEMANTIC_POSITION) { tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); } if (rasterizer->sprite_coord_enable & (1 << i)) { @@ -190,15 +196,24 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta num_cout++; } } + if (!exports_ps) { + /* always at least export 1 component per pixel */ + exports_ps = 2; + } state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); + if (have_pos) { + state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |= S_0286CC_POSITION_ENA(1) | + S_0286CC_BARYC_SAMPLE_CNTL(1); + state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1; + } state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); + state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; - rpshader->rstate = state; - rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); - rpshader->rstate->nbo = 1; - rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); + state->nbo = 1; + state->placement[0] = RADEON_GEM_DOMAIN_GTT; return radeon_state_pm4(state); } @@ -268,21 +283,24 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) R600_ERR("predicate unsupported\n"); return -EINVAL; } +#if 0 if (i->Instruction.Label) { R600_ERR("label unsupported\n"); return -EINVAL; } +#endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { - if (i->Src[j].Register.Indirect || - i->Src[j].Register.Dimension || + if (i->Src[j].Register.Dimension || i->Src[j].Register.Absolute) { - R600_ERR("unsupported src (indirect|dimension|absolute)\n"); + R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, + i->Src[j].Register.Dimension, + i->Src[j].Register.Absolute); return -EINVAL; } } for (j = 0; j < i->Instruction.NumDstRegs; j++) { - if (i->Dst[j].Register.Indirect || i->Dst[j].Register.Dimension) { - R600_ERR("unsupported dst (indirect|dimension)\n"); + if (i->Dst[j].Register.Dimension) { + R600_ERR("unsupported dst (dimension)\n"); return -EINVAL; } } @@ -333,6 +351,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: case TGSI_FILE_SAMPLER: + case TGSI_FILE_ADDRESS: break; default: R600_ERR("unsupported file %d declaration\n", d->Declaration.File); @@ -341,6 +360,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) return 0; } +static int r600_get_temp(struct r600_shader_ctx *ctx) +{ + return ctx->temp_reg + ctx->max_driver_temp_used++; +} + int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; @@ -362,9 +386,15 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s shader->processor_type = ctx.type; /* register allocations */ - /* Values [0,127] correspond to GPR[0..127]. - * Values [256,511] correspond to cfile constants c[0..255]. + /* Values [0,127] correspond to GPR[0..127]. + * Values [128,159] correspond to constant buffer bank 0 + * Values [160,191] correspond to constant buffer bank 1 + * Values [256,511] correspond to cfile constants c[0..255]. * Other special values are shown in the list below. + * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) + * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) + * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) + * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) * 248 SQ_ALU_SRC_0: special constant 0.0. * 249 SQ_ALU_SRC_1: special constant 1.0 float. * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. @@ -389,15 +419,24 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.nliterals = 0; + ctx.literals = NULL; + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: immediate = &ctx.parse.FullToken.FullImmediate; - ctx.value[0] = immediate->u[0].Uint; - ctx.value[1] = immediate->u[1].Uint; - ctx.value[2] = immediate->u[2].Uint; - ctx.value[3] = immediate->u[3].Uint; + ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); + if(ctx.literals == NULL) { + r = -ENOMEM; + goto out_err; + } + ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; + ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; + ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; + ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; + ctx.nliterals++; break; case TGSI_TOKEN_TYPE_DECLARATION: r = tgsi_declaration(&ctx); @@ -408,6 +447,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s r = tgsi_is_supported(&ctx); if (r) goto out_err; + ctx.max_driver_temp_used = 0; + /* reserve first tmp for everyone */ + r600_get_temp(&ctx); opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; r = ctx.inst_info->process(&ctx); @@ -458,6 +500,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { output[i].array_base = 61; + output[i].swizzle_x = 2; + output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; } else { R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); @@ -504,7 +548,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[0].swizzle_z = 7; output[0].swizzle_w = 7; output[0].barrier = 1; - output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; output[0].array_base = 0; output[0].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT; noutput++; @@ -525,9 +569,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (r) goto out_err; } + free(ctx.literals); tgsi_parse_free(&ctx.parse); return 0; out_err: + free(ctx.literals); tgsi_parse_free(&ctx.parse); return r; } @@ -547,11 +593,19 @@ static int tgsi_src(struct r600_shader_ctx *ctx, const struct tgsi_full_src_register *tgsi_src, struct r600_bc_alu_src *r600_src) { + int index; memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); r600_src->sel = tgsi_src->Register.Index; if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { r600_src->sel = 0; + index = tgsi_src->Register.Index; + ctx->value[0] = ctx->literals[index * 4 + 0]; + ctx->value[1] = ctx->literals[index * 4 + 1]; + ctx->value[2] = ctx->literals[index * 4 + 2]; + ctx->value[3] = ctx->literals[index * 4 + 3]; } + if (tgsi_src->Register.Indirect) + r600_src->rel = V_SQ_REL_RELATIVE; r600_src->neg = tgsi_src->Register.Negate; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; @@ -568,6 +622,8 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; r600_dst->chan = swizzle; r600_dst->write = 1; + if (tgsi_dst->Register.Indirect) + r600_dst->rel = V_SQ_REL_RELATIVE; if (inst->Instruction.Saturate) { r600_dst->clamp = 1; } @@ -607,12 +663,13 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { + int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = r600_src[0].sel; + alu.src[0].sel = r600_src[j].sel; alu.src[0].chan = k; - alu.dst.sel = ctx->temp_reg + j; + alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; if (k == 3) @@ -621,37 +678,90 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s if (r) return r; } - r600_src[0].sel = ctx->temp_reg + j; + r600_src[j].sel = treg; j--; } } return 0; } -static int tgsi_op2(struct r600_shader_ctx *ctx) +/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nliteral, r; + + for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { + nliteral++; + } + } + for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = r600_src[j].sel; + alu.src[0].chan = k; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + r600_src[j].sel = treg; + j++; + } + } + return 0; +} + +static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; + int lasti = 0; + + for (i = 0; i < 4; i++) { + if (inst->Dst[0].Register.WriteMask & (1 << i)) { + lasti = i; + } + } r = tgsi_split_constant(ctx, r600_src); if (r) return r; - for (i = 0; i < 4; i++) { + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); - if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu.dst.chan = i; - } else { - alu.inst = ctx->inst_info->r600_opcode; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + + alu.inst = ctx->inst_info->r600_opcode; + if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { alu.src[j] = r600_src[j]; alu.src[j].chan = tgsi_chan(&inst->Src[j], i); } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + } else { + alu.src[0] = r600_src[1]; + alu.src[0].chan = tgsi_chan(&inst->Src[1], i); + + alu.src[1] = r600_src[0]; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { @@ -664,7 +774,7 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) default: break; } - if (i == 3) { + if (i == lasti) { alu.last = 1; } r = r600_bc_add_alu(ctx->bc, &alu); @@ -674,24 +784,154 @@ static int tgsi_op2(struct r600_shader_ctx *ctx) return 0; } -static int tgsi_kill(struct r600_shader_ctx *ctx) +static int tgsi_op2(struct r600_shader_ctx *ctx) +{ + return tgsi_op2_s(ctx, 0); +} + +static int tgsi_op2_swap(struct r600_shader_ctx *ctx) +{ + return tgsi_op2_s(ctx, 1); +} + +/* + * r600 - trunc to -PI..PI range + * r700 - normalize by dividing by 2PI + * see fdo bug 27901 + */ +static int tgsi_setup_trig(struct r600_shader_ctx *ctx, + struct r600_bc_alu_src r600_src[3]) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int r; + uint32_t lit_vals[4]; + struct r600_bc_alu alu; + + memset(lit_vals, 0, 4*4); + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; + + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; + + lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); + lit_vals[1] = fui(0.5f); + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; + alu.is_op3 = 1; + + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + + alu.src[0] = r600_src[0]; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].chan = 0; + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[2].chan = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + r = r600_bc_add_literal(ctx->bc, lit_vals); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT; + + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + if (ctx->bc->chiprev == 0) { + lit_vals[0] = fui(3.1415926535897f * 2.0f); + lit_vals[1] = fui(-3.1415926535897f); + } else { + lit_vals[0] = fui(1.0f); + lit_vals[1] = fui(-0.5f); + } + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; + alu.is_op3 = 1; + + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].chan = 0; + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[2].chan = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + r = r600_bc_add_literal(ctx->bc, lit_vals); + if (r) + return r; + return 0; +} + +static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; + int lasti = 0; + + r = tgsi_setup_trig(ctx, r600_src); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + /* replicate result */ for (i = 0; i < 4; i++) { + if (inst->Dst[0].Register.WriteMask & (1 << i)) + lasti = i; + } + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = ctx->inst_info->r600_opcode; - alu.dst.chan = i; - alu.src[0].sel = 248; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + + alu.src[0].sel = ctx->temp_reg; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); - if (i == 3) { + if (i == lasti) alu.last = 1; - } r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; @@ -699,30 +939,70 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) return 0; } -static int tgsi_slt(struct r600_shader_ctx *ctx) +static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; - int i, r; + int r; - r = tgsi_split_constant(ctx, r600_src); + r = tgsi_setup_trig(ctx, r600_src); + if (r) + return r; + + + /* dst.x = COS */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS; + r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); if (r) return r; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.y = SIN */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN; + r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (r) + return r; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; +} + +static int tgsi_kill(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, r; + for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); - if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu.dst.chan = i; + alu.inst = ctx->inst_info->r600_opcode; + + alu.dst.chan = i; + + alu.src[0].sel = V_SQ_ALU_SRC_0; + + if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { + alu.src[1].sel = V_SQ_ALU_SRC_1; + alu.src[1].neg = 1; } else { - alu.inst = ctx->inst_info->r600_opcode; - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); if (r) return r; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); } if (i == 3) { alu.last = 1; @@ -731,6 +1011,13 @@ static int tgsi_slt(struct r600_shader_ctx *ctx) if (r) return r; } + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + + /* kill must be last in ALU */ + ctx->bc->force_add_cf = 1; + ctx->shader->uses_kill = TRUE; return 0; } @@ -738,12 +1025,20 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; + struct r600_bc_alu_src r600_src[3]; int r; + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; + r = tgsi_split_literal_constant(ctx, r600_src); + if (r) + return r; + /* dst.x, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; /*1.0*/ + alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); if (r) @@ -756,11 +1051,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.y = max(src.x, 0.0) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[1].sel = 248; /*0.0*/ - alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); + alu.src[0] = r600_src[0]; + alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ + alu.src[1].chan = 0; r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); if (r) return r; @@ -769,18 +1062,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - /* dst.z = NOP - fill Z slot */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP; - alu.dst.chan = 2; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - /* dst.w, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; + alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); if (r) @@ -791,6 +1076,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -799,9 +1088,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + alu.src[0] = r600_src[0]; alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); if (r) @@ -811,21 +1098,22 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + chan = alu.dst.chan; sel = alu.dst.sel; /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + alu.src[0] = r600_src[0]; alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]); - if (r) - return r; + + alu.src[2] = r600_src[0]; alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -836,6 +1124,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; @@ -880,19 +1171,43 @@ static int tgsi_trans(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.src[0].sel = ctx->temp_reg; + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.dst.chan = i; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + if (i == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - int i, j, r; + int i, r; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; - for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); if (r) return r; - alu.src[j].chan = tgsi_chan(&inst->Src[j], 0); + alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -900,16 +1215,124 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; /* replicate result */ + return tgsi_helper_tempx_replicate(ctx); +} + +static int tgsi_pow(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int r; + + /* LOG2(a) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + r = r600_bc_add_literal(ctx->bc,ctx->value); + if (r) + return r; + /* b * LOG2(a) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE; + r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); + alu.src[1].sel = ctx->temp_reg; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + r = r600_bc_add_literal(ctx->bc,ctx->value); + if (r) + return r; + /* POW(a,b) = EXP2(b * LOG2(a))*/ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; + alu.src[0].sel = ctx->temp_reg; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + r = r600_bc_add_literal(ctx->bc,ctx->value); + if (r) + return r; + return tgsi_helper_tempx_replicate(ctx); +} + +static int tgsi_ssg(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + struct r600_bc_alu_src r600_src[3]; + int i, r; + + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; + + /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.src[0].sel = ctx->temp_reg; - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT; + alu.is_op3 = 1; + + alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; + + alu.src[0] = r600_src[0]; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + + alu.src[1].sel = V_SQ_ALU_SRC_1; + + alu.src[2] = r600_src[0]; + alu.src[2].chan = tgsi_chan(&inst->Src[0], i); + if (i == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + + /* dst = (-tmp > 0 ? -1 : tmp) */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT; + alu.is_op3 = 1; r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (r) return r; - alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i; + alu.src[0].neg = 1; + + alu.src[1].sel = V_SQ_ALU_SRC_1; + alu.src[1].neg = 1; + + alu.src[2].sel = ctx->temp_reg; + alu.src[2].chan = i; + if (i == 3) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1006,16 +1429,23 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) switch (ctx->inst_info->tgsi_opcode) { case TGSI_OPCODE_DP2: if (i > 1) { - alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = alu.src[1].chan = 0; } break; case TGSI_OPCODE_DP3: if (i > 2) { - alu.src[0].sel = alu.src[1].sel = 248; + alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = alu.src[1].chan = 0; } break; + case TGSI_OPCODE_DPH: + if (i == 3) { + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + alu.src[0].neg = 0; + } + break; default: break; } @@ -1035,75 +1465,197 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) struct r600_bc_tex tex; struct r600_bc_alu alu; unsigned src_gpr; - int r; + int r, i; + int opcode; + boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; + uint32_t lit_vals[4]; src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; - /* Add perspective divide */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; - alu.src[0].sel = src_gpr; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 3; - alu.last = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { + /* Add perspective divide */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 0); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 0; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 1); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; - alu.src[1].sel = src_gpr; - alu.src[1].chan = tgsi_chan(&inst->Src[0], 2); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 2; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; - alu.src[0].sel = 249; - alu.src[0].chan = 0; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 3; - alu.last = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - src_gpr = ctx->temp_reg; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); + if (r) + return r; + alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + src_not_temp = false; + src_gpr = ctx->temp_reg; + } + + if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { + int src_chan, src2_chan; + + /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; + switch (i) { + case 0: + src_chan = 2; + src2_chan = 1; + break; + case 1: + src_chan = 2; + src2_chan = 0; + break; + case 2: + src_chan = 0; + src2_chan = 2; + break; + case 3: + src_chan = 1; + src2_chan = 2; + break; + } + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); + r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); + if (r) + return r; + alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* tmp1.z = RCP_e(|tmp1.z|) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 2; + alu.src[0].abs = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x + * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x + * muladd has no writemask, have to use another temp + */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; + alu.is_op3 = 1; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.src[1].sel = ctx->temp_reg; + alu.src[1].chan = 2; + + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[2].chan = 0; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; + alu.is_op3 = 1; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; + alu.src[1].sel = ctx->temp_reg; + alu.src[1].chan = 2; + + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[2].chan = 0; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + lit_vals[0] = fui(1.5f); + + r = r600_bc_add_literal(ctx->bc, lit_vals); + if (r) + return r; + src_not_temp = false; + src_gpr = ctx->temp_reg; + } + + if (src_not_temp) { + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = src_gpr; + alu.src[0].chan = i; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + src_gpr = ctx->temp_reg; + } + + opcode = ctx->inst_info->r600_opcode; + if (opcode == SQ_TEX_INST_SAMPLE && + (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) + opcode = SQ_TEX_INST_SAMPLE_C; - /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */ memset(&tex, 0, sizeof(struct r600_bc_tex)); - tex.inst = ctx->inst_info->r600_opcode; + tex.inst = opcode; tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; tex.sampler_id = tex.resource_id; tex.src_gpr = src_gpr; @@ -1117,13 +1669,30 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_z = 2; tex.src_sel_w = 3; + if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { + tex.src_sel_x = 1; + tex.src_sel_y = 0; + tex.src_sel_z = 3; + tex.src_sel_w = 1; + } + if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { tex.coord_type_x = 1; tex.coord_type_y = 1; tex.coord_type_z = 1; tex.coord_type_w = 1; } - return r600_bc_add_tex(ctx->bc, &tex); + + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) + tex.src_sel_w = 2; + + r = r600_bc_add_tex(ctx->bc, &tex); + if (r) + return r; + + /* add shadow ambient support - gallium doesn't do it yet */ + return 0; + } static int tgsi_lrp(struct r600_shader_ctx *ctx) @@ -1141,7 +1710,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD; - alu.src[0].sel = 249; + alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; alu.src[1] = r600_src[0]; alu.src[1].chan = tgsi_chan(&inst->Src[0], i); @@ -1205,23 +1774,654 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) return tgsi_helper_copy(ctx, inst); } +static int tgsi_cmp(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; + struct r600_bc_alu alu; + int use_temp = 0; + int i, r; + + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; + + if (inst->Dst[0].Register.WriteMask != 0xf) + use_temp = 1; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE; + alu.src[0] = r600_src[0]; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + + alu.src[1] = r600_src[2]; + alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + + alu.src[2] = r600_src[1]; + alu.src[2].chan = tgsi_chan(&inst->Src[1], i); + + if (use_temp) + alu.dst.sel = ctx->temp_reg; + else { + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } + alu.dst.chan = i; + alu.dst.write = 1; + alu.is_op3 = 1; + if (i == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + if (use_temp) + return tgsi_helper_copy(ctx, inst); + return 0; +} + +static int tgsi_xpd(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; + struct r600_bc_alu alu; + uint32_t use_temp = 0; + int i, r; + + if (inst->Dst[0].Register.WriteMask != 0xf) + use_temp = 1; + + r = tgsi_split_constant(ctx, r600_src); + if (r) + return r; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + + alu.src[0] = r600_src[0]; + switch (i) { + case 0: + alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); + break; + case 1: + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + break; + case 2: + alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + break; + case 3: + alu.src[0].sel = V_SQ_ALU_SRC_0; + alu.src[0].chan = i; + } + + alu.src[1] = r600_src[1]; + switch (i) { + case 0: + alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); + break; + case 1: + alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); + break; + case 2: + alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); + break; + case 3: + alu.src[1].sel = V_SQ_ALU_SRC_0; + alu.src[1].chan = i; + } + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + + if (i == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD; + + alu.src[0] = r600_src[0]; + switch (i) { + case 0: + alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + break; + case 1: + alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); + break; + case 2: + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + break; + case 3: + alu.src[0].sel = V_SQ_ALU_SRC_0; + alu.src[0].chan = i; + } + + alu.src[1] = r600_src[1]; + switch (i) { + case 0: + alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); + break; + case 1: + alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); + break; + case 2: + alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); + break; + case 3: + alu.src[1].sel = V_SQ_ALU_SRC_0; + alu.src[1].chan = i; + } + + alu.src[2].sel = ctx->temp_reg; + alu.src[2].neg = 1; + alu.src[2].chan = i; + + if (use_temp) + alu.dst.sel = ctx->temp_reg; + else { + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + } + alu.dst.chan = i; + alu.dst.write = 1; + alu.is_op3 = 1; + if (i == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + if (use_temp) + return tgsi_helper_copy(ctx, inst); + return 0; +} + +static int tgsi_exp(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu_src r600_src[3]; + struct r600_bc_alu alu; + int r; + + /* result.x = 2^floor(src); */ + if (inst->Dst[0].Register.WriteMask & 1) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } + + /* result.y = tmp - floor(tmp); */ + if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT; + alu.src[0] = r600_src[0]; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + + alu.dst.sel = ctx->temp_reg; +// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); +// if (r) +// return r; + alu.dst.write = 1; + alu.dst.chan = 1; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } + + /* result.z = RoughApprox2ToX(tmp);*/ + if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE; + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.dst.chan = 2; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } + + /* result.w = 1.0;*/ + if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.src[0].sel = V_SQ_ALU_SRC_1; + alu.src[0].chan = 0; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + r = r600_bc_add_literal(ctx->bc, ctx->value); + if (r) + return r; + } + return tgsi_helper_copy(ctx, inst); +} + +static int tgsi_arl(struct r600_shader_ctx *ctx) +{ + /* TODO from r600c, ar values don't persist between clauses */ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + + alu.last = 1; + + r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU); + if (r) + return r; + return 0; +} + +static int tgsi_opdst(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, r = 0; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL; + r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (r) + return r; + + if (i == 0 || i == 3) { + alu.src[0].sel = V_SQ_ALU_SRC_1; + } else { + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + } + + if (i == 0 || i == 2) { + alu.src[1].sel = V_SQ_ALU_SRC_1; + } else { + r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); + if (r) + return r; + alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + } + if (i == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = opcode; + alu.predicate = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.dst.chan = 0; + + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + alu.src[1].sel = V_SQ_ALU_SRC_0; + alu.src[1].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE); + if (r) + return r; + return 0; +} + +static int pops(struct r600_shader_ctx *ctx, int pops) +{ + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP); + ctx->bc->cf_last->pop_count = pops; + return 0; +} + +static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) +{ + switch(reason) { + case FC_PUSH_VPM: + ctx->bc->callstack[ctx->bc->call_sp].current--; + break; + case FC_PUSH_WQM: + case FC_LOOP: + ctx->bc->callstack[ctx->bc->call_sp].current -= 4; + break; + case FC_REP: + /* TOODO : for 16 vp asic should -= 2; */ + ctx->bc->callstack[ctx->bc->call_sp].current --; + break; + } +} + +static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) +{ + if (check_max_only) { + int diff; + switch (reason) { + case FC_PUSH_VPM: + diff = 1; + break; + case FC_PUSH_WQM: + diff = 4; + break; + } + if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > + ctx->bc->callstack[ctx->bc->call_sp].max) { + ctx->bc->callstack[ctx->bc->call_sp].max = + ctx->bc->callstack[ctx->bc->call_sp].current + diff; + } + return; + } + switch (reason) { + case FC_PUSH_VPM: + ctx->bc->callstack[ctx->bc->call_sp].current++; + break; + case FC_PUSH_WQM: + case FC_LOOP: + ctx->bc->callstack[ctx->bc->call_sp].current += 4; + break; + case FC_REP: + ctx->bc->callstack[ctx->bc->call_sp].current++; + break; + } + + if ((ctx->bc->callstack[ctx->bc->call_sp].current) > + ctx->bc->callstack[ctx->bc->call_sp].max) { + ctx->bc->callstack[ctx->bc->call_sp].max = + ctx->bc->callstack[ctx->bc->call_sp].current; + } +} + +static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) +{ + struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; + + sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, + sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); + sp->mid[sp->num_mid] = ctx->bc->cf_last; + sp->num_mid++; +} + +static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) +{ + ctx->bc->fc_sp++; + ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; + ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; +} + +static void fc_poplevel(struct r600_shader_ctx *ctx) +{ + struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; + if (sp->mid) { + free(sp->mid); + sp->mid = NULL; + } + sp->num_mid = 0; + sp->start = NULL; + sp->type = 0; + ctx->bc->fc_sp--; +} + +#if 0 +static int emit_return(struct r600_shader_ctx *ctx) +{ + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); + return 0; +} + +static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) +{ + + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); + ctx->bc->cf_last->pop_count = pops; + /* TODO work out offset */ + return 0; +} + +static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) +{ + return 0; +} + +static void emit_testflag(struct r600_shader_ctx *ctx) +{ + +} + +static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) +{ + emit_testflag(ctx); + emit_jump_to_offset(ctx, 1, 4); + emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); + pops(ctx, ifidx + 1); + emit_return(ctx); +} + +static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) +{ + emit_testflag(ctx); + + r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + ctx->bc->cf_last->pop_count = 1; + + fc_set_mid(ctx, fc_sp); + + pops(ctx, 1); +} +#endif + +static int tgsi_if(struct r600_shader_ctx *ctx) +{ + emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE); + + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); + + fc_pushlevel(ctx, FC_IF); + + callstack_check_depth(ctx, FC_PUSH_VPM, 0); + return 0; +} + +static int tgsi_else(struct r600_shader_ctx *ctx) +{ + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE); + ctx->bc->cf_last->pop_count = 1; + + fc_set_mid(ctx, ctx->bc->fc_sp); + ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; + return 0; +} + +static int tgsi_endif(struct r600_shader_ctx *ctx) +{ + pops(ctx, 1); + if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { + R600_ERR("if/endif unbalanced in shader\n"); + return -1; + } + + if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { + ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; + ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; + } else { + ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; + } + fc_poplevel(ctx); + + callstack_decrease_current(ctx, FC_PUSH_VPM); + return 0; +} + +static int tgsi_bgnloop(struct r600_shader_ctx *ctx) +{ + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL); + + fc_pushlevel(ctx, FC_LOOP); + + /* check stack depth */ + callstack_check_depth(ctx, FC_LOOP, 0); + return 0; +} + +static int tgsi_endloop(struct r600_shader_ctx *ctx) +{ + int i; + + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END); + + if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { + R600_ERR("loop/endloop in shader code are not paired.\n"); + return -EINVAL; + } + + /* fixup loop pointers - from r600isa + LOOP END points to CF after LOOP START, + LOOP START point to CF after LOOP END + BRK/CONT point to LOOP END CF + */ + ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; + + ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; + + for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { + ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; + } + /* TODO add LOOPRET support */ + fc_poplevel(ctx); + callstack_decrease_current(ctx, FC_LOOP); + return 0; +} + +static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) +{ + unsigned int fscp; + + for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) + { + if (FC_LOOP == ctx->bc->fc_stack[fscp].type) + break; + } + + if (fscp == 0) { + R600_ERR("Break not inside loop/endloop pair\n"); + return -EINVAL; + } + + r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + ctx->bc->cf_last->pop_count = 1; + + fc_set_mid(ctx, fscp); + + pops(ctx, 1); + callstack_check_depth(ctx, FC_PUSH_VPM, 1); + return 0; +} + static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, - {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_slt}, - {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, + {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, @@ -1232,38 +2432,38 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { /* gap */ {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, + {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, /* gap */ {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DDX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DDY, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* predicated kill */ + {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, + {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, + {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, + {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, + {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, + {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, + {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, 0x10, tgsi_tex}, + {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXP, 0, 0x10, tgsi_tex}, + {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -1274,21 +2474,21 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* SGN */ - {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, + {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, + {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, + {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, + {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, + {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, /* gap */ {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -1297,7 +2497,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, @@ -1308,12 +2508,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CONT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, /* gap */ {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 2ee7780ead0..7c722c07cbe 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -42,6 +42,7 @@ struct r600_shader { struct r600_shader_io input[32]; struct r600_shader_io output[32]; enum radeon_family family; + boolean uses_kill; }; #endif diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 002660c654a..fa7a31742af 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -206,6 +206,26 @@ #define S_SQ_ALU_WORD0_SRC0_SEL(x) (((x) & 0x1FF) << 0) #define G_SQ_ALU_WORD0_SRC0_SEL(x) (((x) >> 0) & 0x1FF) #define C_SQ_ALU_WORD0_SRC0_SEL 0xFFFFFE00 +/* + * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) + * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) + * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) + * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) + * 248 SQ_ALU_SRC_0: special constant 0.0. + * 249 SQ_ALU_SRC_1: special constant 1.0 float. + * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. + * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. + * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. + * 253 SQ_ALU_SRC_LITERAL: literal constant. + * 254 SQ_ALU_SRC_PV: previous vector result. + * 255 SQ_ALU_SRC_PS: previous scalar result. + */ +#define V_SQ_ALU_SRC_0 0x000000F8 +#define V_SQ_ALU_SRC_1 0x000000F9 +#define V_SQ_ALU_SRC_1_INT 0x000000FA +#define V_SQ_ALU_SRC_M_1_INT 0x000000FB +#define V_SQ_ALU_SRC_0_5 0x000000FC +#define V_SQ_ALU_SRC_LITERAL 0x000000FD #define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9) #define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1) #define C_SQ_ALU_WORD0_SRC0_REL 0xFFFFFDFF @@ -583,4 +603,11 @@ #define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7) #define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF +#define V_SQ_CF_COND_ACTIVE 0x00 +#define V_SQ_CF_COND_FALSE 0x01 +#define V_SQ_CF_COND_BOOL 0x02 +#define V_SQ_CF_COND_NOT_BOOL 0x03 + +#define V_SQ_REL_ABSOLUTE 0 +#define V_SQ_REL_RELATIVE 1 #endif diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3efd409ae0d..66cab7d7a6e 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -34,6 +34,17 @@ #include "r600d.h" #include "r600_state_inlines.h" +static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state); +static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state); +static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_clip_state *state); +static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_sampler_state *state, unsigned id); +static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, const struct pipe_sampler_view *view, unsigned id); +static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_framebuffer_state *state, int cb); +static void r600_db(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_framebuffer_state *state); + + static void *r600_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { @@ -81,11 +92,12 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct r600_context *rctx = r600_context(ctx); struct r600_context_state *rstate; - rstate = r600_context_state(rctx, pipe_sampler_type, state); + rstate = r600_context_state(rctx, pipe_sampler_view_type, state); pipe_reference(NULL, &texture->reference); rstate->state.sampler_view.texture = texture; rstate->state.sampler_view.reference.count = 1; rstate->state.sampler_view.context = ctx; + r600_resource(ctx, &rstate->rstate[0], &rstate->state.sampler_view, 0); return &rstate->state.sampler_view; } @@ -223,12 +235,24 @@ static void r600_bind_ps_sampler(struct pipe_context *ctx, struct r600_context_state *rstate; unsigned i; - for (i = 0; i < rctx->ps_nsampler; i++) { - rctx->ps_sampler[i] = r600_context_state_decref(rctx->ps_sampler[i]); + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)states[i]; + if (rstate) { + rstate->nrstate = 0; + } } for (i = 0; i < count; i++) { rstate = (struct r600_context_state *)states[i]; - rctx->ps_sampler[i] = r600_context_state_incref(rstate); + if (rstate) { + if (rstate->nrstate >= R600_MAX_RSTATE) + continue; + if (rstate->nrstate) { + memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); + } + radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, R600_SHADER_PS); + rctx->ps_sampler[i] = &rstate->rstate[rstate->nrstate]; + rstate->nrstate++; + } } rctx->ps_nsampler = count; } @@ -240,12 +264,24 @@ static void r600_bind_vs_sampler(struct pipe_context *ctx, struct r600_context_state *rstate; unsigned i; - for (i = 0; i < rctx->vs_nsampler; i++) { - rctx->vs_sampler[i] = r600_context_state_decref(rctx->vs_sampler[i]); + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)states[i]; + if (rstate) { + rstate->nrstate = 0; + } } for (i = 0; i < count; i++) { rstate = (struct r600_context_state *)states[i]; - rctx->vs_sampler[i] = r600_context_state_incref(rstate); + if (rstate) { + if (rstate->nrstate >= R600_MAX_RSTATE) + continue; + if (rstate->nrstate) { + memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); + } + radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_SAMPLER, i, R600_SHADER_VS); + rctx->vs_sampler[i] = &rstate->rstate[rstate->nrstate]; + rstate->nrstate++; + } } rctx->vs_nsampler = count; } @@ -268,6 +304,13 @@ static void r600_set_blend_color(struct pipe_context *ctx, static void r600_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { + struct r600_context *rctx = r600_context(ctx); + struct r600_context_state *rstate; + + rstate = r600_context_state(rctx, pipe_clip_type, state); + r600_bind_state(ctx, rstate); + /* refcount is taken care of this */ + r600_delete_state(ctx, rstate); } static void r600_set_constant_buffer(struct pipe_context *ctx, @@ -276,19 +319,21 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, { struct r600_screen *rscreen = r600_screen(ctx->screen); struct r600_context *rctx = r600_context(ctx); - unsigned nconstant = 0, i, type, id; - struct radeon_state *rstate; + unsigned nconstant = 0, i, type, shader_class; + struct radeon_state *rstate, *rstates; struct pipe_transfer *transfer; u32 *ptr; + type = R600_STATE_CONSTANT; + switch (shader) { case PIPE_SHADER_VERTEX: - id = R600_VS_CONSTANT; - type = R600_VS_CONSTANT_TYPE; + shader_class = R600_SHADER_VS; + rstates = rctx->vs_constant; break; case PIPE_SHADER_FRAGMENT: - id = R600_PS_CONSTANT; - type = R600_PS_CONSTANT_TYPE; + shader_class = R600_SHADER_PS; + rstates = rctx->ps_constant; break; default: R600_ERR("unsupported %d\n", shader); @@ -300,17 +345,15 @@ static void r600_set_constant_buffer(struct pipe_context *ctx, if (ptr == NULL) return; for (i = 0; i < nconstant; i++) { - rstate = radeon_state(rscreen->rw, type, id + i); - if (rstate == NULL) - return; + rstate = &rstates[i]; + radeon_state_init(rstate, rscreen->rw, type, i, shader_class); rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT0_0] = ptr[i * 4 + 0]; rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT1_0] = ptr[i * 4 + 1]; rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT2_0] = ptr[i * 4 + 2]; rstate->states[R600_PS_CONSTANT__SQ_ALU_CONSTANT3_0] = ptr[i * 4 + 3]; if (radeon_state_pm4(rstate)) return; - if (radeon_draw_set_new(rctx->draw, rstate)) - return; + radeon_draw_bind(&rctx->draw, rstate); } pipe_buffer_unmap(ctx, buffer, transfer); } @@ -324,12 +367,24 @@ static void r600_set_ps_sampler_view(struct pipe_context *ctx, struct r600_context_state *rstate; unsigned i; - for (i = 0; i < rctx->ps_nsampler_view; i++) { - rctx->ps_sampler_view[i] = r600_context_state_decref(rctx->ps_sampler_view[i]); + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)views[i]; + if (rstate) { + rstate->nrstate = 0; + } } for (i = 0; i < count; i++) { rstate = (struct r600_context_state *)views[i]; - rctx->ps_sampler_view[i] = r600_context_state_incref(rstate); + if (rstate) { + if (rstate->nrstate >= R600_MAX_RSTATE) + continue; + if (rstate->nrstate) { + memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); + } + radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, R600_SHADER_PS); + rctx->ps_sampler_view[i] = &rstate->rstate[rstate->nrstate]; + rstate->nrstate++; + } } rctx->ps_nsampler_view = count; } @@ -342,12 +397,24 @@ static void r600_set_vs_sampler_view(struct pipe_context *ctx, struct r600_context_state *rstate; unsigned i; - for (i = 0; i < rctx->vs_nsampler_view; i++) { - rctx->vs_sampler_view[i] = r600_context_state_decref(rctx->vs_sampler_view[i]); + for (i = 0; i < count; i++) { + rstate = (struct r600_context_state *)views[i]; + if (rstate) { + rstate->nrstate = 0; + } } for (i = 0; i < count; i++) { rstate = (struct r600_context_state *)views[i]; - rctx->vs_sampler_view[i] = r600_context_state_incref(rstate); + if (rstate) { + if (rstate->nrstate >= R600_MAX_RSTATE) + continue; + if (rstate->nrstate) { + memcpy(&rstate->rstate[rstate->nrstate], &rstate->rstate[0], sizeof(struct radeon_state)); + } + radeon_state_convert(&rstate->rstate[rstate->nrstate], R600_STATE_RESOURCE, i, R600_SHADER_VS); + rctx->vs_sampler_view[i] = &rstate->rstate[rstate->nrstate]; + rstate->nrstate++; + } } rctx->vs_nsampler_view = count; } @@ -360,6 +427,12 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, rstate = r600_context_state(rctx, pipe_framebuffer_type, state); r600_bind_state(ctx, rstate); + for (int i = 0; i < state->nr_cbufs; i++) { + r600_cb(rctx, &rstate->rstate[i+1], state, i); + } + if (state->zsbuf) { + r600_db(rctx, &rstate->rstate[0], state); + } } static void r600_set_polygon_stipple(struct pipe_context *ctx, @@ -525,7 +598,7 @@ struct r600_context_state *r600_context_state_decref(struct r600_context_state * R600_ERR("invalid type %d\n", rstate->type); return NULL; } - radeon_state_decref(rstate->rstate); + radeon_state_fini(&rstate->rstate[0]); FREE(rstate); return NULL; } @@ -558,6 +631,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne break; case pipe_viewport_type: rstate->state.viewport = (*states).viewport; + r600_viewport(rctx, &rstate->rstate[0], &rstate->state.viewport); break; case pipe_depth_type: rstate->state.depth = (*states).depth; @@ -573,6 +647,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne break; case pipe_clip_type: rstate->state.clip = (*states).clip; + r600_ucp(rctx, &rstate->rstate[0], &rstate->state.clip); break; case pipe_stencil_type: rstate->state.stencil = (*states).stencil; @@ -585,6 +660,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne break; case pipe_blend_type: rstate->state.blend = (*states).blend; + r600_blend(rctx, &rstate->rstate[0], &rstate->state.blend); break; case pipe_stencil_ref_type: rstate->state.stencil_ref = (*states).stencil_ref; @@ -599,6 +675,7 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne break; case pipe_sampler_type: rstate->state.sampler = (*states).sampler; + r600_sampler(rctx, &rstate->rstate[0], &rstate->state.sampler, 0); break; default: R600_ERR("invalid type %d\n", rstate->type); @@ -608,16 +685,12 @@ struct r600_context_state *r600_context_state(struct r600_context *rctx, unsigne return rstate; } -static struct radeon_state *r600_blend(struct r600_context *rctx) +static void r600_blend(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_blend_state *state) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; - const struct pipe_blend_state *state = &rctx->blend->state.blend; int i; - rstate = radeon_state(rscreen->rw, R600_BLEND_TYPE, R600_BLEND); - if (rstate == NULL) - return NULL; + radeon_state_init(rstate, rscreen->rw, R600_STATE_BLEND, 0, 0); rstate->states[R600_BLEND__CB_BLEND_RED] = fui(rctx->blend_color.color[0]); rstate->states[R600_BLEND__CB_BLEND_GREEN] = fui(rctx->blend_color.color[1]); rstate->states[R600_BLEND__CB_BLEND_BLUE] = fui(rctx->blend_color.color[2]); @@ -661,29 +734,38 @@ static struct radeon_state *r600_blend(struct r600_context *rctx) rstate->states[R600_BLEND__CB_BLEND_CONTROL] = bc; } - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; + radeon_state_pm4(rstate); +} + +static void r600_ucp(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_clip_state *state) +{ + struct r600_screen *rscreen = rctx->screen; + + radeon_state_init(rstate, rscreen->rw, R600_STATE_UCP, 0, 0); + + for (int i = 0; i < state->nr; i++) { + rstate->states[i * 4 + 0] = fui(state->ucp[i][0]); + rstate->states[i * 4 + 1] = fui(state->ucp[i][1]); + rstate->states[i * 4 + 2] = fui(state->ucp[i][2]); + rstate->states[i * 4 + 3] = fui(state->ucp[i][3]); } - return rstate; + radeon_state_pm4(rstate); } -static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) +static void r600_cb(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_framebuffer_state *state, int cb) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; - struct radeon_state *rstate; - const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level = state->cbufs[cb]->level; unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype; const struct util_format_description *desc; - rstate = radeon_state(rscreen->rw, R600_CB0_TYPE + cb, R600_CB0 + cb); - if (rstate == NULL) - return NULL; + radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0); rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; rbuffer = &rtex->resource; rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); @@ -710,7 +792,7 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) S_0280A0_SOURCE_FORMAT(1) | S_0280A0_NUMBER_TYPE(ntype); - rstate->states[R600_CB0__CB_COLOR0_BASE] = 0x00000000; + rstate->states[R600_CB0__CB_COLOR0_BASE] = state->cbufs[cb]->offset >> 8; rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | S_028060_SLICE_TILE_MAX(slice); @@ -718,32 +800,29 @@ static struct radeon_state *r600_cb(struct r600_context *rctx, int cb) rstate->states[R600_CB0__CB_COLOR0_FRAG] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_TILE] = 0x00000000; rstate->states[R600_CB0__CB_COLOR0_MASK] = 0x00000000; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } -static struct radeon_state *r600_db(struct r600_context *rctx) +static void r600_db(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_framebuffer_state *state) { struct r600_screen *rscreen = rctx->screen; struct r600_resource_texture *rtex; struct r600_resource *rbuffer; - struct radeon_state *rstate; - const struct pipe_framebuffer_state *state = &rctx->framebuffer->state.framebuffer; unsigned level; unsigned pitch, slice, format; + radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); if (state->zsbuf == NULL) - return NULL; - - rstate = radeon_state(rscreen->rw, R600_DB_TYPE, R600_DB); - if (rstate == NULL) - return NULL; + return; rtex = (struct r600_resource_texture*)state->zsbuf->texture; + rtex->tilled = 1; + rtex->array_mode = 2; + rtex->tile_type = 1; + rtex->depth = 1; rbuffer = &rtex->resource; + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); rstate->nbo = 1; rstate->placement[0] = RADEON_GEM_DOMAIN_VRAM; @@ -751,31 +830,30 @@ static struct radeon_state *r600_db(struct r600_context *rctx) pitch = (rtex->pitch[level] / rtex->bpt) / 8 - 1; slice = (rtex->pitch[level] / rtex->bpt) * state->zsbuf->height / 64 - 1; format = r600_translate_dbformat(state->zsbuf->texture->format); - rstate->states[R600_DB__DB_DEPTH_BASE] = 0x00000000; - rstate->states[R600_DB__DB_DEPTH_INFO] = 0x00010000 | + rstate->states[R600_DB__DB_DEPTH_BASE] = state->zsbuf->offset >> 8; + rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtex->array_mode) | S_028010_FORMAT(format); rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (state->zsbuf->height / 8) -1; rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } -static struct radeon_state *r600_rasterizer(struct r600_context *rctx) +static void r600_rasterizer(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_rasterizer_state *state = &rctx->rasterizer->state.rasterizer; const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; + const struct pipe_clip_state *clip = NULL; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; float offset_units = 0, offset_scale = 0; char depth = 0; unsigned offset_db_fmt_cntl = 0; unsigned tmp; unsigned prov_vtx = 1; + + if (rctx->clip) + clip = &rctx->clip->state.clip; if (fb->zsbuf) { offset_units = state->offset_units; offset_scale = state->offset_scale * 12.0f; @@ -796,7 +874,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) break; default: R600_ERR("unsupported %d\n", fb->zsbuf->texture->format); - return NULL; + return; } } offset_db_fmt_cntl |= S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(depth); @@ -805,9 +883,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) prov_vtx = 0; rctx->flat_shade = state->flatshade; - rstate = radeon_state(rscreen->rw, R600_RASTERIZER_TYPE, R600_RASTERIZER); - if (rstate == NULL) - return NULL; + radeon_state_init(rstate, rscreen->rw, R600_STATE_RASTERIZER, 0, 0); rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] = 0x00000001; if (state->sprite_coord_enable) { rstate->states[R600_RASTERIZER__SPI_INTERP_CONTROL_0] |= @@ -821,7 +897,12 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) S_0286D4_PNT_SPRITE_TOP_1(1); } } - rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0x00000000; + rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = 0; + if (clip) { + rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] = S_028810_PS_UCP_MODE(3) | ((1 << clip->nr) - 1); + rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_NEAR_DISABLE(clip->depth_clamp); + rstate->states[R600_RASTERIZER__PA_CL_CLIP_CNTL] |= S_028810_ZCLIP_FAR_DISABLE(clip->depth_clamp); + } rstate->states[R600_RASTERIZER__PA_SU_SC_MODE_CNTL] = S_028814_PROVOKING_VTX_LAST(prov_vtx) | S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | @@ -835,7 +916,7 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); rstate->states[R600_RASTERIZER__PA_CL_NANINF_CNTL] = 0x00000000; /* point size 12.4 fixed point */ - tmp = (unsigned)(state->point_size * 8.0 / 2.0); + tmp = (unsigned)(state->point_size * 8.0); rstate->states[R600_RASTERIZER__PA_SU_POINT_SIZE] = S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp); rstate->states[R600_RASTERIZER__PA_SU_POINT_MINMAX] = 0x80000000; rstate->states[R600_RASTERIZER__PA_SU_LINE_CNTL] = 0x00000008; @@ -852,19 +933,14 @@ static struct radeon_state *r600_rasterizer(struct r600_context *rctx) rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_FRONT_OFFSET] = fui(offset_units); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_SCALE] = fui(offset_scale); rstate->states[R600_RASTERIZER__PA_SU_POLY_OFFSET_BACK_OFFSET] = fui(offset_units); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } -static struct radeon_state *r600_scissor(struct r600_context *rctx) +static void r600_scissor(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_scissor_state *state = &rctx->scissor->state.scissor; const struct pipe_framebuffer_state *fb = &rctx->framebuffer->state.framebuffer; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; unsigned minx, maxx, miny, maxy; u32 tl, br; @@ -881,9 +957,7 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx) } tl = S_028240_TL_X(minx) | S_028240_TL_Y(miny) | S_028240_WINDOW_OFFSET_DISABLE(1); br = S_028244_BR_X(maxx) | S_028244_BR_Y(maxy); - rstate = radeon_state(rscreen->rw, R600_SCISSOR_TYPE, R600_SCISSOR); - if (rstate == NULL) - return NULL; + radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_WINDOW_OFFSET] = 0x00000000; @@ -903,22 +977,14 @@ static struct radeon_state *r600_scissor(struct r600_context *rctx) rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = br; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = tl; rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = br; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } -static struct radeon_state *r600_viewport(struct r600_context *rctx) +static void r600_viewport(struct r600_context *rctx, struct radeon_state *rstate, const struct pipe_viewport_state *state) { - const struct pipe_viewport_state *state = &rctx->viewport->state.viewport; struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; - rstate = radeon_state(rscreen->rw, R600_VIEWPORT_TYPE, R600_VIEWPORT); - if (rstate == NULL) - return NULL; + radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMIN_0] = 0x00000000; rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui(state->scale[0]); @@ -928,29 +994,28 @@ static struct radeon_state *r600_viewport(struct r600_context *rctx) rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui(state->translate[1]); rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = fui(state->translate[2]); rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } -static struct radeon_state *r600_dsa(struct r600_context *rctx) +static void r600_dsa(struct r600_context *rctx, struct radeon_state *rstate) { const struct pipe_depth_stencil_alpha_state *state = &rctx->dsa->state.dsa; const struct pipe_stencil_ref *stencil_ref = &rctx->stencil_ref->state.stencil_ref; struct r600_screen *rscreen = rctx->screen; unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; unsigned stencil_ref_mask, stencil_ref_mask_bf; - struct r600_shader *rshader = &rctx->ps_shader->shader; - struct radeon_state *rstate; + struct r600_shader *rshader; int i; - rstate = radeon_state(rscreen->rw, R600_DSA_TYPE, R600_DSA); - if (rstate == NULL) - return NULL; + if (rctx->ps_shader == NULL) { + return; + } + radeon_state_init(rstate, rscreen->rw, R600_STATE_DSA, 0, 0); db_shader_control = 0x210; + rshader = &rctx->ps_shader->shader; + if (rshader->uses_kill) + db_shader_control |= (1 << 6); for (i = 0; i < rshader->noutput; i++) { if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) db_shader_control |= 1; @@ -1008,11 +1073,7 @@ static struct radeon_state *r600_dsa(struct r600_context *rctx) rstate->states[R600_DSA__DB_SRESULTS_COMPARE_STATE1] = 0x00000000; rstate->states[R600_DSA__DB_PRELOAD_CONTROL] = 0x00000000; rstate->states[R600_DSA__DB_ALPHA_TO_MASK] = 0x0000AA00; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } static inline unsigned r600_tex_wrap(unsigned wrap) @@ -1090,16 +1151,12 @@ static INLINE u32 S_FIXED(float value, u32 frac_bits) return value * (1 << frac_bits); } -static struct radeon_state *r600_sampler(struct r600_context *rctx, - const struct pipe_sampler_state *state, - unsigned id) +static void r600_sampler(struct r600_context *rctx, struct radeon_state *rstate, + const struct pipe_sampler_state *state, unsigned id) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; - rstate = radeon_state(rscreen->rw, R600_PS_SAMPLER_TYPE, id); - if (rstate == NULL) - return NULL; + radeon_state_init(rstate, rscreen->rw, R600_STATE_SAMPLER, id, R600_SHADER_PS); rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD0_0] = S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | @@ -1114,11 +1171,7 @@ static struct radeon_state *r600_sampler(struct r600_context *rctx, S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)); rstate->states[R600_PS_SAMPLER__SQ_TEX_SAMPLER_WORD2_0] = S_03C008_TYPE(1); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } static inline unsigned r600_tex_swizzle(unsigned swizzle) @@ -1160,6 +1213,7 @@ static inline unsigned r600_tex_dim(unsigned dim) case PIPE_TEXTURE_1D: return V_038000_SQ_TEX_DIM_1D; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: return V_038000_SQ_TEX_DIM_2D; case PIPE_TEXTURE_3D: return V_038000_SQ_TEX_DIM_3D; @@ -1168,19 +1222,20 @@ static inline unsigned r600_tex_dim(unsigned dim) } } -static struct radeon_state *r600_resource(struct r600_context *rctx, - const struct pipe_sampler_view *view, - unsigned id) +static void r600_resource(struct pipe_context *ctx, struct radeon_state *rstate, + const struct pipe_sampler_view *view, unsigned id) { + struct r600_context *rctx = r600_context(ctx); struct r600_screen *rscreen = rctx->screen; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; - struct radeon_state *rstate; unsigned format; - uint32_t word4 = 0, yuv_format = 0; - unsigned char swizzle[4]; + uint32_t word4 = 0, yuv_format = 0, pitch = 0; + unsigned char swizzle[4], array_mode = 0, tile_type = 0; + int r; + rstate->cpm4 = 0; swizzle[0] = view->swizzle_r; swizzle[1] = view->swizzle_g; swizzle[2] = view->swizzle_b; @@ -1188,37 +1243,49 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, format = r600_translate_texformat(view->texture->format, swizzle, &word4, &yuv_format); - if (format == ~0) - return NULL; + if (format == ~0) { + return; + } desc = util_format_description(view->texture->format); if (desc == NULL) { R600_ERR("unknow format %d\n", view->texture->format); - return NULL; - } - rstate = radeon_state(rscreen->rw, R600_PS_RESOURCE_TYPE, id); - if (rstate == NULL) { - return NULL; + return; } + radeon_state_init(rstate, rscreen->rw, R600_STATE_RESOURCE, id, R600_SHADER_PS); tmp = (struct r600_resource_texture*)view->texture; rbuffer = &tmp->resource; - rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); - rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + if (tmp->depth) { + r = r600_texture_from_depth(ctx, tmp, view->first_level); + if (r) { + return; + } + rstate->bo[0] = radeon_bo_incref(rscreen->rw, tmp->uncompressed); + rstate->bo[1] = radeon_bo_incref(rscreen->rw, tmp->uncompressed); + } else { + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + } rstate->nbo = 2; rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; rstate->placement[1] = RADEON_GEM_DOMAIN_GTT; rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; rstate->placement[3] = RADEON_GEM_DOMAIN_GTT; + pitch = (tmp->pitch[0] / tmp->bpt); + pitch = (pitch + 0x7) & ~0x7; + /* FIXME properly handle first level != 0 */ rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD0] = S_038000_DIM(r600_tex_dim(view->texture->target)) | - S_038000_PITCH(((tmp->pitch[0] / tmp->bpt) / 8) - 1) | + S_038000_TILE_MODE(array_mode) | + S_038000_TILE_TYPE(tile_type) | + S_038000_PITCH((pitch / 8) - 1) | S_038000_TEX_WIDTH(view->texture->width0 - 1); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD1] = S_038004_TEX_HEIGHT(view->texture->height0 - 1) | S_038004_TEX_DEPTH(view->texture->depth0 - 1) | S_038004_DATA_FORMAT(format); - rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = 0; + rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD2] = tmp->offset[0] >> 8; rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD3] = tmp->offset[1] >> 8; rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD4] = word4 | @@ -1232,17 +1299,12 @@ static struct radeon_state *r600_resource(struct r600_context *rctx, S_038014_LAST_ARRAY(0); rstate->states[R600_PS_RESOURCE__RESOURCE0_WORD6] = S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE); - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } -static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) +static void r600_cb_cntl(struct r600_context *rctx, struct radeon_state *rstate) { struct r600_screen *rscreen = rctx->screen; - struct radeon_state *rstate; const struct pipe_blend_state *pbs = &rctx->blend->state.blend; int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; uint32_t color_control, target_mask, shader_mask; @@ -1257,7 +1319,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) } if (pbs->logicop_enable) { - color_control |= (pbs->logicop_func) << 16; + color_control |= (pbs->logicop_func << 16) | (pbs->logicop_func << 20); } else { color_control |= (0xcc << 16); } @@ -1277,7 +1339,7 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) target_mask |= (pbs->rt[0].colormask << (4 * i)); } } - rstate = radeon_state(rscreen->rw, R600_CB_CNTL_TYPE, R600_CB_CNTL); + radeon_state_init(rstate, rscreen->rw, R600_STATE_CB_CNTL, 0, 0); rstate->states[R600_CB_CNTL__CB_SHADER_MASK] = shader_mask; rstate->states[R600_CB_CNTL__CB_TARGET_MASK] = target_mask; rstate->states[R600_CB_CNTL__CB_COLOR_CONTROL] = color_control; @@ -1289,115 +1351,51 @@ static struct radeon_state *r600_cb_cntl(struct r600_context *rctx) rstate->states[R600_CB_CNTL__CB_CLRCMP_DST] = 0x000000FF; rstate->states[R600_CB_CNTL__CB_CLRCMP_MSK] = 0xFFFFFFFF; rstate->states[R600_CB_CNTL__PA_SC_AA_MASK] = 0xFFFFFFFF; - if (radeon_state_pm4(rstate)) { - radeon_state_decref(rstate); - return NULL; - } - return rstate; + radeon_state_pm4(rstate); } -int r600_context_hw_states(struct r600_context *rctx) +int r600_context_hw_states(struct pipe_context *ctx) { + struct r600_context *rctx = r600_context(ctx); unsigned i; - int r; - int nr_cbufs = rctx->framebuffer->state.framebuffer.nr_cbufs; - /* free previous TODO determine what need to be updated, what - * doesn't - */ - //radeon_state_decref(rctx->hw_states.config); - rctx->hw_states.cb_cntl = radeon_state_decref(rctx->hw_states.cb_cntl); - rctx->hw_states.db = radeon_state_decref(rctx->hw_states.db); - rctx->hw_states.rasterizer = radeon_state_decref(rctx->hw_states.rasterizer); - rctx->hw_states.scissor = radeon_state_decref(rctx->hw_states.scissor); - rctx->hw_states.dsa = radeon_state_decref(rctx->hw_states.dsa); - rctx->hw_states.blend = radeon_state_decref(rctx->hw_states.blend); - rctx->hw_states.viewport = radeon_state_decref(rctx->hw_states.viewport); - for (i = 0; i < 8; i++) { - rctx->hw_states.cb[i] = radeon_state_decref(rctx->hw_states.cb[i]); + /* build new states */ + r600_rasterizer(rctx, &rctx->hw_states.rasterizer); + r600_scissor(rctx, &rctx->hw_states.scissor); + r600_dsa(rctx, &rctx->hw_states.dsa); + r600_cb_cntl(rctx, &rctx->hw_states.cb_cntl); + + /* bind states */ + radeon_draw_bind(&rctx->draw, &rctx->hw_states.rasterizer); + radeon_draw_bind(&rctx->draw, &rctx->hw_states.scissor); + radeon_draw_bind(&rctx->draw, &rctx->hw_states.dsa); + radeon_draw_bind(&rctx->draw, &rctx->hw_states.cb_cntl); + + radeon_draw_bind(&rctx->draw, &rctx->config); + + if (rctx->viewport) { + radeon_draw_bind(&rctx->draw, &rctx->viewport->rstate[0]); } - for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - radeon_state_decref(rctx->hw_states.ps_resource[i]); - rctx->hw_states.ps_resource[i] = NULL; + if (rctx->blend) { + radeon_draw_bind(&rctx->draw, &rctx->blend->rstate[0]); } - rctx->hw_states.ps_nresource = 0; - for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - radeon_state_decref(rctx->hw_states.ps_sampler[i]); - rctx->hw_states.ps_sampler[i] = NULL; + if (rctx->clip) { + radeon_draw_bind(&rctx->draw, &rctx->clip->rstate[0]); } - rctx->hw_states.ps_nsampler = 0; - - /* build new states */ - rctx->hw_states.rasterizer = r600_rasterizer(rctx); - rctx->hw_states.scissor = r600_scissor(rctx); - rctx->hw_states.dsa = r600_dsa(rctx); - rctx->hw_states.blend = r600_blend(rctx); - rctx->hw_states.viewport = r600_viewport(rctx); - for (i = 0; i < nr_cbufs; i++) { - rctx->hw_states.cb[i] = r600_cb(rctx, i); + for (i = 0; i < rctx->framebuffer->state.framebuffer.nr_cbufs; i++) { + radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[i+1]); + } + if (rctx->framebuffer->state.framebuffer.zsbuf) { + radeon_draw_bind(&rctx->draw, &rctx->framebuffer->rstate[0]); } - rctx->hw_states.db = r600_db(rctx); - rctx->hw_states.cb_cntl = r600_cb_cntl(rctx); - for (i = 0; i < rctx->ps_nsampler; i++) { if (rctx->ps_sampler[i]) { - rctx->hw_states.ps_sampler[i] = r600_sampler(rctx, - &rctx->ps_sampler[i]->state.sampler, - R600_PS_SAMPLER + i); + radeon_draw_bind(&rctx->draw, rctx->ps_sampler[i]); } } - rctx->hw_states.ps_nsampler = rctx->ps_nsampler; for (i = 0; i < rctx->ps_nsampler_view; i++) { if (rctx->ps_sampler_view[i]) { - rctx->hw_states.ps_resource[i] = r600_resource(rctx, - &rctx->ps_sampler_view[i]->state.sampler_view, - R600_PS_RESOURCE + i); - } - } - rctx->hw_states.ps_nresource = rctx->ps_nsampler_view; - - /* bind states */ - r = radeon_draw_set(rctx->draw, rctx->hw_states.db); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.rasterizer); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.scissor); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.dsa); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.blend); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.viewport); - if (r) - return r; - for (i = 0; i < nr_cbufs; i++) { - r = radeon_draw_set(rctx->draw, rctx->hw_states.cb[i]); - if (r) - return r; - } - r = radeon_draw_set(rctx->draw, rctx->hw_states.config); - if (r) - return r; - r = radeon_draw_set(rctx->draw, rctx->hw_states.cb_cntl); - if (r) - return r; - for (i = 0; i < rctx->hw_states.ps_nresource; i++) { - if (rctx->hw_states.ps_resource[i]) { - r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_resource[i]); - if (r) - return r; - } - } - for (i = 0; i < rctx->hw_states.ps_nsampler; i++) { - if (rctx->hw_states.ps_sampler[i]) { - r = radeon_draw_set(rctx->draw, rctx->hw_states.ps_sampler[i]); - if (r) - return r; + radeon_draw_bind(&rctx->draw, rctx->ps_sampler_view[i]); } } return 0; diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h index f93c20da35e..84866825aab 100644 --- a/src/gallium/drivers/r600/r600_state_inlines.h +++ b/src/gallium/drivers/r600/r600_state_inlines.h @@ -252,6 +252,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_R8SG8SB8UX8U_NORM: case PIPE_FORMAT_X8B8G8R8_UNORM: case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: return V_0280A0_COLOR_8_8_8_8; case PIPE_FORMAT_R10G10B10A2_UNORM: @@ -262,7 +263,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return V_0280A0_COLOR_24_8; + return V_0280A0_COLOR_8_24; + + case PIPE_FORMAT_R32_FLOAT: + return V_0280A0_COLOR_32_FLOAT; /* 64-bit buffers. */ case PIPE_FORMAT_R16G16B16A16_UNORM: @@ -275,6 +279,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format) /* 128-bit buffers. */ case PIPE_FORMAT_R32G32B32_FLOAT: + return V_0280A0_COLOR_32_32_32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: return V_0280A0_COLOR_32_32_32_32_FLOAT; diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 30d79ebdd6f..b6698e3885c 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -24,6 +24,7 @@ * Jerome Glisse * Corbin Simpson */ +#include <errno.h> #include <pipe/p_screen.h> #include <util/u_format.h> #include <util/u_math.h> @@ -33,10 +34,26 @@ #include "r600_screen.h" #include "r600_context.h" #include "r600_resource.h" +#include "r600_state_inlines.h" #include "r600d.h" extern struct u_resource_vtbl r600_texture_vtbl; +/* Copy from a tiled texture to a detiled one. */ +static void r600_copy_from_tiled_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer) +{ + struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer; + struct pipe_resource *texture = transfer->resource; + struct pipe_subresource subdst; + + subdst.face = 0; + subdst.level = 0; + ctx->resource_copy_region(ctx, rtransfer->linear_texture, + subdst, 0, 0, 0, texture, transfer->sr, + transfer->box.x, transfer->box.y, transfer->box.z, + transfer->box.width, transfer->box.height); +} + static unsigned long r600_texture_get_offset(struct r600_resource_texture *rtex, unsigned level, unsigned zslice, unsigned face) @@ -65,7 +82,9 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource for (i = 0, offset = 0; i <= ptex->last_level; i++) { w = u_minify(ptex->width0, i); h = u_minify(ptex->height0, i); + h = util_next_power_of_two(h); pitch = util_format_get_stride(ptex->format, align(w, 64)); + pitch = align(pitch, 256); layer_size = pitch * h; if (ptex->target == PIPE_TEXTURE_CUBE) size = layer_size * 6; @@ -74,6 +93,8 @@ static void r600_setup_miptree(struct r600_screen *rscreen, struct r600_resource rtex->offset[i] = offset; rtex->layer_size[i] = layer_size; rtex->pitch[i] = pitch; + rtex->width[i] = w; + rtex->height[i] = h; offset += size; } rtex->size = offset; @@ -104,10 +125,22 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, FREE(rtex); return NULL; } - return &resource->base.b; } +static void r600_texture_destroy_state(struct pipe_resource *ptexture) +{ + struct r600_resource_texture *rtexture = (struct r600_resource_texture*)ptexture; + + for (int i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) { + radeon_state_fini(&rtexture->scissor[i]); + radeon_state_fini(&rtexture->db[i]); + for (int j = 0; j < 8; j++) { + radeon_state_fini(&rtexture->cb[j][i]); + } + } +} + static void r600_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex) { @@ -118,6 +151,10 @@ static void r600_texture_destroy(struct pipe_screen *screen, if (resource->bo) { radeon_bo_decref(rscreen->rw, resource->bo); } + if (rtex->uncompressed) { + radeon_bo_decref(rscreen->rw, rtex->uncompressed); + } + r600_texture_destroy_state(ptex); FREE(rtex); } @@ -168,7 +205,8 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, } /* Support only 2D textures without mipmaps */ - if (templ->target != PIPE_TEXTURE_2D || templ->depth0 != 1 || templ->last_level != 0) + if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) || + templ->depth0 != 1 || templ->last_level != 0) return NULL; rtex = CALLOC_STRUCT(r600_resource_texture); @@ -181,9 +219,12 @@ struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen, pipe_reference_init(&resource->base.b.reference, 1); resource->base.b.screen = screen; resource->bo = bo; + rtex->depth = 0; rtex->pitch_override = whandle->stride; rtex->bpt = util_format_get_blocksize(templ->format); rtex->pitch[0] = whandle->stride; + rtex->width[0] = templ->width0; + rtex->height[0] = templ->height0; rtex->offset[0] = 0; rtex->size = align(rtex->pitch[0] * templ->height0, 64); @@ -205,6 +246,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, const struct pipe_box *box) { struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture; + struct pipe_resource resource; struct r600_transfer *trans; trans = CALLOC_STRUCT(r600_transfer); @@ -216,48 +258,117 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx, trans->transfer.box = *box; trans->transfer.stride = rtex->pitch[sr.level]; trans->offset = r600_texture_get_offset(rtex, sr.level, box->z, sr.face); + if (rtex->tilled && !rtex->depth) { + resource.target = PIPE_TEXTURE_2D; + resource.format = texture->format; + resource.width0 = box->width; + resource.height0 = box->height; + resource.depth0 = 0; + resource.last_level = 0; + resource.nr_samples = 0; + resource.usage = PIPE_USAGE_DYNAMIC; + resource.bind = 0; + resource.flags = 0; + /* For texture reading, the temporary (detiled) texture is used as + * a render target when blitting from a tiled texture. */ + if (usage & PIPE_TRANSFER_READ) { + resource.bind |= PIPE_BIND_RENDER_TARGET; + } + /* For texture writing, the temporary texture is used as a sampler + * when blitting into a tiled texture. */ + if (usage & PIPE_TRANSFER_WRITE) { + resource.bind |= PIPE_BIND_SAMPLER_VIEW; + } + /* Create the temporary texture. */ + trans->linear_texture = ctx->screen->resource_create(ctx->screen, &resource); + if (trans->linear_texture == NULL) { + R600_ERR("failed to create temporary texture to hold untiled copy\n"); + pipe_resource_reference(&trans->transfer.resource, NULL); + FREE(trans); + return NULL; + } + if (usage & PIPE_TRANSFER_READ) { + /* We cannot map a tiled texture directly because the data is + * in a different order, therefore we do detiling using a blit. */ + r600_copy_from_tiled_texture(ctx, trans); + /* Always referenced in the blit. */ + ctx->flush(ctx, 0, NULL); + } + } return &trans->transfer; } void r600_texture_transfer_destroy(struct pipe_context *ctx, - struct pipe_transfer *trans) + struct pipe_transfer *transfer) { - pipe_resource_reference(&trans->resource, NULL); - FREE(trans); + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; + + if (rtransfer->linear_texture) { + pipe_resource_reference(&rtransfer->linear_texture, NULL); + } + pipe_resource_reference(&transfer->resource, NULL); + FREE(transfer); } void* r600_texture_transfer_map(struct pipe_context *ctx, struct pipe_transfer* transfer) { struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; - struct r600_resource *resource; + struct radeon_bo *bo; enum pipe_format format = transfer->resource->format; struct r600_screen *rscreen = r600_screen(ctx->screen); + struct r600_resource_texture *rtex; + unsigned long offset = 0; char *map; + int r; r600_flush(ctx, 0, NULL); - - resource = (struct r600_resource *)transfer->resource; - if (radeon_bo_map(rscreen->rw, resource->bo)) { + if (rtransfer->linear_texture) { + bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; + } else { + rtex = (struct r600_resource_texture*)transfer->resource; + if (rtex->depth) { + r = r600_texture_from_depth(ctx, rtex, transfer->sr.level); + if (r) { + return NULL; + } + r600_flush(ctx, 0, NULL); + bo = rtex->uncompressed; + } else { + bo = ((struct r600_resource *)transfer->resource)->bo; + } + offset = rtransfer->offset + + transfer->box.y / util_format_get_blockheight(format) * transfer->stride + + transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + } + if (radeon_bo_map(rscreen->rw, bo)) { return NULL; } - radeon_bo_wait(rscreen->rw, resource->bo); - - map = resource->bo->data; + radeon_bo_wait(rscreen->rw, bo); - return map + rtransfer->offset + - transfer->box.y / util_format_get_blockheight(format) * transfer->stride + - transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + map = bo->data; + return map + offset; } void r600_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer) { + struct r600_transfer *rtransfer = (struct r600_transfer*)transfer; struct r600_screen *rscreen = r600_screen(ctx->screen); - struct r600_resource *resource; - - resource = (struct r600_resource *)transfer->resource; - radeon_bo_unmap(rscreen->rw, resource->bo); + struct r600_resource_texture *rtex; + struct radeon_bo *bo; + + if (rtransfer->linear_texture) { + bo = ((struct r600_resource *)rtransfer->linear_texture)->bo; + } else { + rtex = (struct r600_resource_texture*)transfer->resource; + if (rtex->depth) { + bo = rtex->uncompressed; + } else { + bo = ((struct r600_resource *)transfer->resource)->bo; + } + } + radeon_bo_unmap(rscreen->rw, bo); } struct u_resource_vtbl r600_texture_vtbl = @@ -280,51 +391,51 @@ void r600_init_screen_texture_functions(struct pipe_screen *screen) } static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format, - const unsigned char *swizzle_view) + const unsigned char *swizzle_view) { - unsigned i; - unsigned char swizzle[4]; - unsigned result = 0; - const uint32_t swizzle_shift[4] = { - 16, 19, 22, 25, - }; - const uint32_t swizzle_bit[4] = { - 0, 1, 2, 3, - }; - - if (swizzle_view) { - /* Combine two sets of swizzles. */ - for (i = 0; i < 4; i++) { - swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? - swizzle_format[swizzle_view[i]] : swizzle_view[i]; - } - } else { - memcpy(swizzle, swizzle_format, 4); - } - - /* Get swizzle. */ - for (i = 0; i < 4; i++) { - switch (swizzle[i]) { - case UTIL_FORMAT_SWIZZLE_Y: - result |= swizzle_bit[1] << swizzle_shift[i]; - break; - case UTIL_FORMAT_SWIZZLE_Z: - result |= swizzle_bit[2] << swizzle_shift[i]; - break; - case UTIL_FORMAT_SWIZZLE_W: - result |= swizzle_bit[3] << swizzle_shift[i]; - break; - case UTIL_FORMAT_SWIZZLE_0: - result |= V_038010_SQ_SEL_0 << swizzle_shift[i]; - break; - case UTIL_FORMAT_SWIZZLE_1: - result |= V_038010_SQ_SEL_1 << swizzle_shift[i]; - break; - default: /* UTIL_FORMAT_SWIZZLE_X */ - result |= swizzle_bit[0] << swizzle_shift[i]; - } - } - return result; + unsigned i; + unsigned char swizzle[4]; + unsigned result = 0; + const uint32_t swizzle_shift[4] = { + 16, 19, 22, 25, + }; + const uint32_t swizzle_bit[4] = { + 0, 1, 2, 3, + }; + + if (swizzle_view) { + /* Combine two sets of swizzles. */ + for (i = 0; i < 4; i++) { + swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ? + swizzle_format[swizzle_view[i]] : swizzle_view[i]; + } + } else { + memcpy(swizzle, swizzle_format, 4); + } + + /* Get swizzle. */ + for (i = 0; i < 4; i++) { + switch (swizzle[i]) { + case UTIL_FORMAT_SWIZZLE_Y: + result |= swizzle_bit[1] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_Z: + result |= swizzle_bit[2] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_W: + result |= swizzle_bit[3] << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_0: + result |= V_038010_SQ_SEL_0 << swizzle_shift[i]; + break; + case UTIL_FORMAT_SWIZZLE_1: + result |= V_038010_SQ_SEL_1 << swizzle_shift[i]; + break; + default: /* UTIL_FORMAT_SWIZZLE_X */ + result |= swizzle_bit[0] << swizzle_shift[i]; + } + } + return result; } /* texture format translate */ @@ -344,19 +455,21 @@ uint32_t r600_translate_texformat(enum pipe_format format, }; desc = util_format_description(format); + word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view); + /* Colorspace (return non-RGB formats directly). */ switch (desc->colorspace) { /* Depth stencil formats */ case UTIL_FORMAT_COLORSPACE_ZS: switch (format) { case PIPE_FORMAT_Z16_UNORM: - result = V_028010_DEPTH_16; + result = V_0280A0_COLOR_16; goto out_word4; case PIPE_FORMAT_Z24X8_UNORM: - result = V_028010_DEPTH_X8_24; + result = V_0280A0_COLOR_8_24; goto out_word4; case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - result = V_028010_DEPTH_8_24; + result = V_0280A0_COLOR_8_24; goto out_word4; default: goto out_unknown; @@ -382,8 +495,6 @@ uint32_t r600_translate_texformat(enum pipe_format format, break; } - word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view); - /* S3TC formats. TODO */ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { goto out_unknown; @@ -519,9 +630,221 @@ out_word4: *word4_p = word4; if (yuv_format_p) *yuv_format_p = yuv_format; -// fprintf(stderr,"returning %08x %08x %08x\n", result, word4, yuv_format); return result; out_unknown: // R600_ERR("Unable to handle texformat %d %s\n", format, util_format_name(format)); return ~0; } + +int r600_texture_from_depth(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + int r; + + if (!rtexture->depth) { + /* This shouldn't happen maybe print a warning */ + return 0; + } + if (rtexture->uncompressed && !rtexture->dirty) { + /* Uncompressed bo already in good state */ + return 0; + } + + /* allocate uncompressed texture */ + if (rtexture->uncompressed == NULL) { + rtexture->uncompressed = radeon_bo(rscreen->rw, 0, rtexture->size, 4096, NULL); + if (rtexture->uncompressed == NULL) { + return -ENOMEM; + } + } + + /* render a rectangle covering whole buffer to uncompress depth */ + r = r600_blit_uncompress_depth(ctx, rtexture, level); + if (r) { + return r; + } + + rtexture->dirty = 0; + return 0; +} + +static void r600_texture_state_scissor(struct r600_screen *rscreen, + struct r600_resource_texture *rtexture, + unsigned level) +{ + struct radeon_state *rstate = &rtexture->scissor[level]; + + radeon_state_init(rstate, rscreen->rw, R600_STATE_SCISSOR, 0, 0); + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_0_TL] = 0x80000000; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_1_TL] = 0x80000000; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_2_TL] = 0x80000000; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_3_TL] = 0x80000000; + rstate->states[R600_SCISSOR__PA_SC_CLIPRECT_RULE] = 0x0000FFFF; + rstate->states[R600_SCISSOR__PA_SC_EDGERULE] = 0xAAAAAAAA; + rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); + rstate->states[R600_SCISSOR__PA_SC_GENERIC_SCISSOR_TL] = 0x80000000; + rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); + rstate->states[R600_SCISSOR__PA_SC_SCREEN_SCISSOR_TL] = 0x80000000; + rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); + rstate->states[R600_SCISSOR__PA_SC_VPORT_SCISSOR_0_TL] = 0x80000000; + rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_BR] = S_028244_BR_X(rtexture->width[level]) | S_028244_BR_Y(rtexture->height[level]); + rstate->states[R600_SCISSOR__PA_SC_WINDOW_SCISSOR_TL] = 0x80000000; + + radeon_state_pm4(rstate); +} + +static void r600_texture_state_cb(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) +{ + struct radeon_state *rstate; + struct r600_resource *rbuffer; + unsigned pitch, slice; + unsigned color_info; + unsigned format, swap, ntype; + const struct util_format_description *desc; + + rstate = &rtexture->cb[cb][level]; + radeon_state_init(rstate, rscreen->rw, R600_STATE_CB0 + cb, 0, 0); + rbuffer = &rtexture->resource; + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; + slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; + ntype = 0; + desc = util_format_description(rbuffer->base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + ntype = V_0280A0_NUMBER_SRGB; + format = r600_translate_colorformat(rtexture->resource.base.b.format); + swap = r600_translate_colorswap(rtexture->resource.base.b.format); + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed); + rstate->bo[1] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed); + rstate->bo[2] = radeon_bo_incref(rscreen->rw, rtexture->uncompressed); + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; + rstate->nbo = 3; + color_info = 0; + } else { + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->bo[1] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->bo[2] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[2] = RADEON_GEM_DOMAIN_GTT; + rstate->placement[4] = RADEON_GEM_DOMAIN_GTT; + rstate->nbo = 3; + color_info = S_0280A0_SOURCE_FORMAT(1); + } + color_info |= S_0280A0_FORMAT(format) | + S_0280A0_COMP_SWAP(swap) | + S_0280A0_BLEND_CLAMP(1) | + S_0280A0_NUMBER_TYPE(ntype); + rstate->states[R600_CB0__CB_COLOR0_BASE] = rtexture->offset[level] >> 8; + rstate->states[R600_CB0__CB_COLOR0_INFO] = color_info; + rstate->states[R600_CB0__CB_COLOR0_SIZE] = S_028060_PITCH_TILE_MAX(pitch) | + S_028060_SLICE_TILE_MAX(slice); + + radeon_state_pm4(rstate); +} + +static void r600_texture_state_db(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) +{ + struct radeon_state *rstate = &rtexture->db[level]; + struct r600_resource *rbuffer; + unsigned pitch, slice, format; + + radeon_state_init(rstate, rscreen->rw, R600_STATE_DB, 0, 0); + rbuffer = &rtexture->resource; + rtexture->tilled = 1; + rtexture->array_mode = 2; + rtexture->tile_type = 1; + rtexture->depth = 1; + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + pitch = (rtexture->pitch[level] / rtexture->bpt) / 8 - 1; + slice = (rtexture->pitch[level] / rtexture->bpt) * rtexture->height[level] / 64 - 1; + format = r600_translate_dbformat(rbuffer->base.b.format); + rstate->states[R600_DB__DB_DEPTH_BASE] = rtexture->offset[level] >> 8; + rstate->states[R600_DB__DB_DEPTH_INFO] = S_028010_ARRAY_MODE(rtexture->array_mode) | + S_028010_FORMAT(format); + rstate->states[R600_DB__DB_DEPTH_VIEW] = 0x00000000; + rstate->states[R600_DB__DB_PREFETCH_LIMIT] = (rtexture->height[level] / 8) -1; + rstate->states[R600_DB__DB_DEPTH_SIZE] = S_028000_PITCH_TILE_MAX(pitch) | + S_028000_SLICE_TILE_MAX(slice); + rstate->bo[0] = radeon_bo_incref(rscreen->rw, rbuffer->bo); + rstate->placement[0] = RADEON_GEM_DOMAIN_GTT; + rstate->nbo = 1; + + radeon_state_pm4(rstate); +} + +int r600_texture_scissor(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + + if (!rtexture->scissor[level].cpm4) { + r600_texture_state_scissor(rscreen, rtexture, level); + } + return 0; +} + +static void r600_texture_state_viewport(struct r600_screen *rscreen, struct r600_resource_texture *rtexture, unsigned level) +{ + struct radeon_state *rstate = &rtexture->viewport[level]; + + radeon_state_init(rstate, rscreen->rw, R600_STATE_VIEWPORT, 0, 0); + + /* set states (most default value are 0 and struct already + * initialized to 0, thus avoid resetting them) + */ + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XOFFSET_0] = fui((float)rtexture->width[level]/2.0); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_XSCALE_0] = fui((float)rtexture->width[level]/2.0); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YOFFSET_0] = fui((float)rtexture->height[level]/2.0); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_YSCALE_0] = fui((float)-rtexture->height[level]/2.0); + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZOFFSET_0] = 0x3F000000; + rstate->states[R600_VIEWPORT__PA_CL_VPORT_ZSCALE_0] = 0x3F000000; + rstate->states[R600_VIEWPORT__PA_CL_VTE_CNTL] = 0x0000043F; + rstate->states[R600_VIEWPORT__PA_SC_VPORT_ZMAX_0] = 0x3F800000; + + radeon_state_pm4(rstate); +} + +int r600_texture_cb(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned cb, unsigned level) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + + if (!rtexture->cb[cb][level].cpm4) { + r600_texture_state_cb(rscreen, rtexture, cb, level); + } + return 0; +} + +int r600_texture_db(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + + if (!rtexture->db[level].cpm4) { + r600_texture_state_db(rscreen, rtexture, level); + } + return 0; +} + +int r600_texture_viewport(struct pipe_context *ctx, struct r600_resource_texture *rtexture, unsigned level) +{ + struct r600_screen *rscreen = r600_screen(ctx->screen); + + if (!rtexture->viewport[level].cpm4) { + r600_texture_state_viewport(rscreen, rtexture, level); + } + return 0; +} diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 53388f822ea..7b9a983d53b 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -199,6 +199,7 @@ #define V_0280A0_COLOR_16_16_16_16_FLOAT 0x00000020 #define V_0280A0_COLOR_32_32_32_32 0x00000022 #define V_0280A0_COLOR_32_32_32_32_FLOAT 0x00000023 +#define V_0280A0_COLOR_32_32_32_FLOAT 0x00000030 #define S_0280A0_ARRAY_MODE(x) (((x) & 0xF) << 8) #define G_0280A0_ARRAY_MODE(x) (((x) >> 8) & 0xF) #define C_0280A0_ARRAY_MODE 0xFFFFF0FF @@ -1316,4 +1317,11 @@ #define G_0286D4_PNT_SPRITE_TOP_1(x) (((x) >> 14) & 0x1) #define C_0286D4_PNT_SPRITE_TOP_1 0xFFFFBFFF +#define SQ_TEX_INST_LD 0x03 +#define SQ_TEX_INST_GET_GRADIENTS_H 0x7 +#define SQ_TEX_INST_GET_GRADIENTS_V 0x8 + +#define SQ_TEX_INST_SAMPLE 0x10 +#define SQ_TEX_INST_SAMPLE_L 0x11 +#define SQ_TEX_INST_SAMPLE_C 0x18 #endif diff --git a/src/gallium/drivers/r600/radeon.h b/src/gallium/drivers/r600/radeon.h index 8f00a4895a0..aaac8de5283 100644 --- a/src/gallium/drivers/r600/radeon.h +++ b/src/gallium/drivers/r600/radeon.h @@ -77,6 +77,14 @@ enum radeon_family { CHIP_LAST, }; +enum { + R600_SHADER_PS = 1, + R600_SHADER_VS, + R600_SHADER_GS, + R600_SHADER_FS, + R600_SHADER_MAX = R600_SHADER_FS, +}; + enum radeon_family radeon_get_family(struct radeon *rw); /* @@ -98,22 +106,23 @@ struct radeon_bo *radeon_bo_incref(struct radeon *radeon, struct radeon_bo *bo); struct radeon_bo *radeon_bo_decref(struct radeon *radeon, struct radeon_bo *bo); int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo); +struct radeon_stype_info; /* * states functions */ struct radeon_state { struct radeon *radeon; unsigned refcount; - unsigned type; + struct radeon_stype_info *stype; + unsigned state_id; unsigned id; + unsigned shader_index; unsigned nstates; - u32 *states; + u32 states[64]; unsigned npm4; unsigned cpm4; u32 pm4_crc; - u32 *pm4; - u32 nimmd; - u32 *immd; + u32 pm4[128]; unsigned nbo; struct radeon_bo *bo[4]; unsigned nreloc; @@ -123,38 +132,22 @@ struct radeon_state { unsigned bo_dirty[4]; }; -struct radeon_state *radeon_state(struct radeon *radeon, u32 type, u32 id); -struct radeon_state *radeon_state_incref(struct radeon_state *state); -struct radeon_state *radeon_state_decref(struct radeon_state *state); +int radeon_state_init(struct radeon_state *rstate, struct radeon *radeon, u32 type, u32 id, u32 shader_class); +void radeon_state_fini(struct radeon_state *state); int radeon_state_pm4(struct radeon_state *state); +int radeon_state_convert(struct radeon_state *state, u32 stype, u32 id, u32 shader_type); /* * draw functions */ struct radeon_draw { - unsigned refcount; struct radeon *radeon; - unsigned nstate; struct radeon_state **state; - unsigned cpm4; }; -struct radeon_draw *radeon_draw(struct radeon *radeon); -struct radeon_draw *radeon_draw_duplicate(struct radeon_draw *draw); -struct radeon_draw *radeon_draw_incref(struct radeon_draw *draw); -struct radeon_draw *radeon_draw_decref(struct radeon_draw *draw); -int radeon_draw_set(struct radeon_draw *draw, struct radeon_state *state); -int radeon_draw_set_new(struct radeon_draw *draw, struct radeon_state *state); -int radeon_draw_check(struct radeon_draw *draw); - -struct radeon_ctx *radeon_ctx(struct radeon *radeon); -struct radeon_ctx *radeon_ctx_decref(struct radeon_ctx *ctx); -struct radeon_ctx *radeon_ctx_incref(struct radeon_ctx *ctx); -int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); -int radeon_ctx_set_draw_new(struct radeon_ctx *ctx, struct radeon_draw *draw); -int radeon_ctx_pm4(struct radeon_ctx *ctx); -int radeon_ctx_submit(struct radeon_ctx *ctx); -void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); +int radeon_draw_init(struct radeon_draw *draw, struct radeon *radeon); +void radeon_draw_bind(struct radeon_draw *draw, struct radeon_state *state); +void radeon_draw_unbind(struct radeon_draw *draw, struct radeon_state *state); /* * radeon context functions @@ -169,95 +162,57 @@ struct radeon_cs_reloc { #pragma pack() struct radeon_ctx { - int refcount; struct radeon *radeon; u32 *pm4; - u32 cpm4; - u32 draw_cpm4; - unsigned id; - unsigned next_id; + int cdwords; + int ndwords; unsigned nreloc; struct radeon_cs_reloc *reloc; unsigned nbo; struct radeon_bo **bo; - unsigned ndraw; - struct radeon_draw *cdraw; - struct radeon_draw **draw; - unsigned nstate; - struct radeon_state **state; }; +int radeon_ctx_init(struct radeon_ctx *ctx, struct radeon *radeon); +void radeon_ctx_fini(struct radeon_ctx *ctx); +void radeon_ctx_clear(struct radeon_ctx *ctx); +int radeon_ctx_set_draw(struct radeon_ctx *ctx, struct radeon_draw *draw); +int radeon_ctx_submit(struct radeon_ctx *ctx); +void radeon_ctx_dump_bof(struct radeon_ctx *ctx, const char *file); +int radeon_ctx_set_query_state(struct radeon_ctx *ctx, struct radeon_state *state); + /* * R600/R700 */ -#define R600_NSTATE 1280 -#define R600_NTYPE 32 +enum r600_stype { + R600_STATE_CONFIG, + R600_STATE_CB_CNTL, + R600_STATE_RASTERIZER, + R600_STATE_VIEWPORT, + R600_STATE_SCISSOR, + R600_STATE_BLEND, + R600_STATE_DSA, + R600_STATE_SHADER, /* has PS,VS,GS,FS variants */ + R600_STATE_CONSTANT, /* has PS,VS,GS,FS variants */ + R600_STATE_RESOURCE, /* has PS,VS,GS,FS variants */ + R600_STATE_SAMPLER, /* has PS,VS,GS,FS variants */ + R600_STATE_SAMPLER_BORDER, /* has PS,VS,GS,FS variants */ + R600_STATE_CB0, + R600_STATE_CB1, + R600_STATE_CB2, + R600_STATE_CB3, + R600_STATE_CB4, + R600_STATE_CB5, + R600_STATE_CB6, + R600_STATE_CB7, + R600_STATE_DB, + R600_STATE_QUERY_BEGIN, + R600_STATE_QUERY_END, + R600_STATE_UCP, + R600_STATE_VGT, + R600_STATE_DRAW, +}; -#define R600_CONFIG 0 -#define R600_CONFIG_TYPE 0 -#define R600_CB_CNTL 1 -#define R600_CB_CNTL_TYPE 1 -#define R600_RASTERIZER 2 -#define R600_RASTERIZER_TYPE 2 -#define R600_VIEWPORT 3 -#define R600_VIEWPORT_TYPE 3 -#define R600_SCISSOR 4 -#define R600_SCISSOR_TYPE 4 -#define R600_BLEND 5 -#define R600_BLEND_TYPE 5 -#define R600_DSA 6 -#define R600_DSA_TYPE 6 -#define R600_VS_SHADER 7 -#define R600_VS_SHADER_TYPE 7 -#define R600_PS_SHADER 8 -#define R600_PS_SHADER_TYPE 8 -#define R600_PS_CONSTANT 9 -#define R600_PS_CONSTANT_TYPE 9 -#define R600_VS_CONSTANT 265 -#define R600_VS_CONSTANT_TYPE 10 -#define R600_PS_RESOURCE 521 -#define R600_PS_RESOURCE_TYPE 11 -#define R600_VS_RESOURCE 681 -#define R600_VS_RESOURCE_TYPE 12 -#define R600_FS_RESOURCE 841 -#define R600_FS_RESOURCE_TYPE 13 -#define R600_GS_RESOURCE 1001 -#define R600_GS_RESOURCE_TYPE 14 -#define R600_PS_SAMPLER 1161 -#define R600_PS_SAMPLER_TYPE 15 -#define R600_VS_SAMPLER 1179 -#define R600_VS_SAMPLER_TYPE 16 -#define R600_GS_SAMPLER 1197 -#define R600_GS_SAMPLER_TYPE 17 -#define R600_PS_SAMPLER_BORDER 1215 -#define R600_PS_SAMPLER_BORDER_TYPE 18 -#define R600_VS_SAMPLER_BORDER 1233 -#define R600_VS_SAMPLER_BORDER_TYPE 19 -#define R600_GS_SAMPLER_BORDER 1251 -#define R600_GS_SAMPLER_BORDER_TYPE 20 -#define R600_CB0 1269 -#define R600_CB0_TYPE 21 -#define R600_CB1 1270 -#define R600_CB1_TYPE 22 -#define R600_CB2 1271 -#define R600_CB2_TYPE 23 -#define R600_CB3 1272 -#define R600_CB3_TYPE 24 -#define R600_CB4 1273 -#define R600_CB4_TYPE 25 -#define R600_CB5 1274 -#define R600_CB5_TYPE 26 -#define R600_CB6 1275 -#define R600_CB6_TYPE 27 -#define R600_CB7 1276 -#define R600_CB7_TYPE 28 -#define R600_DB 1277 -#define R600_DB_TYPE 29 -#define R600_VGT 1278 -#define R600_VGT_TYPE 30 -#define R600_DRAW 1279 -#define R600_DRAW_TYPE 31 /* R600_CONFIG */ #define R600_CONFIG__SQ_CONFIG 0 #define R600_CONFIG__SQ_GPR_RESOURCE_MGMT_1 1 @@ -639,9 +594,40 @@ struct radeon_ctx { /* R600_DRAW */ #define R600_DRAW__VGT_NUM_INDICES 0 #define R600_DRAW__VGT_DMA_BASE_HI 1 -#define R600_DRAW__VGT_DMA_BASE 2 +#define R600_DRAW__VGT_DMA_BASE 2 #define R600_DRAW__VGT_DRAW_INITIATOR 3 -#define R600_DRAW_SIZE 4 -#define R600_DRAW_PM4 128 +#define R600_DRAW_SIZE 4 +#define R600_DRAW_PM4 128 +/* R600_CLIP */ +#define R600_CLIP__PA_CL_UCP_X_0 0 +#define R600_CLIP__PA_CL_UCP_Y_0 1 +#define R600_CLIP__PA_CL_UCP_Z_0 2 +#define R600_CLIP__PA_CL_UCP_W_0 3 +#define R600_CLIP__PA_CL_UCP_X_1 4 +#define R600_CLIP__PA_CL_UCP_Y_1 5 +#define R600_CLIP__PA_CL_UCP_Z_1 6 +#define R600_CLIP__PA_CL_UCP_W_1 7 +#define R600_CLIP__PA_CL_UCP_X_2 8 +#define R600_CLIP__PA_CL_UCP_Y_2 9 +#define R600_CLIP__PA_CL_UCP_Z_2 10 +#define R600_CLIP__PA_CL_UCP_W_2 11 +#define R600_CLIP__PA_CL_UCP_X_3 12 +#define R600_CLIP__PA_CL_UCP_Y_3 13 +#define R600_CLIP__PA_CL_UCP_Z_3 14 +#define R600_CLIP__PA_CL_UCP_W_3 15 +#define R600_CLIP__PA_CL_UCP_X_4 16 +#define R600_CLIP__PA_CL_UCP_Y_4 17 +#define R600_CLIP__PA_CL_UCP_Z_4 18 +#define R600_CLIP__PA_CL_UCP_W_4 19 +#define R600_CLIP__PA_CL_UCP_X_5 20 +#define R600_CLIP__PA_CL_UCP_Y_5 21 +#define R600_CLIP__PA_CL_UCP_Z_5 22 +#define R600_CLIP__PA_CL_UCP_W_5 23 +#define R600_CLIP_SIZE 24 +#define R600_CLIP_PM4 128 +/* R600 QUERY BEGIN/END */ +#define R600_QUERY__OFFSET 0 +#define R600_QUERY_SIZE 1 +#define R600_QUERY_PM4 128 #endif |