From 1371d65a7fbd695d3516861fe733685569d890d0 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Fri, 2 Aug 2013 06:38:23 +0400 Subject: r600g: initial support for geometry shaders on evergreen (v2) This is Vadim's initial work with a few regression fixes squashed in. v2: (airlied) fix regression in glsl-max-varyings - need to use vs and ps_dirty fix regression in shader exports from rebasing. whitespace fixing. v2.1: squash fix assert Signed-off-by: Vadim Girlin Signed-off-by: Dave Airlie Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/eg_asm.c | 9 +- src/gallium/drivers/r600/evergreen_state.c | 154 +++++- src/gallium/drivers/r600/r600_asm.c | 2 +- src/gallium/drivers/r600/r600_blit.c | 1 + src/gallium/drivers/r600/r600_hw_context.c | 6 + src/gallium/drivers/r600/r600_pipe.c | 9 +- src/gallium/drivers/r600/r600_pipe.h | 26 +- src/gallium/drivers/r600/r600_shader.c | 715 +++++++++++++++++++------ src/gallium/drivers/r600/r600_shader.h | 12 + src/gallium/drivers/r600/r600_state_common.c | 161 +++++- src/gallium/drivers/r600/sb/sb_bc.h | 2 + src/gallium/drivers/r600/sb/sb_bc_dump.cpp | 2 +- src/gallium/drivers/r600/sb/sb_bc_finalize.cpp | 2 +- src/gallium/drivers/r600/sb/sb_bc_parser.cpp | 11 +- src/gallium/drivers/r600/sb/sb_shader.cpp | 3 +- 15 files changed, 909 insertions(+), 206 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 5ad3d771974..acb30409428 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -93,8 +93,8 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); id++; - } else if (cfop->flags & CF_STRM) { - /* MEM_STREAM instructions */ + } else if (cfop->flags & CF_MEM) { + /* MEM_STREAM, MEM_RING instructions */ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | @@ -109,12 +109,13 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); id++; } else { - /* branch, loop, call, return instructions */ + /* other instructions */ bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode)| S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COND(cf->cond) | - S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); + S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | + S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); } } return 0; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 48bea1fcfe4..6896617ce93 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2518,6 +2518,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, struct r600_resource *rbuffer; uint64_t va; unsigned buffer_index = ffs(dirty_mask) - 1; + unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_CONST_BUFFER); cb = &state->cb[buffer_index]; rbuffer = (struct r600_resource*)cb->buffer; @@ -2526,10 +2527,12 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, va = r600_resource_va(&rctx->screen->b.b, &rbuffer->b.b); va += cb->buffer_offset; - r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, - ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags); - r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, - pkt_flags); + if (!gs_ring_buffer) { + r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, + ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags); + r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, + pkt_flags); + } radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); @@ -2539,10 +2542,12 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, va); /* RESOURCEi_WORD0 */ radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */ radeon_emit(cs, /* RESOURCEi_WORD2 */ - S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_030008_STRIDE(16) | - S_030008_BASE_ADDRESS_HI(va >> 32UL)); + S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) | + S_030008_STRIDE(gs_ring_buffer ? 4 : 16) | + S_030008_BASE_ADDRESS_HI(va >> 32UL) | + S_030008_DATA_FORMAT(FMT_32_32_32_32_FLOAT)); radeon_emit(cs, /* RESOURCEi_WORD3 */ + S_03000C_UNCACHED(gs_ring_buffer ? 1 : 0) | S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | @@ -2550,7 +2555,8 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ radeon_emit(cs, 0); /* RESOURCEi_WORD6 */ - radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */ + radeon_emit(cs, /* RESOURCEi_WORD7 */ + S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER)); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ)); @@ -2714,6 +2720,63 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer, RADEON_USAGE_READ)); } +static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a) +{ + struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a; + + uint32_t v = 0, v2 = 0; + + if (state->geom_enable) { + v = S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | + S_028B54_GS_EN(1) | + S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); + + v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_G) | + S_028A40_CUT_MODE(V_028A40_GS_CUT_128); + } + + r600_write_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v); + r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2); +} + +static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a) +{ + struct pipe_screen *screen = rctx->b.b.screen; + struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; + struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a; + struct r600_resource *rbuffer; + + r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); + + if (state->enable) { + rbuffer =(struct r600_resource*)state->esgs_ring.buffer; + r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, + (r600_resource_va(screen, &rbuffer->b.b)) >> 8); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE)); + r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, + state->esgs_ring.buffer_size >> 8); + + rbuffer =(struct r600_resource*)state->gsvs_ring.buffer; + r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, + (r600_resource_va(screen, &rbuffer->b.b)) >> 8); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE)); + r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, + state->gsvs_ring.buffer_size >> 8); + } else { + r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0); + r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0); + } + + r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH)); +} + void cayman_init_common_regs(struct r600_command_buffer *cb, enum chip_class ctx_chip_class, enum radeon_family ctx_family, @@ -3509,6 +3572,77 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader shader->flatshade = rctx->rasterizer->flatshade; } +void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_command_buffer *cb = &shader->command_buffer; + struct r600_shader *rshader = &shader->shader; + + r600_init_command_buffer(cb, 32); + + r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES, + S_028890_NUM_GPRS(rshader->bc.ngpr) | + S_028890_STACK_SIZE(rshader->bc.nstack)); + r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES, + r600_resource_va(ctx->screen, (void *)shader->bo) >> 8); + /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ +} + +void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) +{ + struct r600_command_buffer *cb = &shader->command_buffer; + struct r600_shader *rshader = &shader->shader; + struct r600_shader *cp_shader = &shader->gs_copy_shader->shader; + unsigned gsvs_itemsize = + (cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2; + + r600_init_command_buffer(cb, 64); + + /* VGT_GS_OUT_PRIM_TYPE is written by r6000_draw_vbo */ + /* VGT_GS_MODE is written by evergreen_emit_shader_stages */ + + r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1); + + r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT, + S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices)); + + +/* XXX kernel checker fails + r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT, + S_028B90_CNT(0) | + S_028B90_ENABLE(0)); +*/ + r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4); + r600_store_value(cb, cp_shader->ring_item_size >> 2); + r600_store_value(cb, 0); + r600_store_value(cb, 0); + r600_store_value(cb, 0); + + r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE, + (rshader->ring_item_size) >> 2); + + r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE, + gsvs_itemsize); + + r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3); + r600_store_value(cb, gsvs_itemsize); + r600_store_value(cb, gsvs_itemsize); + r600_store_value(cb, gsvs_itemsize); + + /* FIXME calculate these values somehow ??? */ + r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3); + r600_store_value(cb, 0x80); /* GS_PER_ES */ + r600_store_value(cb, 0x100); /* ES_PER_GS */ + r600_store_value(cb, 0x2); /* GS_PER_VS */ + + r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS, + S_028878_NUM_GPRS(rshader->bc.ngpr) | + S_028878_STACK_SIZE(rshader->bc.nstack)); + r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS, + r600_resource_va(ctx->screen, (void *)shader->bo) >> 8); + /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */ +} + + void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_command_buffer *cb = &shader->command_buffer; @@ -3918,6 +4052,10 @@ void evergreen_init_state_functions(struct r600_context *rctx) rctx->atoms[id++] = &rctx->b.streamout.begin_atom; r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0); + r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0); + r600_init_atom(rctx, &rctx->export_shader.atom, id++, r600_emit_shader, 0); + r600_init_atom(rctx, &rctx->shader_stages.atom, id++, evergreen_emit_shader_stages, 6); + r600_init_atom(rctx, &rctx->gs_rings.atom, id++, evergreen_emit_gs_rings, 26); rctx->b.b.create_blend_state = evergreen_create_blend_state; rctx->b.b.create_depth_stencil_alpha_state = evergreen_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 14ee2c210b5..899a8efd21d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1939,7 +1939,7 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) if (cf->end_of_program) fprintf(stderr, "EOP "); fprintf(stderr, "\n"); - } else if (r600_isa_cf(cf->op)->flags & CF_STRM) { + } else if (r600_isa_cf(cf->op)->flags & CF_MEM) { int o = 0; const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", "WRITE_IND_ACK"}; diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index c2ae2f6b28e..6bb7cfe9b9e 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -59,6 +59,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb); util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso); util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader); + util_blitter_save_geometry_shader(rctx->blitter, rctx->gs_shader); util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets, (struct pipe_stream_output_target**)rctx->b.streamout.targets); util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 9b0c558c29e..fc81e95accd 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -301,6 +301,12 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->config_state.atom.dirty = true; ctx->stencil_ref.atom.dirty = true; ctx->vertex_fetch_shader.atom.dirty = true; + ctx->export_shader.atom.dirty = true; + if (ctx->gs_shader) { + ctx->geometry_shader.atom.dirty = true; + ctx->shader_stages.atom.dirty = true; + ctx->gs_rings.atom.dirty = true; + } ctx->vertex_shader.atom.dirty = true; ctx->viewport.atom.dirty = true; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 49521e0eb5a..6c80f85f9e3 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -447,15 +447,18 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { + struct r600_screen *rscreen = (struct r600_screen *)pscreen; + switch(shader) { case PIPE_SHADER_FRAGMENT: case PIPE_SHADER_VERTEX: - case PIPE_SHADER_COMPUTE: + case PIPE_SHADER_COMPUTE: break; case PIPE_SHADER_GEOMETRY: - /* XXX: support and enable geometry programs */ - return 0; + if (rscreen->b.chip_class < EVERGREEN) + return 0; + break; default: /* XXX: support tessellation on Evergreen */ return 0; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 31e27f282e5..2d2c79b84e3 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -38,7 +38,7 @@ #include "util/u_double_list.h" #include "util/u_transfer.h" -#define R600_NUM_ATOMS 41 +#define R600_NUM_ATOMS 42 /* the number of CS dwords for flushing and drawing */ #define R600_MAX_FLUSH_CS_DWORDS 16 @@ -46,13 +46,14 @@ #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 -#define R600_MAX_DRIVER_CONST_BUFFERS 3 +#define R600_MAX_DRIVER_CONST_BUFFERS 4 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) /* start driver buffers after user buffers */ #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) +#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3) #define R600_MAX_CONST_BUFFER_SIZE 4096 @@ -179,6 +180,18 @@ struct r600_viewport_state { struct pipe_viewport_state state; }; +struct r600_shader_stages_state { + struct r600_atom atom; + unsigned geom_enable; +}; + +struct r600_gs_rings_state { + struct r600_atom atom; + unsigned enable; + struct pipe_constant_buffer esgs_ring; + struct pipe_constant_buffer gsvs_ring; +}; + /* This must start from 16. */ /* features */ #define DBG_NO_LLVM (1 << 17) @@ -353,7 +366,7 @@ struct r600_fetch_shader { struct r600_shader_state { struct r600_atom atom; - struct r600_pipe_shader_selector *shader; + struct r600_pipe_shader *shader; }; struct r600_context { @@ -415,7 +428,11 @@ struct r600_context { struct r600_cso_state vertex_fetch_shader; struct r600_shader_state vertex_shader; struct r600_shader_state pixel_shader; + struct r600_shader_state geometry_shader; + struct r600_shader_state export_shader; struct r600_cs_shader_state cs_shader_state; + struct r600_shader_stages_state shader_stages; + struct r600_gs_rings_state gs_rings; struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES]; struct r600_textures_info samplers[PIPE_SHADER_TYPES]; /** Vertex buffers for fetch shaders */ @@ -427,6 +444,7 @@ struct r600_context { unsigned compute_cb_target_mask; struct r600_pipe_shader_selector *ps_shader; struct r600_pipe_shader_selector *vs_shader; + struct r600_pipe_shader_selector *gs_shader; struct r600_rasterizer_state *rasterizer; bool alpha_to_one; bool force_blend_disable; @@ -506,6 +524,8 @@ void cayman_init_common_regs(struct r600_command_buffer *cb, void evergreen_init_state_functions(struct r600_context *rctx); void evergreen_init_atom_start_cs(struct r600_context *rctx); void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); +void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader); void *evergreen_create_db_flush_dsa(struct r600_context *rctx); void *evergreen_create_resolve_blend(struct r600_context *rctx); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 32d2aa73bef..56067246874 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -60,7 +60,7 @@ issued in the w slot as well. The compiler must issue the source argument to slots z, y, and x */ -static int r600_shader_from_tgsi(struct r600_screen *rscreen, +static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, struct r600_shader_key key); @@ -104,17 +104,43 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so) } } +static int store_shader(struct pipe_context *ctx, + struct r600_pipe_shader *shader) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + uint32_t *ptr, i; + + if (shader->bo == NULL) { + shader->bo = (struct r600_resource*) + pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); + if (shader->bo == NULL) { + return -ENOMEM; + } + ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); + if (R600_BIG_ENDIAN) { + for (i = 0; i < shader->shader.bc.ndw; ++i) { + ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]); + } + } else { + memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); + } + rctx->b.ws->buffer_unmap(shader->bo->cs_buf); + } + + return 0; +} + int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, struct r600_shader_key key) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_shader_selector *sel = shader->selector; - int r, i; - uint32_t *ptr; + int r; bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); + unsigned export_shader = key.vs_as_es; shader->shader.bc.isa = rctx->isa; @@ -126,7 +152,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, r600_dump_streamout(&sel->so); } } - r = r600_shader_from_tgsi(rctx->screen, shader, key); + r = r600_shader_from_tgsi(rctx, shader, key); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; @@ -157,29 +183,39 @@ int r600_pipe_shader_create(struct pipe_context *ctx, } } - /* Store the shader in a buffer. */ - if (shader->bo == NULL) { - shader->bo = (struct r600_resource*) - pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); - if (shader->bo == NULL) { - return -ENOMEM; - } - ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); - if (R600_BIG_ENDIAN) { - for (i = 0; i < shader->shader.bc.ndw; ++i) { - ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]); - } - } else { - memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); + if (shader->gs_copy_shader) { + if (dump) { + // dump copy shader + r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, + &shader->gs_copy_shader->shader, dump, 0); + if (r) + return r; } - rctx->b.ws->buffer_unmap(shader->bo->cs_buf); + + if ((r = store_shader(ctx, shader->gs_copy_shader))) + return r; } + /* Store the shader in a buffer. */ + if ((r = store_shader(ctx, shader))) + return r; + /* Build state. */ switch (shader->shader.processor_type) { + case TGSI_PROCESSOR_GEOMETRY: + if (rctx->b.chip_class >= EVERGREEN) { + evergreen_update_gs_state(ctx, shader); + evergreen_update_vs_state(ctx, shader->gs_copy_shader); + } else { + assert(!"not suported yet"); + } + break; case TGSI_PROCESSOR_VERTEX: if (rctx->b.chip_class >= EVERGREEN) { - evergreen_update_vs_state(ctx, shader); + if (export_shader) + evergreen_update_es_state(ctx, shader); + else + evergreen_update_vs_state(ctx, shader); } else { r600_update_vs_state(ctx, shader); } @@ -245,6 +281,9 @@ struct r600_shader_ctx { unsigned cv_output; int fragcoord_input; int native_integers; + int next_ring_offset; + int gs_next_vertex; + struct r600_shader *gs_for_vs; }; struct r600_shader_tgsi_instruction { @@ -254,6 +293,7 @@ struct r600_shader_tgsi_instruction { int (*process)(struct r600_shader_ctx *ctx); }; +static int emit_gs_ring_writes(struct r600_shader_ctx *ctx); static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); @@ -285,7 +325,13 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { if (i->Src[j].Register.Dimension) { - if (i->Src[j].Register.File != TGSI_FILE_CONSTANT) { + switch (i->Src[j].Register.File) { + case TGSI_FILE_CONSTANT: + break; + case TGSI_FILE_INPUT: + if (ctx->type == TGSI_PROCESSOR_GEOMETRY) + break; + default: R600_ERR("unsupported src %d (dimension %d)\n", j, i->Src[j].Register.Dimension); return -EINVAL; @@ -536,6 +582,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) if ((r = evergreen_interp_input(ctx, i))) return r; } + } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { + /* FIXME probably skip inputs if they aren't passed in the ring */ + ctx->shader->input[i].ring_offset = ctx->next_ring_offset; + ctx->next_ring_offset += 16; } for (j = 1; j < count; ++j) { ctx->shader->input[i + j] = ctx->shader->input[i]; @@ -550,7 +600,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; ctx->shader->output[i].interpolate = d->Interp.Interpolate; ctx->shader->output[i].write_mask = d->Declaration.UsageMask; - if (ctx->type == TGSI_PROCESSOR_VERTEX) { + if (ctx->type == TGSI_PROCESSOR_VERTEX || + ctx->type == TGSI_PROCESSOR_GEOMETRY) { ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); switch (d->Semantic.Name) { case TGSI_SEMANTIC_CLIPDIST: @@ -773,6 +824,59 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx return 0; } +static int fetch_gs_input(struct r600_shader_ctx *ctx, unsigned index, unsigned vtx_id, unsigned int dst_reg) +{ + struct r600_bytecode_vtx vtx; + int r; + int offset_reg = vtx_id / 3; + int offset_chan = vtx_id % 3; + + /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, + * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ + + if (offset_reg == 0 && offset_chan == 2) + offset_chan = 3; + + memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = R600_GS_RING_CONST_BUFFER; + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = offset_reg; + vtx.src_sel_x = offset_chan; + vtx.offset = index * 16; /*bytes*/ + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.use_const_fields = 1; + + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + +static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int i; + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_full_src_register *src = &inst->Src[i]; + + if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { + int treg = r600_get_temp(ctx); + int index = src->Register.Index; + int vtx_id = src->Dimension.Index; + + fetch_gs_input(ctx, index, vtx_id, treg); + ctx->src[i].sel = treg; + } + } + return 0; +} + static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -983,10 +1087,247 @@ out_err: return r; } -static int r600_shader_from_tgsi(struct r600_screen *rscreen, +static int generate_gs_copy_shader(struct r600_context *rctx, + struct r600_pipe_shader *gs) +{ + struct r600_shader_ctx ctx = {}; + struct r600_shader *gs_shader = &gs->shader; + struct r600_pipe_shader *cshader; + int ocnt = gs_shader->noutput; + struct r600_bytecode_alu alu; + struct r600_bytecode_vtx vtx; + struct r600_bytecode_output output; + struct r600_bytecode_cf *cf_jump, *cf_pop, + *last_exp_pos = NULL, *last_exp_param = NULL; + int i, next_pos = 60, next_param = 0; + + cshader = calloc(1, sizeof(struct r600_pipe_shader)); + if (!cshader) + return 0; + + memcpy(cshader->shader.output, gs_shader->output, ocnt * + sizeof(struct r600_shader_io)); + + cshader->shader.noutput = ocnt; + + ctx.shader = &cshader->shader; + ctx.bc = &ctx.shader->bc; + ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; + + r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, + rctx->screen->has_compressed_msaa_texturing); + + ctx.bc->isa = rctx->isa; + + /* R0.x = R0.x & 0x3fffffff */ + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_AND_INT; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0x3fffffff; + alu.dst.write = 1; + r600_bytecode_add_alu(ctx.bc, &alu); + + /* R0.y = R0.x >> 30 */ + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_LSHR_INT; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0x1e; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.last = 1; + r600_bytecode_add_alu(ctx.bc, &alu); + + /* PRED_SETE_INT __, R0.y, 0 */ + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_PRED_SETE_INT; + alu.src[0].chan = 1; + alu.src[1].sel = V_SQ_ALU_SRC_0; + alu.execute_mask = 1; + alu.update_pred = 1; + alu.last = 1; + r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); + + r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); + cf_jump = ctx.bc->cf_last; + + /* fetch vertex data from GSVS ring */ + for (i = 0; i < ocnt; ++i) { + struct r600_shader_io *out = &ctx.shader->output[i]; + out->gpr = i + 1; + out->ring_offset = i * 16; + + memset(&vtx, 0, sizeof(vtx)); + vtx.op = FETCH_OP_VFETCH; + vtx.buffer_id = R600_GS_RING_CONST_BUFFER; + vtx.fetch_type = 2; + vtx.offset = out->ring_offset; + vtx.dst_gpr = out->gpr; + vtx.dst_sel_x = 0; + vtx.dst_sel_y = 1; + vtx.dst_sel_z = 2; + vtx.dst_sel_w = 3; + vtx.use_const_fields = 1; + + r600_bytecode_add_vtx(ctx.bc, &vtx); + } + + /* XXX handle clipvertex, streamout? */ + + /* export vertex data */ + /* XXX factor out common code with r600_shader_from_tgsi ? */ + for (i = 0; i < ocnt; ++i) { + struct r600_shader_io *out = &ctx.shader->output[i]; + + if (out->name == TGSI_SEMANTIC_CLIPVERTEX) + continue; + + memset(&output, 0, sizeof(output)); + output.gpr = out->gpr; + output.elem_size = 3; + output.swizzle_x = 0; + output.swizzle_y = 1; + output.swizzle_z = 2; + output.swizzle_w = 3; + output.burst_count = 1; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output.op = CF_OP_EXPORT; + switch (out->name) { + case TGSI_SEMANTIC_POSITION: + output.array_base = next_pos++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; + + case TGSI_SEMANTIC_PSIZE: + output.array_base = next_pos++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; + case TGSI_SEMANTIC_CLIPDIST: + /* spi_sid is 0 for clipdistance outputs that were generated + * for clipvertex - we don't need to pass them to PS */ + if (out->spi_sid) { + /* duplicate it as PARAM to pass to the pixel shader */ + output.array_base = next_param++; + r600_bytecode_add_output(ctx.bc, &output); + last_exp_param = ctx.bc->cf_last; + } + output.array_base = next_pos++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; + case TGSI_SEMANTIC_FOG: + output.swizzle_y = 4; /* 0 */ + output.swizzle_z = 4; /* 0 */ + output.swizzle_w = 5; /* 1 */ + break; + } + r600_bytecode_add_output(ctx.bc, &output); + if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) + last_exp_param = ctx.bc->cf_last; + else + last_exp_pos = ctx.bc->cf_last; + } + + if (!last_exp_pos) { + memset(&output, 0, sizeof(output)); + output.gpr = 0; + output.elem_size = 3; + output.swizzle_x = 7; + output.swizzle_y = 7; + output.swizzle_z = 7; + output.swizzle_w = 7; + output.burst_count = 1; + output.type = 2; + output.op = CF_OP_EXPORT; + output.array_base = next_pos++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + r600_bytecode_add_output(ctx.bc, &output); + last_exp_pos = ctx.bc->cf_last; + } + + if (!last_exp_param) { + memset(&output, 0, sizeof(output)); + output.gpr = 0; + output.elem_size = 3; + output.swizzle_x = 7; + output.swizzle_y = 7; + output.swizzle_z = 7; + output.swizzle_w = 7; + output.burst_count = 1; + output.type = 2; + output.op = CF_OP_EXPORT; + output.array_base = next_param++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + r600_bytecode_add_output(ctx.bc, &output); + last_exp_param = ctx.bc->cf_last; + } + + last_exp_pos->op = CF_OP_EXPORT_DONE; + last_exp_param->op = CF_OP_EXPORT_DONE; + + r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); + cf_pop = ctx.bc->cf_last; + + cf_jump->cf_addr = cf_pop->id + 2; + cf_jump->pop_count = 1; + cf_pop->cf_addr = cf_pop->id + 2; + cf_pop->pop_count = 1; + + r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); + ctx.bc->cf_last->end_of_program = 1; + + gs->gs_copy_shader = cshader; + + ctx.bc->nstack = 1; + cshader->shader.ring_item_size = ocnt * 16; + + return r600_bytecode_build(ctx.bc); +} + +static int emit_gs_ring_writes(struct r600_shader_ctx *ctx) +{ + struct r600_bytecode_output output; + int i, k, ring_offset; + + for (i = 0; i < ctx->shader->noutput; i++) { + if (ctx->gs_for_vs) { + /* for ES we need to lookup corresponding ring offset expected by GS + * (map this output to GS input by name and sid) */ + /* FIXME precompute offsets */ + ring_offset = -1; + for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { + struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; + struct r600_shader_io *out = &ctx->shader->output[i]; + if (in->name == out->name && in->sid == out->sid) + ring_offset = in->ring_offset; + } + if (ring_offset == -1) { + R600_ERR("error mapping VS->GS outputs\n"); + return -1; + } + } else + ring_offset = i * 16; + + /* next_ring_offset after parsing input decls contains total size of + * single vertex data, gs_next_vertex - current vertex index */ + ring_offset += ctx->next_ring_offset * ctx->gs_next_vertex; + + memset(&output, 0, sizeof(struct r600_bytecode_output)); + output.gpr = ctx->shader->output[i].gpr; + output.elem_size = 3; + output.comp_mask = 0xF; + output.burst_count = 1; + output.op = CF_OP_MEM_RING; + output.array_base = ring_offset >> 2; /* in dwords */ + r600_bytecode_add_output(ctx->bc, &output); + } + ++ctx->gs_next_vertex; + return 0; +} + +static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, struct r600_shader_key key) { + struct r600_screen *rscreen = rctx->screen; struct r600_shader *shader = &pipeshader->shader; struct tgsi_token *tokens = pipeshader->selector->tokens; struct pipe_stream_output_info so = pipeshader->selector->so; @@ -1002,6 +1343,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, /* Declarations used by llvm code */ bool use_llvm = false; bool indirect_gprs; + bool ring_outputs = false; #ifdef R600_USE_LLVM use_llvm = !(rscreen->b.debug_flags & DBG_NO_LLVM); @@ -1010,6 +1352,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, ctx.shader = shader; ctx.native_integers = true; + shader->vs_as_es = key.vs_as_es; + r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, rscreen->has_compressed_msaa_texturing); ctx.tokens = tokens; @@ -1021,6 +1365,17 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, shader->processor_type = ctx.type; ctx.bc->type = shader->processor_type; + ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY); + + if (key.vs_as_es) { + ctx.gs_for_vs = &rctx->gs_shader->current->shader; + } else { + ctx.gs_for_vs = NULL; + } + + ctx.next_ring_offset = 0; + ctx.gs_next_vertex = 0; + ctx.face_gpr = -1; ctx.fragcoord_input = -1; ctx.colors_used = 0; @@ -1073,6 +1428,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } + if (ctx.type == TGSI_PROCESSOR_GEOMETRY && ctx.bc->chip_class >= EVERGREEN) { + /* FIXME 1 would be enough in some cases (3 or less input vertices) */ + ctx.file_offset[TGSI_FILE_INPUT] = 2; + } ctx.use_llvm = use_llvm; if (use_llvm) { @@ -1149,6 +1508,15 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, case TGSI_PROPERTY_VS_PROHIBIT_UCPS: /* we don't need this one */ break; + case TGSI_PROPERTY_GS_INPUT_PRIM: + shader->gs_input_prim = property->u[0].Data; + break; + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + shader->gs_output_prim = property->u[0].Data; + break; + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + shader->gs_max_out_vertices = property->u[0].Data; + break; } break; default: @@ -1158,6 +1526,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, } } + shader->ring_item_size = ctx.next_ring_offset; + /* Process two side if needed */ if (shader->two_side && ctx.colors_used) { int i, count = ctx.shader->ninput; @@ -1298,6 +1668,9 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, goto out_err; if ((r = tgsi_split_literal_constant(&ctx))) goto out_err; + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) + if ((r = tgsi_split_gs_inputs(&ctx))) + goto out_err; if (ctx.bc->chip_class == CAYMAN) ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; else if (ctx.bc->chip_class >= EVERGREEN) @@ -1319,7 +1692,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, noutput = shader->noutput; - if (ctx.clip_vertex_write) { + if (!ring_outputs && ctx.clip_vertex_write) { unsigned clipdist_temp[2]; clipdist_temp[0] = r600_get_temp(&ctx); @@ -1370,117 +1743,122 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, } /* Add stream outputs. */ - if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) + if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX && + so.num_outputs && !use_llvm) emit_streamout(&ctx, &so); - /* export output */ - for (i = 0, j = 0; i < noutput; i++, j++) { - memset(&output[j], 0, sizeof(struct r600_bytecode_output)); - output[j].gpr = shader->output[i].gpr; - output[j].elem_size = 3; - output[j].swizzle_x = 0; - output[j].swizzle_y = 1; - output[j].swizzle_z = 2; - output[j].swizzle_w = 3; - output[j].burst_count = 1; - output[j].type = -1; - output[j].op = CF_OP_EXPORT; - switch (ctx.type) { - case TGSI_PROCESSOR_VERTEX: - switch (shader->output[i].name) { - case TGSI_SEMANTIC_POSITION: - output[j].array_base = next_pos_base++; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; - break; + if (ring_outputs) { + if (key.vs_as_es) + emit_gs_ring_writes(&ctx); + } else { + /* export output */ + for (i = 0, j = 0; i < noutput; i++, j++) { + memset(&output[j], 0, sizeof(struct r600_bytecode_output)); + output[j].gpr = shader->output[i].gpr; + output[j].elem_size = 3; + output[j].swizzle_x = 0; + output[j].swizzle_y = 1; + output[j].swizzle_z = 2; + output[j].swizzle_w = 3; + output[j].burst_count = 1; + output[j].type = -1; + output[j].op = CF_OP_EXPORT; + switch (ctx.type) { + case TGSI_PROCESSOR_VERTEX: + switch (shader->output[i].name) { + case TGSI_SEMANTIC_POSITION: + output[j].array_base = next_pos_base++; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; - case TGSI_SEMANTIC_PSIZE: - output[j].array_base = next_pos_base++; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - j--; - break; - case TGSI_SEMANTIC_CLIPDIST: - output[j].array_base = next_pos_base++; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; - /* spi_sid is 0 for clipdistance outputs that were generated - * for clipvertex - we don't need to pass them to PS */ - if (shader->output[i].spi_sid) { - j++; - /* duplicate it as PARAM to pass to the pixel shader */ - memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); - output[j].array_base = next_param_base++; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - } - break; - case TGSI_SEMANTIC_FOG: - output[j].swizzle_y = 4; /* 0 */ - output[j].swizzle_z = 4; /* 0 */ - output[j].swizzle_w = 5; /* 1 */ - break; - } - break; - case TGSI_PROCESSOR_FRAGMENT: - if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - /* never export more colors than the number of CBs */ - if (shader->output[i].sid >= max_color_exports) { - /* skip export */ + case TGSI_SEMANTIC_PSIZE: + output[j].array_base = next_pos_base++; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; + case TGSI_SEMANTIC_CLIPVERTEX: j--; - continue; - } - output[j].swizzle_w = key.alpha_to_one ? 5 : 3; - output[j].array_base = shader->output[i].sid; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - shader->nr_ps_color_exports++; - if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { - for (k = 1; k < max_color_exports; k++) { + break; + case TGSI_SEMANTIC_CLIPDIST: + output[j].array_base = next_pos_base++; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + /* spi_sid is 0 for clipdistance outputs that were generated + * for clipvertex - we don't need to pass them to PS */ + if (shader->output[i].spi_sid) { j++; - memset(&output[j], 0, sizeof(struct r600_bytecode_output)); - output[j].gpr = shader->output[i].gpr; - output[j].elem_size = 3; - output[j].swizzle_x = 0; - output[j].swizzle_y = 1; - output[j].swizzle_z = 2; - output[j].swizzle_w = key.alpha_to_one ? 5 : 3; - output[j].burst_count = 1; - output[j].array_base = k; - output[j].op = CF_OP_EXPORT; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - shader->nr_ps_color_exports++; + /* duplicate it as PARAM to pass to the pixel shader */ + memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); + output[j].array_base = next_param_base++; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; } + break; + case TGSI_SEMANTIC_FOG: + output[j].swizzle_y = 4; /* 0 */ + output[j].swizzle_z = 4; /* 0 */ + output[j].swizzle_w = 5; /* 1 */ + break; } - } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[j].array_base = 61; - output[j].swizzle_x = 2; - output[j].swizzle_y = 7; - output[j].swizzle_z = output[j].swizzle_w = 7; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { - output[j].array_base = 61; - output[j].swizzle_x = 7; - output[j].swizzle_y = 1; - output[j].swizzle_z = output[j].swizzle_w = 7; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - } else { - R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); + break; + case TGSI_PROCESSOR_FRAGMENT: + if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { + /* never export more colors than the number of CBs */ + if (shader->output[i].sid >= max_color_exports) { + /* skip export */ + j--; + continue; + } + output[j].swizzle_w = key.alpha_to_one ? 5 : 3; + output[j].array_base = shader->output[i].sid; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + shader->nr_ps_color_exports++; + if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { + for (k = 1; k < max_color_exports; k++) { + j++; + memset(&output[j], 0, sizeof(struct r600_bytecode_output)); + output[j].gpr = shader->output[i].gpr; + output[j].elem_size = 3; + output[j].swizzle_x = 0; + output[j].swizzle_y = 1; + output[j].swizzle_z = 2; + output[j].swizzle_w = key.alpha_to_one ? 5 : 3; + output[j].burst_count = 1; + output[j].array_base = k; + output[j].op = CF_OP_EXPORT; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + shader->nr_ps_color_exports++; + } + } + } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { + output[j].array_base = 61; + output[j].swizzle_x = 2; + output[j].swizzle_y = 7; + output[j].swizzle_z = output[j].swizzle_w = 7; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { + output[j].array_base = 61; + output[j].swizzle_x = 7; + output[j].swizzle_y = 1; + output[j].swizzle_z = output[j].swizzle_w = 7; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else { + R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); + r = -EINVAL; + goto out_err; + } + break; + default: + R600_ERR("unsupported processor type %d\n", ctx.type); r = -EINVAL; goto out_err; } - break; - default: - R600_ERR("unsupported processor type %d\n", ctx.type); - r = -EINVAL; - goto out_err; - } - if (output[j].type==-1) { - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - output[j].array_base = next_param_base++; + if (output[j].type==-1) { + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[j].array_base = next_param_base++; + } } - } - /* add fake position export */ - if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) { + /* add fake position export */ + if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = 0; output[j].elem_size = 3; @@ -1493,10 +1871,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].array_base = next_pos_base; output[j].op = CF_OP_EXPORT; j++; - } + } - /* add fake param output for vertex shader if no param is exported */ - if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { + /* add fake param output for vertex shader if no param is exported */ + if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = 0; output[j].elem_size = 3; @@ -1509,39 +1887,40 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].array_base = 0; output[j].op = CF_OP_EXPORT; j++; - } + } + + /* add fake pixel export */ + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { + memset(&output[j], 0, sizeof(struct r600_bytecode_output)); + output[j].gpr = 0; + output[j].elem_size = 3; + output[j].swizzle_x = 7; + output[j].swizzle_y = 7; + output[j].swizzle_z = 7; + output[j].swizzle_w = 7; + output[j].burst_count = 1; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[j].array_base = 0; + output[j].op = CF_OP_EXPORT; + j++; + } + + noutput = j; - /* add fake pixel export */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { - memset(&output[j], 0, sizeof(struct r600_bytecode_output)); - output[j].gpr = 0; - output[j].elem_size = 3; - output[j].swizzle_x = 7; - output[j].swizzle_y = 7; - output[j].swizzle_z = 7; - output[j].swizzle_w = 7; - output[j].burst_count = 1; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - output[j].array_base = 0; - output[j].op = CF_OP_EXPORT; - j++; - } - - noutput = j; - - /* set export done on last export of each type */ - for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (!(output_done & (1 << output[i].type))) { - output_done |= (1 << output[i].type); - output[i].op = CF_OP_EXPORT_DONE; + /* set export done on last export of each type */ + for (i = noutput - 1, output_done = 0; i >= 0; i--) { + if (!(output_done & (1 << output[i].type))) { + output_done |= (1 << output[i].type); + output[i].op = CF_OP_EXPORT_DONE; + } } - } - /* add output to bytecode */ - if (!use_llvm) { - for (i = 0; i < noutput; i++) { - r = r600_bytecode_add_output(ctx.bc, &output[i]); - if (r) - goto out_err; + /* add output to bytecode */ + if (!use_llvm) { + for (i = 0; i < noutput; i++) { + r = r600_bytecode_add_output(ctx.bc, &output[i]); + if (r) + goto out_err; + } } } @@ -1552,7 +1931,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, else { const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op); - if (last->flags & CF_CLAUSE) + /* alu clause instructions don't have EOP bit, so add NOP */ + if (last->flags & CF_ALU) r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); ctx.bc->cf_last->end_of_program = 1; @@ -1567,6 +1947,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, goto out_err; } + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + if ((r = generate_gs_copy_shader(rctx, pipeshader))) + return r; + } + free(ctx.literals); tgsi_parse_free(&ctx.parse); return 0; @@ -5561,6 +5946,14 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_gs_emit(struct r600_shader_ctx *ctx) +{ + if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) + emit_gs_ring_writes(ctx); + + return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); +} + static int tgsi_umad(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -5934,8 +6327,8 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, + {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, @@ -6126,8 +6519,8 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, + {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index d989ce43649..0bf8b51fff3 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -37,6 +37,7 @@ struct r600_shader_io { unsigned lds_pos; /* for evergreen */ unsigned back_color_input; unsigned write_mask; + int ring_offset; }; struct r600_shader { @@ -64,9 +65,17 @@ struct r600_shader { boolean has_txq_cube_array_z_comp; boolean uses_tex_buffers; + /* geometry shader properties */ + unsigned gs_input_prim; + unsigned gs_output_prim; + unsigned gs_max_out_vertices; + /* size in bytes of a data item in the ring (single vertex data) */ + unsigned ring_item_size; + unsigned indirect_files; unsigned max_arrays; unsigned num_arrays; + unsigned vs_as_es; struct r600_shader_array * arrays; }; @@ -74,6 +83,7 @@ struct r600_shader_key { unsigned color_two_side:1; unsigned alpha_to_one:1; unsigned nr_cbufs:4; + unsigned vs_as_es:1; }; struct r600_shader_array { @@ -85,6 +95,8 @@ struct r600_shader_array { struct r600_pipe_shader { struct r600_pipe_shader_selector *selector; struct r600_pipe_shader *next_variant; + /* for GS - corresponding copy shader (installed as VS) */ + struct r600_pipe_shader *gs_copy_shader; struct r600_shader shader; struct r600_command_buffer command_buffer; /* register writes */ struct r600_resource *bo; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c05b74d42cd..3221a8e24e8 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -693,6 +693,8 @@ static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_contex /* Dual-source blending only makes sense with nr_cbufs == 1. */ if (key.nr_cbufs == 1 && rctx->dual_src_blend) key.nr_cbufs = 2; + } else if (sel->type == PIPE_SHADER_VERTEX) { + key.vs_as_es = (rctx->gs_shader != NULL); } return key; } @@ -792,6 +794,12 @@ static void *r600_create_vs_state(struct pipe_context *ctx, return r600_create_shader_state(ctx, state, PIPE_SHADER_VERTEX); } +static void *r600_create_gs_state(struct pipe_context *ctx, + const struct pipe_shader_state *state) +{ + return r600_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY); +} + static void r600_bind_ps_state(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; @@ -813,6 +821,13 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state) rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride; } +static void r600_bind_gs_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + rctx->gs_shader = (struct r600_pipe_shader_selector *)state; +} + static void r600_delete_shader_selector(struct pipe_context *ctx, struct r600_pipe_shader_selector *sel) { @@ -853,6 +868,20 @@ static void r600_delete_vs_state(struct pipe_context *ctx, void *state) r600_delete_shader_selector(ctx, sel); } + +static void r600_delete_gs_state(struct pipe_context *ctx, void *state) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state; + + if (rctx->gs_shader == sel) { + rctx->gs_shader = NULL; + } + + r600_delete_shader_selector(ctx, sel); +} + + void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state) { if (state->dirty_mask) { @@ -1046,10 +1075,65 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s pipe_resource_reference(&cb.buffer, NULL); } +static void update_shader_atom(struct pipe_context *ctx, + struct r600_shader_state *state, + struct r600_pipe_shader *shader) +{ + state->shader = shader; + if (shader) { + state->atom.num_dw = shader->command_buffer.num_dw; + state->atom.dirty = true; + r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo); + } else { + state->atom.num_dw = 0; + state->atom.dirty = false; + } +} + +static void update_gs_block_state(struct r600_context *rctx, unsigned enable) +{ + if (rctx->shader_stages.geom_enable != enable) { + rctx->shader_stages.geom_enable = enable; + rctx->shader_stages.atom.dirty = true; + } + + if (rctx->gs_rings.enable != enable) { + rctx->gs_rings.enable = enable; + rctx->gs_rings.atom.dirty = true; + + if (enable && !rctx->gs_rings.esgs_ring.buffer) { + unsigned size = 0x1C000; + rctx->gs_rings.esgs_ring.buffer = + pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STATIC, size); + rctx->gs_rings.esgs_ring.buffer_size = size; + + size = 0x4000000; + + rctx->gs_rings.gsvs_ring.buffer = + pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STATIC, size); + rctx->gs_rings.gsvs_ring.buffer_size = size; + } + + if (enable) { + r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY, + R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.esgs_ring); + r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, + R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring); + } else { + r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY, + R600_GS_RING_CONST_BUFFER, NULL); + r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, + R600_GS_RING_CONST_BUFFER, NULL); + } + } +} + static bool r600_update_derived_state(struct r600_context *rctx) { struct pipe_context * ctx = (struct pipe_context*)rctx; - bool ps_dirty = false, vs_dirty = false; + bool ps_dirty = false, vs_dirty = false, gs_dirty = false; bool blend_disable; if (!rctx->blitter->running) { @@ -1067,22 +1151,54 @@ static bool r600_update_derived_state(struct r600_context *rctx) } } - if (unlikely(rctx->vertex_shader.shader != rctx->vs_shader)) { + update_gs_block_state(rctx, rctx->gs_shader != NULL); + + if (rctx->gs_shader) { + r600_shader_select(ctx, rctx->gs_shader, &gs_dirty); + if (unlikely(!rctx->gs_shader->current)) + return false; + + if (rctx->b.chip_class >= EVERGREEN && !rctx->shader_stages.geom_enable) { + rctx->shader_stages.geom_enable = true; + rctx->shader_stages.atom.dirty = true; + } + + /* gs_shader provides GS and VS (copy shader) */ + if (unlikely(rctx->geometry_shader.shader != rctx->gs_shader->current)) { + update_shader_atom(ctx, &rctx->geometry_shader, rctx->gs_shader->current); + update_shader_atom(ctx, &rctx->vertex_shader, rctx->gs_shader->current->gs_copy_shader); + } + r600_shader_select(ctx, rctx->vs_shader, &vs_dirty); + if (unlikely(!rctx->vs_shader->current)) + return false; + + /* vs_shader is used as ES */ + if (unlikely(vs_dirty || rctx->export_shader.shader != rctx->vs_shader->current)) { + update_shader_atom(ctx, &rctx->export_shader, rctx->vs_shader->current); + } + } else { + if (unlikely(rctx->geometry_shader.shader)) { + update_shader_atom(ctx, &rctx->geometry_shader, NULL); + update_shader_atom(ctx, &rctx->export_shader, NULL); + rctx->shader_stages.geom_enable = false; + rctx->shader_stages.atom.dirty = true; + } + r600_shader_select(ctx, rctx->vs_shader, &vs_dirty); if (unlikely(!rctx->vs_shader->current)) return false; - rctx->vertex_shader.shader = rctx->vs_shader; - rctx->vertex_shader.atom.dirty = true; - r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo); + if (unlikely(vs_dirty || rctx->vertex_shader.shader != rctx->vs_shader->current)) { + update_shader_atom(ctx, &rctx->vertex_shader, rctx->vs_shader->current); - /* Update clip misc state. */ - if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl || - rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) { - rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl; - rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write; - rctx->clip_misc_state.atom.dirty = true; + /* Update clip misc state. */ + if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl || + rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) { + rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl; + rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write; + rctx->clip_misc_state.atom.dirty = true; + } } } @@ -1090,7 +1206,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) if (unlikely(!rctx->ps_shader->current)) return false; - if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader)) { + if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader->current)) { if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) { rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs; @@ -1112,9 +1228,9 @@ static bool r600_update_derived_state(struct r600_context *rctx) r600_update_db_shader_control(rctx); } - if (!ps_dirty && rctx->ps_shader && rctx->rasterizer && + if (unlikely(!ps_dirty && rctx->ps_shader && rctx->rasterizer && ((rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable) || - (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade))) { + (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)))) { if (rctx->b.chip_class >= EVERGREEN) evergreen_update_ps_state(ctx, rctx->ps_shader->current); @@ -1122,11 +1238,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) r600_update_ps_state(ctx, rctx->ps_shader->current); } - rctx->pixel_shader.shader = rctx->ps_shader; - rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw; - rctx->pixel_shader.atom.dirty = true; - r600_context_add_resource_size(ctx, - (struct pipe_resource *)rctx->ps_shader->current->bo); + update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current); } /* on R600 we stuff masks + txq info into one constant buffer */ @@ -1165,6 +1277,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) rctx->blend_state.cso, blend_disable); } + return true; } @@ -1606,11 +1719,14 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state) void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a) { + struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; - struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader->current; + struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader; - r600_emit_command_buffer(cs, &shader->command_buffer); + if (!shader) + return; + r600_emit_command_buffer(cs, &shader->command_buffer); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->bo, RADEON_USAGE_READ)); } @@ -2139,6 +2255,7 @@ void r600_init_common_state_functions(struct r600_context *rctx) { rctx->b.b.create_fs_state = r600_create_ps_state; rctx->b.b.create_vs_state = r600_create_vs_state; + rctx->b.b.create_gs_state = r600_create_gs_state; rctx->b.b.create_vertex_elements_state = r600_create_vertex_fetch_shader; rctx->b.b.bind_blend_state = r600_bind_blend_state; rctx->b.b.bind_depth_stencil_alpha_state = r600_bind_dsa_state; @@ -2147,6 +2264,7 @@ void r600_init_common_state_functions(struct r600_context *rctx) rctx->b.b.bind_rasterizer_state = r600_bind_rs_state; rctx->b.b.bind_vertex_elements_state = r600_bind_vertex_elements; rctx->b.b.bind_vs_state = r600_bind_vs_state; + rctx->b.b.bind_gs_state = r600_bind_gs_state; rctx->b.b.delete_blend_state = r600_delete_blend_state; rctx->b.b.delete_depth_stencil_alpha_state = r600_delete_dsa_state; rctx->b.b.delete_fs_state = r600_delete_ps_state; @@ -2154,6 +2272,7 @@ void r600_init_common_state_functions(struct r600_context *rctx) rctx->b.b.delete_sampler_state = r600_delete_sampler_state; rctx->b.b.delete_vertex_elements_state = r600_delete_vertex_elements; rctx->b.b.delete_vs_state = r600_delete_vs_state; + rctx->b.b.delete_gs_state = r600_delete_gs_state; rctx->b.b.set_blend_color = r600_set_blend_color; rctx->b.b.set_clip_state = r600_set_clip_state; rctx->b.b.set_constant_buffer = r600_set_constant_buffer; diff --git a/src/gallium/drivers/r600/sb/sb_bc.h b/src/gallium/drivers/r600/sb/sb_bc.h index 73b8b08ba39..d03da98777d 100644 --- a/src/gallium/drivers/r600/sb/sb_bc.h +++ b/src/gallium/drivers/r600/sb/sb_bc.h @@ -169,8 +169,10 @@ enum shader_target { TARGET_UNKNOWN, TARGET_VS, + TARGET_ES, TARGET_PS, TARGET_GS, + TARGET_GS_COPY, TARGET_COMPUTE, TARGET_FETCH, diff --git a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp index 9b1420d3895..f79dff16ee3 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_dump.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_dump.cpp @@ -137,7 +137,7 @@ void bc_dump::dump(cf_node& n) { for (int k = 0; k < 4; ++k) s << chans[n.bc.sel[k]]; - } else if (n.bc.op_ptr->flags & (CF_STRM | CF_RAT)) { + } else if (n.bc.op_ptr->flags & CF_MEM) { static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK", "WRITE_IND_ACK"}; fill_to(s, 18); diff --git a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp index 355eb63810c..d9654832259 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp @@ -63,7 +63,7 @@ int bc_finalizer::run() { // workaround for some problems on r6xx/7xx // add ALU NOP to each vertex shader - if (!ctx.is_egcm() && sh.target == TARGET_VS) { + if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) { cf_node *c = sh.create_clause(NST_ALU_CLAUSE); alu_group_node *g = sh.create_alu_group(); diff --git a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp index 67e6c3a582d..24c4854225d 100644 --- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp +++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp @@ -58,7 +58,10 @@ int bc_parser::decode() { if (pshader) { switch (bc->type) { case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break; - case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break; + case TGSI_PROCESSOR_VERTEX: + t = pshader->vs_as_es ? TARGET_ES : TARGET_VS; + break; + case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break; case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break; default: assert(!"unknown shader target"); return -1; break; } @@ -134,8 +137,12 @@ int bc_parser::parse_decls() { } } - if (sh->target == TARGET_VS) + if (sh->target == TARGET_VS || sh->target == TARGET_ES) sh->add_input(0, 1, 0x0F); + else if (sh->target == TARGET_GS) { + sh->add_input(0, 1, 0x0F); + sh->add_input(1, 1, 0x0F); + } bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN && sh->target == TARGET_PS; diff --git a/src/gallium/drivers/r600/sb/sb_shader.cpp b/src/gallium/drivers/r600/sb/sb_shader.cpp index 38617a85330..f996c0786d1 100644 --- a/src/gallium/drivers/r600/sb/sb_shader.cpp +++ b/src/gallium/drivers/r600/sb/sb_shader.cpp @@ -215,7 +215,7 @@ void shader::init() { void shader::init_call_fs(cf_node* cf) { unsigned gpr = 0; - assert(target == TARGET_VS); + assert(target == TARGET_VS || target == TARGET_ES); for(inputs_vec::const_iterator I = inputs.begin(), E = inputs.end(); I != E; ++I, ++gpr) { @@ -433,6 +433,7 @@ std::string shader::get_full_target_name() { const char* shader::get_shader_target_name() { switch (target) { case TARGET_VS: return "VS"; + case TARGET_ES: return "ES"; case TARGET_PS: return "PS"; case TARGET_GS: return "GS"; case TARGET_COMPUTE: return "COMPUTE"; -- cgit v1.2.3