diff options
Diffstat (limited to 'src/gallium/drivers/r600/r600_shader.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 715 |
1 files changed, 554 insertions, 161 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 32d2aa73bef..56067246874 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -60,7 +60,7 @@ issued in the w slot as well. The compiler must issue the source argument to slots z, y, and x */ -static int r600_shader_from_tgsi(struct r600_screen *rscreen, +static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, struct r600_shader_key key); @@ -104,17 +104,43 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so) } } +static int store_shader(struct pipe_context *ctx, + struct r600_pipe_shader *shader) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + uint32_t *ptr, i; + + if (shader->bo == NULL) { + shader->bo = (struct r600_resource*) + pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); + if (shader->bo == NULL) { + return -ENOMEM; + } + ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); + if (R600_BIG_ENDIAN) { + for (i = 0; i < shader->shader.bc.ndw; ++i) { + ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]); + } + } else { + memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); + } + rctx->b.ws->buffer_unmap(shader->bo->cs_buf); + } + + return 0; +} + int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, struct r600_shader_key key) { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_shader_selector *sel = shader->selector; - int r, i; - uint32_t *ptr; + int r; bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); + unsigned export_shader = key.vs_as_es; shader->shader.bc.isa = rctx->isa; @@ -126,7 +152,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, r600_dump_streamout(&sel->so); } } - r = r600_shader_from_tgsi(rctx->screen, shader, key); + r = r600_shader_from_tgsi(rctx, shader, key); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; @@ -157,29 +183,39 @@ int r600_pipe_shader_create(struct pipe_context *ctx, } } - /* Store the shader in a buffer. */ - if (shader->bo == NULL) { - shader->bo = (struct r600_resource*) - pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); - if (shader->bo == NULL) { - return -ENOMEM; - } - ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); - if (R600_BIG_ENDIAN) { - for (i = 0; i < shader->shader.bc.ndw; ++i) { - ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]); - } - } else { - memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); + if (shader->gs_copy_shader) { + if (dump) { + // dump copy shader + r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, + &shader->gs_copy_shader->shader, dump, 0); + if (r) + return r; } - rctx->b.ws->buffer_unmap(shader->bo->cs_buf); + + if ((r = store_shader(ctx, shader->gs_copy_shader))) + return r; } + /* Store the shader in a buffer. */ + if ((r = store_shader(ctx, shader))) + return r; + /* Build state. */ switch (shader->shader.processor_type) { + case TGSI_PROCESSOR_GEOMETRY: + if (rctx->b.chip_class >= EVERGREEN) { + evergreen_update_gs_state(ctx, shader); + evergreen_update_vs_state(ctx, shader->gs_copy_shader); + } else { + assert(!"not suported yet"); + } + break; case TGSI_PROCESSOR_VERTEX: if (rctx->b.chip_class >= EVERGREEN) { - evergreen_update_vs_state(ctx, shader); + if (export_shader) + evergreen_update_es_state(ctx, shader); + else + evergreen_update_vs_state(ctx, shader); } else { r600_update_vs_state(ctx, shader); } @@ -245,6 +281,9 @@ struct r600_shader_ctx { unsigned cv_output; int fragcoord_input; int native_integers; + int next_ring_offset; + int gs_next_vertex; + struct r600_shader *gs_for_vs; }; struct r600_shader_tgsi_instruction { @@ -254,6 +293,7 @@ struct r600_shader_tgsi_instruction { int (*process)(struct r600_shader_ctx *ctx); }; +static int emit_gs_ring_writes(struct r600_shader_ctx *ctx); static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); @@ -285,7 +325,13 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { if (i->Src[j].Register.Dimension) { - if (i->Src[j].Register.File != TGSI_FILE_CONSTANT) { + switch (i->Src[j].Register.File) { + case TGSI_FILE_CONSTANT: + break; + case TGSI_FILE_INPUT: + if (ctx->type == TGSI_PROCESSOR_GEOMETRY) + break; + default: R600_ERR("unsupported src %d (dimension %d)\n", j, i->Src[j].Register.Dimension); return -EINVAL; @@ -536,6 +582,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) if ((r = evergreen_interp_input(ctx, i))) return r; } + } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { + /* FIXME probably skip inputs if they aren't passed in the ring */ + ctx->shader->input[i].ring_offset = ctx->next_ring_offset; + ctx->next_ring_offset += 16; } for (j = 1; j < count; ++j) { ctx->shader->input[i + j] = ctx->shader->input[i]; @@ -550,7 +600,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; ctx->shader->output[i].interpolate = d->Interp.Interpolate; ctx->shader->output[i].write_mask = d->Declaration.UsageMask; - if (ctx->type == TGSI_PROCESSOR_VERTEX) { + if (ctx->type == TGSI_PROCESSOR_VERTEX || + ctx->type == TGSI_PROCESSOR_GEOMETRY) { ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); switch (d->Semantic.Name) { case TGSI_SEMANTIC_CLIPDIST: @@ -773,6 +824,59 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx return 0; } +static int fetch_gs_input(struct r600_shader_ctx *ctx, unsigned index, unsigned vtx_id, unsigned int dst_reg) +{ + struct r600_bytecode_vtx vtx; + int r; + int offset_reg = vtx_id / 3; + int offset_chan = vtx_id % 3; + + /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, + * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ + + if (offset_reg == 0 && offset_chan == 2) + offset_chan = 3; + + memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = R600_GS_RING_CONST_BUFFER; + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = offset_reg; + vtx.src_sel_x = offset_chan; + vtx.offset = index * 16; /*bytes*/ + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.use_const_fields = 1; + + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + +static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int i; + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_full_src_register *src = &inst->Src[i]; + + if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { + int treg = r600_get_temp(ctx); + int index = src->Register.Index; + int vtx_id = src->Dimension.Index; + + fetch_gs_input(ctx, index, vtx_id, treg); + ctx->src[i].sel = treg; + } + } + return 0; +} + static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -983,10 +1087,247 @@ out_err: return r; } -static int r600_shader_from_tgsi(struct r600_screen *rscreen, +static int generate_gs_copy_shader(struct r600_context *rctx, + struct r600_pipe_shader *gs) +{ + struct r600_shader_ctx ctx = {}; + struct r600_shader *gs_shader = &gs->shader; + struct r600_pipe_shader *cshader; + int ocnt = gs_shader->noutput; + struct r600_bytecode_alu alu; + struct r600_bytecode_vtx vtx; + struct r600_bytecode_output output; + struct r600_bytecode_cf *cf_jump, *cf_pop, + *last_exp_pos = NULL, *last_exp_param = NULL; + int i, next_pos = 60, next_param = 0; + + cshader = calloc(1, sizeof(struct r600_pipe_shader)); + if (!cshader) + return 0; + + memcpy(cshader->shader.output, gs_shader->output, ocnt * + sizeof(struct r600_shader_io)); + + cshader->shader.noutput = ocnt; + + ctx.shader = &cshader->shader; + ctx.bc = &ctx.shader->bc; + ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; + + r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, + rctx->screen->has_compressed_msaa_texturing); + + ctx.bc->isa = rctx->isa; + + /* R0.x = R0.x & 0x3fffffff */ + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_AND_INT; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0x3fffffff; + alu.dst.write = 1; + r600_bytecode_add_alu(ctx.bc, &alu); + + /* R0.y = R0.x >> 30 */ + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_LSHR_INT; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0x1e; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.last = 1; + r600_bytecode_add_alu(ctx.bc, &alu); + + /* PRED_SETE_INT __, R0.y, 0 */ + memset(&alu, 0, sizeof(alu)); + alu.op = ALU_OP2_PRED_SETE_INT; + alu.src[0].chan = 1; + alu.src[1].sel = V_SQ_ALU_SRC_0; + alu.execute_mask = 1; + alu.update_pred = 1; + alu.last = 1; + r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); + + r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); + cf_jump = ctx.bc->cf_last; + + /* fetch vertex data from GSVS ring */ + for (i = 0; i < ocnt; ++i) { + struct r600_shader_io *out = &ctx.shader->output[i]; + out->gpr = i + 1; + out->ring_offset = i * 16; + + memset(&vtx, 0, sizeof(vtx)); + vtx.op = FETCH_OP_VFETCH; + vtx.buffer_id = R600_GS_RING_CONST_BUFFER; + vtx.fetch_type = 2; + vtx.offset = out->ring_offset; + vtx.dst_gpr = out->gpr; + vtx.dst_sel_x = 0; + vtx.dst_sel_y = 1; + vtx.dst_sel_z = 2; + vtx.dst_sel_w = 3; + vtx.use_const_fields = 1; + + r600_bytecode_add_vtx(ctx.bc, &vtx); + } + + /* XXX handle clipvertex, streamout? */ + + /* export vertex data */ + /* XXX factor out common code with r600_shader_from_tgsi ? */ + for (i = 0; i < ocnt; ++i) { + struct r600_shader_io *out = &ctx.shader->output[i]; + + if (out->name == TGSI_SEMANTIC_CLIPVERTEX) + continue; + + memset(&output, 0, sizeof(output)); + output.gpr = out->gpr; + output.elem_size = 3; + output.swizzle_x = 0; + output.swizzle_y = 1; + output.swizzle_z = 2; + output.swizzle_w = 3; + output.burst_count = 1; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output.op = CF_OP_EXPORT; + switch (out->name) { + case TGSI_SEMANTIC_POSITION: + output.array_base = next_pos++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; + + case TGSI_SEMANTIC_PSIZE: + output.array_base = next_pos++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; + case TGSI_SEMANTIC_CLIPDIST: + /* spi_sid is 0 for clipdistance outputs that were generated + * for clipvertex - we don't need to pass them to PS */ + if (out->spi_sid) { + /* duplicate it as PARAM to pass to the pixel shader */ + output.array_base = next_param++; + r600_bytecode_add_output(ctx.bc, &output); + last_exp_param = ctx.bc->cf_last; + } + output.array_base = next_pos++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; + case TGSI_SEMANTIC_FOG: + output.swizzle_y = 4; /* 0 */ + output.swizzle_z = 4; /* 0 */ + output.swizzle_w = 5; /* 1 */ + break; + } + r600_bytecode_add_output(ctx.bc, &output); + if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) + last_exp_param = ctx.bc->cf_last; + else + last_exp_pos = ctx.bc->cf_last; + } + + if (!last_exp_pos) { + memset(&output, 0, sizeof(output)); + output.gpr = 0; + output.elem_size = 3; + output.swizzle_x = 7; + output.swizzle_y = 7; + output.swizzle_z = 7; + output.swizzle_w = 7; + output.burst_count = 1; + output.type = 2; + output.op = CF_OP_EXPORT; + output.array_base = next_pos++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + r600_bytecode_add_output(ctx.bc, &output); + last_exp_pos = ctx.bc->cf_last; + } + + if (!last_exp_param) { + memset(&output, 0, sizeof(output)); + output.gpr = 0; + output.elem_size = 3; + output.swizzle_x = 7; + output.swizzle_y = 7; + output.swizzle_z = 7; + output.swizzle_w = 7; + output.burst_count = 1; + output.type = 2; + output.op = CF_OP_EXPORT; + output.array_base = next_param++; + output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + r600_bytecode_add_output(ctx.bc, &output); + last_exp_param = ctx.bc->cf_last; + } + + last_exp_pos->op = CF_OP_EXPORT_DONE; + last_exp_param->op = CF_OP_EXPORT_DONE; + + r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); + cf_pop = ctx.bc->cf_last; + + cf_jump->cf_addr = cf_pop->id + 2; + cf_jump->pop_count = 1; + cf_pop->cf_addr = cf_pop->id + 2; + cf_pop->pop_count = 1; + + r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); + ctx.bc->cf_last->end_of_program = 1; + + gs->gs_copy_shader = cshader; + + ctx.bc->nstack = 1; + cshader->shader.ring_item_size = ocnt * 16; + + return r600_bytecode_build(ctx.bc); +} + +static int emit_gs_ring_writes(struct r600_shader_ctx *ctx) +{ + struct r600_bytecode_output output; + int i, k, ring_offset; + + for (i = 0; i < ctx->shader->noutput; i++) { + if (ctx->gs_for_vs) { + /* for ES we need to lookup corresponding ring offset expected by GS + * (map this output to GS input by name and sid) */ + /* FIXME precompute offsets */ + ring_offset = -1; + for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { + struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; + struct r600_shader_io *out = &ctx->shader->output[i]; + if (in->name == out->name && in->sid == out->sid) + ring_offset = in->ring_offset; + } + if (ring_offset == -1) { + R600_ERR("error mapping VS->GS outputs\n"); + return -1; + } + } else + ring_offset = i * 16; + + /* next_ring_offset after parsing input decls contains total size of + * single vertex data, gs_next_vertex - current vertex index */ + ring_offset += ctx->next_ring_offset * ctx->gs_next_vertex; + + memset(&output, 0, sizeof(struct r600_bytecode_output)); + output.gpr = ctx->shader->output[i].gpr; + output.elem_size = 3; + output.comp_mask = 0xF; + output.burst_count = 1; + output.op = CF_OP_MEM_RING; + output.array_base = ring_offset >> 2; /* in dwords */ + r600_bytecode_add_output(ctx->bc, &output); + } + ++ctx->gs_next_vertex; + return 0; +} + +static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, struct r600_shader_key key) { + struct r600_screen *rscreen = rctx->screen; struct r600_shader *shader = &pipeshader->shader; struct tgsi_token *tokens = pipeshader->selector->tokens; struct pipe_stream_output_info so = pipeshader->selector->so; @@ -1002,6 +1343,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, /* Declarations used by llvm code */ bool use_llvm = false; bool indirect_gprs; + bool ring_outputs = false; #ifdef R600_USE_LLVM use_llvm = !(rscreen->b.debug_flags & DBG_NO_LLVM); @@ -1010,6 +1352,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, ctx.shader = shader; ctx.native_integers = true; + shader->vs_as_es = key.vs_as_es; + r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, rscreen->has_compressed_msaa_texturing); ctx.tokens = tokens; @@ -1021,6 +1365,17 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, shader->processor_type = ctx.type; ctx.bc->type = shader->processor_type; + ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY); + + if (key.vs_as_es) { + ctx.gs_for_vs = &rctx->gs_shader->current->shader; + } else { + ctx.gs_for_vs = NULL; + } + + ctx.next_ring_offset = 0; + ctx.gs_next_vertex = 0; + ctx.face_gpr = -1; ctx.fragcoord_input = -1; ctx.colors_used = 0; @@ -1073,6 +1428,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } + if (ctx.type == TGSI_PROCESSOR_GEOMETRY && ctx.bc->chip_class >= EVERGREEN) { + /* FIXME 1 would be enough in some cases (3 or less input vertices) */ + ctx.file_offset[TGSI_FILE_INPUT] = 2; + } ctx.use_llvm = use_llvm; if (use_llvm) { @@ -1149,6 +1508,15 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, case TGSI_PROPERTY_VS_PROHIBIT_UCPS: /* we don't need this one */ break; + case TGSI_PROPERTY_GS_INPUT_PRIM: + shader->gs_input_prim = property->u[0].Data; + break; + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + shader->gs_output_prim = property->u[0].Data; + break; + case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: + shader->gs_max_out_vertices = property->u[0].Data; + break; } break; default: @@ -1158,6 +1526,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, } } + shader->ring_item_size = ctx.next_ring_offset; + /* Process two side if needed */ if (shader->two_side && ctx.colors_used) { int i, count = ctx.shader->ninput; @@ -1298,6 +1668,9 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, goto out_err; if ((r = tgsi_split_literal_constant(&ctx))) goto out_err; + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) + if ((r = tgsi_split_gs_inputs(&ctx))) + goto out_err; if (ctx.bc->chip_class == CAYMAN) ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; else if (ctx.bc->chip_class >= EVERGREEN) @@ -1319,7 +1692,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, noutput = shader->noutput; - if (ctx.clip_vertex_write) { + if (!ring_outputs && ctx.clip_vertex_write) { unsigned clipdist_temp[2]; clipdist_temp[0] = r600_get_temp(&ctx); @@ -1370,117 +1743,122 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, } /* Add stream outputs. */ - if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) + if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX && + so.num_outputs && !use_llvm) emit_streamout(&ctx, &so); - /* export output */ - for (i = 0, j = 0; i < noutput; i++, j++) { - memset(&output[j], 0, sizeof(struct r600_bytecode_output)); - output[j].gpr = shader->output[i].gpr; - output[j].elem_size = 3; - output[j].swizzle_x = 0; - output[j].swizzle_y = 1; - output[j].swizzle_z = 2; - output[j].swizzle_w = 3; - output[j].burst_count = 1; - output[j].type = -1; - output[j].op = CF_OP_EXPORT; - switch (ctx.type) { - case TGSI_PROCESSOR_VERTEX: - switch (shader->output[i].name) { - case TGSI_SEMANTIC_POSITION: - output[j].array_base = next_pos_base++; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; - break; + if (ring_outputs) { + if (key.vs_as_es) + emit_gs_ring_writes(&ctx); + } else { + /* export output */ + for (i = 0, j = 0; i < noutput; i++, j++) { + memset(&output[j], 0, sizeof(struct r600_bytecode_output)); + output[j].gpr = shader->output[i].gpr; + output[j].elem_size = 3; + output[j].swizzle_x = 0; + output[j].swizzle_y = 1; + output[j].swizzle_z = 2; + output[j].swizzle_w = 3; + output[j].burst_count = 1; + output[j].type = -1; + output[j].op = CF_OP_EXPORT; + switch (ctx.type) { + case TGSI_PROCESSOR_VERTEX: + switch (shader->output[i].name) { + case TGSI_SEMANTIC_POSITION: + output[j].array_base = next_pos_base++; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; - case TGSI_SEMANTIC_PSIZE: - output[j].array_base = next_pos_base++; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; - break; - case TGSI_SEMANTIC_CLIPVERTEX: - j--; - break; - case TGSI_SEMANTIC_CLIPDIST: - output[j].array_base = next_pos_base++; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; - /* spi_sid is 0 for clipdistance outputs that were generated - * for clipvertex - we don't need to pass them to PS */ - if (shader->output[i].spi_sid) { - j++; - /* duplicate it as PARAM to pass to the pixel shader */ - memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); - output[j].array_base = next_param_base++; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - } - break; - case TGSI_SEMANTIC_FOG: - output[j].swizzle_y = 4; /* 0 */ - output[j].swizzle_z = 4; /* 0 */ - output[j].swizzle_w = 5; /* 1 */ - break; - } - break; - case TGSI_PROCESSOR_FRAGMENT: - if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { - /* never export more colors than the number of CBs */ - if (shader->output[i].sid >= max_color_exports) { - /* skip export */ + case TGSI_SEMANTIC_PSIZE: + output[j].array_base = next_pos_base++; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + break; + case TGSI_SEMANTIC_CLIPVERTEX: j--; - continue; - } - output[j].swizzle_w = key.alpha_to_one ? 5 : 3; - output[j].array_base = shader->output[i].sid; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - shader->nr_ps_color_exports++; - if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { - for (k = 1; k < max_color_exports; k++) { + break; + case TGSI_SEMANTIC_CLIPDIST: + output[j].array_base = next_pos_base++; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + /* spi_sid is 0 for clipdistance outputs that were generated + * for clipvertex - we don't need to pass them to PS */ + if (shader->output[i].spi_sid) { j++; - memset(&output[j], 0, sizeof(struct r600_bytecode_output)); - output[j].gpr = shader->output[i].gpr; - output[j].elem_size = 3; - output[j].swizzle_x = 0; - output[j].swizzle_y = 1; - output[j].swizzle_z = 2; - output[j].swizzle_w = key.alpha_to_one ? 5 : 3; - output[j].burst_count = 1; - output[j].array_base = k; - output[j].op = CF_OP_EXPORT; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - shader->nr_ps_color_exports++; + /* duplicate it as PARAM to pass to the pixel shader */ + memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); + output[j].array_base = next_param_base++; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; } + break; + case TGSI_SEMANTIC_FOG: + output[j].swizzle_y = 4; /* 0 */ + output[j].swizzle_z = 4; /* 0 */ + output[j].swizzle_w = 5; /* 1 */ + break; } - } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { - output[j].array_base = 61; - output[j].swizzle_x = 2; - output[j].swizzle_y = 7; - output[j].swizzle_z = output[j].swizzle_w = 7; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { - output[j].array_base = 61; - output[j].swizzle_x = 7; - output[j].swizzle_y = 1; - output[j].swizzle_z = output[j].swizzle_w = 7; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - } else { - R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); + break; + case TGSI_PROCESSOR_FRAGMENT: + if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { + /* never export more colors than the number of CBs */ + if (shader->output[i].sid >= max_color_exports) { + /* skip export */ + j--; + continue; + } + output[j].swizzle_w = key.alpha_to_one ? 5 : 3; + output[j].array_base = shader->output[i].sid; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + shader->nr_ps_color_exports++; + if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { + for (k = 1; k < max_color_exports; k++) { + j++; + memset(&output[j], 0, sizeof(struct r600_bytecode_output)); + output[j].gpr = shader->output[i].gpr; + output[j].elem_size = 3; + output[j].swizzle_x = 0; + output[j].swizzle_y = 1; + output[j].swizzle_z = 2; + output[j].swizzle_w = key.alpha_to_one ? 5 : 3; + output[j].burst_count = 1; + output[j].array_base = k; + output[j].op = CF_OP_EXPORT; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + shader->nr_ps_color_exports++; + } + } + } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { + output[j].array_base = 61; + output[j].swizzle_x = 2; + output[j].swizzle_y = 7; + output[j].swizzle_z = output[j].swizzle_w = 7; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { + output[j].array_base = 61; + output[j].swizzle_x = 7; + output[j].swizzle_y = 1; + output[j].swizzle_z = output[j].swizzle_w = 7; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + } else { + R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); + r = -EINVAL; + goto out_err; + } + break; + default: + R600_ERR("unsupported processor type %d\n", ctx.type); r = -EINVAL; goto out_err; } - break; - default: - R600_ERR("unsupported processor type %d\n", ctx.type); - r = -EINVAL; - goto out_err; - } - if (output[j].type==-1) { - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; - output[j].array_base = next_param_base++; + if (output[j].type==-1) { + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; + output[j].array_base = next_param_base++; + } } - } - /* add fake position export */ - if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) { + /* add fake position export */ + if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = 0; output[j].elem_size = 3; @@ -1493,10 +1871,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].array_base = next_pos_base; output[j].op = CF_OP_EXPORT; j++; - } + } - /* add fake param output for vertex shader if no param is exported */ - if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { + /* add fake param output for vertex shader if no param is exported */ + if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = 0; output[j].elem_size = 3; @@ -1509,39 +1887,40 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].array_base = 0; output[j].op = CF_OP_EXPORT; j++; - } + } + + /* add fake pixel export */ + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { + memset(&output[j], 0, sizeof(struct r600_bytecode_output)); + output[j].gpr = 0; + output[j].elem_size = 3; + output[j].swizzle_x = 7; + output[j].swizzle_y = 7; + output[j].swizzle_z = 7; + output[j].swizzle_w = 7; + output[j].burst_count = 1; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + output[j].array_base = 0; + output[j].op = CF_OP_EXPORT; + j++; + } + + noutput = j; - /* add fake pixel export */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { - memset(&output[j], 0, sizeof(struct r600_bytecode_output)); - output[j].gpr = 0; - output[j].elem_size = 3; - output[j].swizzle_x = 7; - output[j].swizzle_y = 7; - output[j].swizzle_z = 7; - output[j].swizzle_w = 7; - output[j].burst_count = 1; - output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - output[j].array_base = 0; - output[j].op = CF_OP_EXPORT; - j++; - } - - noutput = j; - - /* set export done on last export of each type */ - for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (!(output_done & (1 << output[i].type))) { - output_done |= (1 << output[i].type); - output[i].op = CF_OP_EXPORT_DONE; + /* set export done on last export of each type */ + for (i = noutput - 1, output_done = 0; i >= 0; i--) { + if (!(output_done & (1 << output[i].type))) { + output_done |= (1 << output[i].type); + output[i].op = CF_OP_EXPORT_DONE; + } } - } - /* add output to bytecode */ - if (!use_llvm) { - for (i = 0; i < noutput; i++) { - r = r600_bytecode_add_output(ctx.bc, &output[i]); - if (r) - goto out_err; + /* add output to bytecode */ + if (!use_llvm) { + for (i = 0; i < noutput; i++) { + r = r600_bytecode_add_output(ctx.bc, &output[i]); + if (r) + goto out_err; + } } } @@ -1552,7 +1931,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, else { const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op); - if (last->flags & CF_CLAUSE) + /* alu clause instructions don't have EOP bit, so add NOP */ + if (last->flags & CF_ALU) r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); ctx.bc->cf_last->end_of_program = 1; @@ -1567,6 +1947,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, goto out_err; } + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + if ((r = generate_gs_copy_shader(rctx, pipeshader))) + return r; + } + free(ctx.literals); tgsi_parse_free(&ctx.parse); return 0; @@ -5561,6 +5946,14 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_gs_emit(struct r600_shader_ctx *ctx) +{ + if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) + emit_gs_ring_writes(ctx); + + return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); +} + static int tgsi_umad(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -5934,8 +6327,8 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, + {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, @@ -6126,8 +6519,8 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, + {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, |