diff options
Diffstat (limited to 'src/gallium/drivers/r600/r600_shader.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 817 |
1 files changed, 358 insertions, 459 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c982471a04f..240c8f1ffd0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -28,6 +28,7 @@ #include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" +#include "r600_formats.h" #include "r600_opcodes.h" #include "r600d.h" #include <stdio.h> @@ -175,6 +176,13 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade R_0288CC_SQ_PGM_CF_OFFSET_PS, 0x00000000, 0xFFFFFFFF, NULL); + if (rshader->fs_write_all) { + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, + S_028808_MULTIWRITE_ENABLE(1), + S_028808_MULTIWRITE_ENABLE(1), + NULL); + } + if (rshader->uses_kill) { /* only set some bits here, the other bits are set in the dsa state */ r600_pipe_state_add_reg(rstate, @@ -187,7 +195,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade 0xFFFFFFFF, NULL); } -int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) +static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_shader *rshader = &shader->shader; @@ -225,12 +233,12 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) return 0; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); + int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) { static int dump_shaders = -1; struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; - u32 *literals; int r; /* Would like some magic "get_bool_option_once" routine. @@ -243,13 +251,12 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s tgsi_dump(tokens, 0); } shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader, &literals); + r = r600_shader_from_tgsi(tokens, &shader->shader); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; } r = r600_bc_build(&shader->shader.bc); - free(literals); if (r) { R600_ERR("building bytecode failed !\n"); return r; @@ -274,6 +281,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader */ struct r600_shader_tgsi_instruction; +struct r600_shader_src { + unsigned sel; + unsigned swizzle[4]; + unsigned neg; + unsigned abs; + unsigned rel; + uint32_t value[4]; +}; + struct r600_shader_ctx { struct tgsi_shader_info info; struct tgsi_parse_context parse; @@ -281,9 +297,11 @@ struct r600_shader_ctx { unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; + unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; + struct r600_shader_src src[3]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -492,9 +510,179 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) return ctx->num_interp_gpr; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals) +static void tgsi_src(struct r600_shader_ctx *ctx, + const struct tgsi_full_src_register *tgsi_src, + struct r600_shader_src *r600_src) +{ + memset(r600_src, 0, sizeof(*r600_src)); + r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; + r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; + r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; + r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; + r600_src->neg = tgsi_src->Register.Negate; + r600_src->abs = tgsi_src->Register.Absolute; + if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { + int index; + if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && + (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { + + index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; + r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) + return; + } + index = tgsi_src->Register.Index; + r600_src->sel = V_SQ_ALU_SRC_LITERAL; + memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); + } else { + if (tgsi_src->Register.Indirect) + r600_src->rel = V_SQ_REL_RELATIVE; + r600_src->sel = tgsi_src->Register.Index; + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + } +} + +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) +{ + struct r600_bc_vtx vtx; + unsigned int ar_reg; + int r; + + if (offset) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.src[0].sel = ctx->ar_reg; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = offset; + + alu.dst.sel = dst_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + ar_reg = dst_reg; + } else { + ar_reg = ctx->ar_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = ar_reg; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.data_format = FMT_32_32_32_32_FLOAT; + vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + +static int tgsi_split_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nconst, r; + + for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + nconst++; + } + tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); + } + for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { + continue; + } + + if (ctx->src[i].rel) { + int treg = r600_get_temp(ctx); + if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) + return r; + + ctx->src[i].sel = treg; + ctx->src[i].rel = 0; + j--; + } else if (j > 0) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].rel = ctx->src[i].rel; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + ctx->src[i].rel =0; + j--; + } + } + return 0; +} + +/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ +static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int i, j, k, nliteral, r; + + for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { + if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + nliteral++; + } + } + for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { + if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + int treg = r600_get_temp(ctx); + for (k = 0; k < 4; k++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.src[0].sel = ctx->src[i].sel; + alu.src[0].chan = k; + alu.src[0].value = ctx->src[i].value[k]; + alu.dst.sel = treg; + alu.dst.chan = k; + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + ctx->src[i].sel = treg; + j--; + } + } + return 0; +} + +static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) { struct tgsi_full_immediate *immediate; + struct tgsi_full_property *property; struct r600_shader_ctx ctx; struct r600_bc_output output[32]; unsigned noutput; @@ -558,12 +746,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.file_offset[TGSI_FILE_CONSTANT] = 512; ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; - ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; ctx.literals = NULL; - + shader->fs_write_all = FALSE; while (!tgsi_parse_end_of_tokens(&ctx.parse)) { tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { @@ -592,7 +781,12 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s ctx.max_driver_temp_used = 0; /* reserve first tmp for everyone */ r600_get_temp(&ctx); + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; if (ctx.bc->chiprev == CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else @@ -602,6 +796,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s goto out_err; break; case TGSI_TOKEN_TYPE_PROPERTY: + property = &ctx.parse.FullToken.FullProperty; + if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { + if (property->u[0].Data == 1) + shader->fs_write_all = TRUE; + } break; default: R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); @@ -619,6 +818,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = i - pos0; switch (ctx.type) { @@ -680,6 +880,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[i].swizzle_y = 1; output[i].swizzle_z = 2; output[i].swizzle_w = 3; + output[i].burst_count = 1; output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[i].array_base = 0; noutput++; @@ -694,6 +895,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s output[0].swizzle_y = 7; output[0].swizzle_z = 7; output[0].swizzle_w = 7; + output[0].burst_count = 1; output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; output[0].array_base = 0; noutput++; @@ -704,7 +906,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (r) goto out_err; } - *literals = ctx.literals; + free(ctx.literals); tgsi_parse_free(&ctx.parse); return 0; out_err: @@ -724,40 +926,22 @@ static int tgsi_end(struct r600_shader_ctx *ctx) return 0; } -static int tgsi_src(struct r600_shader_ctx *ctx, - const struct tgsi_full_src_register *tgsi_src, - struct r600_bc_alu_src *r600_src) +static void r600_bc_src(struct r600_bc_alu_src *bc_src, + const struct r600_shader_src *shader_src, + unsigned chan) { - memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); - r600_src->neg = tgsi_src->Register.Negate; - r600_src->abs = tgsi_src->Register.Absolute; - if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { - int index; - if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && - (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && - (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { - - index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); - if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) - return 0; - } - index = tgsi_src->Register.Index; - r600_src->sel = V_SQ_ALU_SRC_LITERAL; - r600_src->value = ctx->literals + index * 4; - } else { - if (tgsi_src->Register.Indirect) - r600_src->rel = V_SQ_REL_RELATIVE; - r600_src->sel = tgsi_src->Register.Index; - r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; - } - return 0; + bc_src->sel = shader_src->sel; + bc_src->chan = shader_src->swizzle[chan]; + bc_src->neg = shader_src->neg; + bc_src->abs = shader_src->abs; + bc_src->rel = shader_src->rel; + bc_src->value = shader_src->value[bc_src->chan]; } -static int tgsi_dst(struct r600_shader_ctx *ctx, - const struct tgsi_full_dst_register *tgsi_dst, - unsigned swizzle, - struct r600_bc_alu_dst *r600_dst) +static void tgsi_dst(struct r600_shader_ctx *ctx, + const struct tgsi_full_dst_register *tgsi_dst, + unsigned swizzle, + struct r600_bc_alu_dst *r600_dst) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -770,101 +954,6 @@ static int tgsi_dst(struct r600_shader_ctx *ctx, if (inst->Instruction.Saturate) { r600_dst->clamp = 1; } - return 0; -} - -static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) -{ - switch (swizzle) { - case 0: - return tgsi_src->Register.SwizzleX; - case 1: - return tgsi_src->Register.SwizzleY; - case 2: - return tgsi_src->Register.SwizzleZ; - case 3: - return tgsi_src->Register.SwizzleW; - default: - return 0; - } -} - -static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nconst, r; - - for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - nconst++; - } - r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); - if (r) { - return r; - } - } - for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; - alu.src[0].chan = k; - alu.src[0].rel = r600_src[i].rel; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - r600_src[i].sel = treg; - r600_src[i].rel =0; - j--; - } - } - return 0; -} - -/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ -static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) -{ - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu alu; - int i, j, k, nliteral, r; - - for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { - if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) { - nliteral++; - } - } - for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) { - int treg = r600_get_temp(ctx); - for (k = 0; k < 4; k++) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - alu.src[0].sel = r600_src[i].sel; - alu.src[0].chan = k; - alu.src[0].value = r600_src[i].value; - alu.dst.sel = treg; - alu.dst.chan = k; - alu.dst.write = 1; - if (k == 3) - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - } - r600_src[i].sel = treg; - j--; - } - } - return 0; } static int tgsi_last_instruction(unsigned writemask) @@ -882,38 +971,25 @@ static int tgsi_last_instruction(unsigned writemask) static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.inst = ctx->inst_info->r600_opcode; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } } else { - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } /* handle some special cases */ switch (ctx->inst_info->tgsi_opcode) { @@ -951,24 +1027,15 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) * r700 - normalize by dividing by 2PI * see fdo bug 27901 */ -static int tgsi_setup_trig(struct r600_shader_ctx *ctx, - struct r600_bc_alu_src r600_src[3]) +static int tgsi_setup_trig(struct r600_shader_ctx *ctx) { static float half_inv_pi = 1.0 /(3.1415926535 * 2); static float double_pi = 3.1415926535 * 2; static float neg_pi = -3.1415926535; - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int r; struct r600_bc_alu alu; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -977,12 +1044,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[1].value = (uint32_t *)&half_inv_pi; + alu.src[1].value = *(uint32_t *)&half_inv_pi; alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 0; alu.last = 1; @@ -1021,8 +1087,8 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[2].chan = 0; if (ctx->bc->chiprev == CHIPREV_R600) { - alu.src[1].value = (uint32_t *)&double_pi; - alu.src[2].value = (uint32_t *)&neg_pi; + alu.src[1].value = *(uint32_t *)&double_pi; + alu.src[2].value = *(uint32_t *)&neg_pi; } else { alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[2].sel = V_SQ_ALU_SRC_0_5; @@ -1039,12 +1105,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; @@ -1070,9 +1135,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = ctx->temp_reg; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == lasti) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1085,7 +1148,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int r; @@ -1093,7 +1155,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) * X or Y components of the destination vector. */ if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { - r = tgsi_setup_trig(ctx, r600_src); + r = tgsi_setup_trig(ctx); if (r) return r; } @@ -1102,9 +1164,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1118,9 +1178,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -1136,9 +1194,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = 0; @@ -1156,9 +1212,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -1175,7 +1229,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) static int tgsi_kill(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int i, r; @@ -1191,10 +1244,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[1].neg = 1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); } if (i == 3) { alu.last = 1; @@ -1214,24 +1264,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* dst.x, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1240,12 +1280,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.y = max(src.x, 0.0) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); - alu.src[0] = r600_src[0]; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1256,9 +1294,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1273,11 +1309,8 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.z = log(src.y) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1289,13 +1322,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); alu.src[1].sel = sel; alu.src[1].chan = chan; - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[2], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; alu.dst.write = 1; @@ -1310,9 +1341,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; - r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) @@ -1336,10 +1365,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); alu.src[i].abs = 1; } alu.dst.sel = ctx->temp_reg; @@ -1363,9 +1389,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) alu.src[0].sel = ctx->temp_reg; alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.dst.chan = i; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; if (i == 3) alu.last = 1; @@ -1385,10 +1409,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); - if (r) - return r; - alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); + r600_bc_src(&alu.src[i], &ctx->src[i], 0); } alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1402,17 +1423,13 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) static int tgsi_pow(struct r600_shader_ctx *ctx) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; /* LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; alu.last = 1; @@ -1422,10 +1439,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1450,16 +1464,8 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - struct r600_bc_alu_src r600_src[3]; int i, r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -1469,13 +1475,10 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.src[1].sel = V_SQ_ALU_SRC_1; + r600_bc_src(&alu.src[2], &ctx->src[0], i); - alu.src[2] = r600_src[0]; - alu.src[2].chan = tgsi_chan(&inst->Src[0], i); if (i == 3) alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -1488,9 +1491,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); alu.is_op3 = 1; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; @@ -1523,9 +1524,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru alu.dst.chan = i; } else { alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; } @@ -1542,17 +1541,10 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru static int tgsi_op3(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -1560,14 +1552,10 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -1584,28 +1572,17 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) static int tgsi_dp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, j, r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = ctx->inst_info->r600_opcode; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - alu.src[j] = r600_src[j]; - alu.src[j].chan = tgsi_chan(&inst->Src[j], i); + r600_bc_src(&alu.src[j], &ctx->src[j], i); } - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; /* handle some special cases */ @@ -1661,11 +1638,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* Add perspective divide */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; + r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 3; alu.last = 1; @@ -1679,10 +1653,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 3; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; alu.dst.write = 1; @@ -1735,14 +1706,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src2_chan = 0; break; } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); + r600_bc_src(&alu.src[0], &ctx->src[0], src_chan); + r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1782,7 +1747,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value = (u32*)&one_point_five; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -1803,7 +1768,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value = (u32*)&one_point_five; + alu.src[2].value = *(uint32_t *)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -1822,10 +1787,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == 3) @@ -1846,7 +1808,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.resource_id = tex.sampler_id; + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -1872,6 +1834,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.coord_type_w = 1; } + if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { + tex.coord_type_z = 0; + tex.src_sel_z = 1; + } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) + tex.coord_type_z = 0; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) tex.src_sel_w = 2; @@ -1886,36 +1854,23 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_lrp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); unsigned i; int r; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - /* optimize if it's just an equal balance */ - if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) { + if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); - alu.src[0] = r600_src[1]; - alu.src[0].chan = tgsi_chan(&inst->Src[1], i); - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + r600_bc_src(&alu.src[0], &ctx->src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); alu.omod = 3; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; @@ -1936,8 +1891,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; - alu.src[1] = r600_src[0]; - alu.src[1].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.src[1].neg = 1; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1959,8 +1913,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; if (i == lasti) { @@ -1980,17 +1933,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - alu.src[1] = r600_src[1]; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); alu.src[2].sel = ctx->temp_reg; alu.src[2].chan = i; - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; if (i == lasti) { alu.last = 1; @@ -2005,37 +1953,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) static int tgsi_cmp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; int i, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - for (i = 0; i < lasti + 1; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); - alu.src[0] = r600_src[0]; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); - - alu.src[1] = r600_src[2]; - alu.src[1].chan = tgsi_chan(&inst->Src[2], i); - - alu.src[2] = r600_src[1]; - alu.src[2].chan = tgsi_chan(&inst->Src[1], i); - - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - + r600_bc_src(&alu.src[0], &ctx->src[0], i); + r600_bc_src(&alu.src[1], &ctx->src[2], i); + r600_bc_src(&alu.src[2], &ctx->src[1], i); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -2051,7 +1982,6 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) static int tgsi_xpd(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3]; struct r600_bc_alu alu; uint32_t use_temp = 0; int i, r; @@ -2059,43 +1989,34 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask != 0xf) use_temp = 1; - r = tgsi_split_constant(ctx, r600_src); - if (r) - return r; - r = tgsi_split_literal_constant(ctx, r600_src); - if (r) - return r; - for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - alu.src[0] = r600_src[0]; switch (i) { case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); + r600_bc_src(&alu.src[0], &ctx->src[0], 2); break; case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); break; case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); break; case 3: alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; } - alu.src[1] = r600_src[1]; switch (i) { case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); + r600_bc_src(&alu.src[1], &ctx->src[1], 1); break; case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); + r600_bc_src(&alu.src[1], &ctx->src[1], 2); break; case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[1], &ctx->src[1], 0); break; case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -2117,32 +2038,30 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); - alu.src[0] = r600_src[0]; switch (i) { case 0: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); break; case 1: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); + r600_bc_src(&alu.src[0], &ctx->src[0], 2); break; case 2: - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); break; case 3: alu.src[0].sel = V_SQ_ALU_SRC_0; alu.src[0].chan = i; } - alu.src[1] = r600_src[1]; switch (i) { case 0: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); + r600_bc_src(&alu.src[1], &ctx->src[1], 2); break; case 1: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); + r600_bc_src(&alu.src[1], &ctx->src[1], 0); break; case 2: - alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); + r600_bc_src(&alu.src[1], &ctx->src[1], 1); break; case 3: alu.src[1].sel = V_SQ_ALU_SRC_0; @@ -2155,11 +2074,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) if (use_temp) alu.dst.sel = ctx->temp_reg; - else { - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; - } + else + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.chan = i; alu.dst.write = 1; alu.is_op3 = 1; @@ -2177,7 +2093,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) static int tgsi_exp(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - struct r600_bc_alu_src r600_src[3] = { { 0 } }; struct r600_bc_alu alu; int r; @@ -2186,11 +2101,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2218,11 +2129,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); - alu.src[0] = r600_src[0]; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; // r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2242,10 +2149,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2288,11 +2192,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -2321,11 +2221,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -2385,11 +2281,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = ctx->temp_reg; alu.src[1].chan = 1; @@ -2409,11 +2301,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2451,6 +2339,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; + memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { @@ -2465,23 +2354,23 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) return -1; } - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; - alu.dst.chan = 0; - alu.dst.sel = ctx->temp_reg; + alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; + + /* TODO: Note that the MOVA can be avoided if we never use AR for + * indexing non-CB registers in the current ALU clause. Similarly, we + * need to load AR from ar_reg again if we started a new clause + * between ARL and AR usage. The easy way to do that is to remove + * the MOVA here, and load it for the first AR access after ar_reg + * has been modified in each clause. */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].sel = ctx->temp_reg; + alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -2495,26 +2384,48 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.src[0].sel = ctx->ar_reg; + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; default: assert(0); return -1; } - - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); - + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + alu.src[0].sel = ctx->ar_reg; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -2534,26 +2445,18 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); - r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - if (r) - return r; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 0 || i == 3) { alu.src[0].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], i); + r600_bc_src(&alu.src[0], &ctx->src[0], i); } - if (i == 0 || i == 2) { + if (i == 0 || i == 2) { alu.src[1].sel = V_SQ_ALU_SRC_1; } else { - r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); - if (r) - return r; - alu.src[1].chan = tgsi_chan(&inst->Src[1], i); + r600_bc_src(&alu.src[1], &ctx->src[1], i); } if (i == 3) alu.last = 1; @@ -2566,7 +2469,6 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; @@ -2578,10 +2480,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.dst.write = 1; alu.dst.chan = 0; - r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); - if (r) - return r; - alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; alu.src[1].chan = 0; |