summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600/r600_shader.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600/r600_shader.c')
-rw-r--r--src/gallium/drivers/r600/r600_shader.c817
1 files changed, 358 insertions, 459 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index c982471a04f..240c8f1ffd0 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -28,6 +28,7 @@
#include "r600_pipe.h"
#include "r600_asm.h"
#include "r600_sq.h"
+#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600d.h"
#include <stdio.h>
@@ -175,6 +176,13 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
R_0288CC_SQ_PGM_CF_OFFSET_PS,
0x00000000, 0xFFFFFFFF, NULL);
+ if (rshader->fs_write_all) {
+ r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
+ S_028808_MULTIWRITE_ENABLE(1),
+ S_028808_MULTIWRITE_ENABLE(1),
+ NULL);
+ }
+
if (rshader->uses_kill) {
/* only set some bits here, the other bits are set in the dsa state */
r600_pipe_state_add_reg(rstate,
@@ -187,7 +195,7 @@ static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shade
0xFFFFFFFF, NULL);
}
-int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_shader *rshader = &shader->shader;
@@ -225,12 +233,12 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
return 0;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
{
static int dump_shaders = -1;
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- u32 *literals;
int r;
/* Would like some magic "get_bool_option_once" routine.
@@ -243,13 +251,12 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
tgsi_dump(tokens, 0);
}
shader->shader.family = r600_get_family(rctx->radeon);
- r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
+ r = r600_shader_from_tgsi(tokens, &shader->shader);
if (r) {
R600_ERR("translation from TGSI failed !\n");
return r;
}
r = r600_bc_build(&shader->shader.bc);
- free(literals);
if (r) {
R600_ERR("building bytecode failed !\n");
return r;
@@ -274,6 +281,15 @@ void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader
*/
struct r600_shader_tgsi_instruction;
+struct r600_shader_src {
+ unsigned sel;
+ unsigned swizzle[4];
+ unsigned neg;
+ unsigned abs;
+ unsigned rel;
+ uint32_t value[4];
+};
+
struct r600_shader_ctx {
struct tgsi_shader_info info;
struct tgsi_parse_context parse;
@@ -281,9 +297,11 @@ struct r600_shader_ctx {
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
+ unsigned ar_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bc *bc;
struct r600_shader *shader;
+ struct r600_shader_src src[3];
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
@@ -492,9 +510,179 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
return ctx->num_interp_gpr;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
+static void tgsi_src(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_src_register *tgsi_src,
+ struct r600_shader_src *r600_src)
+{
+ memset(r600_src, 0, sizeof(*r600_src));
+ r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
+ r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
+ r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
+ r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
+ r600_src->neg = tgsi_src->Register.Negate;
+ r600_src->abs = tgsi_src->Register.Absolute;
+ if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
+ int index;
+ if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
+
+ index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
+ r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
+ return;
+ }
+ index = tgsi_src->Register.Index;
+ r600_src->sel = V_SQ_ALU_SRC_LITERAL;
+ memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
+ } else {
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
+ r600_src->sel = tgsi_src->Register.Index;
+ r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ }
+}
+
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
+{
+ struct r600_bc_vtx vtx;
+ unsigned int ar_reg;
+ int r;
+
+ if (offset) {
+ struct r600_bc_alu alu;
+
+ memset(&alu, 0, sizeof(alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
+ alu.src[0].sel = ctx->ar_reg;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = offset;
+
+ alu.dst.sel = dst_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ ar_reg = dst_reg;
+ } else {
+ ar_reg = ctx->ar_reg;
+ }
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.src_gpr = ar_reg;
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = dst_reg;
+ vtx.dst_sel_x = 0; /* SEL_X */
+ vtx.dst_sel_y = 1; /* SEL_Y */
+ vtx.dst_sel_z = 2; /* SEL_Z */
+ vtx.dst_sel_w = 3; /* SEL_W */
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
+ vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
+ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+ if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+ return r;
+
+ return 0;
+}
+
+static int tgsi_split_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nconst, r;
+
+ for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
+ nconst++;
+ }
+ tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
+ }
+ for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
+ continue;
+ }
+
+ if (ctx->src[i].rel) {
+ int treg = r600_get_temp(ctx);
+ if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
+ return r;
+
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel = 0;
+ j--;
+ } else if (j > 0) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].rel = ctx->src[i].rel;
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel =0;
+ j--;
+ }
+ }
+ return 0;
+}
+
+/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
+static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nliteral, r;
+
+ for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ nliteral++;
+ }
+ }
+ for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].value = ctx->src[i].value[k];
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ j--;
+ }
+ }
+ return 0;
+}
+
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
+ struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bc_output output[32];
unsigned noutput;
@@ -558,12 +746,13 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
- ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+ ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
+ ctx.temp_reg = ctx.ar_reg + 1;
ctx.nliterals = 0;
ctx.literals = NULL;
-
+ shader->fs_write_all = FALSE;
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
@@ -592,7 +781,12 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
ctx.max_driver_temp_used = 0;
/* reserve first tmp for everyone */
r600_get_temp(&ctx);
+
opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ if ((r = tgsi_split_constant(&ctx)))
+ goto out_err;
+ if ((r = tgsi_split_literal_constant(&ctx)))
+ goto out_err;
if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
else
@@ -602,6 +796,11 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
goto out_err;
break;
case TGSI_TOKEN_TYPE_PROPERTY:
+ property = &ctx.parse.FullToken.FullProperty;
+ if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
+ if (property->u[0].Data == 1)
+ shader->fs_write_all = TRUE;
+ }
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
@@ -619,6 +818,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = i - pos0;
switch (ctx.type) {
@@ -680,6 +880,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = 0;
noutput++;
@@ -694,6 +895,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[0].swizzle_y = 7;
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
+ output[0].burst_count = 1;
output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;
noutput++;
@@ -704,7 +906,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
if (r)
goto out_err;
}
- *literals = ctx.literals;
+ free(ctx.literals);
tgsi_parse_free(&ctx.parse);
return 0;
out_err:
@@ -724,40 +926,22 @@ static int tgsi_end(struct r600_shader_ctx *ctx)
return 0;
}
-static int tgsi_src(struct r600_shader_ctx *ctx,
- const struct tgsi_full_src_register *tgsi_src,
- struct r600_bc_alu_src *r600_src)
+static void r600_bc_src(struct r600_bc_alu_src *bc_src,
+ const struct r600_shader_src *shader_src,
+ unsigned chan)
{
- memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
- r600_src->neg = tgsi_src->Register.Negate;
- r600_src->abs = tgsi_src->Register.Absolute;
- if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
- int index;
- if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
- (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
- (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
-
- index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
- r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
- if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
- return 0;
- }
- index = tgsi_src->Register.Index;
- r600_src->sel = V_SQ_ALU_SRC_LITERAL;
- r600_src->value = ctx->literals + index * 4;
- } else {
- if (tgsi_src->Register.Indirect)
- r600_src->rel = V_SQ_REL_RELATIVE;
- r600_src->sel = tgsi_src->Register.Index;
- r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
- }
- return 0;
+ bc_src->sel = shader_src->sel;
+ bc_src->chan = shader_src->swizzle[chan];
+ bc_src->neg = shader_src->neg;
+ bc_src->abs = shader_src->abs;
+ bc_src->rel = shader_src->rel;
+ bc_src->value = shader_src->value[bc_src->chan];
}
-static int tgsi_dst(struct r600_shader_ctx *ctx,
- const struct tgsi_full_dst_register *tgsi_dst,
- unsigned swizzle,
- struct r600_bc_alu_dst *r600_dst)
+static void tgsi_dst(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_dst_register *tgsi_dst,
+ unsigned swizzle,
+ struct r600_bc_alu_dst *r600_dst)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -770,101 +954,6 @@ static int tgsi_dst(struct r600_shader_ctx *ctx,
if (inst->Instruction.Saturate) {
r600_dst->clamp = 1;
}
- return 0;
-}
-
-static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
-{
- switch (swizzle) {
- case 0:
- return tgsi_src->Register.SwizzleX;
- case 1:
- return tgsi_src->Register.SwizzleY;
- case 2:
- return tgsi_src->Register.SwizzleZ;
- case 3:
- return tgsi_src->Register.SwizzleW;
- default:
- return 0;
- }
-}
-
-static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nconst, r;
-
- for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- nconst++;
- }
- r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
- if (r) {
- return r;
- }
- }
- for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.src[0].rel = r600_src[i].rel;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r600_src[i].sel = treg;
- r600_src[i].rel =0;
- j--;
- }
- }
- return 0;
-}
-
-/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
-static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nliteral, r;
-
- for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
- nliteral++;
- }
- }
- for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.src[0].value = r600_src[i].value;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r600_src[i].sel = treg;
- j--;
- }
- }
- return 0;
}
static int tgsi_last_instruction(unsigned writemask)
@@ -882,38 +971,25 @@ static int tgsi_last_instruction(unsigned writemask)
static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.inst = ctx->inst_info->r600_opcode;
if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
} else {
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
-
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
@@ -951,24 +1027,15 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
* r700 - normalize by dividing by 2PI
* see fdo bug 27901
*/
-static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
- struct r600_bc_alu_src r600_src[3])
+static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
{
static float half_inv_pi = 1.0 /(3.1415926535 * 2);
static float double_pi = 3.1415926535 * 2;
static float neg_pi = -3.1415926535;
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
int r;
struct r600_bc_alu alu;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
@@ -977,12 +1044,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
- alu.src[1].value = (uint32_t *)&half_inv_pi;
+ alu.src[1].value = *(uint32_t *)&half_inv_pi;
alu.src[2].sel = V_SQ_ALU_SRC_0_5;
alu.src[2].chan = 0;
alu.last = 1;
@@ -1021,8 +1087,8 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
alu.src[2].chan = 0;
if (ctx->bc->chiprev == CHIPREV_R600) {
- alu.src[1].value = (uint32_t *)&double_pi;
- alu.src[2].value = (uint32_t *)&neg_pi;
+ alu.src[1].value = *(uint32_t *)&double_pi;
+ alu.src[2].value = *(uint32_t *)&neg_pi;
} else {
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[2].sel = V_SQ_ALU_SRC_0_5;
@@ -1039,12 +1105,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
static int tgsi_trig(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
@@ -1070,9 +1135,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = ctx->temp_reg;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1085,7 +1148,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
static int tgsi_scs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int r;
@@ -1093,7 +1155,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
* X or Y components of the destination vector.
*/
if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
}
@@ -1102,9 +1164,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -1118,9 +1178,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
@@ -1136,9 +1194,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = 0;
@@ -1156,9 +1212,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
@@ -1175,7 +1229,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
static int tgsi_kill(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int i, r;
@@ -1191,10 +1244,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[1].neg = 1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
if (i == 3) {
alu.last = 1;
@@ -1214,24 +1264,14 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* dst.x, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1240,12 +1280,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.y = max(src.x, 0.0) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
- alu.src[0] = r600_src[0];
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
alu.src[1].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1256,9 +1294,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1273,11 +1309,8 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* dst.z = log(src.y) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1289,13 +1322,11 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
alu.src[1].sel = sel;
alu.src[1].chan = chan;
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[2], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
alu.dst.write = 1;
@@ -1310,9 +1341,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
@@ -1336,10 +1365,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
alu.src[i].abs = 1;
}
alu.dst.sel = ctx->temp_reg;
@@ -1363,9 +1389,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx)
alu.src[0].sel = ctx->temp_reg;
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.dst.chan = i;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
if (i == 3)
alu.last = 1;
@@ -1385,10 +1409,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
}
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -1402,17 +1423,13 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
static int tgsi_pow(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
/* LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
alu.last = 1;
@@ -1422,10 +1439,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
/* b * LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[1], 0);
alu.src[1].sel = ctx->temp_reg;
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -1450,16 +1464,8 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int i, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1469,13 +1475,10 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.src[1].sel = V_SQ_ALU_SRC_1;
+ r600_bc_src(&alu.src[2], &ctx->src[0], i);
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
if (i == 3)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -1488,9 +1491,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
alu.is_op3 = 1;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
@@ -1523,9 +1524,7 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
alu.dst.chan = i;
} else {
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
}
@@ -1542,17 +1541,10 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
static int tgsi_op3(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
@@ -1560,14 +1552,10 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
@@ -1584,28 +1572,17 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
static int tgsi_dp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
/* handle some special cases */
@@ -1661,11 +1638,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
/* Add perspective divide */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 3;
alu.last = 1;
@@ -1679,10 +1653,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 3;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
alu.dst.write = 1;
@@ -1735,14 +1706,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
src2_chan = 0;
break;
}
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
+ r600_bc_src(&alu.src[0], &ctx->src[0], src_chan);
+ r600_bc_src(&alu.src[1], &ctx->src[0], src2_chan);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1782,7 +1747,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
- alu.src[2].value = (u32*)&one_point_five;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -1803,7 +1768,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
- alu.src[2].value = (u32*)&one_point_five;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -1822,10 +1787,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
@@ -1846,7 +1808,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
memset(&tex, 0, sizeof(struct r600_bc_tex));
tex.inst = opcode;
tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
- tex.resource_id = tex.sampler_id;
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
@@ -1872,6 +1834,12 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
tex.coord_type_w = 1;
}
+ if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
+ tex.coord_type_z = 0;
+ tex.src_sel_z = 1;
+ } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
+ tex.coord_type_z = 0;
+
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
tex.src_sel_w = 2;
@@ -1886,36 +1854,23 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
static int tgsi_lrp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
unsigned i;
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* optimize if it's just an equal balance */
- if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
+ if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
alu.omod = 3;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
if (i == lasti) {
alu.last = 1;
@@ -1936,8 +1891,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.src[1].neg = 1;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
@@ -1959,8 +1913,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == lasti) {
@@ -1980,17 +1933,12 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
- alu.src[1] = r600_src[1];
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
if (i == lasti) {
alu.last = 1;
@@ -2005,37 +1953,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
static int tgsi_cmp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
-
- alu.src[2] = r600_src[1];
- alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
-
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
-
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
+ r600_bc_src(&alu.src[2], &ctx->src[1], i);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
@@ -2051,7 +1982,6 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
static int tgsi_xpd(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
uint32_t use_temp = 0;
int i, r;
@@ -2059,43 +1989,34 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
if (inst->Dst[0].Register.WriteMask != 0xf)
use_temp = 1;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- alu.src[0] = r600_src[0];
switch (i) {
case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 2);
break;
case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
break;
case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
break;
case 3:
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
}
- alu.src[1] = r600_src[1];
switch (i) {
case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 1);
break;
case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 2);
break;
case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 0);
break;
case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
@@ -2117,32 +2038,30 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
- alu.src[0] = r600_src[0];
switch (i) {
case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
break;
case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 2);
break;
case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
break;
case 3:
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
}
- alu.src[1] = r600_src[1];
switch (i) {
case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 2);
break;
case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 0);
break;
case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
+ r600_bc_src(&alu.src[1], &ctx->src[1], 1);
break;
case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
@@ -2155,11 +2074,8 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
if (use_temp)
alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ else
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
@@ -2177,7 +2093,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3] = { { 0 } };
struct r600_bc_alu alu;
int r;
@@ -2186,11 +2101,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -2218,11 +2129,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
- alu.src[0] = r600_src[0];
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
@@ -2242,10 +2149,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -2288,11 +2192,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
@@ -2321,11 +2221,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
@@ -2385,11 +2281,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = ctx->temp_reg;
alu.src[1].chan = 1;
@@ -2409,11 +2301,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
@@ -2451,6 +2339,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
@@ -2465,23 +2354,23 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
return -1;
}
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
- alu.dst.chan = 0;
- alu.dst.sel = ctx->temp_reg;
+ alu.dst.sel = ctx->ar_reg;
alu.dst.write = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
+
+ /* TODO: Note that the MOVA can be avoided if we never use AR for
+ * indexing non-CB registers in the current ALU clause. Similarly, we
+ * need to load AR from ar_reg again if we started a new clause
+ * between ARL and AR usage. The easy way to do that is to remove
+ * the MOVA here, and load it for the first AR access after ar_reg
+ * has been modified in each clause. */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].sel = ctx->ar_reg;
alu.src[0].chan = 0;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -2495,26 +2384,48 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ alu.src[0].sel = ctx->ar_reg;
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
case TGSI_OPCODE_ARR:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
default:
assert(0);
return -1;
}
-
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
-
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+ alu.src[0].sel = ctx->ar_reg;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
@@ -2534,26 +2445,18 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == 0 || i == 3) {
alu.src[0].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
}
- if (i == 0 || i == 2) {
+ if (i == 0 || i == 2) {
alu.src[1].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
}
if (i == 3)
alu.last = 1;
@@ -2566,7 +2469,6 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
@@ -2578,10 +2480,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
alu.dst.write = 1;
alu.dst.chan = 0;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = 0;