diff options
Diffstat (limited to 'src/gallium/drivers/r600/r600_shader.c')
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 119 |
1 files changed, 107 insertions, 12 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 50f9ed6eda4..acb3ef2c4d6 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -28,6 +28,7 @@ #include "r600_pipe.h" #include "r600_asm.h" #include "r600_sq.h" +#include "r600_formats.h" #include "r600_opcodes.h" #include "r600d.h" #include <stdio.h> @@ -296,6 +297,7 @@ struct r600_shader_ctx { unsigned type; unsigned file_offset[TGSI_FILE_COUNT]; unsigned temp_reg; + unsigned ar_reg; struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; @@ -541,6 +543,55 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } } +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) +{ + struct r600_bc_vtx vtx; + unsigned int ar_reg; + int r; + + if (offset) { + struct r600_bc_alu alu; + + memset(&alu, 0, sizeof(alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.src[0].sel = ctx->ar_reg; + + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = offset; + + alu.dst.sel = dst_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + ar_reg = dst_reg; + } else { + ar_reg = ctx->ar_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = ar_reg; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = dst_reg; + vtx.dst_sel_x = 0; /* SEL_X */ + vtx.dst_sel_y = 1; /* SEL_Y */ + vtx.dst_sel_z = 2; /* SEL_Z */ + vtx.dst_sel_w = 3; /* SEL_W */ + vtx.data_format = FMT_32_32_32_32_FLOAT; + vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ + vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) + return r; + + return 0; +} + static int tgsi_split_constant(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -554,7 +605,19 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); } for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { - if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { + if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { + continue; + } + + if (ctx->src[i].rel) { + int treg = r600_get_temp(ctx); + if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) + return r; + + ctx->src[i].sel = treg; + ctx->src[i].rel = 0; + j--; + } else if (j > 0) { int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -683,8 +746,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh ctx.file_offset[TGSI_FILE_CONSTANT] = 512; ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; - ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + + ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; + ctx.temp_reg = ctx.ar_reg + 1; ctx.nliterals = 0; ctx.literals = NULL; @@ -1760,7 +1824,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&tex, 0, sizeof(struct r600_bc_tex)); tex.inst = opcode; tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; - tex.resource_id = tex.sampler_id; + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; tex.src_gpr = src_gpr; tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; @@ -2302,15 +2366,21 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) r600_bc_src(&alu.src[0], &ctx->src[0], 0); alu.last = 1; - alu.dst.chan = 0; - alu.dst.sel = ctx->temp_reg; + alu.dst.sel = ctx->ar_reg; alu.dst.write = 1; r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; + + /* TODO: Note that the MOVA can be avoided if we never use AR for + * indexing non-CB registers in the current ALU clause. Similarly, we + * need to load AR from ar_reg again if we started a new clause + * between ARL and AR usage. The easy way to do that is to remove + * the MOVA here, and load it for the first AR access after ar_reg + * has been modified in each clause. */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; - alu.src[0].sel = ctx->temp_reg; + alu.src[0].sel = ctx->ar_reg; alu.src[0].chan = 0; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); @@ -2325,22 +2395,47 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) struct r600_bc_alu alu; int r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; + + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.src[0].sel = ctx->ar_reg; + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; case TGSI_OPCODE_ARR: - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->ar_reg; + alu.dst.write = 1; + alu.last = 1; + + if ((r = r600_bc_add_alu(ctx->bc, &alu))) + return r; break; default: assert(0); return -1; } - r600_bc_src(&alu.src[0], &ctx->src[0], 0); - + memset(&alu, 0, sizeof(alu)); + alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; + alu.src[0].sel = ctx->ar_reg; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); |