From 26127d6a2f2df8d8833825bbe96b28ed4fc028f0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 22 Dec 2010 17:45:51 +0100 Subject: r600g: rework literal handling --- src/gallium/drivers/r600/r600_asm.c | 189 ++++++++++++++++++------------ src/gallium/drivers/r600/r600_asm.h | 6 +- src/gallium/drivers/r600/r600_shader.c | 203 +++++---------------------------- src/gallium/drivers/r600/r600_shader.h | 2 +- src/gallium/drivers/r600/r700_asm.c | 10 -- 5 files changed, 148 insertions(+), 262 deletions(-) (limited to 'src/gallium/drivers/r600') diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index febf191a6c1..1cd0f4cdcfd 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -599,10 +599,90 @@ static int replace_gpr_with_pv_ps(struct r600_bc_alu *slots[5], struct r600_bc_a return 0; } +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg) +{ + switch(value) { + case 0: + *sel = V_SQ_ALU_SRC_0; + break; + case 1: + *sel = V_SQ_ALU_SRC_1_INT; + break; + case -1: + *sel = V_SQ_ALU_SRC_M_1_INT; + break; + case 0x3F800000: // 1.0f + *sel = V_SQ_ALU_SRC_1; + break; + case 0x3F000000: // 0.5f + *sel = V_SQ_ALU_SRC_0_5; + break; + case 0xBF800000: // -1.0f + *sel = V_SQ_ALU_SRC_1; + *neg ^= 1; + break; + case 0xBF000000: // -0.5f + *sel = V_SQ_ALU_SRC_0_5; + *neg ^= 1; + break; + default: + *sel = V_SQ_ALU_SRC_LITERAL; + break; + } +} + +/* compute how many literal are needed */ +static int r600_bc_alu_nliterals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned *nliteral) +{ + unsigned num_src = r600_bc_get_num_operands(alu); + unsigned i, j; + + for (i = 0; i < num_src; ++i) { + if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + uint32_t value = alu->src[i].value[alu->src[i].chan]; + unsigned found = 0; + for (j = 0; j < *nliteral; ++j) { + if (literal[j] == value) { + found = 1; + break; + } + } + if (!found) { + if (*nliteral >= 4) + return -EINVAL; + literal[(*nliteral)++] = value; + } + } + } + return 0; +} + +static void r600_bc_alu_adjust_literals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned nliteral) +{ + unsigned num_src = r600_bc_get_num_operands(alu); + unsigned i, j; + + for (i = 0; i < num_src; ++i) { + if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) { + uint32_t value = alu->src[i].value[alu->src[i].chan]; + for (j = 0; j < nliteral; ++j) { + if (literal[j] == value) { + alu->src[i].chan = j; + break; + } + } + } + } +} + static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev) { struct r600_bc_alu *prev[5]; struct r600_bc_alu *result[5] = { NULL }; + + uint32_t literal[4]; + unsigned nliteral = 0; + int i, j, r, src, num_src; int num_once_inst = 0; @@ -611,13 +691,12 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], s return r; for (i = 0; i < 5; ++i) { - // TODO: we have literals? forget it! - if (prev[i] && prev[i]->nliteral) + /* check number of literals */ + if (prev[i] && r600_bc_alu_nliterals(prev[i], literal, &nliteral)) return 0; - if (slots[i] && slots[i]->nliteral) + if (slots[i] && r600_bc_alu_nliterals(slots[i], literal, &nliteral)) return 0; - // let's check used slots if (prev[i] && !slots[i]) { result[i] = prev[i]; @@ -711,7 +790,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (nalu == NULL) return -ENOMEM; memcpy(nalu, alu, sizeof(struct r600_bc_alu)); - nalu->nliteral = 0; if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) { /* check if we could add it anyway */ @@ -749,20 +827,10 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { bc->ngpr = alu->src[i].sel + 1; } - /* compute how many literal are needed - * either 2 or 4 literals - */ - if (alu->src[i].sel == 253) { - if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) { - nalu->nliteral = (alu->src[i].chan + 2) & 0x6; - } - } - } - if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { - lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!lalu->last && lalu->nliteral > nalu->nliteral) { - nalu->nliteral = lalu->nliteral; - } + if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) + r600_bc_special_constants( + nalu->src[i].value[nalu->src[i].chan], + &nalu->src[i].sel, &nalu->src[i].neg); } if (alu->dst.sel >= bc->ngpr) { bc->ngpr = alu->dst.sel + 1; @@ -809,46 +877,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); } -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) -{ - struct r600_bc_alu *alu; - - if (bc->cf_last == NULL) { - return 0; - } - if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { - return 0; - } - /* all same on EG */ - if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END || - bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { - return 0; - } - /* same on EG */ - if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) && - (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)) && - (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)) && - (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) || - LIST_IS_EMPTY(&bc->cf_last->alu)) { - R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); - return -EINVAL; - } - alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!alu->last || !alu->nliteral || alu->literal_added) { - return 0; - } - memcpy(alu->value, value, 4 * 4); - bc->cf_last->ndw += alu->nliteral; - bc->ndw += alu->nliteral; - alu->literal_added = 1; - return 0; -} - int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) { struct r600_bc_vtx *nvtx = r600_bc_vtx(); @@ -999,8 +1027,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign /* r600 only, r700/eg bits in r700_asm.c */ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { - unsigned i; - /* don't replace gpr by pv or ps for destination register */ bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | @@ -1037,14 +1063,6 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } - if (alu->last) { - if (alu->nliteral && !alu->literal_added) { - R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); - } - for (i = 0; i < alu->nliteral; i++) { - bc->bytecode[id++] = alu->value[i]; - } - } return 0; } @@ -1122,8 +1140,10 @@ int r600_bc_build(struct r600_bc *bc) struct r600_bc_alu *alu; struct r600_bc_vtx *vtx; struct r600_bc_tex *tex; + uint32_t literal[4]; + unsigned nliteral; unsigned addr; - int r; + int i, r; if (bc->callstack[0].max > 0) bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; @@ -1140,6 +1160,16 @@ int r600_bc_build(struct r600_bc *bc) case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + nliteral = 0; + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r = r600_bc_alu_nliterals(alu, literal, &nliteral); + if (r) + return r; + if (alu->last) { + cf->ndw += align(nliteral, 2); + nliteral = 0; + } + } break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -1188,7 +1218,12 @@ int r600_bc_build(struct r600_bc *bc) case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + nliteral = 0; LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r = r600_bc_alu_nliterals(alu, literal, &nliteral); + if (r) + return r; + r600_bc_alu_adjust_literals(alu, literal, nliteral); switch(bc->chiprev) { case CHIPREV_R600: r = r600_bc_alu_build(bc, alu, addr); @@ -1205,7 +1240,10 @@ int r600_bc_build(struct r600_bc *bc) return r; addr += 2; if (alu->last) { - addr += alu->nliteral; + for (i = 0; i < align(nliteral, 2); ++i) { + bc->bytecode[addr++] = literal[i]; + } + nliteral = 0; } } break; @@ -1292,6 +1330,8 @@ void r600_bc_dump(struct r600_bc *bc) struct r600_bc_tex *tex; unsigned i, id; + uint32_t literal[4]; + unsigned nliteral; char chip = '6'; switch (bc->chiprev) { @@ -1378,7 +1418,10 @@ void r600_bc_dump(struct r600_bc *bc) } id = cf->addr; + nliteral = 0; LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + r600_bc_alu_nliterals(alu, literal, &nliteral); + fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel); fprintf(stderr, "REL:%d ", alu->src[0].rel); @@ -1413,10 +1456,12 @@ void r600_bc_dump(struct r600_bc *bc) id++; if (alu->last) { - for (i = 0; i < alu->nliteral; i++, id++) { + for (i = 0; i < nliteral; i++, id++) { float *f = (float*)(bc->bytecode + id); fprintf(stderr, "%04d %08X %f\n", id, bc->bytecode[id], *f); } + id += nliteral & 1; + nliteral = 0; } } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 013df54b32e..259b264e4d9 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -34,6 +34,7 @@ struct r600_bc_alu_src { unsigned neg; unsigned abs; unsigned rel; + u32 *value; }; struct r600_bc_alu_dst { @@ -52,11 +53,8 @@ struct r600_bc_alu { unsigned last; unsigned is_op3; unsigned predicate; - unsigned nliteral; - unsigned literal_added; unsigned bank_swizzle; unsigned bank_swizzle_force; - u32 value[4]; unsigned omod; }; @@ -195,13 +193,13 @@ void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); int r600_bc_init(struct r600_bc *bc, enum radeon_family family); void r600_bc_clear(struct r600_bc *bc); int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu); -int r600_bc_add_literal(struct r600_bc *bc, const u32 *value); int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); int r600_bc_add_cfinst(struct r600_bc *bc, int inst); int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); +void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); void r600_bc_dump(struct r600_bc *bc); void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 80579b8d871..f2e74c9cee2 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -225,21 +225,23 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) return 0; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; + u32 *literals; int r; //fprintf(stderr, "--------------------------------------------------------------\n"); //tgsi_dump(tokens, 0); shader->shader.family = r600_get_family(rctx->radeon); - r = r600_shader_from_tgsi(tokens, &shader->shader); + r = r600_shader_from_tgsi(tokens, &shader->shader, &literals); if (r) { R600_ERR("translation from TGSI failed !\n"); return r; } r = r600_bc_build(&shader->shader.bc); + free(literals); if (r) { R600_ERR("building bytecode failed !\n"); return r; @@ -272,7 +274,6 @@ struct r600_shader_ctx { struct r600_shader_tgsi_instruction *inst_info; struct r600_bc *bc; struct r600_shader *shader; - u32 value[4]; u32 *literals; u32 nliterals; u32 max_driver_temp_used; @@ -481,7 +482,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) return ctx->num_interp_gpr; } -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals) { struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; @@ -583,9 +584,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; r = ctx.inst_info->process(&ctx); - if (r) - goto out_err; - r = r600_bc_add_literal(ctx.bc, ctx.value); if (r) goto out_err; break; @@ -706,7 +704,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s if (r) goto out_err; } - free(ctx.literals); + *literals = ctx.literals; tgsi_parse_free(&ctx.parse); return 0; out_err: @@ -740,38 +738,13 @@ static int tgsi_src(struct r600_shader_ctx *ctx, (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; - switch(ctx->literals[index]) { - case 0: - r600_src->sel = V_SQ_ALU_SRC_0; - return 0; - case 1: - r600_src->sel = V_SQ_ALU_SRC_1_INT; - return 0; - case -1: - r600_src->sel = V_SQ_ALU_SRC_M_1_INT; - return 0; - case 0x3F800000: // 1.0f - r600_src->sel = V_SQ_ALU_SRC_1; + r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); + if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) return 0; - case 0x3F000000: // 0.5f - r600_src->sel = V_SQ_ALU_SRC_0_5; - return 0; - case 0xBF800000: // -1.0f - r600_src->sel = V_SQ_ALU_SRC_1; - r600_src->neg ^= 1; - return 0; - case 0xBF000000: // -0.5f - r600_src->sel = V_SQ_ALU_SRC_0_5; - r600_src->neg ^= 1; - return 0; - } } index = tgsi_src->Register.Index; r600_src->sel = V_SQ_ALU_SRC_LITERAL; - ctx->value[0] = ctx->literals[index * 4 + 0]; - ctx->value[1] = ctx->literals[index * 4 + 1]; - ctx->value[2] = ctx->literals[index * 4 + 2]; - ctx->value[3] = ctx->literals[index * 4 + 3]; + r600_src->value = ctx->literals + index * 4; } else { if (tgsi_src->Register.Indirect) r600_src->rel = V_SQ_REL_RELATIVE; @@ -877,6 +850,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; + alu.src[0].value = r600_src[i].value; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -886,9 +860,6 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_ if (r) return r; } - r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); - if (r) - return r; r600_src[i].sel = treg; j--; } @@ -983,12 +954,14 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) static int tgsi_setup_trig(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) { + static float half_inv_pi = 1.0 /(3.1415926535 * 2); + static float double_pi = 3.1415926535 * 2; + static float neg_pi = -3.1415926535; + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; int r; - uint32_t lit_vals[4]; struct r600_bc_alu alu; - memset(lit_vals, 0, 4*4); r = tgsi_split_constant(ctx, r600_src); if (r) return r; @@ -996,9 +969,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); - lit_vals[1] = fui(0.5f); - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1012,13 +982,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = (uint32_t *)&half_inv_pi; + alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - r = r600_bc_add_literal(ctx->bc, lit_vals); if (r) return r; @@ -1036,14 +1004,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, if (r) return r; - if (ctx->bc->chiprev == CHIPREV_R600) { - lit_vals[0] = fui(3.1415926535897f * 2.0f); - lit_vals[1] = fui(-3.1415926535897f); - } else { - lit_vals[0] = fui(1.0f); - lit_vals[1] = fui(-0.5f); - } - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); alu.is_op3 = 1; @@ -1059,11 +1019,18 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx, alu.src[1].chan = 0; alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 1; + + if (ctx->bc->chiprev == CHIPREV_R600) { + alu.src[1].value = (uint32_t *)&double_pi; + alu.src[2].value = (uint32_t *)&neg_pi; + } else { + alu.src[1].sel = V_SQ_ALU_SRC_1; + alu.src[2].sel = V_SQ_ALU_SRC_0_5; + alu.src[2].neg = 1; + } + alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - r = r600_bc_add_literal(ctx->bc, lit_vals); if (r) return r; return 0; @@ -1181,10 +1148,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* dst.w = 1.0; */ @@ -1205,10 +1168,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return 0; @@ -1244,9 +1203,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* kill must be last in ALU */ ctx->bc->force_add_cf = 1; @@ -1309,10 +1265,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - if (inst->Dst[0].Register.WriteMask & (1 << 2)) { int chan; @@ -1331,10 +1283,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - chan = alu.dst.chan; sel = alu.dst.sel; @@ -1357,9 +1305,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -1401,9 +1346,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); if (r) return r; /* replicate result */ @@ -1452,9 +1394,6 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); if (r) return r; /* replicate result */ @@ -1478,9 +1417,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); if (r) return r; /* b * LOG2(a) */ @@ -1495,9 +1431,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); if (r) return r; /* POW(a,b) = EXP2(b * LOG2(a))*/ @@ -1508,9 +1441,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) alu.dst.write = 1; alu.last = 1; r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; - r = r600_bc_add_literal(ctx->bc,ctx->value); if (r) return r; return tgsi_helper_tempx_replicate(ctx); @@ -1552,9 +1482,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* dst = (-tmp > 0 ? -1 : tmp) */ for (i = 0; i < 4; i++) { @@ -1589,9 +1516,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru struct r600_bc_alu alu; int i, r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { @@ -1720,6 +1644,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) static int tgsi_tex(struct r600_shader_ctx *ctx) { + static float one_point_five = 1.5f; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_tex tex; struct r600_bc_alu alu; @@ -1729,7 +1654,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && inst->Src[0].Register.File != TGSI_FILE_INPUT; - uint32_t lit_vals[4]; src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; @@ -1878,6 +1802,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; + alu.src[2].value = (u32*)&one_point_five; alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -1888,11 +1813,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; - lit_vals[0] = fui(1.5f); - - r = r600_bc_add_literal(ctx->bc, lit_vals); - if (r) - return r; src_not_temp = FALSE; src_gpr = ctx->temp_reg; } @@ -2026,9 +1946,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* (1 - src0) * src2 */ for (i = 0; i < lasti + 1; i++) { @@ -2051,9 +1968,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) if (r) return r; } - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; /* src0 * src1 + (1 - src0) * src2 */ for (i = 0; i < lasti + 1; i++) { @@ -2194,10 +2108,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } for (i = 0; i < 4; i++) { @@ -2255,10 +2165,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } if (use_temp) return tgsi_helper_copy(ctx, inst); @@ -2291,10 +2197,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -2306,10 +2208,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.y = tmp - floor(tmp); */ @@ -2335,9 +2233,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.z = RoughApprox2ToX(tmp);*/ @@ -2358,9 +2253,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.w = 1.0;*/ @@ -2378,9 +2270,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return tgsi_helper_copy(ctx, inst); } @@ -2410,10 +2299,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -2426,10 +2311,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.y = src.x / (2 ^ floor(log2(src.x))); */ @@ -2452,10 +2333,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); @@ -2471,10 +2348,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); @@ -2490,10 +2363,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -2509,10 +2378,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); @@ -2534,10 +2399,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.z = log2(src);*/ @@ -2559,10 +2420,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } /* result.w = 1.0; */ @@ -2581,10 +2438,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx) r = r600_bc_add_alu(ctx->bc, &alu); if (r) return r; - - r = r600_bc_add_literal(ctx->bc, ctx->value); - if (r) - return r; } return tgsi_helper_copy(ctx, inst); diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 35b0331525a..935dd6fe3ab 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -47,6 +47,6 @@ struct r600_shader { boolean uses_kill; }; -int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); +int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals); #endif diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c index 3eb6fb50ca7..a7f2f54736e 100644 --- a/src/gallium/drivers/r600/r700_asm.c +++ b/src/gallium/drivers/r600/r700_asm.c @@ -29,8 +29,6 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { - unsigned i; - bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | @@ -67,13 +65,5 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } - if (alu->last) { - if (alu->nliteral && !alu->literal_added) { - R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); - } - for (i = 0; i < alu->nliteral; i++) { - bc->bytecode[id++] = alu->value[i]; - } - } return 0; } -- cgit v1.2.3