summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2010-12-27 20:59:53 +0100
committerChristoph Bumiller <[email protected]>2010-12-27 20:59:53 +0100
commit4fa429c876806d5ad614de469dec76a54a55bb74 (patch)
tree0c1523d23e264666a8410b104f6727bea7042603
parenta10b1c12045c514c98d3bc343c26cea5f2497b9f (diff)
nvc0: reenable some shader optimizations
CSE and constants folding.
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_optimize.c96
-rw-r--r--src/gallium/drivers/nvc0/nvc0_pc_print.c18
2 files changed, 63 insertions, 51 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
index e9e387632b9..8aff875fc37 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_optimize.c
@@ -276,7 +276,7 @@ nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
for (s = 0; s < 3 && nvi->src[s]; ++s) {
ld = nvi->src[s]->value->insn;
- if (!ld || ld->opcode != NV_OP_LD)
+ if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
continue;
if (!nvc0_insn_can_load(nvi, s, ld))
continue;
@@ -383,9 +383,8 @@ nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
#define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
-/*
static void
-modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
+apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
{
if (mod & NV_MOD_ABS) {
if (type == NV_TYPE_F32)
@@ -400,10 +399,28 @@ modifiers_apply(uint32_t *val, ubyte type, ubyte mod)
else
*val = ~(*val) + 1;
}
+ if (mod & NV_MOD_SAT) {
+ union {
+ float f;
+ uint32_t u;
+ int32_t i;
+ } u;
+ u.u = *val;
+ if (type == NV_TYPE_F32) {
+ u.f = CLAMP(u.f, -1.0f, 1.0f);
+ } else
+ if (type == NV_TYPE_U16) {
+ u.u = MIN2(u.u, 0xffff);
+ } else
+ if (type == NV_TYPE_S16) {
+ u.i = CLAMP(u.i, -32768, 32767);
+ }
+ *val = u.u;
+ }
+ if (mod & NV_MOD_NOT)
+ *val = ~*val;
}
-*/
-#if 0
static void
constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
struct nv_value *src0, struct nv_value *src1)
@@ -424,8 +441,8 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
u0.u32 = src0->reg.imm.u32;
u1.u32 = src1->reg.imm.u32;
- modifiers_apply(&u0.u32, type, nvi->src[0]->mod);
- modifiers_apply(&u1.u32, type, nvi->src[1]->mod);
+ apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
+ apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
switch (nvi->opcode) {
case NV_OP_MAD:
@@ -468,14 +485,14 @@ constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
nvi->opcode = NV_OP_MOV;
- val = new_value(pc, NV_FILE_IMM, type);
-
+ val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
val->reg.imm.u32 = u.u32;
nv_reference(pc, nvi, 1, NULL);
nv_reference(pc, nvi, 0, val);
- if (nvi->src[2]) { /* from MAD */
+ if (nvi->src[2]) {
+ /* from MAD */
nvi->src[1] = nvi->src[0];
nvi->src[0] = nvi->src[2];
nvi->src[2] = NULL;
@@ -506,7 +523,7 @@ constant_operand(struct nv_pc *pc,
type = NV_OPTYPE(nvi->opcode);
u.u32 = val->reg.imm.u32;
- modifiers_apply(&u.u32, type, nvi->src[s]->mod);
+ apply_modifiers(&u.u32, type, nvi->src[s]->mod);
switch (NV_BASEOP(nvi->opcode)) {
case NV_OP_MUL:
@@ -576,23 +593,22 @@ constant_operand(struct nv_pc *pc,
break;
}
}
-#endif
static int
nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
{
-#if 0
struct nv_instruction *nvi, *next;
int j;
for (nvi = b->entry; nvi; nvi = next) {
struct nv_value *src0, *src1, *src;
- int mod;
+ int s;
+ uint8_t mod[4];
next = nvi->next;
- src0 = nvcg_find_immediate(nvi->src[0]);
- src1 = nvcg_find_immediate(nvi->src[1]);
+ src0 = nvc0_pc_find_immediate(nvi->src[0]);
+ src1 = nvc0_pc_find_immediate(nvi->src[1]);
if (src0 && src1)
constant_expression(ctx->pc, nvi, src0, src1);
@@ -604,7 +620,7 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
constant_operand(ctx->pc, nvi, src1, 1);
}
- /* try to combine MUL, ADD into MAD */
+ /* check if we can MUL + ADD -> MAD/FMA */
if (nvi->opcode != NV_OP_ADD)
continue;
@@ -622,20 +638,27 @@ nv_pass_lower_arith(struct nv_pass *ctx, struct nv_basic_block *b)
/* could have an immediate from above constant_* */
if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
continue;
+ s = (src == src0) ? 0 : 1;
+
+ mod[0] = nvi->src[0]->mod;
+ mod[1] = nvi->src[1]->mod;
+ mod[2] = src->insn->src[0]->mod;
+ mod[3] = src->insn->src[0]->mod;
+
+ if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
+ continue;
nvi->opcode = NV_OP_MAD;
- mod = nvi->src[(src == src0) ? 0 : 1]->mod;
- nv_reference(ctx->pc, &nvi->src[(src == src0) ? 0 : 1], NULL);
- nvi->src[2] = nvi->src[(src == src0) ? 1 : 0];
+ nv_reference(ctx->pc, nvi, s, NULL);
+ nvi->src[2] = nvi->src[!s];
- assert(!(mod & ~NV_MOD_NEG));
nvi->src[0] = new_ref(ctx->pc, src->insn->src[0]->value);
nvi->src[1] = new_ref(ctx->pc, src->insn->src[1]->value);
- nvi->src[0]->mod = src->insn->src[0]->mod ^ mod;
- nvi->src[1]->mod = src->insn->src[1]->mod;
+ nvi->src[0]->mod = mod[2] ^ mod[s];
+ nvi->src[1]->mod = mod[3];
}
DESCEND_ARBITRARY(j, nv_pass_lower_arith);
-#endif
+
return 0;
}
@@ -1016,7 +1039,6 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
static int
nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
{
-#if 0
struct nv_instruction *ir, *ik, *next;
struct nv_instruction *entry = b->phi ? b->phi : b->entry;
int s;
@@ -1030,23 +1052,13 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
if (ir->opcode != ik->opcode || ir->fixed)
continue;
- if (!ir->def[0] || !ik->def[0] ||
- ik->opcode == NV_OP_LDA ||
- ik->opcode == NV_OP_STA ||
- ik->opcode == NV_OP_MOV ||
- nv_is_vector_op(ik->opcode))
- continue; /* ignore loads, stores & moves */
-
- if (ik->src[4] || ir->src[4])
- continue; /* don't mess with address registers */
+ if (!ir->def[0] || !ik->def[0] || ir->def[1] || ik->def[1])
+ continue;
- if (ik->flags_src || ir->flags_src ||
- ik->flags_def || ir->flags_def)
- continue; /* and also not with flags, for now */
+ if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
+ continue;
- if (ik->def[0]->reg.file == NV_FILE_OUT ||
- ir->def[0]->reg.file == NV_FILE_OUT ||
- !values_equal(ik->def[0], ir->def[0]))
+ if (!values_equal(ik->def[0], ir->def[0]))
continue;
for (s = 0; s < 3; ++s) {
@@ -1071,7 +1083,7 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
if (s == 3) {
nvc0_insn_delete(ir);
++reps;
- nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
+ nvc0_pc_replace_value(ctx->pc, ir->def[0], ik->def[0]);
break;
}
}
@@ -1079,7 +1091,7 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
} while(reps);
DESCEND_ARBITRARY(s, nv_pass_cse);
-#endif
+
return 0;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_pc_print.c b/src/gallium/drivers/nvc0/nvc0_pc_print.c
index 6249f1fd1cd..b03826484e4 100644
--- a/src/gallium/drivers/nvc0/nvc0_pc_print.c
+++ b/src/gallium/drivers/nvc0/nvc0_pc_print.c
@@ -283,21 +283,21 @@ struct nv_op_info nvc0_op_info_table[NV_OP_COUNT + 1] =
{ NV_OP_LD, "ld", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
{ NV_OP_ST, "st", NV_TYPE_ANY, 0, 0, 0, 0, 0, 0, 0, 0 },
{ NV_OP_MOV, "mov", NV_TYPE_ANY, 0, 0, 0, 0, 1, 0, 1, 0 },
- { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
- { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
- { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 1, 0 },
+ { NV_OP_AND, "and", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
+ { NV_OP_OR, "or", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
+ { NV_OP_XOR, "xor", NV_TYPE_U32, NV_MOD_NOT, 0, 1, 0, 1, 0, 6, 0 },
{ NV_OP_SHL, "shl", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
{ NV_OP_SHR, "shr", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 1, 0 },
{ NV_OP_NOT, "not", NV_TYPE_U32, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_SET, "set", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 },
- { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 1, 2 },
- { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 1, 2 },
- { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
+ { NV_OP_ADD, "add", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_SUB, "sub", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 2, 2 },
+ { NV_OP_MUL, "mul", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_MAD, "mad", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
{ NV_OP_ABS, "abs", NV_TYPE_F32, 0, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_NEG, "neg", NV_TYPE_F32, NV_MOD_ABS, 0, 0, 0, 1, 0, 0, 0 },
- { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
- { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 0, 2 },
+ { NV_OP_MAX, "max", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
+ { NV_OP_MIN, "min", NV_TYPE_F32, NV_MOD_SGN, 0, 1, 0, 1, 0, 2, 2 },
{ NV_OP_CVT, "cvt", NV_TYPE_ANY, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },
{ NV_OP_CEIL, "ceil", NV_TYPE_F32, NV_MOD_SGN, 0, 0, 0, 1, 0, 0, 0 },