diff options
-rw-r--r-- | src/mesa/drivers/dri/i915/i915_fragprog.c | 82 |
1 files changed, 70 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 930c2b876bc..67eff7665d1 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -817,23 +817,52 @@ upload_program(struct i915_fragment_program *p) flags = get_result_flags(inst); dst = get_result_vector(p, inst); + /* If both operands are uniforms or constants, we get 5 instructions + * like: + * + * U[1] = MOV CONST[1] + * U[0].xyz = SGE CONST[0].xxxx, U[1] + * U[1] = MOV CONST[1].-x-y-z-w + * R[0].xyz = SGE CONST[0].-x-x-x-x, U[1] + * R[0].xyz = MUL R[0], U[0] + * + * This code is stupid. Instead of having the individual calls to + * i915_emit_arith generate the moves to utemps, do it in the caller. + * This results in code like: + * + * U[1] = MOV CONST[1] + * U[0].xyz = SGE CONST[0].xxxx, U[1] + * R[0].xyz = SGE CONST[0].-x-x-x-x, U[1].-x-y-z-w + * R[0].xyz = MUL R[0], U[0] + */ + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST + && GET_UREG_TYPE(src1) == REG_TYPE_CONST) { + unsigned tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + src1, 0, 0); + + src1 = tmp; + } + /* tmp = src1 >= src2 */ i915_emit_arith(p, A0_SGE, tmp, flags, 0, - src_vector(p, &inst->SrcReg[0], program), - src_vector(p, &inst->SrcReg[1], program), + src0, + src1, 0); /* dst = src1 <= src2 */ i915_emit_arith(p, A0_SGE, dst, flags, 0, - negate(src_vector(p, &inst->SrcReg[0], program), - 1, 1, 1, 1), - negate(src_vector(p, &inst->SrcReg[1], program), - 1, 1, 1, 1), + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); /* dst = tmp && dst */ i915_emit_arith(p, @@ -966,23 +995,52 @@ upload_program(struct i915_fragment_program *p) flags = get_result_flags(inst); dst = get_result_vector(p, inst); + /* If both operands are uniforms or constants, we get 5 instructions + * like: + * + * U[1] = MOV CONST[1] + * U[0].xyz = SLT CONST[0].xxxx, U[1] + * U[1] = MOV CONST[1].-x-y-z-w + * R[0].xyz = SLT CONST[0].-x-x-x-x, U[1] + * R[0].xyz = MUL R[0], U[0] + * + * This code is stupid. Instead of having the individual calls to + * i915_emit_arith generate the moves to utemps, do it in the caller. + * This results in code like: + * + * U[1] = MOV CONST[1] + * U[0].xyz = SLT CONST[0].xxxx, U[1] + * R[0].xyz = SLT CONST[0].-x-x-x-x, U[1].-x-y-z-w + * R[0].xyz = MUL R[0], U[0] + */ + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST + && GET_UREG_TYPE(src1) == REG_TYPE_CONST) { + unsigned tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + src1, 0, 0); + + src1 = tmp; + } + /* tmp = src1 < src2 */ i915_emit_arith(p, A0_SLT, tmp, flags, 0, - src_vector(p, &inst->SrcReg[0], program), - src_vector(p, &inst->SrcReg[1], program), + src0, + src1, 0); /* dst = src1 > src2 */ i915_emit_arith(p, A0_SLT, dst, flags, 0, - negate(src_vector(p, &inst->SrcReg[0], program), - 1, 1, 1, 1), - negate(src_vector(p, &inst->SrcReg[1], program), - 1, 1, 1, 1), + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); /* dst = tmp || dst */ i915_emit_arith(p, |