diff options
author | Marek Olšák <[email protected]> | 2011-07-26 21:15:05 +0200 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2011-07-26 22:35:49 +0200 |
commit | 1c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6 (patch) | |
tree | 125e04d4165cb86ed40f0ca45f8f8334a1bef42d /src/gallium/drivers/r300/compiler/radeon_optimize.c | |
parent | 860c51d82711936d343b55aafb46befc8c032fe6 (diff) |
r300g: copy the compiler from r300c
What a beast.
r300g doesn't depend on files from r300c anymore, so r300c is now left
to its own fate. BTW 'make test' can be invoked from the gallium/r300
directory to run some compiler unit tests.
Diffstat (limited to 'src/gallium/drivers/r300/compiler/radeon_optimize.c')
-rw-r--r-- | src/gallium/drivers/r300/compiler/radeon_optimize.c | 700 |
1 files changed, 700 insertions, 0 deletions
diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c new file mode 100644 index 00000000000..39dcb21d4f4 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c @@ -0,0 +1,700 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2010 Tom Stellard <[email protected]> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_compiler_util.h" +#include "radeon_swizzle.h" + +struct src_clobbered_reads_cb_data { + rc_register_file File; + unsigned int Index; + unsigned int Mask; + struct rc_reader_data * ReaderData; +}; + +typedef void (*rc_presub_replace_fn)(struct rc_instruction *, + struct rc_instruction *, + unsigned int); + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, + struct rc_src_register * src) +{ + rc_register_file file = src->File; + struct rc_reader_data * reader_data = data; + + if(!rc_inst_can_use_presub(inst, + reader_data->Writer->U.I.PreSub.Opcode, + rc_swizzle_to_writemask(src->Swizzle), + src, + &reader_data->Writer->U.I.PreSub.SrcReg[0], + &reader_data->Writer->U.I.PreSub.SrcReg[1])) { + reader_data->Abort = 1; + return; + } + + /* XXX This could probably be handled better. */ + if (file == RC_FILE_ADDRESS) { + reader_data->Abort = 1; + return; + } + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_TXD || + inst->U.I.Opcode == RC_OPCODE_TXL || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + reader_data->Abort = 1; + return; + } +} + +static void src_clobbered_reads_cb( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct src_clobbered_reads_cb_data * sc_data = data; + + if (src->File == sc_data->File + && src->Index == sc_data->Index + && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { + + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } + + if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { + sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; + } +} + +static void is_src_clobbered_scan_write( + void * data, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct src_clobbered_reads_cb_data sc_data; + struct rc_reader_data * reader_data = data; + sc_data.File = file; + sc_data.Index = index; + sc_data.Mask = mask; + sc_data.ReaderData = reader_data; + rc_for_all_reads_src(reader_data->Writer, + src_clobbered_reads_cb, &sc_data); +} + +static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct rc_reader_data reader_data; + unsigned int i; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst_mov->U.I.WriteALUResult || + inst_mov->U.I.SaturateMode) + return; + + /* Get a list of all the readers of this MOV instruction. */ + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_mov, &reader_data, + copy_propagate_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return; + + /* Propagate the MOV instruction. */ + for (i = 0; i < reader_data.ReaderCount; i++) { + struct rc_instruction * inst = reader_data.Readers[i].Inst; + *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); + + if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = inst_mov->U.I.PreSub; + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate= 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz = 0; + unsigned int negate = 0; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_constant * constant; + struct rc_src_register newsrc; + int have_real_reference; + unsigned int chan; + + /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ + for (chan = 0; chan < 4; ++chan) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) + break; + if (chan == 4) { + inst->U.I.SrcReg[src].File = RC_FILE_NONE; + continue; + } + + /* Convert immediates to swizzles. */ + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + newsrc = inst->U.I.SrcReg[src]; + have_real_reference = 0; + for (chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + unsigned int newswz; + float imm; + float baseimm; + + if (swz >= 4) + continue; + + imm = constant->u.Immediate[swz]; + baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5 && c->has_half_swizzles) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); + + /* In case this instruction has been converted, make sure all of the + * registers that are no longer used are empty. */ + opcode = rc_get_opcode_info(inst->U.I.Opcode); + for(i = opcode->NumSrcRegs; i < 3; i++) { + memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); + } +} + +/** + * If src and dst use the same register, this function returns a writemask that + * indicates wich components are read by src. Otherwise zero is returned. + */ +static unsigned int src_reads_dst_mask(struct rc_src_register src, + struct rc_dst_register dst) +{ + if (dst.File != src.File || dst.Index != src.Index) { + return 0; + } + return rc_swizzle_to_writemask(src.Swizzle); +} + +/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) + * in any of its channels. Return 0 otherwise. */ +static int src_has_const_swz(struct rc_src_register src) { + int chan; + for(chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF + || swz == RC_SWIZZLE_ONE) { + return 1; + } + } + return 0; +} + +static void presub_scan_read( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + struct rc_reader_data * reader_data = data; + rc_presubtract_op * presub_opcode = reader_data->CbData; + + if (!rc_inst_can_use_presub(inst, *presub_opcode, + reader_data->Writer->U.I.DstReg.WriteMask, + src, + &reader_data->Writer->U.I.SrcReg[0], + &reader_data->Writer->U.I.SrcReg[1])) { + reader_data->Abort = 1; + return; + } +} + +static int presub_helper( + struct radeon_compiler * c, + struct rc_instruction * inst_add, + rc_presubtract_op presub_opcode, + rc_presub_replace_fn presub_replace) +{ + struct rc_reader_data reader_data; + unsigned int i; + rc_presubtract_op cb_op = presub_opcode; + + reader_data.CbData = &cb_op; + reader_data.ExitOnAbort = 1; + rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, + is_src_clobbered_scan_write); + + if (reader_data.Abort || reader_data.ReaderCount == 0) + return 0; + + for(i = 0; i < reader_data.ReaderCount; i++) { + unsigned int src_index; + struct rc_reader reader = reader_data.Readers[i]; + const struct rc_opcode_info * info = + rc_get_opcode_info(reader.Inst->U.I.Opcode); + + for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { + if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) + presub_replace(inst_add, reader.Inst, src_index); + } + } + return 1; +} + +/* This function assumes that inst_add->U.I.SrcReg[0] and + * inst_add->U.I.SrcReg[1] aren't both negative. */ +static void presub_replace_add( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + rc_presubtract_op presub_opcode; + if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) + presub_opcode = RC_PRESUB_SUB; + else + presub_opcode = RC_PRESUB_ADD; + + if (inst_add->U.I.SrcReg[1].Negate) { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; + } else { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; + } + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + inst_reader->U.I.PreSub.Opcode = presub_opcode; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; +} + +static int is_presub_candidate( + struct radeon_compiler * c, + struct rc_instruction * inst) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int i; + unsigned int is_constant[2] = {0, 0}; + + assert(inst->U.I.Opcode == RC_OPCODE_ADD); + + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE + || inst->U.I.SaturateMode + || inst->U.I.WriteALUResult) { + return 0; + } + + /* If both sources use a constant swizzle, then we can't convert it to + * a presubtract operation. In fact for the ADD and SUB presubtract + * operations neither source can contain a constant swizzle. This + * specific case is checked in peephole_add_presub_add() when + * we make sure the swizzles for both sources are equal, so we + * don't need to worry about it here. */ + for (i = 0; i < 2; i++) { + int chan; + for (chan = 0; chan < 4; chan++) { + rc_swizzle swz = + get_swz(inst->U.I.SrcReg[i].Swizzle, chan); + if (swz == RC_SWIZZLE_ONE + || swz == RC_SWIZZLE_ZERO + || swz == RC_SWIZZLE_HALF) { + is_constant[i] = 1; + } + } + } + if (is_constant[0] && is_constant[1]) + return 0; + + for(i = 0; i < info->NumSrcRegs; i++) { + struct rc_src_register src = inst->U.I.SrcReg[i]; + if (src_reads_dst_mask(src, inst->U.I.DstReg)) + return 0; + + src.File = RC_FILE_PRESUB; + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) + return 0; + } + return 1; +} + +static int peephole_add_presub_add( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned dstmask = inst_add->U.I.DstReg.WriteMask; + unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; + unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; + + if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) + return 0; + + /* src0 and src1 can't have absolute values */ + if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) + return 0; + + /* presub_replace_add() assumes only one is negative */ + if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) + return 0; + + /* if src0 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) + return 0; + + /* if src1 is negative, at least all bits of dstmask have to be set */ + if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) + return 0; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +static void presub_replace_inv( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) +{ + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; +} + +/** + * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] + * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source + * of the add instruction must have the constatnt 1 swizzle. This function + * does not check const registers to see if their value is 1.0, so it should + * be called after the constant_folding optimization. + * @return + * 0 if the ADD instruction is still part of the program. + * 1 if the ADD instruction is no longer part of the program. + */ +static int peephole_add_presub_inv( + struct radeon_compiler * c, + struct rc_instruction * inst_add) +{ + unsigned int i, swz; + + if (!is_presub_candidate(c, inst_add)) + return 0; + + /* Check if src0 is 1. */ + /* XXX It would be nice to use is_src_uniform_constant here, but that + * function only works if the register's file is RC_FILE_NONE */ + for(i = 0; i < 4; i++ ) { + swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); + if(((1 << i) & inst_add->U.I.DstReg.WriteMask) + && swz != RC_SWIZZLE_ONE) { + return 0; + } + } + + /* Check src1. */ + if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != + inst_add->U.I.DstReg.WriteMask + || inst_add->U.I.SrcReg[1].Abs + || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY + && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) + || src_has_const_swz(inst_add->U.I.SrcReg[1])) { + + return 0; + } + + if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { + rc_remove_instruction(inst_add); + return 1; + } + return 0; +} + +/** + * @return + * 0 if inst is still part of the program. + * 1 if inst is no longer part of the program. + */ +static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) +{ + switch(inst->U.I.Opcode){ + case RC_OPCODE_ADD: + if (c->has_presub) { + if(peephole_add_presub_inv(c, inst)) + return 1; + if(peephole_add_presub_add(c, inst)) + return 1; + } + break; + default: + break; + } + return 0; +} + +void rc_optimize(struct radeon_compiler * c, void *user) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if(peephole(c, cur)) + continue; + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + copy_propagate(c, cur); + /* cur may no longer be part of the program */ + } + } +} |