summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r300/compiler/radeon_optimize.c
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2011-07-26 21:15:05 +0200
committerMarek Olšák <[email protected]>2011-07-26 22:35:49 +0200
commit1c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6 (patch)
tree125e04d4165cb86ed40f0ca45f8f8334a1bef42d /src/gallium/drivers/r300/compiler/radeon_optimize.c
parent860c51d82711936d343b55aafb46befc8c032fe6 (diff)
r300g: copy the compiler from r300c
What a beast. r300g doesn't depend on files from r300c anymore, so r300c is now left to its own fate. BTW 'make test' can be invoked from the gallium/r300 directory to run some compiler unit tests.
Diffstat (limited to 'src/gallium/drivers/r300/compiler/radeon_optimize.c')
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_optimize.c700
1 files changed, 700 insertions, 0 deletions
diff --git a/src/gallium/drivers/r300/compiler/radeon_optimize.c b/src/gallium/drivers/r300/compiler/radeon_optimize.c
new file mode 100644
index 00000000000..39dcb21d4f4
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_optimize.c
@@ -0,0 +1,700 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <[email protected]>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_swizzle.h"
+
+struct src_clobbered_reads_cb_data {
+ rc_register_file File;
+ unsigned int Index;
+ unsigned int Mask;
+ struct rc_reader_data * ReaderData;
+};
+
+typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
+ struct rc_instruction *,
+ unsigned int);
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+ struct rc_src_register combine;
+ combine.File = inner.File;
+ combine.Index = inner.Index;
+ combine.RelAddr = inner.RelAddr;
+ if (outer.Abs) {
+ combine.Abs = 1;
+ combine.Negate = outer.Negate;
+ } else {
+ combine.Abs = inner.Abs;
+ combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
+ combine.Negate ^= outer.Negate;
+ }
+ combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+ return combine;
+}
+
+static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ rc_register_file file = src->File;
+ struct rc_reader_data * reader_data = data;
+
+ if(!rc_inst_can_use_presub(inst,
+ reader_data->Writer->U.I.PreSub.Opcode,
+ rc_swizzle_to_writemask(src->Swizzle),
+ src,
+ &reader_data->Writer->U.I.PreSub.SrcReg[0],
+ &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* XXX This could probably be handled better. */
+ if (file == RC_FILE_ADDRESS) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* These instructions cannot read from the constants file.
+ * see radeonTransformTEX()
+ */
+ if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+ reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+ (inst->U.I.Opcode == RC_OPCODE_TEX ||
+ inst->U.I.Opcode == RC_OPCODE_TXB ||
+ inst->U.I.Opcode == RC_OPCODE_TXP ||
+ inst->U.I.Opcode == RC_OPCODE_TXD ||
+ inst->U.I.Opcode == RC_OPCODE_TXL ||
+ inst->U.I.Opcode == RC_OPCODE_KIL)){
+ reader_data->Abort = 1;
+ return;
+ }
+}
+
+static void src_clobbered_reads_cb(
+ void * data,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ struct src_clobbered_reads_cb_data * sc_data = data;
+
+ if (src->File == sc_data->File
+ && src->Index == sc_data->Index
+ && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
+
+ sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+ }
+
+ if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
+ sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+ }
+}
+
+static void is_src_clobbered_scan_write(
+ void * data,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct src_clobbered_reads_cb_data sc_data;
+ struct rc_reader_data * reader_data = data;
+ sc_data.File = file;
+ sc_data.Index = index;
+ sc_data.Mask = mask;
+ sc_data.ReaderData = reader_data;
+ rc_for_all_reads_src(reader_data->Writer,
+ src_clobbered_reads_cb, &sc_data);
+}
+
+static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+ struct rc_reader_data reader_data;
+ unsigned int i;
+
+ if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+ inst_mov->U.I.WriteALUResult ||
+ inst_mov->U.I.SaturateMode)
+ return;
+
+ /* Get a list of all the readers of this MOV instruction. */
+ reader_data.ExitOnAbort = 1;
+ rc_get_readers(c, inst_mov, &reader_data,
+ copy_propagate_scan_read, NULL,
+ is_src_clobbered_scan_write);
+
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
+ return;
+
+ /* Propagate the MOV instruction. */
+ for (i = 0; i < reader_data.ReaderCount; i++) {
+ struct rc_instruction * inst = reader_data.Readers[i].Inst;
+ *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
+
+ if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+ inst->U.I.PreSub = inst_mov->U.I.PreSub;
+ }
+
+ /* Finally, remove the original MOV instruction */
+ rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+ rc_swizzle * pswz, unsigned int * pnegate)
+{
+ int have_used = 0;
+
+ if (src.File != RC_FILE_NONE) {
+ *pswz = 0;
+ return 0;
+ }
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(src.Swizzle, chan);
+ if (swz < 4) {
+ *pswz = 0;
+ return 0;
+ }
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+
+ if (!have_used) {
+ *pswz = swz;
+ *pnegate = GET_BIT(src.Negate, chan);
+ have_used = 1;
+ } else {
+ if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+ *pswz = 0;
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+ rc_swizzle swz = 0;
+ unsigned int negate= 0;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MUL;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+ rc_swizzle swz = 0;
+ unsigned int negate = 0;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+ rc_swizzle swz = 0;
+ unsigned int negate = 0;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ return;
+ }
+ }
+}
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int i;
+
+ /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ struct rc_constant * constant;
+ struct rc_src_register newsrc;
+ int have_real_reference;
+ unsigned int chan;
+
+ /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
+ for (chan = 0; chan < 4; ++chan)
+ if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
+ break;
+ if (chan == 4) {
+ inst->U.I.SrcReg[src].File = RC_FILE_NONE;
+ continue;
+ }
+
+ /* Convert immediates to swizzles. */
+ if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+ inst->U.I.SrcReg[src].RelAddr ||
+ inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+ continue;
+
+ constant =
+ &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+ if (constant->Type != RC_CONSTANT_IMMEDIATE)
+ continue;
+
+ newsrc = inst->U.I.SrcReg[src];
+ have_real_reference = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+ unsigned int newswz;
+ float imm;
+ float baseimm;
+
+ if (swz >= 4)
+ continue;
+
+ imm = constant->u.Immediate[swz];
+ baseimm = imm;
+ if (imm < 0.0)
+ baseimm = -baseimm;
+
+ if (baseimm == 0.0) {
+ newswz = RC_SWIZZLE_ZERO;
+ } else if (baseimm == 1.0) {
+ newswz = RC_SWIZZLE_ONE;
+ } else if (baseimm == 0.5 && c->has_half_swizzles) {
+ newswz = RC_SWIZZLE_HALF;
+ } else {
+ have_real_reference = 1;
+ continue;
+ }
+
+ SET_SWZ(newsrc.Swizzle, chan, newswz);
+ if (imm < 0.0 && !newsrc.Abs)
+ newsrc.Negate ^= 1 << chan;
+ }
+
+ if (!have_real_reference) {
+ newsrc.File = RC_FILE_NONE;
+ newsrc.Index = 0;
+ }
+
+ /* don't make the swizzle worse */
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+ c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ continue;
+
+ inst->U.I.SrcReg[src] = newsrc;
+ }
+
+ /* Simplify instructions based on constants */
+ if (inst->U.I.Opcode == RC_OPCODE_MAD)
+ constant_folding_mad(inst);
+
+ /* note: MAD can simplify to MUL or ADD */
+ if (inst->U.I.Opcode == RC_OPCODE_MUL)
+ constant_folding_mul(inst);
+ else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+ constant_folding_add(inst);
+
+ /* In case this instruction has been converted, make sure all of the
+ * registers that are no longer used are empty. */
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ for(i = opcode->NumSrcRegs; i < 3; i++) {
+ memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
+ }
+}
+
+/**
+ * If src and dst use the same register, this function returns a writemask that
+ * indicates wich components are read by src. Otherwise zero is returned.
+ */
+static unsigned int src_reads_dst_mask(struct rc_src_register src,
+ struct rc_dst_register dst)
+{
+ if (dst.File != src.File || dst.Index != src.Index) {
+ return 0;
+ }
+ return rc_swizzle_to_writemask(src.Swizzle);
+}
+
+/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
+ * in any of its channels. Return 0 otherwise. */
+static int src_has_const_swz(struct rc_src_register src) {
+ int chan;
+ for(chan = 0; chan < 4; chan++) {
+ unsigned int swz = GET_SWZ(src.Swizzle, chan);
+ if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
+ || swz == RC_SWIZZLE_ONE) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void presub_scan_read(
+ void * data,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ struct rc_reader_data * reader_data = data;
+ rc_presubtract_op * presub_opcode = reader_data->CbData;
+
+ if (!rc_inst_can_use_presub(inst, *presub_opcode,
+ reader_data->Writer->U.I.DstReg.WriteMask,
+ src,
+ &reader_data->Writer->U.I.SrcReg[0],
+ &reader_data->Writer->U.I.SrcReg[1])) {
+ reader_data->Abort = 1;
+ return;
+ }
+}
+
+static int presub_helper(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst_add,
+ rc_presubtract_op presub_opcode,
+ rc_presub_replace_fn presub_replace)
+{
+ struct rc_reader_data reader_data;
+ unsigned int i;
+ rc_presubtract_op cb_op = presub_opcode;
+
+ reader_data.CbData = &cb_op;
+ reader_data.ExitOnAbort = 1;
+ rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
+ is_src_clobbered_scan_write);
+
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
+ return 0;
+
+ for(i = 0; i < reader_data.ReaderCount; i++) {
+ unsigned int src_index;
+ struct rc_reader reader = reader_data.Readers[i];
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(reader.Inst->U.I.Opcode);
+
+ for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
+ if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
+ presub_replace(inst_add, reader.Inst, src_index);
+ }
+ }
+ return 1;
+}
+
+/* This function assumes that inst_add->U.I.SrcReg[0] and
+ * inst_add->U.I.SrcReg[1] aren't both negative. */
+static void presub_replace_add(
+ struct rc_instruction * inst_add,
+ struct rc_instruction * inst_reader,
+ unsigned int src_index)
+{
+ rc_presubtract_op presub_opcode;
+ if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
+ presub_opcode = RC_PRESUB_SUB;
+ else
+ presub_opcode = RC_PRESUB_ADD;
+
+ if (inst_add->U.I.SrcReg[1].Negate) {
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+ inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
+ } else {
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
+ inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
+ }
+ inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
+ inst_reader->U.I.PreSub.Opcode = presub_opcode;
+ inst_reader->U.I.SrcReg[src_index] =
+ chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+ inst_reader->U.I.PreSub.SrcReg[0]);
+ inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+ inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
+}
+
+static int is_presub_candidate(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int i;
+ unsigned int is_constant[2] = {0, 0};
+
+ assert(inst->U.I.Opcode == RC_OPCODE_ADD);
+
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
+ || inst->U.I.SaturateMode
+ || inst->U.I.WriteALUResult) {
+ return 0;
+ }
+
+ /* If both sources use a constant swizzle, then we can't convert it to
+ * a presubtract operation. In fact for the ADD and SUB presubtract
+ * operations neither source can contain a constant swizzle. This
+ * specific case is checked in peephole_add_presub_add() when
+ * we make sure the swizzles for both sources are equal, so we
+ * don't need to worry about it here. */
+ for (i = 0; i < 2; i++) {
+ int chan;
+ for (chan = 0; chan < 4; chan++) {
+ rc_swizzle swz =
+ get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
+ if (swz == RC_SWIZZLE_ONE
+ || swz == RC_SWIZZLE_ZERO
+ || swz == RC_SWIZZLE_HALF) {
+ is_constant[i] = 1;
+ }
+ }
+ }
+ if (is_constant[0] && is_constant[1])
+ return 0;
+
+ for(i = 0; i < info->NumSrcRegs; i++) {
+ struct rc_src_register src = inst->U.I.SrcReg[i];
+ if (src_reads_dst_mask(src, inst->U.I.DstReg))
+ return 0;
+
+ src.File = RC_FILE_PRESUB;
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
+ return 0;
+ }
+ return 1;
+}
+
+static int peephole_add_presub_add(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst_add)
+{
+ unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
+ unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
+ unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
+
+ if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
+ return 0;
+
+ /* src0 and src1 can't have absolute values */
+ if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
+ return 0;
+
+ /* presub_replace_add() assumes only one is negative */
+ if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
+ return 0;
+
+ /* if src0 is negative, at least all bits of dstmask have to be set */
+ if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
+ return 0;
+
+ /* if src1 is negative, at least all bits of dstmask have to be set */
+ if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
+ return 0;
+
+ if (!is_presub_candidate(c, inst_add))
+ return 0;
+
+ if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
+ rc_remove_instruction(inst_add);
+ return 1;
+ }
+ return 0;
+}
+
+static void presub_replace_inv(
+ struct rc_instruction * inst_add,
+ struct rc_instruction * inst_reader,
+ unsigned int src_index)
+{
+ /* We must be careful not to modify inst_add, since it
+ * is possible it will remain part of the program.*/
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+ inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
+ inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+ inst_reader->U.I.PreSub.SrcReg[0]);
+
+ inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+ inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
+ * of the add instruction must have the constatnt 1 swizzle. This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return
+ * 0 if the ADD instruction is still part of the program.
+ * 1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst_add)
+{
+ unsigned int i, swz;
+
+ if (!is_presub_candidate(c, inst_add))
+ return 0;
+
+ /* Check if src0 is 1. */
+ /* XXX It would be nice to use is_src_uniform_constant here, but that
+ * function only works if the register's file is RC_FILE_NONE */
+ for(i = 0; i < 4; i++ ) {
+ swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+ if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+ && swz != RC_SWIZZLE_ONE) {
+ return 0;
+ }
+ }
+
+ /* Check src1. */
+ if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+ inst_add->U.I.DstReg.WriteMask
+ || inst_add->U.I.SrcReg[1].Abs
+ || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+ && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+ || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+ return 0;
+ }
+
+ if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
+ rc_remove_instruction(inst_add);
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * @return
+ * 0 if inst is still part of the program.
+ * 1 if inst is no longer part of the program.
+ */
+static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+ switch(inst->U.I.Opcode){
+ case RC_OPCODE_ADD:
+ if (c->has_presub) {
+ if(peephole_add_presub_inv(c, inst))
+ return 1;
+ if(peephole_add_presub_add(c, inst))
+ return 1;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+void rc_optimize(struct radeon_compiler * c, void *user)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * cur = inst;
+ inst = inst->Next;
+
+ constant_folding(c, cur);
+
+ if(peephole(c, cur))
+ continue;
+
+ if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+ copy_propagate(c, cur);
+ /* cur may no longer be part of the program */
+ }
+ }
+}