summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-08-27 14:35:01 -0700
committerEric Anholt <[email protected]>2012-10-08 08:50:27 -0700
commit97615b2d8c7c3cea6fd3a43bcb1739a96e2046c4 (patch)
treee8b5b5bedbf9ec120b2b2cbecce3fda3e49795e8
parent014aaa97d3d7f78629e6e030953be0e9fb7f33dd (diff)
i965: Replace brw_wm_* with dumping code into the fs_visitor.
This makes a giant pile of code newly dead. It also fixes TXB on newer chipsets, which has been totally broken (I now have a piglit test for that). It passes the same set of Ian's ARB_fragment_program tests. It also improves high-settings ETQW performance by 3.2 +/- 1.9% (n=3), thanks to better optimization and having 8-wide along with 16-wide shaders. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=24355 Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp36
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h30
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp22
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_fp.cpp784
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c58
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c19
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c8
-rw-r--r--src/mesa/drivers/dri/i965/gen7_wm_state.c8
10 files changed, 860 insertions, 109 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 3715b0f300f..edc2376815e 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -119,6 +119,7 @@ i965_CXX_FILES = \
brw_fs_cse.cpp \
brw_fs_copy_propagation.cpp \
brw_fs_emit.cpp \
+ brw_fs_fp.cpp \
brw_fs_live_variables.cpp \
brw_fs_visitor.cpp \
brw_fs_channel_expressions.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index fea598025ef..27014133d4f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1999,11 +1999,15 @@ fs_visitor::run()
/* Generate FS IR for main(). (the visitor only descends into
* functions called "main").
*/
- foreach_list(node, &*shader->ir) {
- ir_instruction *ir = (ir_instruction *)node;
- base_ir = ir;
- this->result = reg_undef;
- ir->accept(this);
+ if (shader) {
+ foreach_list(node, &*shader->ir) {
+ ir_instruction *ir = (ir_instruction *)node;
+ base_ir = ir;
+ this->result = reg_undef;
+ ir->accept(this);
+ }
+ } else {
+ emit_fragment_program_code();
}
if (failed)
return false;
@@ -2084,24 +2088,26 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
bool start_busy = false;
float start_time = 0;
- if (!prog)
- return false;
-
if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
start_busy = (intel->batch.last_bo &&
drm_intel_bo_busy(intel->batch.last_bo));
start_time = get_time();
}
- struct brw_shader *shader =
- (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
- if (!shader)
- return false;
+ struct brw_shader *shader = NULL;
+ if (prog)
+ shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("GLSL IR for native fragment shader %d:\n", prog->Name);
- _mesa_print_ir(shader->ir, NULL);
- printf("\n\n");
+ if (shader) {
+ printf("GLSL IR for native fragment shader %d:\n", prog->Name);
+ _mesa_print_ir(shader->ir, NULL);
+ printf("\n\n");
+ } else {
+ printf("ARB_fragment_program %d ir for native fragment shader\n",
+ c->fp->program.Base.Id);
+ _mesa_print_program(&c->fp->program.Base);
+ }
}
/* Now the main event: Visit the shader IR and generate our FS IR for it.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 2209e416b6d..4db9e90ed5e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -177,7 +177,7 @@ public:
/** @{
* Annotation for the generated IR. One of the two can be set.
*/
- ir_instruction *ir;
+ const void *ir;
const char *annotation;
/** @} */
};
@@ -324,6 +324,29 @@ public:
void emit_if_gen6(ir_if *ir);
void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+ void emit_fragment_program_code();
+ void setup_fp_regs();
+ fs_reg get_fp_src_reg(const prog_src_register *src);
+ fs_reg get_fp_dst_reg(const prog_dst_register *dst);
+ void emit_fp_alu1(enum opcode opcode,
+ const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src);
+ void emit_fp_alu2(enum opcode opcode,
+ const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src0, fs_reg src1);
+ void emit_fp_scalar_write(const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src);
+ void emit_fp_scalar_math(enum opcode opcode,
+ const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src);
+
+ void emit_fp_minmax(const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src0, fs_reg src1);
+
+ void emit_fp_sop(uint32_t conditional_mod,
+ const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one);
+
void emit_color_write(int target, int index, int first_color_mrf);
void emit_fb_writes();
bool try_rewrite_rhs_to_dst(ir_assignment *ir,
@@ -381,9 +404,12 @@ public:
int max_grf;
int urb_setup[FRAG_ATTRIB_MAX];
+ fs_reg *fp_temp_regs;
+ fs_reg *fp_input_regs;
+
/** @{ debug annotation info */
const char *current_annotation;
- ir_instruction *base_ir;
+ const void *base_ir;
/** @} */
bool failed;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index e477a6168a9..aa60ed571da 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -726,11 +726,16 @@ fs_visitor::generate_code()
{
int last_native_insn_offset = p->next_insn_offset;
const char *last_annotation_string = NULL;
- ir_instruction *last_annotation_ir = NULL;
+ const void *last_annotation_ir = NULL;
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- printf("Native code for fragment shader %d (%d-wide dispatch):\n",
- prog->Name, c->dispatch_width);
+ if (shader) {
+ printf("Native code for fragment shader %d (%d-wide dispatch):\n",
+ prog->Name, c->dispatch_width);
+ } else {
+ printf("Native code for fragment program %d (%d-wide dispatch):\n",
+ c->fp->program.Base.Id, c->dispatch_width);
+ }
}
fs_cfg *cfg = NULL;
@@ -762,7 +767,16 @@ fs_visitor::generate_code()
last_annotation_ir = inst->ir;
if (last_annotation_ir) {
printf(" ");
- last_annotation_ir->print();
+ if (shader)
+ ((ir_instruction *)inst->ir)->print();
+ else {
+ const prog_instruction *fpi;
+ fpi = (const prog_instruction *)inst->ir;
+ printf("%d: ", (int)(fpi - fp->Base.Instructions));
+ _mesa_fprint_instruction_opt(stdout,
+ fpi,
+ 0, PROG_PRINT_DEBUG, NULL);
+ }
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
new file mode 100644
index 00000000000..be00f6ea8f5
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -0,0 +1,784 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_fp.cpp
+ *
+ * Implementation of the compiler for GL_ARB_fragment_program shaders on top
+ * of the GLSL compiler backend.
+ */
+
+#include "brw_context.h"
+#include "brw_fs.h"
+
+static fs_reg
+regoffset(fs_reg reg, int i)
+{
+ reg.reg_offset += i;
+ return reg;
+}
+
+void
+fs_visitor::emit_fp_alu1(enum opcode opcode,
+ const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src)
+{
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i))
+ emit(opcode, regoffset(dst, i), regoffset(src, i));
+ }
+}
+
+void
+fs_visitor::emit_fp_alu2(enum opcode opcode,
+ const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src0, fs_reg src1)
+{
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i))
+ emit(opcode, regoffset(dst, i),
+ regoffset(src0, i), regoffset(src1, i));
+ }
+}
+
+void
+fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
+ fs_reg dst, fs_reg src0, fs_reg src1)
+{
+ uint32_t conditionalmod;
+ if (fpi->Opcode == OPCODE_MIN)
+ conditionalmod = BRW_CONDITIONAL_L;
+ else
+ conditionalmod = BRW_CONDITIONAL_GE;
+
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i)) {
+ emit_minmax(conditionalmod, regoffset(dst, i),
+ regoffset(src0, i), regoffset(src1, i));
+ }
+ }
+}
+
+void
+fs_visitor::emit_fp_sop(uint32_t conditional_mod,
+ const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src0, fs_reg src1,
+ fs_reg one)
+{
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i)) {
+ fs_inst *inst;
+
+ inst = emit(BRW_OPCODE_CMP, fs_reg(brw_null_reg()),
+ regoffset(src0, i), regoffset(src1, i));
+ inst->conditional_mod = conditional_mod;
+
+ inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
+ inst->predicated = true;
+ }
+ }
+}
+
+void
+fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src)
+{
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i))
+ emit(BRW_OPCODE_MOV, regoffset(dst, i), src);
+ }
+}
+
+void
+fs_visitor::emit_fp_scalar_math(enum opcode opcode,
+ const struct prog_instruction *fpi,
+ fs_reg dst, fs_reg src)
+{
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ emit_math(opcode, temp, src);
+ emit_fp_scalar_write(fpi, dst, temp);
+}
+
+void
+fs_visitor::emit_fragment_program_code()
+{
+ setup_fp_regs();
+
+ fs_reg null = fs_reg(brw_null_reg());
+
+ /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
+ * be:
+ *
+ * sel.f0 dst 1.0 0.0
+ *
+ * instead of
+ *
+ * mov dst 0.0
+ * mov.f0 dst 1.0
+ */
+ fs_reg one = fs_reg(this, glsl_type::float_type);
+ emit(BRW_OPCODE_MOV, one, fs_reg(1.0f));
+
+ for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) {
+ const struct prog_instruction *fpi = &fp->Base.Instructions[insn];
+ base_ir = fpi;
+
+ //_mesa_print_instruction(fpi);
+
+ fs_reg dst;
+ fs_reg src[3];
+
+ /* We always emit into a temporary destination register to avoid
+ * aliasing issues.
+ */
+ dst = fs_reg(this, glsl_type::vec4_type);
+
+ for (int i = 0; i < 3; i++)
+ src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
+
+ switch (fpi->Opcode) {
+ case OPCODE_ABS:
+ src[0].abs = true;
+ src[0].negate = false;
+ emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_ADD:
+ emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
+ break;
+
+ case OPCODE_CMP:
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i)) {
+ fs_inst *inst;
+
+ inst = emit(BRW_OPCODE_CMP, null,
+ regoffset(src[0], i), fs_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
+ regoffset(src[1], i), regoffset(src[2], i));
+ inst->predicated = true;
+ }
+ }
+ break;
+
+ case OPCODE_COS:
+ emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ case OPCODE_DPH: {
+ fs_reg mul = fs_reg(this, glsl_type::float_type);
+ fs_reg acc = fs_reg(this, glsl_type::float_type);
+ int count;
+
+ switch (fpi->Opcode) {
+ case OPCODE_DP2: count = 2; break;
+ case OPCODE_DP3: count = 3; break;
+ case OPCODE_DP4: count = 4; break;
+ case OPCODE_DPH: count = 3; break;
+ default: assert(!"not reached"); count = 0; break;
+ }
+
+ emit(BRW_OPCODE_MUL, acc,
+ regoffset(src[0], 0), regoffset(src[1], 0));
+ for (int i = 1; i < count; i++) {
+ emit(BRW_OPCODE_MUL, mul,
+ regoffset(src[0], i), regoffset(src[1], i));
+ emit(BRW_OPCODE_ADD, acc, acc, mul);
+ }
+
+ if (fpi->Opcode == OPCODE_DPH)
+ emit(BRW_OPCODE_ADD, acc, acc, regoffset(src[1], 3));
+
+ emit_fp_scalar_write(fpi, dst, acc);
+ break;
+ }
+
+ case OPCODE_DST:
+ if (fpi->DstReg.WriteMask & WRITEMASK_X)
+ emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f));
+ if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+ emit(BRW_OPCODE_MUL, regoffset(dst, 1),
+ regoffset(src[0], 1), regoffset(src[1], 1));
+ }
+ if (fpi->DstReg.WriteMask & WRITEMASK_Z)
+ emit(BRW_OPCODE_MOV, regoffset(dst, 2), regoffset(src[0], 2));
+ if (fpi->DstReg.WriteMask & WRITEMASK_W)
+ emit(BRW_OPCODE_MOV, regoffset(dst, 3), regoffset(src[1], 3));
+ break;
+
+ case OPCODE_EX2:
+ emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_FLR:
+ emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_FRC:
+ emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_KIL: {
+ for (int i = 0; i < 4; i++) {
+ /* In most cases the argument to a KIL will be something like
+ * TEMP[0].wwww, so there's no point in checking whether .w is < 0
+ * 4 times in a row.
+ */
+ if (i > 0 &&
+ GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
+ GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
+ ((fpi->SrcReg[0].Negate >> i) & 1) ==
+ ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
+ continue;
+ }
+
+ fs_inst *inst = emit(BRW_OPCODE_CMP, null,
+ regoffset(src[0], i), 0.0f);
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ inst = emit(BRW_OPCODE_IF);
+ inst->predicated = true;
+ emit(FS_OPCODE_DISCARD);
+ emit(BRW_OPCODE_ENDIF);
+ }
+ break;
+ }
+
+ case OPCODE_LG2:
+ emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_LIT:
+ /* From the ARB_fragment_program spec:
+ *
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * Note that we don't do the clamping to +/- 128. We didn't in
+ * brw_wm_emit.c either.
+ */
+ if (fpi->DstReg.WriteMask & WRITEMASK_X)
+ emit(BRW_OPCODE_MOV, regoffset(dst, 0), fs_reg(1.0f));
+
+ if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
+ fs_inst *inst;
+ inst = emit(BRW_OPCODE_CMP, null,
+ regoffset(src[0], 0), fs_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_LE;
+
+ if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+ emit(BRW_OPCODE_MOV, regoffset(dst, 1), regoffset(src[0], 0));
+ inst = emit(BRW_OPCODE_MOV, regoffset(dst, 1), fs_reg(0.0f));
+ inst->predicated = true;
+ }
+
+ if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
+ emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
+ regoffset(src[0], 1), regoffset(src[0], 3));
+
+ inst = emit(BRW_OPCODE_MOV, regoffset(dst, 2), fs_reg(0.0f));
+ inst->predicated = true;
+ }
+ }
+
+ if (fpi->DstReg.WriteMask & WRITEMASK_W)
+ emit(BRW_OPCODE_MOV, regoffset(dst, 3), fs_reg(1.0f));
+
+ break;
+
+ case OPCODE_LRP:
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i)) {
+ fs_reg neg_src0 = regoffset(src[0], i);
+ neg_src0.negate = !neg_src0.negate;
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ fs_reg temp2 = fs_reg(this, glsl_type::float_type);
+ emit(BRW_OPCODE_ADD, temp, neg_src0, fs_reg(1.0f));
+ emit(BRW_OPCODE_MUL, temp, temp, regoffset(src[2], i));
+ emit(BRW_OPCODE_MUL, temp2,
+ regoffset(src[0], i), regoffset(src[1], i));
+ emit(BRW_OPCODE_ADD, regoffset(dst, i), temp, temp2);
+ }
+ }
+ break;
+
+ case OPCODE_MAD:
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i)) {
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ emit(BRW_OPCODE_MUL, temp,
+ regoffset(src[0], i), regoffset(src[1], i));
+ emit(BRW_OPCODE_ADD, regoffset(dst, i),
+ temp, regoffset(src[2], i));
+ }
+ }
+ break;
+
+ case OPCODE_MAX:
+ emit_fp_minmax(fpi, dst, src[0], src[1]);
+ break;
+
+ case OPCODE_MOV:
+ emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_MIN:
+ emit_fp_minmax(fpi, dst, src[0], src[1]);
+ break;
+
+ case OPCODE_MUL:
+ emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
+ break;
+
+ case OPCODE_POW: {
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
+ emit_fp_scalar_write(fpi, dst, temp);
+ break;
+ }
+
+ case OPCODE_RCP:
+ emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_RSQ:
+ emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_SCS:
+ if (fpi->DstReg.WriteMask & WRITEMASK_X) {
+ emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
+ regoffset(src[0], 0));
+ }
+
+ if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+ emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
+ regoffset(src[0], 1));
+ }
+ break;
+
+ case OPCODE_SGE:
+ emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
+ break;
+
+ case OPCODE_SIN:
+ emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_SLT:
+ emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
+ break;
+
+ case OPCODE_SUB: {
+ fs_reg neg_src1 = src[1];
+ neg_src1.negate = !src[1].negate;
+
+ emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
+ break;
+ }
+
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ case OPCODE_TXP: {
+ /* We piggy-back on the GLSL IR support for texture setup. To do so,
+ * we have to cook up an ir_texture that has the coordinate field
+ * with appropriate type, and shadow_comparitor set or not. All the
+ * other properties of ir_texture are passed in as arguments to the
+ * emit_texture_gen* function.
+ */
+ ir_texture *ir = NULL;
+
+ fs_reg lod;
+ fs_reg dpdy;
+ fs_reg coordinate = src[0];
+ fs_reg shadow_c;
+
+ switch (fpi->Opcode) {
+ case OPCODE_TEX:
+ ir = new(mem_ctx) ir_texture(ir_tex);
+ break;
+ case OPCODE_TXP: {
+ ir = new(mem_ctx) ir_texture(ir_tex);
+
+ coordinate = fs_reg(this, glsl_type::vec3_type);
+ fs_reg invproj = fs_reg(this, glsl_type::float_type);
+ emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
+ for (int i = 0; i < 3; i++) {
+ emit(BRW_OPCODE_MUL, regoffset(coordinate, i),
+ regoffset(src[0], i), invproj);
+ }
+ break;
+ }
+ case OPCODE_TXB:
+ ir = new(mem_ctx) ir_texture(ir_txb);
+ lod = regoffset(src[0], 3);
+ break;
+ default:
+ assert(!"not reached");
+ break;
+ }
+
+ const glsl_type *coordinate_type;
+ switch (fpi->TexSrcTarget) {
+ case TEXTURE_1D_INDEX:
+ coordinate_type = glsl_type::float_type;
+ break;
+
+ case TEXTURE_2D_INDEX:
+ case TEXTURE_1D_ARRAY_INDEX:
+ case TEXTURE_RECT_INDEX:
+ case TEXTURE_EXTERNAL_INDEX:
+ coordinate_type = glsl_type::vec2_type;
+ break;
+
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_2D_ARRAY_INDEX:
+ coordinate_type = glsl_type::vec3_type;
+ break;
+
+ case TEXTURE_CUBE_INDEX: {
+ coordinate_type = glsl_type::vec3_type;
+
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
+ fs_reg abscoord = coordinate;
+ abscoord.negate = false;
+ abscoord.abs = true;
+ emit_minmax(BRW_CONDITIONAL_GE, temp,
+ regoffset(abscoord, 0), regoffset(abscoord, 1));
+ emit_minmax(BRW_CONDITIONAL_GE, temp,
+ temp, regoffset(abscoord, 2));
+ emit_math(SHADER_OPCODE_RCP, temp, temp);
+ for (int i = 0; i < 3; i++) {
+ emit(BRW_OPCODE_MUL, regoffset(cubecoord, i),
+ regoffset(coordinate, i), temp);
+ }
+
+ coordinate = cubecoord;
+ break;
+ }
+
+ default:
+ assert(!"not reached");
+ coordinate_type = glsl_type::vec2_type;
+ break;
+ }
+
+ ir_constant_data junk_data;
+ ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
+
+ coordinate = rescale_texcoord(ir, coordinate,
+ fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
+ fpi->TexSrcUnit, fpi->TexSrcUnit);
+
+ if (fpi->TexShadow) {
+ shadow_c = regoffset(coordinate, 2);
+ ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
+ }
+
+ fs_inst *inst;
+ if (intel->gen >= 7) {
+ inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy);
+ } else if (intel->gen >= 5) {
+ inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy);
+ } else {
+ inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
+ }
+
+ inst->sampler = fpi->TexSrcUnit;
+ inst->shadow_compare = fpi->TexShadow;
+
+ /* Reuse the GLSL swizzle_result() handler. */
+ swizzle_result(ir, dst, fpi->TexSrcUnit);
+ dst = this->result;
+
+ break;
+ }
+
+ case OPCODE_SWZ:
+ /* Note that SWZ's extended swizzles are handled in the general
+ * get_src_reg() code.
+ */
+ emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+ break;
+
+ case OPCODE_XPD:
+ for (int i = 0; i < 3; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i)) {
+ int i1 = (i + 1) % 3;
+ int i2 = (i + 2) % 3;
+
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ fs_reg neg_src1_1 = regoffset(src[1], i1);
+ neg_src1_1.negate = !neg_src1_1.negate;
+ emit(BRW_OPCODE_MUL, temp,
+ regoffset(src[0], i2), neg_src1_1);
+ emit(BRW_OPCODE_MUL, regoffset(dst, i),
+ regoffset(src[0], i1), regoffset(src[1], i2));
+ emit(BRW_OPCODE_ADD, regoffset(dst, i),
+ regoffset(dst, i), temp);
+ }
+ }
+ break;
+
+ case OPCODE_END:
+ break;
+
+ default:
+ _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
+ _mesa_opcode_string(fpi->Opcode));
+ }
+
+ /* To handle saturates, we emit a MOV with a saturate bit, which
+ * optimization should fold into the preceding instructions when safe.
+ */
+ if (fpi->Opcode != OPCODE_END) {
+ fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
+
+ for (int i = 0; i < 4; i++) {
+ if (fpi->DstReg.WriteMask & (1 << i)) {
+ fs_inst *inst = emit(BRW_OPCODE_MOV,
+ regoffset(real_dst, i),
+ regoffset(dst, i));
+ inst->saturate = fpi->SaturateMode;
+ }
+ }
+ }
+ }
+
+ /* Epilogue:
+ *
+ * Fragment depth has this strange convention of being the .z component of
+ * a vec4. emit_fb_write() wants to see a float value, instead.
+ */
+ this->current_annotation = "result.depth write";
+ if (frag_depth.file != BAD_FILE) {
+ fs_reg temp = fs_reg(this, glsl_type::float_type);
+ emit(BRW_OPCODE_MOV, temp, regoffset(frag_depth, 2));
+ frag_depth = temp;
+ }
+}
+
+void
+fs_visitor::setup_fp_regs()
+{
+ /* PROGRAM_TEMPORARY */
+ int num_temp = fp->Base.NumTemporaries;
+ fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
+ for (int i = 0; i < num_temp; i++)
+ fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
+
+ /* PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM, etc. */
+ if (c->dispatch_width == 8) {
+ for (unsigned p = 0;
+ p < c->fp->program.Base.Parameters->NumParameters; p++) {
+ for (unsigned int i = 0; i < 4; i++) {
+ this->param_index[c->prog_data.nr_params] = p;
+ this->param_offset[c->prog_data.nr_params] = i;
+ c->prog_data.nr_params++;
+ }
+ }
+ }
+
+ fp_input_regs = rzalloc_array(mem_ctx, fs_reg, FRAG_ATTRIB_MAX);
+ for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
+ if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
+ /* Make up a dummy instruction to reuse code for emitting
+ * interpolation.
+ */
+ ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+ "fp_input",
+ ir_var_in);
+ ir->location = i;
+
+ this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
+ i);
+
+ switch (i) {
+ case FRAG_ATTRIB_WPOS:
+ ir->pixel_center_integer = fp->PixelCenterInteger;
+ ir->origin_upper_left = fp->OriginUpperLeft;
+ fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
+ break;
+ case FRAG_ATTRIB_FACE:
+ fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
+ break;
+ default:
+ fp_input_regs[i] = *emit_general_interpolation(ir);
+
+ if (i == FRAG_ATTRIB_FOGC) {
+ emit(BRW_OPCODE_MOV,
+ regoffset(fp_input_regs[i], 1), fs_reg(0.0f));
+ emit(BRW_OPCODE_MOV,
+ regoffset(fp_input_regs[i], 2), fs_reg(0.0f));
+ emit(BRW_OPCODE_MOV,
+ regoffset(fp_input_regs[i], 3), fs_reg(1.0f));
+ }
+
+ break;
+ }
+
+ this->current_annotation = NULL;
+ }
+ }
+}
+
+fs_reg
+fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
+{
+ switch (dst->File) {
+ case PROGRAM_TEMPORARY:
+ return fp_temp_regs[dst->Index];
+
+ case PROGRAM_OUTPUT:
+ if (dst->Index == FRAG_RESULT_DEPTH) {
+ if (frag_depth.file == BAD_FILE)
+ frag_depth = fs_reg(this, glsl_type::vec4_type);
+ return frag_depth;
+ } else if (dst->Index == FRAG_RESULT_COLOR) {
+ if (outputs[0].file == BAD_FILE) {
+ outputs[0] = fs_reg(this, glsl_type::vec4_type);
+ output_components[0] = 4;
+
+ /* Tell emit_fb_writes() to smear fragment.color across all the
+ * color attachments.
+ */
+ for (int i = 1; i < c->key.nr_color_regions; i++) {
+ outputs[i] = outputs[0];
+ output_components[i] = output_components[0];
+ }
+ }
+ return outputs[0];
+ } else {
+ int output_index = dst->Index - FRAG_RESULT_DATA0;
+ if (outputs[output_index].file == BAD_FILE) {
+ outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
+ }
+ output_components[output_index] = 4;
+ return outputs[output_index];
+ }
+
+ case PROGRAM_UNDEFINED:
+ return fs_reg();
+
+ default:
+ _mesa_problem(ctx, "bad dst register file: %s\n",
+ _mesa_register_file_name((gl_register_file)dst->File));
+ return fs_reg(this, glsl_type::vec4_type);
+ }
+}
+
+fs_reg
+fs_visitor::get_fp_src_reg(const prog_src_register *src)
+{
+ struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
+
+ fs_reg result;
+
+ assert(!src->Abs);
+
+ switch (src->File) {
+ case PROGRAM_UNDEFINED:
+ return fs_reg();
+ case PROGRAM_TEMPORARY:
+ result = fp_temp_regs[src->Index];
+ break;
+
+ case PROGRAM_INPUT:
+ result = fp_input_regs[src->Index];
+ break;
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_NAMED_PARAM:
+ /* We actually want to look at the type in the Parameters list for this,
+ * because this lets us upload constant builtin uniforms, as actual
+ * constants.
+ */
+ switch (plist->Parameters[src->Index].Type) {
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT: {
+ result = fs_reg(this, glsl_type::vec4_type);
+
+ for (int i = 0; i < 4; i++) {
+ emit(BRW_OPCODE_MOV, regoffset(result, i),
+ fs_reg(plist->ParameterValues[src->Index][i].f));
+ }
+ break;
+ }
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
+ result = fs_reg(UNIFORM, src->Index * 4);
+ break;
+
+ default:
+ _mesa_problem(ctx, "bad uniform src register file: %s\n",
+ _mesa_register_file_name((gl_register_file)src->File));
+ return fs_reg(this, glsl_type::vec4_type);
+ }
+ break;
+
+ default:
+ _mesa_problem(ctx, "bad src register file: %s\n",
+ _mesa_register_file_name((gl_register_file)src->File));
+ return fs_reg(this, glsl_type::vec4_type);
+ }
+
+ if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
+ fs_reg unswizzled = result;
+ result = fs_reg(this, glsl_type::vec4_type);
+ for (int i = 0; i < 4; i++) {
+ bool negate = src->Negate & (1 << i);
+ /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
+ * but it costs us nothing to support it.
+ */
+ int src_swiz = GET_SWZ(src->Swizzle, i);
+ if (src_swiz == SWIZZLE_ZERO) {
+ emit(BRW_OPCODE_MOV, regoffset(result, i), fs_reg(0.0f));
+ } else if (src_swiz == SWIZZLE_ONE) {
+ emit(BRW_OPCODE_MOV, regoffset(result, i),
+ negate ? fs_reg(-1.0f) : fs_reg(1.0f));
+ } else {
+ fs_reg src = regoffset(unswizzled, src_swiz);
+ if (negate)
+ src.negate = !src.negate;
+ emit(BRW_OPCODE_MOV, regoffset(result, i), src);
+ }
+ }
+ }
+
+ return result;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 134238d42a6..4603035792d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2228,8 +2228,7 @@ fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
this->c = c;
this->p = &c->func;
this->brw = p->brw;
- this->fp = (struct gl_fragment_program *)
- prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+ this->fp = &c->fp->program;
this->prog = prog;
this->intel = &brw->intel;
this->ctx = &intel->ctx;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index f8eb54fce1d..fa0f684a626 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -85,46 +85,6 @@ GLuint brw_wm_is_scalar_result( GLuint opcode )
}
}
-
-/**
- * Do GPU code generation for non-GLSL shader. non-GLSL shaders have
- * no flow control instructions so we can more readily do SSA-style
- * optimizations.
- */
-static void
-brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
-{
- /* Augment fragment program. Add instructions for pre- and
- * post-fragment-program tasks such as interpolation and fogging.
- */
- brw_wm_pass_fp(c);
-
- /* Translate to intermediate representation. Build register usage
- * chains.
- */
- brw_wm_pass0(c);
-
- /* Dead code removal.
- */
- brw_wm_pass1(c);
-
- /* Register allocation.
- * Divide by two because we operate on 16 pixels at a time and require
- * two GRF entries for each logical shader register.
- */
- c->grf_limit = BRW_WM_MAX_GRF / 2;
-
- brw_wm_pass2(c);
-
- /* how many general-purpose registers are used */
- c->prog_data.reg_blocks = brw_register_blocks(c->max_wm_grf);
-
- /* Emit GEN4 code.
- */
- brw_wm_emit(c);
-}
-
-
/**
* Return a bitfield where bit n is set if barycentric interpolation mode n
* (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
@@ -356,23 +316,7 @@ bool do_wm_prog(struct brw_context *brw,
brw_compute_barycentric_interp_modes(brw, c->key.flat_shade,
&fp->program);
- if (prog && prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
- if (!brw_wm_fs_emit(brw, c, prog))
- return false;
- } else {
- if (!c->instruction) {
- c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
- c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
- c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
- c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
- }
-
- /* Fallback for fixed function and ARB_fp shaders. */
- c->dispatch_width = 16;
- brw_wm_payload_setup(brw, c);
- brw_wm_non_glsl_emit(brw, c);
- c->prog_data.dispatch_width = 16;
- }
+ brw_wm_fs_emit(brw, c, prog);
/* Scratch space is used for register spilling */
if (c->last_scratch) {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index dd67795e743..ea2dea92a70 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -163,23 +163,8 @@ brw_upload_wm_unit(struct brw_context *brw)
/* _NEW_COLOR */
wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled;
-
- /* BRW_NEW_FRAGMENT_PROGRAM
- *
- * If using the fragment shader backend, the program is always
- * 8-wide. If not, it's always 16.
- */
- if (ctx->Shader._CurrentFragmentProgram) {
- struct brw_shader *shader = (struct brw_shader *)
- ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
-
- if (shader != NULL && shader->ir != NULL) {
- wm->wm5.enable_8_pix = 1;
- if (brw->wm.prog_data->prog_offset_16)
- wm->wm5.enable_16_pix = 1;
- }
- }
- if (!wm->wm5.enable_8_pix)
+ wm->wm5.enable_8_pix = 1;
+ if (brw->wm.prog_data->prog_offset_16)
wm->wm5.enable_16_pix = 1;
wm->wm5.max_threads = brw->max_wm_threads - 1;
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index dd435286dee..bd28f97add4 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -151,13 +151,9 @@ upload_wm_state(struct brw_context *brw)
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
/* CACHE_NEW_WM_PROG */
- if (brw->wm.prog_data->dispatch_width == 8) {
- dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
- if (brw->wm.prog_data->prog_offset_16)
- dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
- } else {
+ dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
+ if (brw->wm.prog_data->prog_offset_16)
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
- }
/* CACHE_NEW_WM_PROG | _NEW_COLOR */
if (brw->wm.prog_data->dual_src_blend &&
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index dc49a7dfe2a..e0c69113ada 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -196,13 +196,9 @@ upload_ps_state(struct brw_context *brw)
if (brw->fragment_program->Base.InputsRead != 0)
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
- if (brw->wm.prog_data->dispatch_width == 8) {
- dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
- if (brw->wm.prog_data->prog_offset_16)
- dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
- } else {
+ dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
+ if (brw->wm.prog_data->prog_offset_16)
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
- }
dw5 |= (brw->wm.prog_data->first_curbe_grf <<
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);