diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.c | 55 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 364 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_debug.c | 174 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 1927 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_fp.c | 1182 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_pass0.c | 445 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_pass1.c | 298 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_pass2.c | 359 |
9 files changed, 0 insertions, 4810 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index edc2376815e..125aac5a06f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -74,13 +74,7 @@ i965_C_FILES = \ brw_vs_surface_state.c \ brw_vtbl.c \ brw_wm.c \ - brw_wm_debug.c \ - brw_wm_emit.c \ - brw_wm_fp.c \ brw_wm_iz.c \ - brw_wm_pass0.c \ - brw_wm_pass1.c \ - brw_wm_pass2.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ brw_wm_surface_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index fa0f684a626..bfb36db3fcc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -39,52 +39,6 @@ #include "glsl/ralloc.h" -/** Return number of src args for given instruction */ -GLuint brw_wm_nr_args( GLuint opcode ) -{ - switch (opcode) { - case WM_FRONTFACING: - case WM_PIXELXY: - return 0; - case WM_CINTERP: - case WM_WPOSXY: - case WM_DELTAXY: - return 1; - case WM_LINTERP: - case WM_PIXELW: - return 2; - case WM_FB_WRITE: - case WM_PINTERP: - return 3; - default: - assert(opcode < MAX_OPCODE); - return _mesa_num_inst_src_regs(opcode); - } -} - - -GLuint brw_wm_is_scalar_result( GLuint opcode ) -{ - switch (opcode) { - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_POW: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_DP2: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_DPH: - case OPCODE_DST: - return 1; - - default: - return 0; - } -} - /** * Return a bitfield where bit n is set if barycentric interpolation mode n * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader. @@ -273,15 +227,7 @@ bool do_wm_prog(struct brw_context *brw, return false; } } else { - void *instruction = c->instruction; - void *prog_instructions = c->prog_instructions; - void *vreg = c->vreg; - void *refs = c->refs; memset(c, 0, sizeof(*brw->wm.compile_data)); - c->instruction = instruction; - c->prog_instructions = prog_instructions; - c->vreg = vreg; - c->refs = refs; } /* Allocate the references to the uniforms that will end up in the @@ -308,7 +254,6 @@ bool do_wm_prog(struct brw_context *brw, memcpy(&c->key, key, sizeof(*key)); c->fp = fp; - c->env_param = brw->intel.ctx.FragmentProgram.Parameters; brw_init_compile(brw, &c->func, c); diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index a4b20467c18..46d4416ba8f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -40,8 +40,6 @@ #include "brw_eu.h" #include "brw_program.h" -#define SATURATE (1<<5) - /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM * program execution. These mainly relate to depth and stencil @@ -78,112 +76,6 @@ struct brw_wm_prog_key { struct brw_sampler_prog_key_data tex; }; - -/* A bit of a glossary: - * - * brw_wm_value: A computed value or program input. Values are - * constant, they are created once and are never modified. When a - * fragment program register is written or overwritten, new values are - * created fresh, preserving the rule that values are constant. - * - * brw_wm_ref: A reference to a value. Wherever a value used is by an - * instruction or as a program output, that is tracked with an - * instance of this struct. All references to a value occur after it - * is created. After the last reference, a value is dead and can be - * discarded. - * - * brw_wm_grf: Represents a physical hardware register. May be either - * empty or hold a value. Register allocation is the process of - * assigning values to grf registers. This occurs in pass2 and the - * brw_wm_grf struct is not used before that. - * - * Fragment program registers: These are time-varying constructs that - * are hard to reason about and which we translate away in pass0. A - * single fragment program register element (eg. temp[0].x) will be - * translated to one or more brw_wm_value structs, one for each time - * that temp[0].x is written to during the program. - */ - - - -/* Used in pass2 to track register allocation. - */ -struct brw_wm_grf { - struct brw_wm_value *value; - GLuint nextuse; -}; - -struct brw_wm_value { - struct brw_reg hw_reg; /* emitted to this reg, may not always be there */ - struct brw_wm_ref *lastuse; - struct brw_wm_grf *resident; - GLuint contributes_to_output:1; - GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */ -}; - -struct brw_wm_ref { - struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */ - struct brw_wm_value *value; - struct brw_wm_ref *prevuse; - GLuint unspill_reg:7; /* unspill to reg */ - GLuint emitted:1; - GLuint insn:24; -}; - -struct brw_wm_constref { - const struct brw_wm_ref *ref; - GLfloat constval; -}; - - -struct brw_wm_instruction { - struct brw_wm_value *dst[4]; - struct brw_wm_ref *src[3][4]; - GLuint opcode:8; - GLuint saturate:1; - GLuint writemask:4; - GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ - GLuint tex_idx:4; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ - GLuint tex_shadow:1; /* do shadow comparison? */ - GLuint eot:1; /* End of thread indicator for FB_WRITE*/ - GLuint target:10; /* target binding table index for FB_WRITE*/ -}; - - -#define BRW_WM_MAX_INSN (MAX_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) -#define BRW_WM_MAX_GRF 128 /* hardware limit */ -#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) -#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) -#define BRW_WM_MAX_PARAM 256 -#define BRW_WM_MAX_CONST 256 -#define BRW_WM_MAX_SUBROUTINE 16 - -/* used in masks next to WRITEMASK_*. */ -#define SATURATE (1<<5) - - -/* New opcodes to track internal operations required for WM unit. - * These are added early so that the registers used can be tracked, - * freed and reused like those of other instructions. - */ -#define WM_PIXELXY (MAX_OPCODE) -#define WM_DELTAXY (MAX_OPCODE + 1) -#define WM_PIXELW (MAX_OPCODE + 2) -#define WM_LINTERP (MAX_OPCODE + 3) -#define WM_PINTERP (MAX_OPCODE + 4) -#define WM_CINTERP (MAX_OPCODE + 5) -#define WM_WPOSXY (MAX_OPCODE + 6) -#define WM_FB_WRITE (MAX_OPCODE + 7) -#define WM_FRONTFACING (MAX_OPCODE + 8) -#define MAX_WM_OPCODE (MAX_OPCODE + 9) - -#define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX) -#define NUM_FILES (PROGRAM_PAYLOAD + 1) - -#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) -#define PAYLOAD_W (FRAG_ATTRIB_MAX + 1) -#define PAYLOAD_FP_REG_MAX (FRAG_ATTRIB_MAX + 2) - struct brw_wm_compile { struct brw_compile func; struct brw_wm_prog_key key; @@ -191,13 +83,6 @@ struct brw_wm_compile { struct brw_fragment_program *fp; - GLfloat (*env_param)[4]; - - enum { - START, - PASS2_DONE - } state; - uint8_t source_depth_reg; uint8_t source_w_reg; uint8_t aa_dest_stencil_reg; @@ -208,266 +93,17 @@ struct brw_wm_compile { GLuint source_depth_to_render_target:1; GLuint runtime_check_aads_emit:1; - /* Initial pass - translate fp instructions to fp instructions, - * simplifying and adding instructions for interpolation and - * framebuffer writes. - */ - struct prog_instruction *prog_instructions; - GLuint nr_fp_insns; - GLuint fp_temp; - GLuint fp_interp_emitted; - - struct prog_src_register pixel_xy; - struct prog_src_register delta_xy; - struct prog_src_register pixel_w; - - - struct brw_wm_value *vreg; - GLuint nr_vreg; - - struct brw_wm_value creg[BRW_WM_MAX_PARAM]; - GLuint nr_creg; - - struct { - struct brw_wm_value depth[4]; /* includes r0/r1 */ - struct brw_wm_value input_interp[FRAG_ATTRIB_MAX]; - } payload; - - - const struct brw_wm_ref *pass0_fp_reg[NUM_FILES][256][4]; - - struct brw_wm_ref undef_ref; - struct brw_wm_value undef_value; - - struct brw_wm_ref *refs; - GLuint nr_refs; - - struct brw_wm_instruction *instruction; - GLuint nr_insns; - - struct brw_wm_constref constref[BRW_WM_MAX_CONST]; - GLuint nr_constrefs; - - struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; - - GLuint grf_limit; - GLuint max_wm_grf; GLuint last_scratch; - GLuint cur_inst; /**< index of current instruction */ - - bool out_of_regs; /**< ran out of GRF registers? */ - - /** Mapping from Mesa registers to hardware registers */ - struct { - bool inited; - struct brw_reg reg; - } wm_regs[NUM_FILES][256][4]; - - bool used_grf[BRW_WM_MAX_GRF]; - GLuint first_free_grf; - struct brw_reg stack; - struct brw_reg emit_mask_reg; - GLuint tmp_regs[BRW_WM_MAX_GRF]; - GLuint tmp_index; - GLuint tmp_max; - GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; GLuint dispatch_width; - - /** we may need up to 3 constants per instruction (if use_const_buffer) */ - struct { - GLint index; - struct brw_reg reg; - } current_const[3]; }; - -/** Bits for prog_instruction::Aux field */ -#define INST_AUX_EOT 0x1 -#define INST_AUX_TARGET(T) (T << 1) -#define INST_AUX_GET_TARGET(AUX) ((AUX) >> 1) - - -GLuint brw_wm_nr_args( GLuint opcode ); -GLuint brw_wm_is_scalar_result( GLuint opcode ); - -void brw_wm_pass_fp( struct brw_wm_compile *c ); -void brw_wm_pass0( struct brw_wm_compile *c ); -void brw_wm_pass1( struct brw_wm_compile *c ); -void brw_wm_pass2( struct brw_wm_compile *c ); -void brw_wm_emit( struct brw_wm_compile *c ); -bool brw_wm_arg_can_be_immediate(enum prog_opcode, int arg); -void brw_wm_print_value( struct brw_wm_compile *c, - struct brw_wm_value *value ); - -void brw_wm_print_ref( struct brw_wm_compile *c, - struct brw_wm_ref *ref ); - -void brw_wm_print_insn( struct brw_wm_compile *c, - struct brw_wm_instruction *inst ); - -void brw_wm_print_program( struct brw_wm_compile *c, - const char *stage ); - void brw_wm_lookup_iz(struct intel_context *intel, struct brw_wm_compile *c); bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, struct gl_shader_program *prog); -/* brw_wm_emit.c */ -void emit_alu1(struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0); -void emit_alu2(struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_cinterp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0); -void emit_cmp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2); -void emit_ddxy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - bool is_ddx, - const struct brw_reg *arg0, - bool negate_value); -void emit_delta_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0); -void emit_dp2(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_dp3(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_dp4(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_dph(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_fb_write(struct brw_wm_compile *c, - struct brw_reg *arg0, - struct brw_reg *arg1, - struct brw_reg *arg2, - GLuint target, - GLuint eot); -void emit_frontfacing(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask); -void emit_linterp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas); -void emit_lrp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2); -void emit_mad(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2); -void emit_math1(struct brw_wm_compile *c, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0); -void emit_math2(struct brw_wm_compile *c, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_min(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_max(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_pinterp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas, - const struct brw_reg *w); -void emit_pixel_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask); -void emit_pixel_w(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas); -void emit_sop(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - GLuint cond, - const struct brw_reg *arg0, - const struct brw_reg *arg1); -void emit_sign(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0); -void emit_tex(struct brw_wm_compile *c, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg, - struct brw_reg depth_payload, - GLuint tex_idx, - GLuint sampler, - bool shadow); -void emit_txb(struct brw_wm_compile *c, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg, - struct brw_reg depth_payload, - GLuint tex_idx, - GLuint sampler); -void emit_wpos_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0); -void emit_xpd(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1); - GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog); struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type); struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name); diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c deleted file mode 100644 index 6a91251a80e..00000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_debug.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - - -#include "brw_context.h" -#include "brw_wm.h" - - -void brw_wm_print_value( struct brw_wm_compile *c, - struct brw_wm_value *value ) -{ - assert(value); - if (c->state >= PASS2_DONE) - brw_print_reg(value->hw_reg); - else if( value == &c->undef_value ) - printf("undef"); - else if( value - c->vreg >= 0 && - value - c->vreg < BRW_WM_MAX_VREG) - printf("r%ld", (long) (value - c->vreg)); - else if (value - c->creg >= 0 && - value - c->creg < BRW_WM_MAX_PARAM) - printf("c%ld", (long) (value - c->creg)); - else if (value - c->payload.input_interp >= 0 && - value - c->payload.input_interp < FRAG_ATTRIB_MAX) - printf("i%ld", (long) (value - c->payload.input_interp)); - else if (value - c->payload.depth >= 0 && - value - c->payload.depth < FRAG_ATTRIB_MAX) - printf("d%ld", (long) (value - c->payload.depth)); - else - printf("?"); -} - -void brw_wm_print_ref( struct brw_wm_compile *c, - struct brw_wm_ref *ref ) -{ - struct brw_reg hw_reg = ref->hw_reg; - - if (ref->unspill_reg) - printf("UNSPILL(%x)/", ref->value->spill_slot); - - if (c->state >= PASS2_DONE) - brw_print_reg(ref->hw_reg); - else { - printf("%s", hw_reg.negate ? "-" : ""); - printf("%s", hw_reg.abs ? "abs/" : ""); - brw_wm_print_value(c, ref->value); - if ((hw_reg.nr&1) || hw_reg.subnr) { - printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); - } - } -} - -void brw_wm_print_insn( struct brw_wm_compile *c, - struct brw_wm_instruction *inst ) -{ - GLuint i, arg; - GLuint nr_args = brw_wm_nr_args(inst->opcode); - - printf("["); - for (i = 0; i < 4; i++) { - if (inst->dst[i]) { - brw_wm_print_value(c, inst->dst[i]); - if (inst->dst[i]->spill_slot) - printf("/SPILL(%x)",inst->dst[i]->spill_slot); - } - else - printf("#"); - if (i < 3) - printf(","); - } - printf("]"); - - if (inst->writemask != WRITEMASK_XYZW) - printf(".%s%s%s%s", - GET_BIT(inst->writemask, 0) ? "x" : "", - GET_BIT(inst->writemask, 1) ? "y" : "", - GET_BIT(inst->writemask, 2) ? "z" : "", - GET_BIT(inst->writemask, 3) ? "w" : ""); - - switch (inst->opcode) { - case WM_PIXELXY: - printf(" = PIXELXY"); - break; - case WM_DELTAXY: - printf(" = DELTAXY"); - break; - case WM_PIXELW: - printf(" = PIXELW"); - break; - case WM_WPOSXY: - printf(" = WPOSXY"); - break; - case WM_PINTERP: - printf(" = PINTERP"); - break; - case WM_LINTERP: - printf(" = LINTERP"); - break; - case WM_CINTERP: - printf(" = CINTERP"); - break; - case WM_FB_WRITE: - printf(" = FB_WRITE"); - break; - case WM_FRONTFACING: - printf(" = FRONTFACING"); - break; - default: - printf(" = %s", _mesa_opcode_string(inst->opcode)); - break; - } - - if (inst->saturate) - printf("_SAT"); - - for (arg = 0; arg < nr_args; arg++) { - - printf(" ["); - - for (i = 0; i < 4; i++) { - if (inst->src[arg][i]) { - brw_wm_print_ref(c, inst->src[arg][i]); - } - else - printf("%%"); - - if (i < 3) - printf(","); - else - printf("]"); - } - } - printf("\n"); -} - -void brw_wm_print_program( struct brw_wm_compile *c, - const char *stage ) -{ - GLuint insn; - - printf("%s:\n", stage); - for (insn = 0; insn < c->nr_insns; insn++) - brw_wm_print_insn(c, &c->instruction[insn]); - printf("\n"); -} - diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c deleted file mode 100644 index 0e73ef89056..00000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ /dev/null @@ -1,1927 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - - -#include "main/macros.h" -#include "brw_context.h" -#include "brw_wm.h" - -static bool -can_do_pln(struct intel_context *intel, const struct brw_reg *deltas) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - if (!brw->has_pln) - return false; - - if (deltas[1].nr != deltas[0].nr + 1) - return false; - - if (intel->gen < 6 && ((deltas[0].nr & 1) != 0)) - return false; - - return true; -} - -/* Return the SrcReg index of the channels that can be immediate float operands - * instead of usage of PROGRAM_CONSTANT values through push/pull. - */ -bool -brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg) -{ - int opcode_array[] = { - [OPCODE_ADD] = 2, - [OPCODE_CMP] = 3, - [OPCODE_DP3] = 2, - [OPCODE_DP4] = 2, - [OPCODE_DPH] = 2, - [OPCODE_MAX] = 2, - [OPCODE_MIN] = 2, - [OPCODE_MOV] = 1, - [OPCODE_MUL] = 2, - [OPCODE_SEQ] = 2, - [OPCODE_SGE] = 2, - [OPCODE_SGT] = 2, - [OPCODE_SLE] = 2, - [OPCODE_SLT] = 2, - [OPCODE_SNE] = 2, - [OPCODE_SWZ] = 1, - [OPCODE_XPD] = 2, - }; - - /* These opcodes get broken down in a way that allow two - * args to be immediates. - */ - if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) { - if (arg == 1 || arg == 2) - return true; - } - - if (opcode > ARRAY_SIZE(opcode_array)) - return false; - - return arg == opcode_array[opcode] - 1; -} - -/** - * Computes the screen-space x,y position of the pixels. - * - * This will be used by emit_delta_xy() or emit_wpos_xy() for - * interpolation of attributes.. - * - * Payload R0: - * - * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, - * corresponding to each of the 16 execution channels. - * R0.1..8 -- ? - * R1.0 -- triangle vertex 0.X - * R1.1 -- triangle vertex 0.Y - * R1.2 -- tile 0 x,y coords (2 packed uwords) - * R1.3 -- tile 1 x,y coords (2 packed uwords) - * R1.4 -- tile 2 x,y coords (2 packed uwords) - * R1.5 -- tile 3 x,y coords (2 packed uwords) - * R1.6 -- ? - * R1.7 -- ? - * R1.8 -- ? - */ -void emit_pixel_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask) -{ - struct brw_compile *p = &c->func; - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); - struct brw_reg dst0_uw, dst1_uw; - - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - if (c->dispatch_width == 16) { - dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); - dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); - } else { - dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); - dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); - } - - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst0_uw, - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst1_uw, - stride(suboffset(r1_uw,5), 2, 4, 0), - brw_imm_v(0x11001100)); - } - brw_pop_insn_state(p); -} - -/** - * Computes the screen-space x,y distance of the pixels from the start - * vertex. - * - * This will be used in linterp or pinterp with the start vertex value - * and the Cx, Cy, and C0 coefficients passed in from the setup engine - * to produce interpolated attribute values. - */ -void emit_delta_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_reg r1 = brw_vec1_grf(1, 0); - - if (mask == 0) - return; - - assert(mask == WRITEMASK_XY); - - if (intel->gen >= 6) { - /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1. - Just add them with 0.0 for dst reg.. */ - r1 = brw_imm_v(0x00000000); - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW), - r1); - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_UW), - r1); - return; - } - - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers produced by emit_pixel_xy(). - */ - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_UW), - negate(r1)); - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); -} - -/** - * Computes the pixel offset from the window origin for gl_FragCoord(). - */ -void emit_wpos_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W); - struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W); - - if (mask & WRITEMASK_X) { - if (intel->gen >= 6) { - struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F); - brw_MOV(p, delta_x_f, delta_x); - delta_x = delta_x_f; - } - - if (c->fp->program.PixelCenterInteger) { - /* X' = X */ - brw_MOV(p, dst[0], delta_x); - } else { - /* X' = X + 0.5 */ - brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5)); - } - } - - if (mask & WRITEMASK_Y) { - if (intel->gen >= 6) { - struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F); - brw_MOV(p, delta_y_f, delta_y); - delta_y = delta_y_f; - } - - if (c->fp->program.OriginUpperLeft) { - if (c->fp->program.PixelCenterInteger) { - /* Y' = Y */ - brw_MOV(p, dst[1], delta_y); - } else { - brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5)); - } - } else { - float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5; - - /* Y' = (height - 1) - Y + center */ - brw_ADD(p, dst[1], negate(delta_y), - brw_imm_f(c->key.drawable_height - 1 + center_offset)); - } - } -} - - -void emit_pixel_w(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - struct brw_reg src; - struct brw_reg temp_dst; - - if (intel->gen >= 6) - temp_dst = dst[3]; - else - temp_dst = brw_message_reg(2); - - assert(intel->gen < 6); - - /* Don't need this if all you are doing is interpolating color, for - * instance. - */ - if (mask & WRITEMASK_W) { - struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); - - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - if (can_do_pln(intel, deltas)) { - brw_PLN(p, temp_dst, interp3, deltas[0]); - } else { - brw_LINE(p, brw_null_reg(), interp3, deltas[0]); - brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]); - } - - /* Calc w */ - if (intel->gen >= 6) - src = temp_dst; - else - src = brw_null_reg(); - - if (c->dispatch_width == 16) { - brw_math_16(p, dst[3], - BRW_MATH_FUNCTION_INV, - 2, src, - BRW_MATH_PRECISION_FULL); - } else { - brw_math(p, dst[3], - BRW_MATH_FUNCTION_INV, - 2, src, - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - } - } -} - -void emit_linterp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_reg interp[4]; - GLuint nr = arg0[0].nr; - GLuint i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - if (intel->gen >= 6) { - brw_PLN(p, dst[i], interp[i], brw_vec8_grf(2, 0)); - } else if (can_do_pln(intel, deltas)) { - brw_PLN(p, dst[i], interp[i], deltas[0]); - } else { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); - } - } - } -} - - -void emit_pinterp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas, - const struct brw_reg *w) -{ - struct intel_context *intel = &p->brw->intel; - struct brw_reg interp[4]; - GLuint nr = arg0[0].nr; - GLuint i; - - if (intel->gen >= 6) { - emit_linterp(p, dst, mask, arg0, interp); - return; - } - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - if (can_do_pln(intel, deltas)) { - brw_PLN(p, dst[i], interp[i], deltas[0]); - } else { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); - } - } - } - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MUL(p, dst[i], dst[i], w[3]); - } - } -} - - -void emit_cinterp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - struct brw_reg interp[4]; - GLuint nr = arg0[0].nr; - GLuint i; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], suboffset(interp[i],3)); /* TODO: optimize away like other moves */ - } - } -} - -/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ -void emit_frontfacing(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask) -{ - struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); - GLuint i; - - if (!(mask & WRITEMASK_XYZW)) - return; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(0.0)); - } - } - - /* bit 31 is "primitive is back face", so checking < (1 << 31) gives - * us front face - */ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(1.0)); - } - } - brw_set_predicate_control_flag_value(p, 0xff); -} - -/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input - * looking like: - * - * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br - * - * and we're trying to produce: - * - * DDX DDY - * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) - * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) - * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) - * (ss0.br - ss0.bl) (ss0.tr - ss0.br) - * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) - * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) - * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) - * (ss1.br - ss1.bl) (ss1.tr - ss1.br) - * - * and add another set of two more subspans if in 16-pixel dispatch mode. - * - * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result - * for each pair, and vertstride = 2 jumps us 2 elements after processing a - * pair. But for DDY, it's harder, as we want to produce the pairs swizzled - * between each other. We could probably do it like ddx and swizzle the right - * order later, but bail for now and just produce - * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) - * - * The negate_value boolean is used to negate the d/dy computation for FBOs, - * since they place the origin at the upper left instead of the lower left. - */ -void emit_ddxy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - bool is_ddx, - const struct brw_reg *arg0, - bool negate_value) -{ - int i; - struct brw_reg src0, src1; - - if (mask & SATURATE) - brw_set_saturate(p, 1); - for (i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - if (is_ddx) { - src0 = brw_reg(arg0[i].file, arg0[i].nr, 1, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_2, - BRW_WIDTH_2, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_2, - BRW_WIDTH_2, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - } else { - src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, - BRW_REGISTER_TYPE_F, - BRW_VERTICAL_STRIDE_4, - BRW_WIDTH_4, - BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - } - if (negate_value) - brw_ADD(p, dst[i], src1, negate(src0)); - else - brw_ADD(p, dst[i], src0, negate(src1)); - } - } - if (mask & SATURATE) - brw_set_saturate(p, 0); -} - -void emit_alu1(struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - GLuint i; - - if (mask & SATURATE) - brw_set_saturate(p, 1); - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - func(p, dst[i], arg0[i]); - } - } - - if (mask & SATURATE) - brw_set_saturate(p, 0); -} - - -void emit_alu2(struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - GLuint i; - - if (mask & SATURATE) - brw_set_saturate(p, 1); - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - func(p, dst[i], arg0[i], arg1[i]); - } - } - - if (mask & SATURATE) - brw_set_saturate(p, 0); -} - - -void emit_mad(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MUL(p, dst[i], arg0[i], arg1[i]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_ADD(p, dst[i], dst[i], arg2[i]); - brw_set_saturate(p, 0); - } - } -} - -void emit_lrp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2) -{ - GLuint i; - - /* Uses dst as a temporary: - */ - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - /* Can I use the LINE instruction for this? - */ - brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0)); - brw_MUL(p, brw_null_reg(), dst[i], arg2[i]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[i], arg0[i], arg1[i]); - brw_set_saturate(p, 0); - } - } -} - -void emit_sop(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - GLuint cond, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst[i], brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst[i], brw_imm_f(1.0)); - brw_pop_insn_state(p); - } - } -} - -static void emit_slt( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) -{ - emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1); -} - -static void emit_sle( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) -{ - emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1); -} - -static void emit_sgt( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) -{ - emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1); -} - -static void emit_sge( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) -{ - emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1); -} - -static void emit_seq( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) -{ - emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1); -} - -static void emit_sne( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) -{ - emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); -} - -void emit_cmp(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0)); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_SEL(p, dst[i], arg1[i], arg2[i]); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } -} - -void emit_sign(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(0.0)); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0)); - brw_MOV(p, dst[i], brw_imm_f(-1.0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, arg0[i], brw_imm_f(0)); - brw_MOV(p, dst[i], brw_imm_f(1.0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - } -} - -void emit_max(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_SEL(p, dst[i], arg0[i], arg1[i]); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } -} - -void emit_min(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_SEL(p, dst[i], arg0[i], arg1[i]); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } -} - - -void emit_dp2(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code */ - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]); - brw_set_saturate(p, 0); -} - - -void emit_dp3(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code */ - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); - brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); - brw_set_saturate(p, 0); -} - - -void emit_dp4(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code */ - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); - brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); - brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]); - brw_set_saturate(p, 0); -} - - -void emit_dph(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - const int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code */ - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); - brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); - brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]); - brw_set_saturate(p, 0); -} - - -void emit_xpd(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - GLuint i; - - assert((mask & WRITEMASK_W) != WRITEMASK_W); - - for (i = 0 ; i < 3; i++) { - if (mask & (1<<i)) { - GLuint i2 = (i+2)%3; - GLuint i1 = (i+1)%3; - - brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]); - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[i], arg0[i1], arg1[i2]); - brw_set_saturate(p, 0); - } - } -} - - -void emit_math1(struct brw_wm_compile *c, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; - struct brw_reg src; - - if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code */ - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || - arg0[0].file != BRW_GENERAL_REGISTER_FILE) || - arg0[0].negate || arg0[0].abs)) { - /* Gen6 math requires that source and dst horizontal stride be 1, - * and that the argument be in the GRF. - * - * The hardware ignores source modifiers (negate and abs) on math - * instructions, so we also move to a temp to set those up. - */ - src = dst[dst_chan]; - brw_MOV(p, src, arg0[0]); - } else { - src = arg0[0]; - } - - /* Send two messages to perform all 16 operations: - */ - brw_push_insn_state(p); - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math(p, - dst[dst_chan], - function, - 2, - src, - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, - offset(dst[dst_chan],1), - function, - 3, - sechalf(src), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - } - brw_pop_insn_state(p); -} - - -void emit_math2(struct brw_wm_compile *c, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; /* Do not emit dead code */ - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - brw_push_insn_state(p); - - /* math can only operate on up to a vec8 at a time, so in - * dispatch_width==16 we have to do the second half manually. - */ - if (intel->gen >= 6) { - struct brw_reg src0 = arg0[0]; - struct brw_reg src1 = arg1[0]; - struct brw_reg temp_dst = dst[dst_chan]; - - if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - } - - if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - /* This is a heinous hack to get a temporary register for use - * in case both arg0 and arg1 are constants. Why you're - * doing exponentiation on constant values in the shader, we - * don't know. - * - * max_wm_grf is almost surely less than the maximum GRF, and - * gen6 doesn't care about the number of GRFs used in a - * shader like pre-gen6 did. - */ - struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0); - brw_MOV(p, temp, src1); - src1 = temp; - } - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math2(p, - temp_dst, - function, - src0, - src1); - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math2(p, - sechalf(temp_dst), - function, - sechalf(src0), - sechalf(src1)); - } - } else { - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(3), arg1[0]); - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); - } - - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math(p, - dst[dst_chan], - function, - 2, - arg0[0], - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - - /* Send two messages to perform all 16 operations: - */ - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, - offset(dst[dst_chan],1), - function, - 4, - sechalf(arg0[0]), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - } - } - brw_pop_insn_state(p); -} - - -void emit_tex(struct brw_wm_compile *c, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg, - struct brw_reg depth_payload, - GLuint tex_idx, - GLuint sampler, - bool shadow) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - struct brw_reg dst_retyped; - GLuint cur_mrf = 2, response_length; - GLuint i, nr_texcoords; - GLuint emit; - GLuint msg_type; - GLuint mrf_per_channel; - GLuint simd_mode; - - if (c->dispatch_width == 16) { - mrf_per_channel = 2; - response_length = 8; - dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } else { - mrf_per_channel = 1; - response_length = 4; - dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - } - - /* How many input regs are there? - */ - switch (tex_idx) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr_texcoords = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_1D_ARRAY_INDEX: - case TEXTURE_RECT_INDEX: - case TEXTURE_EXTERNAL_INDEX: - emit = WRITEMASK_XY; - nr_texcoords = 2; - break; - case TEXTURE_3D_INDEX: - case TEXTURE_2D_ARRAY_INDEX: - case TEXTURE_CUBE_INDEX: - emit = WRITEMASK_XYZ; - nr_texcoords = 3; - break; - default: - /* unexpected target */ - abort(); - } - - /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ - if (intel->gen < 5 && c->dispatch_width == 8) - nr_texcoords = 3; - - if (shadow) { - if (intel->gen < 7) { - /* For shadow comparisons, we have to supply u,v,r. */ - nr_texcoords = 3; - } else { - /* On Ivybridge, the shadow comparitor comes first. Just load it. */ - brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); - cur_mrf += mrf_per_channel; - } - } - - /* Emit the texcoords. */ - for (i = 0; i < nr_texcoords; i++) { - if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) - brw_set_saturate(p, true); - - if (emit & (1<<i)) - brw_MOV(p, brw_message_reg(cur_mrf), arg[i]); - else - brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); - cur_mrf += mrf_per_channel; - - brw_set_saturate(p, false); - } - - /* Fill in the shadow comparison reference value. */ - if (shadow && intel->gen < 7) { - if (intel->gen >= 5) { - /* Fill in the cube map array index value. */ - brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); - cur_mrf += mrf_per_channel; - } else if (c->dispatch_width == 8) { - /* Fill in the LOD bias value. */ - brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); - cur_mrf += mrf_per_channel; - } - brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); - cur_mrf += mrf_per_channel; - } - - if (intel->gen >= 5) { - if (shadow) - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; - else - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; - } else { - /* Note that G45 and older determines shadow compare and dispatch width - * from message length for most messages. - */ - if (c->dispatch_width == 16 && shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; - } - - brw_SAMPLE(p, - dst_retyped, - 1, - retype(depth_payload, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(sampler), - sampler, - dst_flags & WRITEMASK_XYZW, - msg_type, - response_length, - cur_mrf - 1, - 1, - simd_mode, - BRW_SAMPLER_RETURN_FORMAT_FLOAT32); -} - - -void emit_txb(struct brw_wm_compile *c, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg, - struct brw_reg depth_payload, - GLuint tex_idx, - GLuint sampler) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - GLuint msgLength; - GLuint msg_type; - GLuint mrf_per_channel; - GLuint response_length; - struct brw_reg dst_retyped; - - /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased - * samples, so we'll use the 16-wide instruction, leave the second halves - * undefined, and trust the execution mask to keep the undefined pixels - * from mattering. - */ - if (c->dispatch_width == 16 || intel->gen < 5) { - if (intel->gen >= 5) - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; - mrf_per_channel = 2; - dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); - response_length = 8; - } else { - msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; - mrf_per_channel = 1; - dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); - response_length = 4; - } - - /* Shadow ignored for txb. */ - switch (tex_idx) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); - brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - case TEXTURE_EXTERNAL_INDEX: - brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); - brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); - brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); - brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); - brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]); - break; - default: - /* unexpected target */ - abort(); - } - - brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]); - msgLength = 2 + 4 * mrf_per_channel - 1; - - brw_SAMPLE(p, - dst_retyped, - 1, - retype(depth_payload, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(sampler), - sampler, - dst_flags & WRITEMASK_XYZW, - msg_type, - response_length, - msgLength, - 1, - BRW_SAMPLER_SIMD_MODE_SIMD16, - BRW_SAMPLER_RETURN_FORMAT_FLOAT32); -} - - -static void emit_lit(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - struct brw_compile *p = &c->func; - - assert((mask & WRITEMASK_XW) == 0); - - if (mask & WRITEMASK_Y) { - brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MOV(p, dst[1], arg0[0]); - brw_set_saturate(p, 0); - } - - if (mask & WRITEMASK_Z) { - emit_math2(c, BRW_MATH_FUNCTION_POW, - &dst[2], - WRITEMASK_X | (mask & SATURATE), - &arg0[1], - &arg0[3]); - } - - /* Ordinarily you'd use an iff statement to skip or shortcircuit - * some of the POW calculations above, but 16-wide iff statements - * seem to lock c1 hardware, so this is a nasty workaround: - */ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0)); - { - if (mask & WRITEMASK_Y) - brw_MOV(p, dst[1], brw_imm_f(0)); - - if (mask & WRITEMASK_Z) - brw_MOV(p, dst[2], brw_imm_f(0)); - } - brw_set_predicate_control(p, BRW_PREDICATE_NONE); -} - - -/* Kill pixel - set execution mask to zero for those pixels which - * fail. - */ -static void emit_kil( struct brw_wm_compile *c, - struct brw_reg *arg0) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - struct brw_reg pixelmask; - GLuint i, j; - - if (intel->gen >= 6) - pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); - else - pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - - for (i = 0; i < 4; i++) { - /* Check if we've already done the comparison for this reg - * -- common when someone does KIL TEMP.wwww. - */ - for (j = 0; j < i; j++) { - if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0) - break; - } - if (j != i) - continue; - - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); - brw_set_predicate_control_flag_value(p, 0xff); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_AND(p, pixelmask, brw_flag_reg(), pixelmask); - brw_pop_insn_state(p); - } -} - -static void fire_fb_write( struct brw_wm_compile *c, - GLuint base_reg, - GLuint nr, - GLuint target, - GLuint eot ) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - uint32_t msg_control; - - /* Pass through control information: - * - * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case. - */ -/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - if (intel->gen < 6) - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - - if (c->dispatch_width == 16) - msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; - else - msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - - /* Send framebuffer write message: */ -/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ - brw_fb_WRITE(p, - c->dispatch_width, - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - msg_control, - target, - nr, - 0, - eot, - true); -} - - -static void emit_aa( struct brw_wm_compile *c, - struct brw_reg *arg1, - GLuint reg ) -{ - struct brw_compile *p = &c->func; - GLuint comp = c->aa_dest_stencil_reg / 2; - GLuint off = c->aa_dest_stencil_reg % 2; - struct brw_reg aa = offset(arg1[comp], off); - - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */ - brw_MOV(p, brw_message_reg(reg), aa); - brw_pop_insn_state(p); -} - - -/* Post-fragment-program processing. Send the results to the - * framebuffer. - * \param arg0 the fragment color - * \param arg1 the pass-through depth value - * \param arg2 the shader-computed depth value - */ -void emit_fb_write(struct brw_wm_compile *c, - struct brw_reg *arg0, - struct brw_reg *arg1, - struct brw_reg *arg2, - GLuint target, - GLuint eot) -{ - struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; - struct intel_context *intel = &brw->intel; - GLuint nr = 2; - GLuint channel; - - /* Reserve a space for AA - may not be needed: - */ - if (c->aa_dest_stencil_reg) - nr += 1; - - /* I don't really understand how this achieves the color interleave - * (ie RGBARGBA) in the result: [Do the saturation here] - */ - brw_push_insn_state(p); - - if (c->key.clamp_fragment_color) - brw_set_saturate(p, 1); - - for (channel = 0; channel < 4; channel++) { - if (intel->gen >= 6) { - /* gen6 SIMD16 single source DP write looks like: - * m + 0: r0 - * m + 1: r1 - * m + 2: g0 - * m + 3: g1 - * m + 4: b0 - * m + 5: b1 - * m + 6: a0 - * m + 7: a1 - */ - if (c->dispatch_width == 16) { - brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]); - } else { - brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); - } - } else if (c->dispatch_width == 16 && brw->has_compr4) { - /* pre-gen6 SIMD16 single source DP write looks like: - * m + 0: r0 - * m + 1: g0 - * m + 2: b0 - * m + 3: a0 - * m + 4: r1 - * m + 5: g1 - * m + 6: b1 - * m + 7: a1 - * - * By setting the high bit of the MRF register number, we indicate - * that we want COMPR4 mode - instead of doing the usual destination - * + 1 for the second half we get destination + 4. - */ - brw_MOV(p, - brw_message_reg(nr + channel + BRW_MRF_COMPR4), - arg0[channel]); - } else { - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, - brw_message_reg(nr + channel), - arg0[channel]); - - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, - brw_message_reg(nr + channel + 4), - sechalf(arg0[channel])); - } - } - } - - brw_set_saturate(p, 0); - - /* skip over the regs populated above: - */ - if (c->dispatch_width == 16) - nr += 8; - else - nr += 4; - - brw_pop_insn_state(p); - - if (c->source_depth_to_render_target) - { - if (c->computes_depth) - brw_MOV(p, brw_message_reg(nr), arg2[2]); - else - brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ - - nr += 2; - } - - if (c->dest_depth_reg) - { - GLuint comp = c->dest_depth_reg / 2; - GLuint off = c->dest_depth_reg % 2; - - if (off != 0) { - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); - /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); - brw_pop_insn_state(p); - } - else { - brw_MOV(p, brw_message_reg(nr), arg1[comp]); - } - nr += 2; - } - - if (intel->gen >= 6) { - /* Load the message header. There's no implied move from src0 - * to the base mrf on gen6. - */ - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD), - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - brw_pop_insn_state(p); - - if (target != 0) { - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, - 0, - 2), BRW_REGISTER_TYPE_UD), - brw_imm_ud(target)); - } - } - - if (!c->runtime_check_aads_emit) { - if (c->aa_dest_stencil_reg) - emit_aa(c, arg1, 2); - - fire_fb_write(c, 0, nr, target, eot); - } - else { - struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); - struct brw_reg ip = brw_ip_reg(); - int jmp; - - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); - brw_AND(p, - v1_null_ud, - get_element_ud(brw_vec8_grf(1,0), 6), - brw_imm_ud(1<<26)); - - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)) - p->store; - { - emit_aa(c, arg1, 2); - fire_fb_write(c, 0, nr, target, eot); - /* note - thread killed in subroutine */ - } - brw_land_fwd_jump(p, jmp); - - /* ELSE: Shuffle up one register to fill in the hole left for AA: - */ - fire_fb_write(c, 1, nr-1, target, eot); - } -} - -/** - * Move a GPR to scratch memory. - */ -static void emit_spill( struct brw_wm_compile *c, - struct brw_reg reg, - GLuint slot ) -{ - struct brw_compile *p = &c->func; - - /* - mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr } - */ - brw_MOV(p, brw_message_reg(2), reg); - - /* - mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } - send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } - */ - brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot); -} - - -/** - * Load a GPR from scratch memory. - */ -static void emit_unspill( struct brw_wm_compile *c, - struct brw_reg reg, - GLuint slot ) -{ - struct brw_compile *p = &c->func; - - /* Slot 0 is the undef value. - */ - if (slot == 0) { - brw_MOV(p, reg, brw_imm_f(0)); - return; - } - - /* - mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask } - send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } - */ - - brw_oword_block_read(p, vec16(reg), brw_message_reg(1), 2, slot); -} - - -/** - * Retrieve up to 4 GEN4 register pairs for the given wm reg: - * Args with unspill_reg != 0 will be loaded from scratch memory. - */ -static void get_argument_regs( struct brw_wm_compile *c, - struct brw_wm_ref *arg[], - struct brw_reg *regs ) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (arg[i]) { - if (arg[i]->unspill_reg) - emit_unspill(c, - brw_vec8_grf(arg[i]->unspill_reg, 0), - arg[i]->value->spill_slot); - - regs[i] = arg[i]->hw_reg; - } - else { - regs[i] = brw_null_reg(); - } - } -} - - -/** - * For values that have a spill_slot!=0, write those regs to scratch memory. - */ -static void spill_values( struct brw_wm_compile *c, - struct brw_wm_value *values, - GLuint nr ) -{ - GLuint i; - - for (i = 0; i < nr; i++) - if (values[i].spill_slot) - emit_spill(c, values[i].hw_reg, values[i].spill_slot); -} - - -/* Emit the fragment program instructions here. - */ -void brw_wm_emit( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - GLuint insn; - - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - if (intel->gen >= 6) - brw_set_acc_write_control(p, 1); - - /* Check if any of the payload regs need to be spilled: - */ - spill_values(c, c->payload.depth, 4); - spill_values(c, c->creg, c->nr_creg); - spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX); - - - for (insn = 0; insn < c->nr_insns; insn++) { - - struct brw_wm_instruction *inst = &c->instruction[insn]; - struct brw_reg args[3][4], dst[4]; - GLuint i, dst_flags; - - /* Get argument regs: - */ - for (i = 0; i < 3; i++) - get_argument_regs(c, inst->src[i], args[i]); - - /* Get dest regs: - */ - for (i = 0; i < 4; i++) - if (inst->dst[i]) - dst[i] = inst->dst[i]->hw_reg; - else - dst[i] = brw_null_reg(); - - /* Flags - */ - dst_flags = inst->writemask; - if (inst->saturate) - dst_flags |= SATURATE; - - switch (inst->opcode) { - /* Generated instructions for calculating triangle interpolants: - */ - case WM_PIXELXY: - emit_pixel_xy(c, dst, dst_flags); - break; - - case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0]); - break; - - case WM_WPOSXY: - emit_wpos_xy(c, dst, dst_flags, args[0]); - break; - - case WM_PIXELW: - emit_pixel_w(c, dst, dst_flags, args[0], args[1]); - break; - - case WM_LINTERP: - emit_linterp(p, dst, dst_flags, args[0], args[1]); - break; - - case WM_PINTERP: - emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - - case WM_CINTERP: - emit_cinterp(p, dst, dst_flags, args[0]); - break; - - case WM_FB_WRITE: - emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); - break; - - case WM_FRONTFACING: - emit_frontfacing(p, dst, dst_flags); - break; - - /* Straightforward arithmetic: - */ - case OPCODE_ADD: - emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_FRC: - emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); - break; - - case OPCODE_FLR: - emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); - break; - - case OPCODE_DDX: - emit_ddxy(p, dst, dst_flags, true, args[0], false); - break; - - case OPCODE_DDY: - /* Make sure fp->program.UsesDFdy flag got set (otherwise there's no - * guarantee that c->key.render_to_fbo is set). - */ - assert(c->fp->program.UsesDFdy); - emit_ddxy(p, dst, dst_flags, false, args[0], c->key.render_to_fbo); - break; - - case OPCODE_DP2: - emit_dp2(p, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_DP3: - emit_dp3(p, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_DP4: - emit_dp4(p, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_DPH: - emit_dph(p, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_TRUNC: - for (i = 0; i < 4; i++) { - if (dst_flags & (1<<i)) { - brw_RNDZ(p, dst[i], args[0][i]); - } - } - break; - - case OPCODE_LRP: - emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - - case OPCODE_MAD: - emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); - break; - - case OPCODE_MOV: - case OPCODE_SWZ: - emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); - break; - - case OPCODE_MUL: - emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_XPD: - emit_xpd(p, dst, dst_flags, args[0], args[1]); - break; - - /* Higher math functions: - */ - case OPCODE_RCP: - emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); - break; - - case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); - break; - - case OPCODE_SIN: - emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); - break; - - case OPCODE_COS: - emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); - break; - - case OPCODE_EX2: - emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); - break; - - case OPCODE_LG2: - emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); - break; - - case OPCODE_SCS: - /* There is an scs math function, but it would need some - * fixup for 16-element execution. - */ - if (dst_flags & WRITEMASK_X) - emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); - if (dst_flags & WRITEMASK_Y) - emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); - break; - - case OPCODE_POW: - emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); - break; - - /* Comparisons: - */ - case OPCODE_CMP: - emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - - case OPCODE_MAX: - emit_max(p, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_MIN: - emit_min(p, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_SLT: - emit_slt(p, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_SLE: - emit_sle(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_SGT: - emit_sgt(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_SGE: - emit_sge(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_SEQ: - emit_seq(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_SNE: - emit_sne(p, dst, dst_flags, args[0], args[1]); - break; - - case OPCODE_SSG: - emit_sign(p, dst, dst_flags, args[0]); - break; - - case OPCODE_LIT: - emit_lit(c, dst, dst_flags, args[0]); - break; - - /* Texturing operations: - */ - case OPCODE_TEX: - emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, - inst->tex_idx, inst->tex_unit, - inst->tex_shadow); - break; - - case OPCODE_TXB: - emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, - inst->tex_idx, inst->tex_unit); - break; - - case OPCODE_KIL: - emit_kil(c, args[0]); - break; - - default: - printf("Unsupported opcode %i (%s) in fragment shader\n", - inst->opcode, inst->opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->opcode) : - "unknown"); - } - - for (i = 0; i < 4; i++) - if (inst->dst[i] && inst->dst[i]->spill_slot) - emit_spill(c, - inst->dst[i]->hw_reg, - inst->dst[i]->spill_slot); - } - - /* Only properly tested on ILK */ - if (p->brw->intel.gen == 5) { - brw_remove_duplicate_mrf_moves(p); - if (c->dispatch_width == 16) - brw_remove_grf_to_mrf_moves(p); - } - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("wm-native:\n"); - brw_dump_compile(p, stdout, 0, p->next_insn_offset); - printf("\n"); - } -} - diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c deleted file mode 100644 index b40c501257b..00000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ /dev/null @@ -1,1182 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - - -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "brw_context.h" -#include "brw_wm.h" -#include "brw_util.h" - -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_statevars.h" - - -/** An invalid texture target */ -#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS - -/** An invalid texture unit */ -#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT - -#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS - -#define X 0 -#define Y 1 -#define Z 2 -#define W 3 - - -static const char *wm_opcode_strings[] = { - "PIXELXY", - "DELTAXY", - "PIXELW", - "LINTERP", - "PINTERP", - "CINTERP", - "WPOSXY", - "FB_WRITE", - "FRONTFACING", -}; - -#if 0 -static const char *wm_file_strings[] = { - "PAYLOAD" -}; -#endif - - -/*********************************************************************** - * Source regs - */ - -static struct prog_src_register src_reg(GLuint file, GLuint idx) -{ - struct prog_src_register reg; - reg.File = file; - reg.Index = idx; - reg.Swizzle = SWIZZLE_NOOP; - reg.RelAddr = 0; - reg.Negate = NEGATE_NONE; - reg.Abs = 0; - reg.HasIndex2 = 0; - reg.RelAddr2 = 0; - reg.Index2 = 0; - return reg; -} - -static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) -{ - return src_reg(dst.File, dst.Index); -} - -static struct prog_src_register src_undef( void ) -{ - return src_reg(PROGRAM_UNDEFINED, 0); -} - -static bool src_is_undef(struct prog_src_register src) -{ - return src.File == PROGRAM_UNDEFINED; -} - -static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) -{ - reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); - return reg; -} - -static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) -{ - return src_swizzle(reg, x, x, x, x); -} - -static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) -{ - reg.Swizzle = swizzle; - return reg; -} - - -/*********************************************************************** - * Dest regs - */ - -static struct prog_dst_register dst_reg(GLuint file, GLuint idx) -{ - struct prog_dst_register reg; - reg.File = file; - reg.Index = idx; - reg.WriteMask = WRITEMASK_XYZW; - reg.RelAddr = 0; - reg.CondMask = COND_TR; - reg.CondSwizzle = 0; - reg.CondSrc = 0; - return reg; -} - -static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) -{ - reg.WriteMask &= mask; - return reg; -} - -static struct prog_dst_register dst_undef( void ) -{ - return dst_reg(PROGRAM_UNDEFINED, 0); -} - - - -static struct prog_dst_register get_temp( struct brw_wm_compile *c ) -{ - int bit = ffs( ~c->fp_temp ); - - if (!bit) { - printf("%s: out of temporaries\n", __FILE__); - exit(1); - } - - c->fp_temp |= 1<<(bit-1); - return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); -} - - -static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) -{ - c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); -} - - -/*********************************************************************** - * Instructions - */ - -static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) -{ - assert(c->nr_fp_insns < BRW_WM_MAX_INSN); - memset(&c->prog_instructions[c->nr_fp_insns], 0, - sizeof(*c->prog_instructions)); - return &c->prog_instructions[c->nr_fp_insns++]; -} - -static struct prog_instruction *emit_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst0) -{ - struct prog_instruction *inst = get_fp_inst(c); - *inst = *inst0; - return inst; -} - -static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - GLuint tex_src_unit, - GLuint tex_src_target, - GLuint tex_shadow, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) -{ - struct prog_instruction *inst = get_fp_inst(c); - - assert(tex_src_unit < BRW_MAX_TEX_UNIT || - tex_src_unit == TEX_UNIT_NONE); - assert(tex_src_target < NUM_TEXTURE_TARGETS || - tex_src_target == TEX_TARGET_NONE); - - memset(inst, 0, sizeof(*inst)); - - inst->Opcode = op; - inst->DstReg = dest; - inst->SaturateMode = saturate; - inst->TexSrcUnit = tex_src_unit; - inst->TexSrcTarget = tex_src_target; - inst->TexShadow = tex_shadow; - inst->SrcReg[0] = src0; - inst->SrcReg[1] = src1; - inst->SrcReg[2] = src2; - return inst; -} - - -static struct prog_instruction * emit_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) -{ - return emit_tex_op(c, op, dest, saturate, - TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ - src0, src1, src2); -} - - -/* Many Mesa opcodes produce the same value across all the result channels. - * We'd rather not have to support that splatting in the opcode implementations, - * and brw_wm_pass*.c wants to optimize them out by shuffling references around - * anyway. We can easily get both by emitting the opcode to one channel, and - * then MOVing it to the others, which brw_wm_pass*.c already understands. - */ -static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst0) -{ - struct prog_instruction *inst; - unsigned int dst_chan; - unsigned int other_channel_mask; - - if (inst0->DstReg.WriteMask == 0) - return NULL; - - dst_chan = ffs(inst0->DstReg.WriteMask) - 1; - inst = get_fp_inst(c); - *inst = *inst0; - inst->DstReg.WriteMask = 1 << dst_chan; - - other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); - if (other_channel_mask != 0) { - inst = emit_op(c, - OPCODE_MOV, - dst_mask(inst0->DstReg, other_channel_mask), - 0, - src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), - src_undef(), - src_undef()); - } - return inst; -} - - -/*********************************************************************** - * Special instructions for interpolation and other tasks - */ - -static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) -{ - if (src_is_undef(c->pixel_xy)) { - struct prog_dst_register pixel_xy = get_temp(c); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - - - /* Emit the out calculations, and hold onto the results. Use - * two instructions as a temporary is required. - */ - /* pixel_xy.xy = PIXELXY payload[0]; - */ - emit_op(c, - WM_PIXELXY, - dst_mask(pixel_xy, WRITEMASK_XY), - 0, - payload_r0_depth, - src_undef(), - src_undef()); - - c->pixel_xy = src_reg_from_dst(pixel_xy); - } - - return c->pixel_xy; -} - -static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) -{ - if (src_is_undef(c->delta_xy)) { - struct prog_dst_register delta_xy = get_temp(c); - struct prog_src_register pixel_xy = get_pixel_xy(c); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - - /* deltas.xy = DELTAXY pixel_xy, payload[0] - */ - emit_op(c, - WM_DELTAXY, - dst_mask(delta_xy, WRITEMASK_XY), - 0, - pixel_xy, - payload_r0_depth, - src_undef()); - - c->delta_xy = src_reg_from_dst(delta_xy); - } - - return c->delta_xy; -} - -static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) -{ - /* This is called for producing 1/w in pre-gen6 interp. for gen6, - * the interp opcodes don't use this argument. But to keep the - * nr_args = 3 expectations of pinterp happy, just stuff delta_xy - * into the slot. - */ - if (c->func.brw->intel.gen >= 6) - return c->delta_xy; - - if (src_is_undef(c->pixel_w)) { - struct prog_dst_register pixel_w = get_temp(c); - struct prog_src_register deltas = get_delta_xy(c); - struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); - - /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x - */ - emit_op(c, - WM_PIXELW, - dst_mask(pixel_w, WRITEMASK_W), - 0, - interp_wpos, - deltas, - src_undef()); - - - c->pixel_w = src_reg_from_dst(pixel_w); - } - - return c->pixel_w; -} - -static void emit_interp( struct brw_wm_compile *c, - GLuint idx ) -{ - struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - struct prog_src_register deltas; - - deltas = get_delta_xy(c); - - /* Need to use PINTERP on attributes which have been - * multiplied by 1/W in the SF program, and LINTERP on those - * which have not: - */ - switch (idx) { - case FRAG_ATTRIB_WPOS: - /* Have to treat wpos.xy specially: - */ - emit_op(c, - WM_WPOSXY, - dst_mask(dst, WRITEMASK_XY), - 0, - get_pixel_xy(c), - src_undef(), - src_undef()); - - dst = dst_mask(dst, WRITEMASK_ZW); - - /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw - */ - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); - break; - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - if (c->key.flat_shade) { - emit_op(c, - WM_CINTERP, - dst, - 0, - interp, - src_undef(), - src_undef()); - } - else { - /* perspective-corrected color interpolation */ - emit_op(c, - WM_PINTERP, - dst, - 0, - interp, - deltas, - get_pixel_w(c)); - } - break; - case FRAG_ATTRIB_FOGC: - /* Interpolate the fog coordinate */ - emit_op(c, - WM_PINTERP, - dst_mask(dst, WRITEMASK_X), - 0, - interp, - deltas, - get_pixel_w(c)); - - emit_op(c, - OPCODE_MOV, - dst_mask(dst, WRITEMASK_YZW), - 0, - src_swizzle(interp, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ONE), - src_undef(), - src_undef()); - break; - - case FRAG_ATTRIB_FACE: - emit_op(c, - WM_FRONTFACING, - dst_mask(dst, WRITEMASK_X), - 0, - src_undef(), - src_undef(), - src_undef()); - break; - - case FRAG_ATTRIB_PNTC: - /* XXX review/test this case */ - emit_op(c, - WM_PINTERP, - dst_mask(dst, WRITEMASK_XY), - 0, - interp, - deltas, - get_pixel_w(c)); - - emit_op(c, - OPCODE_MOV, - dst_mask(dst, WRITEMASK_ZW), - 0, - src_swizzle(interp, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ONE), - src_undef(), - src_undef()); - break; - - default: - emit_op(c, - WM_PINTERP, - dst, - 0, - interp, - deltas, - get_pixel_w(c)); - break; - } - - c->fp_interp_emitted |= 1<<idx; -} - -/*********************************************************************** - * Hacks to extend the program parameter and constant lists. - */ - -/* Add the fog parameters to the parameter list of the original - * program, rather than creating a new list. Doesn't really do any - * harm and it's not as if the parameter handling isn't a big hack - * anyway. - */ -static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, - GLint s0, - GLint s1, - GLint s2, - GLint s3, - GLint s4) -{ - struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - gl_state_index tokens[STATE_LENGTH]; - GLuint idx; - tokens[0] = s0; - tokens[1] = s1; - tokens[2] = s2; - tokens[3] = s3; - tokens[4] = s4; - - idx = _mesa_add_state_reference( paramList, tokens ); - - return src_reg(PROGRAM_STATE_VAR, idx); -} - - -static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, - GLfloat s0, - GLfloat s1, - GLfloat s2, - GLfloat s3) -{ - struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - gl_constant_value values[4]; - GLuint idx; - GLuint swizzle; - struct prog_src_register reg; - - values[0].f = s0; - values[1].f = s1; - values[2].f = s2; - values[3].f = s3; - - idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); - reg = src_reg(PROGRAM_STATE_VAR, idx); - reg.Swizzle = swizzle; - - return reg; -} - - - -/*********************************************************************** - * Expand various instructions here to simpler forms. - */ -static void precalc_dst( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_src_register src1 = inst->SrcReg[1]; - struct prog_dst_register dst = inst->DstReg; - struct prog_dst_register temp = get_temp(c); - - if (dst.WriteMask & WRITEMASK_Y) { - /* dst.y = mul src0.y, src1.y - */ - emit_op(c, - OPCODE_MUL, - dst_mask(temp, WRITEMASK_Y), - inst->SaturateMode, - src0, - src1, - src_undef()); - } - - if (dst.WriteMask & WRITEMASK_XZ) { - struct prog_instruction *swz; - GLuint z = GET_SWZ(src0.Swizzle, Z); - - /* dst.xz = swz src0.1zzz - */ - swz = emit_op(c, - OPCODE_SWZ, - dst_mask(temp, WRITEMASK_XZ), - inst->SaturateMode, - src_swizzle(src0, SWIZZLE_ONE, z, z, z), - src_undef(), - src_undef()); - /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].Negate &= ~NEGATE_X; - } - if (dst.WriteMask & WRITEMASK_W) { - /* dst.w = mov src1.w - */ - emit_op(c, - OPCODE_MOV, - dst_mask(temp, WRITEMASK_W), - inst->SaturateMode, - src1, - src_undef(), - src_undef()); - } - - /* This will get optimized out in general, but it ensures that we - * don't overwrite src operands in our channel-wise splitting - * above. See piglit fp-dst-aliasing-[12]. - */ - emit_op(c, - OPCODE_MOV, - dst, - 0, - src_reg_from_dst(temp), - src_undef(), - src_undef()); - - release_temp(c, temp); -} - - -static void precalc_lit( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_dst_register dst = inst->DstReg; - - if (dst.WriteMask & WRITEMASK_YZ) { - emit_op(c, - OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, - src0, - src_undef(), - src_undef()); - } - - if (dst.WriteMask & WRITEMASK_XW) { - struct prog_instruction *swz; - - /* dst.xw = swz src0.1111 - */ - swz = emit_op(c, - OPCODE_SWZ, - dst_mask(dst, WRITEMASK_XW), - 0, - src_swizzle1(src0, SWIZZLE_ONE), - src_undef(), - src_undef()); - /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].Negate = NEGATE_NONE; - } -} - - -/** - * Some TEX instructions require extra code, cube map coordinate - * normalization, or coordinate scaling for RECT textures, etc. - * This function emits those extra instructions and the TEX - * instruction itself. - */ -static void precalc_tex( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; - struct prog_src_register coord; - struct prog_dst_register tmpcoord = { 0 }; - const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - struct prog_dst_register unswizzled_tmp; - - /* If we are doing EXT_texture_swizzle, we need to write our result into a - * temporary, otherwise writemasking of the real dst could lose some of our - * channels. - */ - if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) { - unswizzled_tmp = get_temp(c); - } else { - unswizzled_tmp = inst->DstReg; - } - - assert(unit < BRW_MAX_TEX_UNIT); - - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { - struct prog_instruction *out; - struct prog_dst_register tmp0 = get_temp(c); - struct prog_src_register tmp0src = src_reg_from_dst(tmp0); - struct prog_dst_register tmp1 = get_temp(c); - struct prog_src_register tmp1src = src_reg_from_dst(tmp1); - struct prog_src_register src0 = inst->SrcReg[0]; - - /* find longest component of coord vector and normalize it */ - tmpcoord = get_temp(c); - coord = src_reg_from_dst(tmpcoord); - - /* tmpcoord = src0 (i.e.: coord = src0) */ - out = emit_op(c, OPCODE_MOV, - tmpcoord, - 0, - src0, - src_undef(), - src_undef()); - out->SrcReg[0].Negate = NEGATE_NONE; - out->SrcReg[0].Abs = 1; - - /* tmp0 = MAX(coord.X, coord.Y) */ - emit_op(c, OPCODE_MAX, - tmp0, - 0, - src_swizzle1(coord, X), - src_swizzle1(coord, Y), - src_undef()); - - /* tmp1 = MAX(tmp0, coord.Z) */ - emit_op(c, OPCODE_MAX, - tmp1, - 0, - tmp0src, - src_swizzle1(coord, Z), - src_undef()); - - /* tmp0 = 1 / tmp1 */ - emit_op(c, OPCODE_RCP, - dst_mask(tmp0, WRITEMASK_X), - 0, - tmp1src, - src_undef(), - src_undef()); - - /* tmpCoord = src0 * tmp0 */ - emit_op(c, OPCODE_MUL, - tmpcoord, - 0, - src0, - src_swizzle1(tmp0src, SWIZZLE_X), - src_undef()); - - release_temp(c, tmp0); - release_temp(c, tmp1); - } - else if (intel->gen < 6 && inst->TexSrcTarget == TEXTURE_RECT_INDEX) { - struct prog_src_register scale = - search_or_add_param5( c, - STATE_INTERNAL, - STATE_TEXRECT_SCALE, - unit, - 0,0 ); - - tmpcoord = get_temp(c); - - /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } - */ - emit_op(c, - OPCODE_MUL, - tmpcoord, - 0, - inst->SrcReg[0], - src_swizzle(scale, - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_ONE, - SWIZZLE_ONE), - src_undef()); - - coord = src_reg_from_dst(tmpcoord); - } - else { - coord = inst->SrcReg[0]; - } - - /* Need to emit YUV texture conversions by hand. Probably need to - * do this here - the alternative is in brw_wm_emit.c, but the - * conversion requires allocating a temporary variable which we - * don't have the facility to do that late in the compilation. - */ - if (c->key.tex.yuvtex_mask & (1 << unit)) { - /* convert ycbcr to RGBA */ - bool swap_uv = c->key.tex.yuvtex_swap_mask & (1 << unit); - - /* - CONST C0 = { -.5, -.0625, -.5, 1.164 } - CONST C1 = { 1.596, -0.813, 2.018, -.391 } - UYV = TEX ... - UYV.xyz = ADD UYV, C0 - UYV.y = MUL UYV.y, C0.w - if (UV swaped) - RGB.xyz = MAD UYV.zzx, C1, UYV.y - else - RGB.xyz = MAD UYV.xxz, C1, UYV.y - RGB.y = MAD UYV.z, C1.w, RGB.y - */ - struct prog_dst_register tmp = get_temp(c); - struct prog_src_register tmpsrc = src_reg_from_dst(tmp); - struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); - struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); - - /* tmp = TEX ... - */ - emit_tex_op(c, - OPCODE_TEX, - tmp, - inst->SaturateMode, - unit, - inst->TexSrcTarget, - inst->TexShadow, - coord, - src_undef(), - src_undef()); - - /* tmp.xyz = ADD TMP, C0 - */ - emit_op(c, - OPCODE_ADD, - dst_mask(tmp, WRITEMASK_XYZ), - 0, - tmpsrc, - C0, - src_undef()); - - /* YUV.y = MUL YUV.y, C0.w - */ - - emit_op(c, - OPCODE_MUL, - dst_mask(tmp, WRITEMASK_Y), - 0, - tmpsrc, - src_swizzle1(C0, W), - src_undef()); - - /* - * if (UV swaped) - * RGB.xyz = MAD YUV.zzx, C1, YUV.y - * else - * RGB.xyz = MAD YUV.xxz, C1, YUV.y - */ - - emit_op(c, - OPCODE_MAD, - dst_mask(unswizzled_tmp, WRITEMASK_XYZ), - 0, - swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), - C1, - src_swizzle1(tmpsrc, Y)); - - /* RGB.y = MAD YUV.z, C1.w, RGB.y - */ - emit_op(c, - OPCODE_MAD, - dst_mask(unswizzled_tmp, WRITEMASK_Y), - 0, - src_swizzle1(tmpsrc, Z), - src_swizzle1(C1, W), - src_swizzle1(src_reg_from_dst(unswizzled_tmp), Y)); - - release_temp(c, tmp); - } - else { - /* ordinary RGBA tex instruction */ - emit_tex_op(c, - OPCODE_TEX, - unswizzled_tmp, - inst->SaturateMode, - unit, - inst->TexSrcTarget, - inst->TexShadow, - coord, - src_undef(), - src_undef()); - } - - /* For GL_EXT_texture_swizzle: */ - if (c->key.tex.swizzles[unit] != SWIZZLE_NOOP) { - /* swizzle the result of the TEX instruction */ - struct prog_src_register tmpsrc = src_reg_from_dst(unswizzled_tmp); - emit_op(c, OPCODE_SWZ, - inst->DstReg, - SATURATE_OFF, /* saturate already done above */ - src_swizzle4(tmpsrc, c->key.tex.swizzles[unit]), - src_undef(), - src_undef()); - } - - if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || - (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) - release_temp(c, tmpcoord); -} - - -/** - * Check if the given TXP instruction really needs the divide-by-W step. - */ -static bool -projtex(struct brw_wm_compile *c, const struct prog_instruction *inst) -{ - const struct prog_src_register src = inst->SrcReg[0]; - bool retVal; - - assert(inst->Opcode == OPCODE_TXP); - - /* Only try to detect the simplest cases. Could detect (later) - * cases where we are trying to emit code like RCP {1.0}, MUL x, - * {1.0}, and so on. - * - * More complex cases than this typically only arise from - * user-provided fragment programs anyway: - */ - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - retVal = false; /* ut2004 gun rendering !?! */ - else if (src.File == PROGRAM_INPUT && - GET_SWZ(src.Swizzle, W) == W && - (c->key.proj_attrib_mask & (1 << src.Index)) == 0) - retVal = false; - else - retVal = true; - - return retVal; -} - - -/** - * Emit code for TXP. - */ -static void precalc_txp( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct prog_src_register src0 = inst->SrcReg[0]; - - if (projtex(c, inst)) { - struct prog_dst_register tmp = get_temp(c); - struct prog_instruction tmp_inst; - - /* tmp0.w = RCP inst.arg[0][3] - */ - emit_op(c, - OPCODE_RCP, - dst_mask(tmp, WRITEMASK_W), - 0, - src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), - src_undef(), - src_undef()); - - /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww - */ - emit_op(c, - OPCODE_MUL, - dst_mask(tmp, WRITEMASK_XYZ), - 0, - src0, - src_swizzle1(src_reg_from_dst(tmp), W), - src_undef()); - - /* dst = precalc(TEX tmp0) - */ - tmp_inst = *inst; - tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); - precalc_tex(c, &tmp_inst); - - release_temp(c, tmp); - } - else - { - /* dst = precalc(TEX src0) - */ - precalc_tex(c, inst); - } -} - - - -static void emit_render_target_writes( struct brw_wm_compile *c ) -{ - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); - struct prog_src_register outcolor; - GLuint i; - - struct prog_instruction *inst = NULL; - - /* The inst->Aux field is used for FB write target and the EOT marker */ - - for (i = 0; i < c->key.nr_color_regions; i++) { - if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - } else { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - } - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = INST_AUX_TARGET(i); - } - - /* Mark the last FB write as final, or emit a dummy write if we had - * no render targets bound. - */ - if (c->key.nr_color_regions != 0) { - inst->Aux |= INST_AUX_EOT; - } else { - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), - 0, src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR), - payload_r0_depth, outdepth); - inst->Aux = INST_AUX_TARGET(0) | INST_AUX_EOT; - } -} - - - - -/*********************************************************************** - * Emit INTERP instructions ahead of first use of each attrib. - */ - -static void validate_src_regs( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - GLuint nr_args = brw_wm_nr_args( inst->Opcode ); - GLuint i; - - for (i = 0; i < nr_args; i++) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) { - GLuint idx = inst->SrcReg[i].Index; - if (!(c->fp_interp_emitted & (1<<idx))) { - emit_interp(c, idx); - } - } - } -} - -static void print_insns( const struct prog_instruction *insn, - GLuint nr ) -{ - GLuint i; - for (i = 0; i < nr; i++, insn++) { - printf("%3d: ", i); - if (insn->Opcode < MAX_OPCODE) - _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL); - else if (insn->Opcode < MAX_WM_OPCODE) { - GLuint idx = insn->Opcode - MAX_OPCODE; - - _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx], - 3, PROG_PRINT_DEBUG, NULL); - } - else - printf("965 Opcode %d\n", insn->Opcode); - } -} - - -/** - * Initial pass for fragment program code generation. - * This function is used by both the GLSL and non-GLSL paths. - */ -void brw_wm_pass_fp( struct brw_wm_compile *c ) -{ - struct intel_context *intel = &c->func.brw->intel; - struct brw_fragment_program *fp = c->fp; - GLuint insn; - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("pre-fp:\n"); - _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG, - true); - printf("\n"); - } - - c->pixel_xy = src_undef(); - if (intel->gen >= 6) { - /* The interpolation deltas come in as the perspective pixel - * location barycentric params. - */ - c->delta_xy = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - } else { - c->delta_xy = src_undef(); - } - c->pixel_w = src_undef(); - c->nr_fp_insns = 0; - - /* Emit preamble instructions. This is where special instructions such as - * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to - * compute shader inputs from varying vars. - */ - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { - const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; - validate_src_regs(c, inst); - } - - /* Loop over all instructions doing assorted simplifications and - * transformations. - */ - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { - const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; - struct prog_instruction *out; - - /* Check for INPUT values, emit INTERP instructions where - * necessary: - */ - - switch (inst->Opcode) { - case OPCODE_SWZ: - out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; - break; - - case OPCODE_ABS: - out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; - out->SrcReg[0].Negate = NEGATE_NONE; - out->SrcReg[0].Abs = 1; - break; - - case OPCODE_SUB: - out = emit_insn(c, inst); - out->Opcode = OPCODE_ADD; - out->SrcReg[1].Negate ^= NEGATE_XYZW; - break; - - case OPCODE_SCS: - out = emit_insn(c, inst); - /* This should probably be done in the parser. - */ - out->DstReg.WriteMask &= WRITEMASK_XY; - break; - - case OPCODE_DST: - precalc_dst(c, inst); - break; - - case OPCODE_LIT: - precalc_lit(c, inst); - break; - - case OPCODE_RSQ: - out = emit_scalar_insn(c, inst); - out->SrcReg[0].Abs = true; - break; - - case OPCODE_TEX: - precalc_tex(c, inst); - break; - - case OPCODE_TXP: - precalc_txp(c, inst); - break; - - case OPCODE_TXB: - out = emit_insn(c, inst); - out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); - break; - - case OPCODE_XPD: - out = emit_insn(c, inst); - /* This should probably be done in the parser. - */ - out->DstReg.WriteMask &= WRITEMASK_XYZ; - break; - - case OPCODE_KIL: - out = emit_insn(c, inst); - /* This should probably be done in the parser. - */ - out->DstReg.WriteMask = 0; - break; - case OPCODE_END: - emit_render_target_writes(c); - break; - case OPCODE_PRINT: - break; - default: - if (brw_wm_is_scalar_result(inst->Opcode)) - emit_scalar_insn(c, inst); - else - emit_insn(c, inst); - break; - } - } - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("pass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - printf("\n"); - } -} - diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c deleted file mode 100644 index 423e6ed7b2e..00000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - - -#include "brw_context.h" -#include "brw_wm.h" -#include "program/prog_parameter.h" - - - -/*********************************************************************** - */ - -static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) -{ - assert(c->nr_refs < BRW_WM_MAX_REF); - memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs)); - return &c->refs[c->nr_refs++]; -} - -static struct brw_wm_value *get_value( struct brw_wm_compile *c) -{ - assert(c->nr_refs < BRW_WM_MAX_VREG); - memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg)); - return &c->vreg[c->nr_vreg++]; -} - -/** return pointer to a newly allocated instruction */ -static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) -{ - assert(c->nr_insns < BRW_WM_MAX_INSN); - memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction)); - return &c->instruction[c->nr_insns++]; -} - -/*********************************************************************** - */ - -/** Init the "undef" register */ -static void pass0_init_undef( struct brw_wm_compile *c) -{ - struct brw_wm_ref *ref = &c->undef_ref; - ref->value = &c->undef_value; - ref->hw_reg = brw_vec8_grf(0, 0); - ref->insn = 0; - ref->prevuse = NULL; -} - -/** Set a FP register to a value */ -static void pass0_set_fpreg_value( struct brw_wm_compile *c, - GLuint file, - GLuint idx, - GLuint component, - struct brw_wm_value *value ) -{ - struct brw_wm_ref *ref = get_ref(c); - ref->value = value; - ref->hw_reg = brw_vec8_grf(0, 0); - ref->insn = 0; - ref->prevuse = NULL; - c->pass0_fp_reg[file][idx][component] = ref; -} - -/** Set a FP register to a ref */ -static void pass0_set_fpreg_ref( struct brw_wm_compile *c, - GLuint file, - GLuint idx, - GLuint component, - const struct brw_wm_ref *src_ref ) -{ - c->pass0_fp_reg[file][idx][component] = src_ref; -} - -static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, - const GLfloat *param_ptr ) -{ - GLuint i = c->prog_data.nr_params++; - - if (i >= BRW_WM_MAX_PARAM) { - printf("%s: out of params\n", __FUNCTION__); - c->prog_data.error = 1; - return NULL; - } - else { - struct brw_wm_ref *ref = get_ref(c); - - c->prog_data.param[i] = param_ptr; - c->nr_creg = (i+16)/16; - - /* Push the offsets into hw_reg. These will be added to the - * real register numbers once one is allocated in pass2. - */ - ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8); - ref->value = &c->creg[i/16]; - ref->insn = 0; - ref->prevuse = NULL; - - return ref; - } -} - - -/** Return a ref to a constant/literal value */ -static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, - const GLfloat *constval ) -{ - GLuint i; - - /* Search for an existing const value matching the request: - */ - for (i = 0; i < c->nr_constrefs; i++) { - if (c->constref[i].constval == *constval) - return c->constref[i].ref; - } - - /* Else try to add a new one: - */ - if (c->nr_constrefs < BRW_WM_MAX_CONST) { - GLuint i = c->nr_constrefs++; - - /* A constant is a special type of parameter: - */ - c->constref[i].constval = *constval; - c->constref[i].ref = get_param_ref(c, constval); - - return c->constref[i].ref; - } - else { - printf("%s: out of constrefs\n", __FUNCTION__); - c->prog_data.error = 1; - return NULL; - } -} - - -/* Lookup our internal registers - */ -static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, - GLuint file, - GLuint idx, - GLuint component ) -{ - const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component]; - - if (!ref) { - switch (file) { - case PROGRAM_INPUT: - case PROGRAM_PAYLOAD: - case PROGRAM_TEMPORARY: - case PROGRAM_OUTPUT: - case PROGRAM_VARYING: - break; - - case PROGRAM_LOCAL_PARAM: - ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]); - break; - - case PROGRAM_ENV_PARAM: - ref = get_param_ref(c, &c->env_param[idx][component]); - break; - - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - case PROGRAM_CONSTANT: - case PROGRAM_NAMED_PARAM: { - struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; - - /* There's something really hokey about parameters parsed in - * arb programs - they all end up in here, whether they be - * state values, parameters or constants. This duplicates the - * structure above & also seems to subvert the limits set for - * each type of constant/param. - */ - switch (plist->Parameters[idx].Type) { - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - /* These are invariant: - */ - ref = get_const_ref(c, &plist->ParameterValues[idx][component].f); - break; - - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - /* These may change from run to run: - */ - ref = get_param_ref(c, &plist->ParameterValues[idx][component].f ); - break; - - default: - assert(0); - break; - } - break; - } - - default: - assert(0); - break; - } - - c->pass0_fp_reg[file][idx][component] = ref; - } - - if (!ref) - ref = &c->undef_ref; - - return ref; -} - - - -/*********************************************************************** - * Straight translation to internal instruction format - */ - -static void pass0_set_dst( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, - GLuint writemask ) -{ - const struct prog_dst_register *dst = &inst->DstReg; - GLuint i; - - for (i = 0; i < 4; i++) { - if (writemask & (1<<i)) { - out->dst[i] = get_value(c); - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]); - } - } - - out->writemask = writemask; -} - - -static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, - struct prog_src_register src, - GLuint i ) -{ - GLuint component = GET_SWZ(src.Swizzle,i); - const struct brw_wm_ref *src_ref; - static const GLfloat const_zero = 0.0; - static const GLfloat const_one = 1.0; - - if (component == SWIZZLE_ZERO) - src_ref = get_const_ref(c, &const_zero); - else if (component == SWIZZLE_ONE) - src_ref = get_const_ref(c, &const_one); - else - src_ref = pass0_get_reg(c, src.File, src.Index, component); - - return src_ref; -} - - -static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, - struct prog_src_register src, - GLuint i, - struct brw_wm_instruction *insn) -{ - const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i); - struct brw_wm_ref *newref = get_ref(c); - - newref->value = ref->value; - newref->hw_reg = ref->hw_reg; - - if (insn) { - newref->insn = insn - c->instruction; - newref->prevuse = newref->value->lastuse; - newref->value->lastuse = newref; - } - - if (src.Negate & (1 << i)) - newref->hw_reg.negate ^= 1; - - if (src.Abs) { - newref->hw_reg.negate = 0; - newref->hw_reg.abs = 1; - } - - return newref; -} - - -static void -translate_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_wm_instruction *out = get_instruction(c); - GLuint writemask = inst->DstReg.WriteMask; - GLuint nr_args = brw_wm_nr_args(inst->Opcode); - GLuint i, j; - - /* Copy some data out of the instruction - */ - out->opcode = inst->Opcode; - out->saturate = (inst->SaturateMode != SATURATE_OFF); - out->tex_unit = inst->TexSrcUnit; - out->tex_idx = inst->TexSrcTarget; - out->tex_shadow = inst->TexShadow; - out->eot = inst->Aux & INST_AUX_EOT; - out->target = INST_AUX_GET_TARGET(inst->Aux); - - /* Args: - */ - for (i = 0; i < nr_args; i++) { - for (j = 0; j < 4; j++) { - out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out); - } - } - - /* Dst: - */ - pass0_set_dst(c, out, inst, writemask); -} - - - -/*********************************************************************** - * Optimize moves and swizzles away: - */ -static void pass0_precalc_mov( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - const struct prog_dst_register *dst = &inst->DstReg; - GLuint writemask = inst->DstReg.WriteMask; - struct brw_wm_ref *refs[4]; - GLuint i; - - /* Get the effect of a MOV by manipulating our register table: - * First get all refs, then assign refs. This ensures that "in-place" - * swizzles such as: - * MOV t, t.xxyx - * are handled correctly. Previously, these two steps were done in - * one loop and the above case was incorrectly handled. - */ - for (i = 0; i < 4; i++) { - refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL); - } - for (i = 0; i < 4; i++) { - if (writemask & (1 << i)) { - pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]); - } - } -} - - -/* Initialize payload "registers". - */ -static void pass0_init_payload( struct brw_wm_compile *c ) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i; - pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, - &c->payload.depth[j] ); - } - -#if 0 - /* This seems to be an alternative to the INTERP_WPOS stuff I do - * elsewhere: - */ - if (c->key.source_depth_reg) - pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2, - &c->payload.depth[c->key.source_depth_reg/2]); -#endif - - for (i = 0; i < FRAG_ATTRIB_MAX; i++) - pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, - &c->payload.input_interp[i] ); -} - - -/*********************************************************************** - * PASS 0 - * - * Work forwards to give each calculated value a unique number. Where - * an instruction produces duplicate values (eg DP3), all are given - * the same number. - * - * Translate away swizzling and eliminate non-saturating moves. - */ -void brw_wm_pass0( struct brw_wm_compile *c ) -{ - GLuint insn; - - c->nr_vreg = 0; - c->nr_insns = 0; - - pass0_init_undef(c); - pass0_init_payload(c); - - for (insn = 0; insn < c->nr_fp_insns; insn++) { - const struct prog_instruction *inst = &c->prog_instructions[insn]; - - /* Optimize away moves, otherwise emit translated instruction: - */ - switch (inst->Opcode) { - case OPCODE_MOV: - case OPCODE_SWZ: - if (!inst->SaturateMode) { - pass0_precalc_mov(c, inst); - } - else { - translate_insn(c, inst); - } - break; - default: - translate_insn(c, inst); - break; - } - } - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - brw_wm_print_program(c, "pass0"); - } -} diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c deleted file mode 100644 index e96e9ed3e8b..00000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ /dev/null @@ -1,298 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - - -#include "brw_context.h" -#include "brw_wm.h" - - -static GLuint get_tracked_mask(struct brw_wm_compile *c, - struct brw_wm_instruction *inst) -{ - GLuint i; - for (i = 0; i < 4; i++) { - if (inst->writemask & (1<<i)) { - if (!inst->dst[i]->contributes_to_output) { - inst->writemask &= ~(1<<i); - inst->dst[i] = 0; - } - } - } - - return inst->writemask; -} - -/* Remove a reference from a value's usage chain. - */ -static void unlink_ref(struct brw_wm_ref *ref) -{ - struct brw_wm_value *value = ref->value; - - if (ref == value->lastuse) { - value->lastuse = ref->prevuse; - } - else { - struct brw_wm_ref *i = value->lastuse; - while (i->prevuse != ref) i = i->prevuse; - i->prevuse = ref->prevuse; - } -} - -static void track_arg(struct brw_wm_compile *c, - struct brw_wm_instruction *inst, - GLuint arg, - GLuint readmask) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - struct brw_wm_ref *ref = inst->src[arg][i]; - if (ref) { - if (readmask & (1<<i)) { - ref->value->contributes_to_output = 1; - } - else { - unlink_ref(ref); - inst->src[arg][i] = NULL; - } - } - } -} - -static GLuint get_texcoord_mask( GLuint tex_idx ) -{ - switch (tex_idx) { - case TEXTURE_1D_INDEX: - return WRITEMASK_X; - case TEXTURE_2D_INDEX: - case TEXTURE_1D_ARRAY_INDEX: - case TEXTURE_EXTERNAL_INDEX: - return WRITEMASK_XY; - case TEXTURE_3D_INDEX: - case TEXTURE_2D_ARRAY_INDEX: - return WRITEMASK_XYZ; - case TEXTURE_CUBE_INDEX: - return WRITEMASK_XYZ; - case TEXTURE_RECT_INDEX: - return WRITEMASK_XY; - default: return 0; - } -} - - -/* Step two: Basically this is dead code elimination. - * - * Iterate backwards over instructions, noting which values - * contribute to the final result. Adjust writemasks to only - * calculate these values. - */ -void brw_wm_pass1( struct brw_wm_compile *c ) -{ - GLint insn; - - for (insn = c->nr_insns-1; insn >= 0; insn--) { - struct brw_wm_instruction *inst = &c->instruction[insn]; - GLuint writemask; - GLuint read0, read1, read2; - - if (inst->opcode == OPCODE_KIL) { - track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ - continue; - } - - if (inst->opcode == WM_FB_WRITE) { - track_arg(c, inst, 0, WRITEMASK_XYZW); - track_arg(c, inst, 1, WRITEMASK_XYZW); - if (c->source_depth_to_render_target && c->computes_depth) - track_arg(c, inst, 2, WRITEMASK_Z); - else - track_arg(c, inst, 2, 0); - continue; - } - - /* Lookup all the registers which were written by this - * instruction and get a mask of those that contribute to the output: - */ - writemask = get_tracked_mask(c, inst); - if (!writemask) { - GLuint arg; - for (arg = 0; arg < 3; arg++) - track_arg(c, inst, arg, 0); - continue; - } - - read0 = 0; - read1 = 0; - read2 = 0; - - /* Mark all inputs which contribute to the marked outputs: - */ - switch (inst->opcode) { - case OPCODE_ABS: - case OPCODE_FLR: - case OPCODE_FRC: - case OPCODE_MOV: - case OPCODE_SSG: - case OPCODE_SWZ: - case OPCODE_TRUNC: - read0 = writemask; - break; - - case OPCODE_SUB: - case OPCODE_SLT: - case OPCODE_SLE: - case OPCODE_SGE: - case OPCODE_SGT: - case OPCODE_SEQ: - case OPCODE_SNE: - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - read0 = writemask; - read1 = writemask; - break; - - case OPCODE_DDX: - case OPCODE_DDY: - read0 = writemask; - break; - - case OPCODE_MAD: - case OPCODE_CMP: - case OPCODE_LRP: - read0 = writemask; - read1 = writemask; - read2 = writemask; - break; - - case OPCODE_XPD: - if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; - if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; - if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; - read1 = read0; - break; - - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_SCS: - case WM_CINTERP: - case WM_PIXELXY: - read0 = WRITEMASK_X; - break; - - case OPCODE_POW: - read0 = WRITEMASK_X; - read1 = WRITEMASK_X; - break; - - case OPCODE_TEX: - case OPCODE_TXP: - read0 = get_texcoord_mask(inst->tex_idx); - - if (inst->tex_shadow) - read0 |= WRITEMASK_Z; - break; - - case OPCODE_TXB: - /* Shadow ignored for txb. - */ - read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; - break; - - case WM_WPOSXY: - read0 = writemask & WRITEMASK_XY; - break; - - case WM_DELTAXY: - read0 = writemask & WRITEMASK_XY; - read1 = WRITEMASK_X; - break; - - case WM_PIXELW: - read0 = WRITEMASK_X; - read1 = WRITEMASK_XY; - break; - - case WM_LINTERP: - read0 = WRITEMASK_X; - read1 = WRITEMASK_XY; - break; - - case WM_PINTERP: - read0 = WRITEMASK_X; /* interpolant */ - read1 = WRITEMASK_XY; /* deltas */ - read2 = WRITEMASK_W; /* pixel w */ - break; - - case OPCODE_DP2: - read0 = WRITEMASK_XY; - read1 = WRITEMASK_XY; - break; - - case OPCODE_DP3: - read0 = WRITEMASK_XYZ; - read1 = WRITEMASK_XYZ; - break; - - case OPCODE_DPH: - read0 = WRITEMASK_XYZ; - read1 = WRITEMASK_XYZW; - break; - - case OPCODE_DP4: - read0 = WRITEMASK_XYZW; - read1 = WRITEMASK_XYZW; - break; - - case OPCODE_LIT: - read0 = WRITEMASK_XYW; - break; - - case OPCODE_DST: - case WM_FRONTFACING: - default: - break; - } - - track_arg(c, inst, 0, read0); - track_arg(c, inst, 1, read1); - track_arg(c, inst, 2, read2); - } - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - brw_wm_print_program(c, "pass1"); - } -} diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c deleted file mode 100644 index f20e0b700e7..00000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ /dev/null @@ -1,359 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <[email protected]> - */ - - -#include "brw_context.h" -#include "brw_wm.h" - - -/* Use these to force spilling so that that functionality can be - * tested with known-good examples rather than having to construct new - * tests. - */ -#define TEST_PAYLOAD_SPILLS 0 -#define TEST_DST_SPILLS 0 - -static void spill_value(struct brw_wm_compile *c, - struct brw_wm_value *value); - -static void prealloc_reg(struct brw_wm_compile *c, - struct brw_wm_value *value, - GLuint reg) -{ - if (value->lastuse) { - /* Set nextuse to zero, it will be corrected by - * update_register_usage(). - */ - c->pass2_grf[reg].value = value; - c->pass2_grf[reg].nextuse = 0; - - value->resident = &c->pass2_grf[reg]; - value->hw_reg = brw_vec8_grf(reg*2, 0); - - if (TEST_PAYLOAD_SPILLS) - spill_value(c, value); - } -} - - -/* Initialize all the register values. Do the initial setup - * calculations for interpolants. - */ -static void init_registers( struct brw_wm_compile *c ) -{ - struct brw_context *brw = c->func.brw; - struct intel_context *intel = &brw->intel; - GLuint nr_interp_regs = 0; - GLuint i = 0; - GLuint j; - - for (j = 0; j < c->grf_limit; j++) - c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; - - for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++) - prealloc_reg(c, &c->payload.depth[j], i++); - - for (j = 0; j < c->nr_creg; j++) - prealloc_reg(c, &c->creg[j], i++); - - if (intel->gen >= 6) { - for (unsigned int j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (c->fp->program.Base.InputsRead & BITFIELD64_BIT(j)) { - nr_interp_regs++; - prealloc_reg(c, &c->payload.input_interp[j], i++); - } - } - } else { - for (j = 0; j < VERT_RESULT_MAX; j++) { - /* Point size is packed into the header, not as a general attribute */ - if (j == VERT_RESULT_PSIZ) - continue; - - if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { - int fp_index = _mesa_vert_result_to_frag_attrib(j); - - nr_interp_regs++; - - /* The back color slot is skipped when the front color is - * also written to. In addition, some slots can be - * written in the vertex shader and not read in the - * fragment shader. So the register number must always be - * incremented, mapped or not. - */ - if (fp_index >= 0) - prealloc_reg(c, &c->payload.input_interp[fp_index], i); - i++; - } - } - assert(nr_interp_regs >= 1); - } - - - c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2); - c->prog_data.urb_read_length = nr_interp_regs * 2; - c->prog_data.curb_read_length = c->nr_creg * 2; - - c->max_wm_grf = i * 2; -} - - -/* Update the nextuse value for each register in our file. - */ -static void update_register_usage(struct brw_wm_compile *c, - GLuint thisinsn) -{ - GLuint i; - - for (i = 1; i < c->grf_limit; i++) { - struct brw_wm_grf *grf = &c->pass2_grf[i]; - - /* Only search those which can change: - */ - if (grf->nextuse < thisinsn) { - const struct brw_wm_ref *ref = grf->value->lastuse; - - /* Has last use of value been passed? - */ - if (ref->insn < thisinsn) { - grf->value->resident = 0; - grf->value = 0; - grf->nextuse = BRW_WM_MAX_INSN; - } - else { - /* Else loop through chain to update: - */ - while (ref->prevuse && ref->prevuse->insn >= thisinsn) - ref = ref->prevuse; - - grf->nextuse = ref->insn; - } - } - } -} - - -static void spill_value(struct brw_wm_compile *c, - struct brw_wm_value *value) -{ - /* Allocate a spill slot. Note that allocations start from 0x40 - - * the first slot is reserved to mean "undef" in brw_wm_emit.c - */ - if (!value->spill_slot) { - c->last_scratch += 0x40; - value->spill_slot = c->last_scratch; - } - - /* The spill will be done in brw_wm_emit.c immediately after the - * value is calculated, so we can just take this reg without any - * further work. - */ - value->resident->value = NULL; - value->resident->nextuse = BRW_WM_MAX_INSN; - value->resident = NULL; -} - - - -/* Search for contiguous region with the most distant nearest - * member. Free regs count as very distant. - * - * TODO: implement spill-to-reg so that we can rearrange discontigous - * free regs and then spill the oldest non-free regs in sequence. - * This would mean inserting instructions in this pass. - */ -static GLuint search_contiguous_regs(struct brw_wm_compile *c, - GLuint nr, - GLuint thisinsn) -{ - struct brw_wm_grf *grf = c->pass2_grf; - GLuint furthest = 0; - GLuint reg = 0; - GLuint i, j; - - /* Start search at 1: r0 is special and can't be used or spilled. - */ - for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) { - GLuint group_nextuse = BRW_WM_MAX_INSN; - - for (j = 0; j < nr; j++) { - if (grf[i+j].nextuse < group_nextuse) - group_nextuse = grf[i+j].nextuse; - } - - if (group_nextuse > furthest) { - furthest = group_nextuse; - reg = i; - } - } - - assert(furthest != thisinsn); - - /* Any non-empty regs will need to be spilled: - */ - for (j = 0; j < nr; j++) - if (grf[reg+j].value) - spill_value(c, grf[reg+j].value); - - return reg; -} - - -static void alloc_contiguous_dest(struct brw_wm_compile *c, - struct brw_wm_value *dst[], - GLuint nr, - GLuint thisinsn) -{ - GLuint reg = search_contiguous_regs(c, nr, thisinsn); - GLuint i; - - for (i = 0; i < nr; i++) { - if (!dst[i]) { - /* Need to grab a dummy value in TEX case. Don't introduce - * it into the tracking scheme. - */ - dst[i] = &c->vreg[c->nr_vreg++]; - } - else { - assert(!dst[i]->resident); - assert(c->pass2_grf[reg+i].nextuse != thisinsn); - - c->pass2_grf[reg+i].value = dst[i]; - c->pass2_grf[reg+i].nextuse = thisinsn; - - dst[i]->resident = &c->pass2_grf[reg+i]; - } - - dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0); - } - - if ((reg+nr)*2 > c->max_wm_grf) - c->max_wm_grf = (reg+nr) * 2; -} - - -static void load_args(struct brw_wm_compile *c, - struct brw_wm_instruction *inst) -{ - GLuint thisinsn = inst - c->instruction; - GLuint i,j; - - for (i = 0; i < 3; i++) { - for (j = 0; j < 4; j++) { - struct brw_wm_ref *ref = inst->src[i][j]; - - if (ref) { - if (!ref->value->resident) { - /* Need to bring the value in from scratch space. The code for - * this will be done in brw_wm_emit.c, here we just do the - * register allocation and mark the ref as requiring a fill. - */ - GLuint reg = search_contiguous_regs(c, 1, thisinsn); - - c->pass2_grf[reg].value = ref->value; - c->pass2_grf[reg].nextuse = thisinsn; - - ref->value->resident = &c->pass2_grf[reg]; - - /* Note that a fill is required: - */ - ref->unspill_reg = reg*2; - } - - /* Adjust the hw_reg to point at the value's current location: - */ - assert(ref->value == ref->value->resident->value); - ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2; - } - } - } -} - - - -/* Step 3: Work forwards once again. Perform register allocations, - * taking into account instructions like TEX which require contiguous - * result registers. Where necessary spill registers to scratch space - * and reload later. - */ -void brw_wm_pass2( struct brw_wm_compile *c ) -{ - GLuint insn; - GLuint i; - - init_registers(c); - - for (insn = 0; insn < c->nr_insns; insn++) { - struct brw_wm_instruction *inst = &c->instruction[insn]; - - /* Update registers' nextuse values: - */ - update_register_usage(c, insn); - - /* May need to unspill some args. - */ - load_args(c, inst); - - /* Allocate registers to hold results: - */ - switch (inst->opcode) { - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - alloc_contiguous_dest(c, inst->dst, 4, insn); - break; - - default: - for (i = 0; i < 4; i++) { - if (inst->writemask & (1<<i)) { - assert(inst->dst[i]); - alloc_contiguous_dest(c, &inst->dst[i], 1, insn); - } - } - break; - } - - if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) { - for (i = 0; i < 4; i++) - if (inst->dst[i]) - spill_value(c, inst->dst[i]); - } - } - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - brw_wm_print_program(c, "pass2"); - } - - c->state = PASS2_DONE; - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - brw_wm_print_program(c, "pass2/done"); - } -} |