diff options
Diffstat (limited to 'src/intel/compiler')
-rw-r--r-- | src/intel/compiler/brw_clip.h | 163 | ||||
-rw-r--r-- | src/intel/compiler/brw_clip_line.c | 306 | ||||
-rw-r--r-- | src/intel/compiler/brw_clip_point.c | 49 | ||||
-rw-r--r-- | src/intel/compiler/brw_clip_tri.c | 662 | ||||
-rw-r--r-- | src/intel/compiler/brw_clip_unfilled.c | 531 | ||||
-rw-r--r-- | src/intel/compiler/brw_clip_util.c | 469 | ||||
-rw-r--r-- | src/intel/compiler/brw_compile_clip.c | 96 | ||||
-rw-r--r-- | src/intel/compiler/brw_compiler.h | 64 |
8 files changed, 2340 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_clip.h b/src/intel/compiler/brw_clip.h new file mode 100644 index 00000000000..bdf7ee25848 --- /dev/null +++ b/src/intel/compiler/brw_clip.h @@ -0,0 +1,163 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + +#include "brw_compiler.h" +#include "brw_eu.h" + +/* Initial 3 verts, plus at most 6 additional verts from intersections + * with fixed planes, plus at most 8 additional verts from intersections + * with user clip planes + */ +#define MAX_VERTS (3+6+8) + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_codegen func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + + struct brw_reg ff_sync; + + /* Bitmask indicating which coordinate attribute should be used for + * comparison to each clipping plane. A 0 indicates that VARYING_SLOT_POS + * should be used, because it's one of the fixed +/- x/y/z planes that + * constitute the bounds of the view volume. A 1 indicates that + * VARYING_SLOT_CLIP_VERTEX should be used (if available) since it's a user- + * defined clipping plane. + */ + struct brw_reg vertex_src_mask; + + /* Offset into the vertex of the current plane's clipdistance value */ + struct brw_reg clipdistance_offset; + } reg; + + /* Number of registers storing VUE data */ + GLuint nr_regs; + + GLuint first_tmp; + GLuint last_tmp; + + bool need_direction; + + struct brw_vue_map vue_map; +}; + +/** + * True if the given varying is one of the outputs of the vertex shader. + */ +static inline bool brw_clip_have_varying(struct brw_clip_compile *c, + GLuint varying) +{ + return (c->key.attrs & BITFIELD64_BIT(varying)) ? 1 : 0; +} + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + bool force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + enum brw_urb_write_flags flags, + GLuint header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_flatshaded_attributes( struct brw_clip_compile *c, + GLuint to, GLuint from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +struct brw_reg get_tmp( struct brw_clip_compile *c ); + +void brw_clip_project_position(struct brw_clip_compile *c, + struct brw_reg pos ); +void brw_clip_ff_sync(struct brw_clip_compile *c); +void brw_clip_init_ff_sync(struct brw_clip_compile *c); + +#endif diff --git a/src/intel/compiler/brw_clip_line.c b/src/intel/compiler/brw_clip_line.c new file mode 100644 index 00000000000..37f226657a9 --- /dev/null +++ b/src/intel/compiler/brw_clip_line.c @@ -0,0 +1,306 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "main/macros.h" +#include "main/enums.h" +#include "program/program.h" + +#include "brw_clip.h" + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + const struct gen_device_info *devinfo = c->func.devinfo; + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + c->reg.vertex_src_mask = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + c->reg.clipdistance_offset = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_W); + i++; + + if (devinfo->gen == 5) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p<MAX_PLANES;p++) { + * if (clipmask & (1 << p)) { + * GLfloat dp0 = DOTPROD( vtx0, plane[p] ); + * GLfloat dp1 = DOTPROD( vtx1, plane[p] ); + * + * if (dp1 < 0.0f) { + * GLfloat t = dp1 / (dp1 - dp0); + * if (t > t1) t1 = t; + * } else { + * GLfloat t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); + GLint clipdist0_offset = c->key.nr_userclip + ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0) + : 0; + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + /* -ve rhw workaround */ + if (p->devinfo->has_negative_rhw_bug) { + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + } + + /* Set the initial vertex source mask: The first 6 planes are the bounds + * of the view volume; the next 8 planes are the user clipping planes. + */ + brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0)); + + /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0]. + * We'll increment 6 times before we start hitting actual user clipping. */ + brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float))); + + brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + + brw_IF(p, BRW_EXECUTE_1); + { + brw_AND(p, v1_null_ud, c->reg.vertex_src_mask, brw_imm_ud(1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_IF(p, BRW_EXECUTE_1); + { + /* user clip distance: just fetch the correct float from each vertex */ + struct brw_indirect temp_ptr = brw_indirect(7, 0); + brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx0), c->reg.clipdistance_offset); + brw_MOV(p, c->reg.dp0, deref_1f(temp_ptr, 0)); + brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx1), c->reg.clipdistance_offset); + brw_MOV(p, c->reg.dp1, deref_1f(temp_ptr, 0)); + } + brw_ELSE(p); + { + /* fixed plane: fetch the hpos, dp4 against the plane. */ + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, hpos_offset), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, hpos_offset), c->reg.plane_equation); + } + brw_ENDIF(p); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, vec1(c->reg.dp1), brw_imm_f(0.0f)); + + brw_IF(p, BRW_EXECUTE_1); + { + /* + * Both can be negative on GM965/G965 due to RHW workaround + * if so, this object should be rejected. + */ + if (p->devinfo->has_negative_rhw_bug) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p); + } + + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, + BRW_PREDICATE_NORMAL); + } + brw_ELSE(p); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + /* Only on GM965/G965 */ + if (p->devinfo->has_negative_rhw_bug) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + brw_IF(p, BRW_EXECUTE_1); + } + + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, + BRW_PREDICATE_NORMAL); + } + + if (p->devinfo->has_negative_rhw_bug) { + brw_ENDIF(p); + } + } + brw_ENDIF(p); + } + brw_ENDIF(p); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1)); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float))); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + } + brw_WHILE(p); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, false); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, false); + + brw_clip_emit_vue(c, newvtx0, BRW_URB_WRITE_ALLOCATE_COMPLETE, + (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_START); + brw_clip_emit_vue(c, newvtx1, BRW_URB_WRITE_EOT_COMPLETE, + (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_END); + } + brw_ENDIF(p); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + brw_clip_init_ff_sync(c); + + if (c->key.contains_flat_varying) { + if (c->key.pv_first) + brw_clip_copy_flatshaded_attributes(c, 1, 0); + else + brw_clip_copy_flatshaded_attributes(c, 0, 1); + } + + clip_and_emit_line(c); +} diff --git a/src/intel/compiler/brw_clip_point.c b/src/intel/compiler/brw_clip_point.c new file mode 100644 index 00000000000..ac8f3153d56 --- /dev/null +++ b/src/intel/compiler/brw_clip_point.c @@ -0,0 +1,49 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "main/macros.h" +#include "main/enums.h" +#include "program/program.h" + +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_init_ff_sync(c); + + brw_clip_kill_thread(c); +} diff --git a/src/intel/compiler/brw_clip_tri.c b/src/intel/compiler/brw_clip_tri.c new file mode 100644 index 00000000000..8ccf9e49b20 --- /dev/null +++ b/src/intel/compiler/brw_clip_tri.c @@ -0,0 +1,662 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "main/macros.h" +#include "main/enums.h" +#include "program/program.h" + +#include "brw_clip.h" + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ) +{ + const struct gen_device_info *devinfo = c->func.devinfo; + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->vue_map.num_slots % 2) { + /* The VUE has an odd number of slots so the last register is only half + * used. Fill the second half with zero. + */ + for (j = 0; j < 3; j++) { + GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots); + + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + c->reg.vertex_src_mask = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + c->reg.clipdistance_offset = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_W); + i++; + + if (devinfo->gen == 5) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + brw_ELSE(p); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_flatshaded_attributes(c, 1, 0); + brw_clip_copy_flatshaded_attributes(c, 2, 0); + } + brw_ELSE(p); + { + if (c->key.pv_first) { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRIFAN)); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_flatshaded_attributes(c, 0, 1); + brw_clip_copy_flatshaded_attributes(c, 2, 1); + } + brw_ELSE(p); + { + brw_clip_copy_flatshaded_attributes(c, 1, 0); + brw_clip_copy_flatshaded_attributes(c, 2, 0); + } + brw_ENDIF(p); + } + else { + brw_clip_copy_flatshaded_attributes(c, 0, 2); + brw_clip_copy_flatshaded_attributes(c, 1, 2); + } + } + brw_ENDIF(p); +} + + +/** + * Loads the clip distance for a vertex into `dst`, and ends with + * a comparison of it to zero with the condition `cond`. + * + * - If using a fixed plane, the distance is dot(hpos, plane). + * - If using a user clip plane, the distance is directly available in the vertex. + */ +static inline void +load_clip_distance(struct brw_clip_compile *c, struct brw_indirect vtx, + struct brw_reg dst, GLuint hpos_offset, int cond) +{ + struct brw_codegen *p = &c->func; + + dst = vec4(dst); + brw_AND(p, vec1(brw_null_reg()), c->reg.vertex_src_mask, brw_imm_ud(1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect temp_ptr = brw_indirect(7, 0); + brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx), c->reg.clipdistance_offset); + brw_MOV(p, vec1(dst), deref_1f(temp_ptr, 0)); + } + brw_ELSE(p); + { + brw_MOV(p, dst, deref_4f(vtx, hpos_offset)); + brw_DP4(p, dst, dst, c->reg.plane_equation); + } + brw_ENDIF(p); + + brw_CMP(p, brw_null_reg(), cond, vec1(dst), brw_imm_f(0.0f)); +} + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); + GLint clipdist0_offset = c->key.nr_userclip + ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0) + : 0; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + /* Set the initial vertex source mask: The first 6 planes are the bounds + * of the view volume; the next 8 planes are the user clipping planes. + */ + brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0)); + + /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0]. + * We'll increment 6 times before we start hitting actual user clipping. */ + brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float))); + + brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + + brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + load_clip_distance(c, vtxPrev, c->reg.dpPrev, hpos_offset, BRW_CONDITIONAL_L); + /* (prev < 0.0f) */ + brw_IF(p, BRW_EXECUTE_1); + { + load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_GE); + /* IS_POSITIVE(next) + */ + brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev)); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, + BRW_PREDICATE_NORMAL); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, false); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p); + + } + brw_ELSE(p); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_L); + /* (next < 0.0f) + */ + brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx)); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, + BRW_PREDICATE_NORMAL); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, true); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p); + } + brw_ENDIF(p); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + } + brw_WHILE(p); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); + + /* && (planemask>>=1) != 0 + */ + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1)); + brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float))); + } + brw_WHILE(p); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_codegen *p = &c->func; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_G); + + brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, + ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, + (_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + } + brw_WHILE(p); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_EOT_COMPLETE, + ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_END)); + } + brw_ENDIF(p); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_codegen *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_POS); + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, hpos_offset)); + brw_MOV(p, v1, deref_4f(vt1, hpos_offset)); + brw_MOV(p, v2, deref_4f(vt2, hpos_offset)); + brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); + + /* test nearz, xmin, ymin plane */ + /* clip.xyz < -clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + + /* test farz, xmax, ymax plane */ + /* clip.xyz > clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); + brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); + brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + + release_tmps(c); +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + brw_clip_init_ff_sync(c); + + /* if -ve rhw workaround bit is set, + do cliptest */ + if (p->devinfo->has_negative_rhw_bug) { + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p); + } + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.contains_flat_varying) + brw_clip_tri_flat_shade(c); + + if ((c->key.clip_mode == BRW_CLIP_MODE_NORMAL) || + (c->key.clip_mode == BRW_CLIP_MODE_KERNEL_CLIP)) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} diff --git a/src/intel/compiler/brw_clip_unfilled.c b/src/intel/compiler/brw_clip_unfilled.c new file mode 100644 index 00000000000..83f9447cbd4 --- /dev/null +++ b/src/intel/compiler/brw_clip_unfilled.c @@ -0,0 +1,531 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + +#include "main/macros.h" +#include "main/enums.h" +#include "program/program.h" + +#include "brw_clip.h" + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); + struct brw_reg v0 = byte_offset(c->reg.vertex[0], hpos_offset); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], hpos_offset); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], hpos_offset); + + + struct brw_reg v0n = get_tmp(c); + struct brw_reg v1n = get_tmp(c); + struct brw_reg v2n = get_tmp(c); + + /* Convert to NDC. + * NOTE: We can't modify the original vertex coordinates, + * as it may impact further operations. + * So, we have to keep normalized coordinates in temp registers. + * + * TBD-KC + * Try to optimize unnecessary MOV's. + */ + brw_MOV(p, v0n, v0); + brw_MOV(p, v1n, v1); + brw_MOV(p, v2n, v2); + + brw_clip_project_position(c, v0n); + brw_clip_project_position(c, v1n); + brw_clip_project_position(c, v2n); + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0n, negate(v2n)); + brw_ADD(p, f, v1n, negate(v2n)); + + /* Take their crossproduct: + */ + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, BRW_SWIZZLE_YZXW), + brw_swizzle(f, BRW_SWIZZLE_ZXYW)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, BRW_SWIZZLE_ZXYW)), + brw_swizzle(f, BRW_SWIZZLE_YZXW)); + brw_set_default_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + GLuint conditional; + + assert (!(c->key.fill_ccw == BRW_CLIP_FILL_MODE_CULL && + c->key.fill_cw == BRW_CLIP_FILL_MODE_CULL)); + + if (c->key.fill_ccw == BRW_CLIP_FILL_MODE_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + GLuint conditional; + + /* Do we have any colors to copy? + */ + if (!(brw_clip_have_varying(c, VARYING_SLOT_COL0) && + brw_clip_have_varying(c, VARYING_SLOT_BFC0)) && + !(brw_clip_have_varying(c, VARYING_SLOT_COL1) && + brw_clip_have_varying(c, VARYING_SLOT_BFC1))) + return; + + /* In some weird degenerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting weird GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + brw_IF(p, BRW_EXECUTE_1); + { + GLuint i; + + for (i = 0; i < 3; i++) { + if (brw_clip_have_varying(c, VARYING_SLOT_COL0) && + brw_clip_have_varying(c, VARYING_SLOT_BFC0)) + brw_MOV(p, + byte_offset(c->reg.vertex[i], + brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_COL0)), + byte_offset(c->reg.vertex[i], + brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_BFC0))); + + if (brw_clip_have_varying(c, VARYING_SLOT_COL1) && + brw_clip_have_varying(c, VARYING_SLOT_BFC1)) + brw_MOV(p, + byte_offset(c->reg.vertex[i], + brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_COL1)), + byte_offset(c->reg.vertex[i], + brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_BFC1))); + } + } + brw_ENDIF(p); +} + + + + +/* + GLfloat iz = 1.0 / dir.z; + GLfloat ac = dir.x * iz; + GLfloat bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + if (ctx->Polygon.OffsetClamp && isfinite(ctx->Polygon.OffsetClamp)) { + if (ctx->Polygon.OffsetClamp < 0) + offset = MAX2( offset, ctx->Polygon.OffsetClamp ); + else + offset = MIN2( offset, ctx->Polygon.OffsetClamp ); + } + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), vec2(dir), get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), + brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + + brw_MUL(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_units)); + if (c->key.offset_clamp && isfinite(c->key.offset_clamp)) { + brw_CMP(p, + vec1(brw_null_reg()), + c->key.offset_clamp < 0 ? BRW_CONDITIONAL_GE : BRW_CONDITIONAL_L, + vec1(off), + brw_imm_f(c->key.offset_clamp)); + brw_SEL(p, vec1(off), vec1(off), brw_imm_f(c->key.offset_clamp)); + } +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + brw_IF(p, BRW_EXECUTE_1); + { + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_EQ); + brw_MOV(p, byte_offset(c->reg.vertex[0], + brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_EDGE)), + brw_imm_f(0)); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_EQ); + brw_MOV(p, byte_offset(c->reg.vertex[2], + brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_EDGE)), + brw_imm_f(0)); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + } + brw_ENDIF(p); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_codegen *p = &c->func; + GLuint ndc_offset = brw_varying_to_offset(&c->vue_map, + BRW_VARYING_SLOT_NDC); + struct brw_reg z = deref_1f(vert, ndc_offset + + 2 * type_sz(BRW_REGISTER_TYPE_F)); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + bool do_offset) +{ + struct brw_codegen *p = &c->func; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a separate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_G); + } + brw_WHILE(p); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_EDGE)), + brw_imm_f(0)); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, + (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_START); + brw_clip_emit_vue(c, v1, BRW_URB_WRITE_ALLOCATE_COMPLETE, + (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_END); + } + brw_ENDIF(p); + + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + } + brw_WHILE(p); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); +} + + + +static void emit_points(struct brw_clip_compile *c, + bool do_offset ) +{ + struct brw_codegen *p = &c->func; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, brw_varying_to_offset(&c->vue_map, + VARYING_SLOT_EDGE)), + brw_imm_f(0)); + brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, BRW_URB_WRITE_ALLOCATE_COMPLETE, + (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) + | URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); + } + brw_ENDIF(p); + + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); + } + brw_WHILE(p); + brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + GLuint mode, + bool do_offset ) +{ + switch (mode) { + case BRW_CLIP_FILL_MODE_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case BRW_CLIP_FILL_MODE_LINE: + emit_lines(c, do_offset); + break; + + case BRW_CLIP_FILL_MODE_POINT: + emit_points(c, do_offset); + break; + + case BRW_CLIP_FILL_MODE_CULL: + unreachable("not reached"); + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != BRW_CLIP_FILL_MODE_CULL && + c->key.fill_cw != BRW_CLIP_FILL_MODE_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + brw_ELSE(p); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p); + } + else if (c->key.fill_cw != BRW_CLIP_FILL_MODE_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != BRW_CLIP_FILL_MODE_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == BRW_CLIP_FILL_MODE_CULL || + c->key.fill_cw == BRW_CLIP_FILL_MODE_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_ff_sync(c); + + assert(brw_clip_have_varying(c, VARYING_SLOT_EDGE)); + + if (c->key.fill_ccw == BRW_CLIP_FILL_MODE_CULL && + c->key.fill_cw == BRW_CLIP_FILL_MODE_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == BRW_CLIP_FILL_MODE_CULL || + c->key.fill_cw == BRW_CLIP_FILL_MODE_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.contains_flat_varying) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} diff --git a/src/intel/compiler/brw_clip_util.c b/src/intel/compiler/brw_clip_util.c new file mode 100644 index 00000000000..e01fbc6a12f --- /dev/null +++ b/src/intel/compiler/brw_clip_util.c @@ -0,0 +1,469 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "main/macros.h" +#include "main/enums.h" +#include "program/program.h" + +#include "brw_clip.h" + + +struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_codegen *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, + brw_swizzle(pos, BRW_SWIZZLE_WWWW)); + brw_set_default_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg tmp = get_tmp(c); + GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); + GLuint ndc_offset = brw_varying_to_offset(&c->vue_map, + BRW_VARYING_SLOT_NDC); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, hpos_offset)); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, ndc_offset), tmp); + + release_tmp(c, tmp); +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + * + * Beware that dest_ptr can be equal to v0_ptr! + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + bool force_edgeflag) +{ + struct brw_codegen *p = &c->func; + struct brw_reg t_nopersp, v0_ndc_copy; + GLuint slot; + + /* Just copy the vertex header: + */ + /* + * After CLIP stage, only first 256 bits of the VUE are read + * back on Ironlake, so needn't change it + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + + /* First handle the 3D and NDC interpolation, in case we + * need noperspective interpolation. Doing it early has no + * performance impact in any case. + */ + + /* Take a copy of the v0 NDC coordinates, in case dest == v0. */ + if (c->key.contains_noperspective_varying) { + GLuint offset = brw_varying_to_offset(&c->vue_map, + BRW_VARYING_SLOT_NDC); + v0_ndc_copy = get_tmp(c); + brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset)); + } + + /* Compute the new 3D position + * + * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0 + */ + { + GLuint delta = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); + struct brw_reg tmp = get_tmp(c); + brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0); + brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0); + brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp); + release_tmp(c, tmp); + } + + /* Recreate the projected (NDC) coordinate in the new vertex header */ + brw_clip_project_vertex(c, dest_ptr); + + /* If we have noperspective attributes, + * we need to compute the screen-space t + */ + if (c->key.contains_noperspective_varying) { + GLuint delta = brw_varying_to_offset(&c->vue_map, + BRW_VARYING_SLOT_NDC); + struct brw_reg tmp = get_tmp(c); + t_nopersp = get_tmp(c); + + /* t_nopersp = vec4(v1.xy, dest.xy) */ + brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta)); + brw_MOV(p, tmp, deref_4f(dest_ptr, delta)); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MOV(p, + brw_writemask(t_nopersp, WRITEMASK_ZW), + brw_swizzle(tmp, BRW_SWIZZLE_XYXY)); + + /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */ + brw_ADD(p, t_nopersp, t_nopersp, + negate(brw_swizzle(v0_ndc_copy, BRW_SWIZZLE_XYXY))); + + /* Add the absolute values of the X and Y deltas so that if + * the points aren't in the same place on the screen we get + * nonzero values to divide. + * + * After that, we have vert1 - vert0 in t_nopersp.x and + * vertnew - vert0 in t_nopersp.y + * + * t_nopersp = vec2(|v1.x -v0.x| + |v1.y -v0.y|, + * |dest.x-v0.x| + |dest.y-v0.y|) + */ + brw_ADD(p, + brw_writemask(t_nopersp, WRITEMASK_XY), + brw_abs(brw_swizzle(t_nopersp, BRW_SWIZZLE_XZXZ)), + brw_abs(brw_swizzle(t_nopersp, BRW_SWIZZLE_YWYW))); + brw_set_default_access_mode(p, BRW_ALIGN_1); + + /* If the points are in the same place, just substitute a + * value to avoid divide-by-zero + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, + vec1(t_nopersp), + brw_imm_f(0)); + brw_IF(p, BRW_EXECUTE_1); + brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0), + brw_float_to_vf(0.0), + brw_float_to_vf(0.0), + brw_float_to_vf(0.0))); + brw_ENDIF(p); + + /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */ + brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0)); + brw_MUL(p, vec1(t_nopersp), vec1(t_nopersp), + vec1(suboffset(t_nopersp, 1))); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, BRW_SWIZZLE_XXXX)); + brw_set_default_access_mode(p, BRW_ALIGN_1); + + release_tmp(c, tmp); + release_tmp(c, v0_ndc_copy); + } + + /* Now we can iterate over each attribute + * (could be done in pairs?) + */ + for (slot = 0; slot < c->vue_map.num_slots; slot++) { + int varying = c->vue_map.slot_to_varying[slot]; + GLuint delta = brw_vue_slot_to_offset(slot); + + /* HPOS, NDC already handled above */ + if (varying == VARYING_SLOT_POS || varying == BRW_VARYING_SLOT_NDC) + continue; + + + if (varying == VARYING_SLOT_EDGE) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } else if (varying == VARYING_SLOT_PSIZ) { + /* PSIZ doesn't need interpolation because it isn't used by the + * fragment shader. + */ + } else if (varying < VARYING_SLOT_MAX) { + /* This is a true vertex result (and not a special value for the VUE + * header), so interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + * + * Unless the attribute is flat shaded -- in which case just copy + * from one of the sources (doesn't matter which; already copied from pv) + */ + GLuint interp = c->key.interp_mode[slot]; + + if (interp != INTERP_MODE_FLAT) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg t = + interp == INTERP_MODE_NOPERSPECTIVE ? t_nopersp : t0; + + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + + release_tmp(c, tmp); + } + else { + brw_MOV(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta)); + } + } + } + + if (c->vue_map.num_slots % 2) { + GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots); + + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + if (c->key.contains_noperspective_varying) + release_tmp(c, t_nopersp); +} + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + enum brw_urb_write_flags flags, + GLuint header) +{ + struct brw_codegen *p = &c->func; + bool allocate = flags & BRW_URB_WRITE_ALLOCATE; + + brw_clip_ff_sync(c); + + /* Any URB entry that is allocated must subsequently be used or discarded, + * so it doesn't make sense to mark EOT and ALLOCATE at the same time. + */ + assert(!(allocate && (flags & BRW_URB_WRITE_EOT))); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a separate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a separate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + flags, + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_codegen *p = &c->func; + + brw_clip_ff_sync(c); + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + BRW_URB_WRITE_UNUSED | BRW_URB_WRITE_EOT_COMPLETE, + 1, /* msg len */ + 0, /* response len */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* Distribute flatshaded attributes from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_flatshaded_attributes( struct brw_clip_compile *c, + GLuint to, GLuint from ) +{ + struct brw_codegen *p = &c->func; + + for (int i = 0; i < c->vue_map.num_slots; i++) { + if (c->key.interp_mode[i] == INTERP_MODE_FLAT) { + brw_MOV(p, + byte_offset(c->reg.vertex[to], brw_vue_slot_to_offset(i)), + byte_offset(c->reg.vertex[from], brw_vue_slot_to_offset(i))); + } + } +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + if (p->devinfo->gen == 5 || p->devinfo->is_g4x) + brw_AND(p, tmp, incoming, brw_imm_ud(0xff<<14)); + else + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + +void brw_clip_ff_sync(struct brw_clip_compile *c) +{ + struct brw_codegen *p = &c->func; + + if (p->devinfo->gen == 5) { + brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); + brw_IF(p, BRW_EXECUTE_1); + { + brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); + } + brw_ENDIF(p); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +void brw_clip_init_ff_sync(struct brw_clip_compile *c) +{ + struct brw_codegen *p = &c->func; + + if (p->devinfo->gen == 5) { + brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); + } +} diff --git a/src/intel/compiler/brw_compile_clip.c b/src/intel/compiler/brw_compile_clip.c new file mode 100644 index 00000000000..83788e4b648 --- /dev/null +++ b/src/intel/compiler/brw_compile_clip.c @@ -0,0 +1,96 @@ +/* + * Copyright © 2006 - 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_clip.h" + +#include "common/gen_debug.h" + +const unsigned * +brw_compile_clip(const struct brw_compiler *compiler, + void *mem_ctx, + const struct brw_clip_prog_key *key, + struct brw_clip_prog_data *prog_data, + struct brw_vue_map *vue_map, + unsigned *final_assembly_size) +{ + struct brw_clip_compile c; + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_codegen(compiler->devinfo, &c.func, mem_ctx); + + c.func.single_program_flow = 1; + + c.key = *key; + c.vue_map = *vue_map; + + /* nr_regs is the number of registers filled by reading data from the VUE. + * This program accesses the entire VUE, so nr_regs needs to be the size of + * the VUE (measured in pairs, since two slots are stored in each + * register). + */ + c.nr_regs = (c.vue_map.num_slots + 1)/2; + + c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); + + /* Would ideally have the option of producing a program which could + * do all three: + */ + switch (key->primitive) { + case GL_TRIANGLES: + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else + brw_emit_tri_clip( &c ); + break; + case GL_LINES: + brw_emit_line_clip( &c ); + break; + case GL_POINTS: + brw_emit_point_clip( &c ); + break; + default: + unreachable("not reached"); + } + + brw_compact_instructions(&c.func, 0, 0, NULL); + + *prog_data = c.prog_data; + + const unsigned *program = brw_get_program(&c.func, final_assembly_size); + + if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) { + fprintf(stderr, "clip:\n"); + brw_disassemble(compiler->devinfo, + program, 0, *final_assembly_size, stderr); + fprintf(stderr, "\n"); + } + + return program; +} diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 1f7afacfdae..95cbfb7c33e 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -281,6 +281,47 @@ struct brw_sf_prog_key { bool userclip_active:1; }; +enum brw_clip_mode { + BRW_CLIP_MODE_NORMAL = 0, + BRW_CLIP_MODE_CLIP_ALL = 1, + BRW_CLIP_MODE_CLIP_NON_REJECTED = 2, + BRW_CLIP_MODE_REJECT_ALL = 3, + BRW_CLIP_MODE_ACCEPT_ALL = 4, + BRW_CLIP_MODE_KERNEL_CLIP = 5, +}; + +enum brw_clip_fill_mode { + BRW_CLIP_FILL_MODE_LINE = 0, + BRW_CLIP_FILL_MODE_POINT = 1, + BRW_CLIP_FILL_MODE_FILL = 2, + BRW_CLIP_FILL_MODE_CULL = 3, +}; + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + uint64_t attrs; + bool contains_flat_varying; + bool contains_noperspective_varying; + unsigned char interp_mode[65]; /* BRW_VARYING_SLOT_COUNT */ + unsigned primitive:4; + unsigned nr_userclip:4; + bool pv_first:1; + bool do_unfilled:1; + enum brw_clip_fill_mode fill_cw:2; /* includes cull information */ + enum brw_clip_fill_mode fill_ccw:2; /* includes cull information */ + bool offset_cw:1; + bool offset_ccw:1; + bool copy_bfc_cw:1; + bool copy_bfc_ccw:1; + enum brw_clip_mode clip_mode:3; + + float offset_factor; + float offset_units; + float offset_clamp; +}; + /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM * program execution. These mainly relate to depth and stencil @@ -905,6 +946,13 @@ struct brw_sf_prog_data { unsigned urb_entry_size; }; +struct brw_clip_prog_data { + uint32_t curb_read_length; /* user planes? */ + uint32_t clip_mode; + uint32_t urb_read_length; + uint32_t total_grf; +}; + #define DEFINE_PROG_DATA_DOWNCAST(stage) \ static inline struct brw_##stage##_prog_data * \ brw_##stage##_prog_data(struct brw_stage_prog_data *prog_data) \ @@ -1011,6 +1059,22 @@ brw_compile_sf(const struct brw_compiler *compiler, unsigned *final_assembly_size); /** + * Compile a clipper shader. + * + * This is a fixed-function shader determined entirely by the shader key and + * a VUE map. + * + * Returns the final assembly and the program's size. + */ +const unsigned * +brw_compile_clip(const struct brw_compiler *compiler, + void *mem_ctx, + const struct brw_clip_prog_key *key, + struct brw_clip_prog_data *prog_data, + struct brw_vue_map *vue_map, + unsigned *final_assembly_size); + +/** * Compile a fragment shader. * * Returns the final assembly and the program's size. |