diff options
author | Jason Ekstrand <[email protected]> | 2017-03-18 12:02:45 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-05-26 07:58:01 -0700 |
commit | 18e18a1863583ea35445d8add8c726d22e126b68 (patch) | |
tree | 055a442df4e002a36362dca17c2594c8a314a470 /src/intel/compiler/brw_clip_util.c | |
parent | 9fb8a8775bf2b122811fdbe2389435dfeafa1fa0 (diff) |
i965: Move clip program compilation to the compiler
Reviewed-by: Topi Pohjolainen <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_clip_util.c')
-rw-r--r-- | src/intel/compiler/brw_clip_util.c | 469 |
1 files changed, 469 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_clip_util.c b/src/intel/compiler/brw_clip_util.c new file mode 100644 index 00000000000..e01fbc6a12f --- /dev/null +++ b/src/intel/compiler/brw_clip_util.c @@ -0,0 +1,469 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <[email protected]> + */ + + +#include "main/macros.h" +#include "main/enums.h" +#include "program/program.h" + +#include "brw_clip.h" + + +struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_codegen *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, + brw_swizzle(pos, BRW_SWIZZLE_WWWW)); + brw_set_default_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg tmp = get_tmp(c); + GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); + GLuint ndc_offset = brw_varying_to_offset(&c->vue_map, + BRW_VARYING_SLOT_NDC); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, hpos_offset)); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, ndc_offset), tmp); + + release_tmp(c, tmp); +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + * + * Beware that dest_ptr can be equal to v0_ptr! + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + bool force_edgeflag) +{ + struct brw_codegen *p = &c->func; + struct brw_reg t_nopersp, v0_ndc_copy; + GLuint slot; + + /* Just copy the vertex header: + */ + /* + * After CLIP stage, only first 256 bits of the VUE are read + * back on Ironlake, so needn't change it + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + + /* First handle the 3D and NDC interpolation, in case we + * need noperspective interpolation. Doing it early has no + * performance impact in any case. + */ + + /* Take a copy of the v0 NDC coordinates, in case dest == v0. */ + if (c->key.contains_noperspective_varying) { + GLuint offset = brw_varying_to_offset(&c->vue_map, + BRW_VARYING_SLOT_NDC); + v0_ndc_copy = get_tmp(c); + brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset)); + } + + /* Compute the new 3D position + * + * dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0 + */ + { + GLuint delta = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); + struct brw_reg tmp = get_tmp(c); + brw_MUL(p, vec4(brw_null_reg()), deref_4f(v1_ptr, delta), t0); + brw_MAC(p, tmp, negate(deref_4f(v0_ptr, delta)), t0); + brw_ADD(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta), tmp); + release_tmp(c, tmp); + } + + /* Recreate the projected (NDC) coordinate in the new vertex header */ + brw_clip_project_vertex(c, dest_ptr); + + /* If we have noperspective attributes, + * we need to compute the screen-space t + */ + if (c->key.contains_noperspective_varying) { + GLuint delta = brw_varying_to_offset(&c->vue_map, + BRW_VARYING_SLOT_NDC); + struct brw_reg tmp = get_tmp(c); + t_nopersp = get_tmp(c); + + /* t_nopersp = vec4(v1.xy, dest.xy) */ + brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta)); + brw_MOV(p, tmp, deref_4f(dest_ptr, delta)); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MOV(p, + brw_writemask(t_nopersp, WRITEMASK_ZW), + brw_swizzle(tmp, BRW_SWIZZLE_XYXY)); + + /* t_nopersp = vec4(v1.xy, dest.xy) - v0.xyxy */ + brw_ADD(p, t_nopersp, t_nopersp, + negate(brw_swizzle(v0_ndc_copy, BRW_SWIZZLE_XYXY))); + + /* Add the absolute values of the X and Y deltas so that if + * the points aren't in the same place on the screen we get + * nonzero values to divide. + * + * After that, we have vert1 - vert0 in t_nopersp.x and + * vertnew - vert0 in t_nopersp.y + * + * t_nopersp = vec2(|v1.x -v0.x| + |v1.y -v0.y|, + * |dest.x-v0.x| + |dest.y-v0.y|) + */ + brw_ADD(p, + brw_writemask(t_nopersp, WRITEMASK_XY), + brw_abs(brw_swizzle(t_nopersp, BRW_SWIZZLE_XZXZ)), + brw_abs(brw_swizzle(t_nopersp, BRW_SWIZZLE_YWYW))); + brw_set_default_access_mode(p, BRW_ALIGN_1); + + /* If the points are in the same place, just substitute a + * value to avoid divide-by-zero + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, + vec1(t_nopersp), + brw_imm_f(0)); + brw_IF(p, BRW_EXECUTE_1); + brw_MOV(p, t_nopersp, brw_imm_vf4(brw_float_to_vf(1.0), + brw_float_to_vf(0.0), + brw_float_to_vf(0.0), + brw_float_to_vf(0.0))); + brw_ENDIF(p); + + /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */ + brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0)); + brw_MUL(p, vec1(t_nopersp), vec1(t_nopersp), + vec1(suboffset(t_nopersp, 1))); + brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, BRW_SWIZZLE_XXXX)); + brw_set_default_access_mode(p, BRW_ALIGN_1); + + release_tmp(c, tmp); + release_tmp(c, v0_ndc_copy); + } + + /* Now we can iterate over each attribute + * (could be done in pairs?) + */ + for (slot = 0; slot < c->vue_map.num_slots; slot++) { + int varying = c->vue_map.slot_to_varying[slot]; + GLuint delta = brw_vue_slot_to_offset(slot); + + /* HPOS, NDC already handled above */ + if (varying == VARYING_SLOT_POS || varying == BRW_VARYING_SLOT_NDC) + continue; + + + if (varying == VARYING_SLOT_EDGE) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } else if (varying == VARYING_SLOT_PSIZ) { + /* PSIZ doesn't need interpolation because it isn't used by the + * fragment shader. + */ + } else if (varying < VARYING_SLOT_MAX) { + /* This is a true vertex result (and not a special value for the VUE + * header), so interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + * + * Unless the attribute is flat shaded -- in which case just copy + * from one of the sources (doesn't matter which; already copied from pv) + */ + GLuint interp = c->key.interp_mode[slot]; + + if (interp != INTERP_MODE_FLAT) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg t = + interp == INTERP_MODE_NOPERSPECTIVE ? t_nopersp : t0; + + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + + release_tmp(c, tmp); + } + else { + brw_MOV(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta)); + } + } + } + + if (c->vue_map.num_slots % 2) { + GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots); + + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + if (c->key.contains_noperspective_varying) + release_tmp(c, t_nopersp); +} + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + enum brw_urb_write_flags flags, + GLuint header) +{ + struct brw_codegen *p = &c->func; + bool allocate = flags & BRW_URB_WRITE_ALLOCATE; + + brw_clip_ff_sync(c); + + /* Any URB entry that is allocated must subsequently be used or discarded, + * so it doesn't make sense to mark EOT and ALLOCATE at the same time. + */ + assert(!(allocate && (flags & BRW_URB_WRITE_EOT))); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a separate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a separate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + flags, + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_codegen *p = &c->func; + + brw_clip_ff_sync(c); + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + BRW_URB_WRITE_UNUSED | BRW_URB_WRITE_EOT_COMPLETE, + 1, /* msg len */ + 0, /* response len */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* Distribute flatshaded attributes from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_flatshaded_attributes( struct brw_clip_compile *c, + GLuint to, GLuint from ) +{ + struct brw_codegen *p = &c->func; + + for (int i = 0; i < c->vue_map.num_slots; i++) { + if (c->key.interp_mode[i] == INTERP_MODE_FLAT) { + brw_MOV(p, + byte_offset(c->reg.vertex[to], brw_vue_slot_to_offset(i)), + byte_offset(c->reg.vertex[from], brw_vue_slot_to_offset(i))); + } + } +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_codegen *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + if (p->devinfo->gen == 5 || p->devinfo->is_g4x) + brw_AND(p, tmp, incoming, brw_imm_ud(0xff<<14)); + else + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + +void brw_clip_ff_sync(struct brw_clip_compile *c) +{ + struct brw_codegen *p = &c->func; + + if (p->devinfo->gen == 5) { + brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); + brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); + brw_IF(p, BRW_EXECUTE_1); + { + brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); + } + brw_ENDIF(p); + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +void brw_clip_init_ff_sync(struct brw_clip_compile *c) +{ + struct brw_codegen *p = &c->func; + + if (p->devinfo->gen == 5) { + brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); + } +} |