diff options
author | Jason Ekstrand <[email protected]> | 2017-02-28 09:10:43 -0800 |
---|---|---|
committer | Emil Velikov <[email protected]> | 2017-03-13 11:16:34 +0000 |
commit | 700bebb958e93f4d472c383de62ced9db8e64bec (patch) | |
tree | 0075c098c56c338f38ba0db80b9dba3e7e268a17 /src/intel/compiler/brw_vec4_tes.cpp | |
parent | d0d4a5f43b4dd79bd7bfff7c7deaade10bfebf7c (diff) |
i965: Move the back-end compiler to src/intel/compiler
Mostly a dummy git mv with a couple of noticable parts:
- With the earlier header cleanups, nothing in src/intel depends
files from src/mesa/drivers/dri/i965/
- Both Autoconf and Android builds are addressed. Thanks to Mauro and
Tapani for the fixups in the latter
- brw_util.[ch] is not really compiler specific, so it's moved to i965.
v2:
- move brw_eu_defines.h instead of brw_defines.h
- remove no-longer applicable includes
- add missing vulkan/ prefix in the Android build (thanks Tapani)
v3:
- don't list brw_defines.h in src/intel/Makefile.sources (Jason)
- rebase on top of the oa patches
[Emil Velikov: commit message, various small fixes througout]
Signed-off-by: Emil Velikov <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_vec4_tes.cpp')
-rw-r--r-- | src/intel/compiler/brw_vec4_tes.cpp | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_vec4_tes.cpp b/src/intel/compiler/brw_vec4_tes.cpp new file mode 100644 index 00000000000..bcf9a87eb01 --- /dev/null +++ b/src/intel/compiler/brw_vec4_tes.cpp @@ -0,0 +1,296 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_vec4_tes.cpp + * + * Tessellaton evaluation shader specific code derived from the vec4_visitor class. + */ + +#include "brw_vec4_tes.h" +#include "brw_cfg.h" +#include "common/gen_debug.h" + +namespace brw { + +vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler, + void *log_data, + const struct brw_tes_prog_key *key, + struct brw_tes_prog_data *prog_data, + const nir_shader *shader, + void *mem_ctx, + int shader_time_index) + : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base, + shader, mem_ctx, false, shader_time_index) +{ +} + + +dst_reg * +vec4_tes_visitor::make_reg_for_system_value(int location) +{ + return NULL; +} + +void +vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_tess_level_outer: + case nir_intrinsic_load_tess_level_inner: + break; + default: + vec4_visitor::nir_setup_system_value_intrinsic(instr); + } +} + + +void +vec4_tes_visitor::setup_payload() +{ + int reg = 0; + + /* The payload always contains important data in r0 and r1, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. + */ + reg += 2; + + reg = setup_uniforms(reg); + + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + bool is_64bit = type_sz(inst->src[i].type) == 8; + + unsigned slot = inst->src[i].nr + inst->src[i].offset / 16; + struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2)); + grf = stride(grf, 0, is_64bit ? 2 : 4, 1); + grf.swizzle = inst->src[i].swizzle; + grf.type = inst->src[i].type; + grf.abs = inst->src[i].abs; + grf.negate = inst->src[i].negate; + + /* For 64-bit attributes we can end up with components XY in the + * second half of a register and components ZW in the first half + * of the next. Fix it up here. + */ + if (is_64bit && grf.subnr > 0) { + /* We can't do swizzles that mix XY and ZW channels in this case. + * Such cases should have been handled by the scalarization pass. + */ + assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^ + (brw_mask_for_swizzle(grf.swizzle) & 0xc)); + if (brw_mask_for_swizzle(grf.swizzle) & 0xc) { + grf.subnr = 0; + grf.nr++; + grf.swizzle -= BRW_SWIZZLE_ZZZZ; + } + } + + inst->src[i] = grf; + } + } + + reg += 8 * prog_data->urb_read_length; + + this->first_non_payload_grf = reg; +} + + +void +vec4_tes_visitor::emit_prolog() +{ + input_read_header = src_reg(this, glsl_type::uvec4_type); + emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header)); + + this->current_annotation = NULL; +} + + +void +vec4_tes_visitor::emit_urb_write_header(int mrf) +{ + /* No need to do anything for DS; an implied write to this MRF will be + * performed by VS_OPCODE_URB_WRITE. + */ + (void) mrf; +} + + +vec4_instruction * +vec4_tes_visitor::emit_urb_write_opcode(bool complete) +{ + /* For DS, the URB writes end the thread. */ + if (complete) { + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_end(); + } + + vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst->urb_write_flags = complete ? + BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS; + + return inst; +} + +void +vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) +{ + const struct brw_tes_prog_data *tes_prog_data = + (const struct brw_tes_prog_data *) prog_data; + + switch (instr->intrinsic) { + case nir_intrinsic_load_tess_coord: + /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + src_reg(brw_vec8_grf(1, 0)))); + break; + case nir_intrinsic_load_tess_level_outer: + if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), + BRW_SWIZZLE_ZWZW))); + } else { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), + BRW_SWIZZLE_WZYX))); + } + break; + case nir_intrinsic_load_tess_level_inner: + if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), + BRW_SWIZZLE_WZYX))); + } else { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + src_reg(ATTR, 1, glsl_type::float_type))); + } + break; + case nir_intrinsic_load_primitive_id: + emit(TES_OPCODE_GET_PRIMITIVE_ID, + get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); + break; + + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: { + src_reg indirect_offset = get_indirect_offset(instr); + unsigned imm_offset = instr->const_index[0]; + src_reg header = input_read_header; + bool is_64bit = nir_dest_bit_size(instr->dest) == 64; + unsigned first_component = nir_intrinsic_component(instr); + if (is_64bit) + first_component /= 2; + + if (indirect_offset.file != BAD_FILE) { + header = src_reg(this, glsl_type::uvec4_type); + emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), + input_read_header, indirect_offset); + } else { + /* Arbitrarily only push up to 24 vec4 slots worth of data, + * which is 12 registers (since each holds 2 vec4 slots). + */ + const unsigned max_push_slots = 24; + if (imm_offset < max_push_slots) { + const glsl_type *src_glsl_type = + is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type; + src_reg src = src_reg(ATTR, imm_offset, src_glsl_type); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); + + const brw_reg_type dst_reg_type = + is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D; + emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src)); + + prog_data->urb_read_length = + MAX2(prog_data->urb_read_length, + DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2)); + break; + } + } + + if (!is_64bit) { + dst_reg temp(this, glsl_type::ivec4_type); + vec4_instruction *read = + emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); + read->offset = imm_offset; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + + src_reg src = src_reg(temp); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); + + /* Copy to target. We might end up with some funky writemasks landing + * in here, but we really don't want them in the above pseudo-ops. + */ + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dst, src)); + } else { + /* For 64-bit we need to load twice as many 32-bit components, and for + * dvec3/4 we need to emit 2 URB Read messages + */ + dst_reg temp(this, glsl_type::dvec4_type); + dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D); + + vec4_instruction *read = + emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header)); + read->offset = imm_offset; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + + if (instr->num_components > 2) { + read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE), + src_reg(header)); + read->offset = imm_offset + 1; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + } + + src_reg temp_as_src = src_reg(temp); + temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component); + + dst_reg shuffled(this, glsl_type::dvec4_type); + shuffle_64bit_data(shuffled, temp_as_src, false); + + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dst, src_reg(shuffled))); + } + break; + } + default: + vec4_visitor::nir_emit_intrinsic(instr); + } +} + + +void +vec4_tes_visitor::emit_thread_end() +{ + /* For DS, we always end the thread by emitting a single vertex. + * emit_urb_write_opcode() will take care of setting the eot flag on the + * SEND instruction. + */ + emit_vertex(); +} + +} /* namespace brw */ |