i965: Move the back-end compiler to src/intel/compiler

Mostly a dummy git mv with a couple of noticable parts: - With the earlier header cleanups, nothing in src/intel depends files from src/mesa/drivers/dri/i965/ - Both Autoconf and Android builds are addressed. Thanks to Mauro and Tapani for the fixups in the latter - brw_util.[ch] is not really compiler specific, so it's moved to i965. v2: - move brw_eu_defines.h instead of brw_defines.h - remove no-longer applicable includes - add missing vulkan/ prefix in the Android build (thanks Tapani) v3: - don't list brw_defines.h in src/intel/Makefile.sources (Jason) - rebase on top of the oa patches [Emil Velikov: commit message, various small fixes througout] Signed-off-by: Emil Velikov <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
author: Jason Ekstrand <[email protected]> 2017-02-28 09:10:43 -0800
committer: Emil Velikov <[email protected]> 2017-03-13 11:16:34 +0000
commit: 700bebb958e93f4d472c383de62ced9db8e64bec (patch)
tree: 0075c098c56c338f38ba0db80b9dba3e7e268a17 /src/intel/compiler/brw_vec4_tes.cpp
parent: d0d4a5f43b4dd79bd7bfff7c7deaade10bfebf7c (diff)
1 files changed, 296 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_vec4_tes.cpp b/src/intel/compiler/brw_vec4_tes.cpp
new file mode 100644
index 00000000000..bcf9a87eb01
--- /dev/null
+++ b/src/intel/compiler/brw_vec4_tes.cpp
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_tes.cpp
+ *
+ * Tessellaton evaluation shader specific code derived from the vec4_visitor class.
+ */
+
+#include "brw_vec4_tes.h"
+#include "brw_cfg.h"
+#include "common/gen_debug.h"
+
+namespace brw {
+
+vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler,
+                                  void *log_data,
+                                  const struct brw_tes_prog_key *key,
+                                  struct brw_tes_prog_data *prog_data,
+                                  const nir_shader *shader,
+                                  void *mem_ctx,
+                                  int shader_time_index)
+   : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
+                  shader, mem_ctx, false, shader_time_index)
+{
+}
+
+
+dst_reg *
+vec4_tes_visitor::make_reg_for_system_value(int location)
+{
+   return NULL;
+}
+
+void
+vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_tess_level_outer:
+   case nir_intrinsic_load_tess_level_inner:
+      break;
+   default:
+      vec4_visitor::nir_setup_system_value_intrinsic(instr);
+   }
+}
+
+
+void
+vec4_tes_visitor::setup_payload()
+{
+   int reg = 0;
+
+   /* The payload always contains important data in r0 and r1, which contains
+    * the URB handles that are passed on to the URB write at the end
+    * of the thread.
+    */
+   reg += 2;
+
+   reg = setup_uniforms(reg);
+
+   foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file != ATTR)
+            continue;
+
+         bool is_64bit = type_sz(inst->src[i].type) == 8;
+
+         unsigned slot = inst->src[i].nr + inst->src[i].offset / 16;
+         struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2));
+         grf = stride(grf, 0, is_64bit ? 2 : 4, 1);
+         grf.swizzle = inst->src[i].swizzle;
+         grf.type = inst->src[i].type;
+         grf.abs = inst->src[i].abs;
+         grf.negate = inst->src[i].negate;
+
+         /* For 64-bit attributes we can end up with components XY in the
+          * second half of a register and components ZW in the first half
+          * of the next. Fix it up here.
+          */
+         if (is_64bit && grf.subnr > 0) {
+            /* We can't do swizzles that mix XY and ZW channels in this case.
+             * Such cases should have been handled by the scalarization pass.
+             */
+            assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^
+                   (brw_mask_for_swizzle(grf.swizzle) & 0xc));
+            if (brw_mask_for_swizzle(grf.swizzle) & 0xc) {
+               grf.subnr = 0;
+               grf.nr++;
+               grf.swizzle -= BRW_SWIZZLE_ZZZZ;
+            }
+         }
+
+         inst->src[i] = grf;
+      }
+   }
+
+   reg += 8 * prog_data->urb_read_length;
+
+   this->first_non_payload_grf = reg;
+}
+
+
+void
+vec4_tes_visitor::emit_prolog()
+{
+   input_read_header = src_reg(this, glsl_type::uvec4_type);
+   emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header));
+
+   this->current_annotation = NULL;
+}
+
+
+void
+vec4_tes_visitor::emit_urb_write_header(int mrf)
+{
+   /* No need to do anything for DS; an implied write to this MRF will be
+    * performed by VS_OPCODE_URB_WRITE.
+    */
+   (void) mrf;
+}
+
+
+vec4_instruction *
+vec4_tes_visitor::emit_urb_write_opcode(bool complete)
+{
+   /* For DS, the URB writes end the thread. */
+   if (complete) {
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+   }
+
+   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+   inst->urb_write_flags = complete ?
+      BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS;
+
+   return inst;
+}
+
+void
+vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+   const struct brw_tes_prog_data *tes_prog_data =
+      (const struct brw_tes_prog_data *) prog_data;
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_tess_coord:
+      /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */
+      emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+               src_reg(brw_vec8_grf(1, 0))));
+      break;
+   case nir_intrinsic_load_tess_level_outer:
+      if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
+                          BRW_SWIZZLE_ZWZW)));
+      } else {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  swizzle(src_reg(ATTR, 1, glsl_type::vec4_type),
+                          BRW_SWIZZLE_WZYX)));
+      }
+      break;
+   case nir_intrinsic_load_tess_level_inner:
+      if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  swizzle(src_reg(ATTR, 0, glsl_type::vec4_type),
+                          BRW_SWIZZLE_WZYX)));
+      } else {
+         emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F),
+                  src_reg(ATTR, 1, glsl_type::float_type)));
+      }
+      break;
+   case nir_intrinsic_load_primitive_id:
+      emit(TES_OPCODE_GET_PRIMITIVE_ID,
+           get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
+      break;
+
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_per_vertex_input: {
+      src_reg indirect_offset = get_indirect_offset(instr);
+      unsigned imm_offset = instr->const_index[0];
+      src_reg header = input_read_header;
+      bool is_64bit = nir_dest_bit_size(instr->dest) == 64;
+      unsigned first_component = nir_intrinsic_component(instr);
+      if (is_64bit)
+         first_component /= 2;
+
+      if (indirect_offset.file != BAD_FILE) {
+         header = src_reg(this, glsl_type::uvec4_type);
+         emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header),
+              input_read_header, indirect_offset);
+      } else {
+         /* Arbitrarily only push up to 24 vec4 slots worth of data,
+          * which is 12 registers (since each holds 2 vec4 slots).
+          */
+         const unsigned max_push_slots = 24;
+         if (imm_offset < max_push_slots) {
+            const glsl_type *src_glsl_type =
+               is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type;
+            src_reg src = src_reg(ATTR, imm_offset, src_glsl_type);
+            src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
+
+            const brw_reg_type dst_reg_type =
+               is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D;
+            emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src));
+
+            prog_data->urb_read_length =
+               MAX2(prog_data->urb_read_length,
+                    DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2));
+            break;
+         }
+      }
+
+      if (!is_64bit) {
+         dst_reg temp(this, glsl_type::ivec4_type);
+         vec4_instruction *read =
+            emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
+         read->offset = imm_offset;
+         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+
+         src_reg src = src_reg(temp);
+         src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
+
+         /* Copy to target.  We might end up with some funky writemasks landing
+          * in here, but we really don't want them in the above pseudo-ops.
+          */
+         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+         dst.writemask = brw_writemask_for_size(instr->num_components);
+         emit(MOV(dst, src));
+      } else {
+         /* For 64-bit we need to load twice as many 32-bit components, and for
+          * dvec3/4 we need to emit 2 URB Read messages
+          */
+         dst_reg temp(this, glsl_type::dvec4_type);
+         dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D);
+
+         vec4_instruction *read =
+            emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header));
+         read->offset = imm_offset;
+         read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+
+         if (instr->num_components > 2) {
+            read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE),
+                        src_reg(header));
+            read->offset = imm_offset + 1;
+            read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
+         }
+
+         src_reg temp_as_src = src_reg(temp);
+         temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component);
+
+         dst_reg shuffled(this, glsl_type::dvec4_type);
+         shuffle_64bit_data(shuffled, temp_as_src, false);
+
+         dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
+         dst.writemask = brw_writemask_for_size(instr->num_components);
+         emit(MOV(dst, src_reg(shuffled)));
+      }
+      break;
+   }
+   default:
+      vec4_visitor::nir_emit_intrinsic(instr);
+   }
+}
+
+
+void
+vec4_tes_visitor::emit_thread_end()
+{
+   /* For DS, we always end the thread by emitting a single vertex.
+    * emit_urb_write_opcode() will take care of setting the eot flag on the
+    * SEND instruction.
+    */
+   emit_vertex();
+}
+
+} /* namespace brw */
author	Jason Ekstrand <[email protected]>	2017-02-28 09:10:43 -0800
committer	Emil Velikov <[email protected]>	2017-03-13 11:16:34 +0000
commit	700bebb958e93f4d472c383de62ced9db8e64bec (patch)
tree	0075c098c56c338f38ba0db80b9dba3e7e268a17 /src/intel/compiler/brw_vec4_tes.cpp
parent	d0d4a5f43b4dd79bd7bfff7c7deaade10bfebf7c (diff)