summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources2
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h26
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_link.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_reg.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp17
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_tcs.c262
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_cse.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp247
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_nir.cpp23
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp496
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_tcs.h84
19 files changed, 1195 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 7354aafbd39..0b706de69a0 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -75,6 +75,7 @@ i965_compiler_FILES = \
brw_vec4_reg_allocate.cpp \
brw_vec4_surface_builder.cpp \
brw_vec4_surface_builder.h \
+ brw_vec4_tcs.cpp \
brw_vec4_visitor.cpp \
brw_vec4_vs_visitor.cpp \
brw_vue_map.c \
@@ -150,6 +151,7 @@ i965_FILES = \
brw_state.h \
brw_state_upload.c \
brw_structs.h \
+ brw_tcs.c \
brw_tcs_surface_state.c \
brw_tes.c \
brw_tes_surface_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 64d831d4e91..e6bae8e902f 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -191,6 +191,16 @@ struct brw_vs_prog_key {
struct brw_sampler_prog_key_data tex;
};
+/** The program key for Tessellation Control Shaders. */
+struct brw_tcs_prog_key
+{
+ unsigned program_string_id;
+
+ GLenum tes_primitive_mode;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
/** The program key for Tessellation Evaluation Shaders. */
struct brw_tes_prog_key
{
@@ -677,6 +687,22 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
char **error_str);
/**
+ * Compile a tessellation control shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_tcs(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const struct brw_tcs_prog_key *key,
+ struct brw_tcs_prog_data *prog_data,
+ const struct nir_shader *nir,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
* Compile a tessellation evaluation shader.
*
* Returns the final assembly and the program's size.
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 5e840d18920..1d989f351bf 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1704,6 +1704,12 @@ brw_vertex_program_const(const struct gl_vertex_program *p)
return (const struct brw_vertex_program *) p;
}
+static inline struct brw_tess_ctrl_program *
+brw_tess_ctrl_program(struct gl_tess_ctrl_program *p)
+{
+ return (struct brw_tess_ctrl_program *) p;
+}
+
static inline struct brw_tess_eval_program *
brw_tess_eval_program(struct gl_tess_eval_program *p)
{
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 4a184cf72f3..cc19c06f162 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1305,6 +1305,14 @@ enum opcode {
* UD immediate).
*/
SHADER_OPCODE_MOV_INDIRECT,
+
+ VEC4_OPCODE_URB_READ,
+ TCS_OPCODE_GET_INSTANCE_ID,
+ TCS_OPCODE_URB_WRITE,
+ TCS_OPCODE_SET_INPUT_URB_OFFSETS,
+ TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
+ TCS_OPCODE_GET_PRIMITIVE_ID,
+ TCS_OPCODE_CREATE_BARRIER_HEADER,
};
enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index f5a7d204b3d..7cdc830f6b8 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -42,6 +42,7 @@ brw_shader_precompile(struct gl_context *ctx,
struct gl_shader_program *sh_prog)
{
struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ struct gl_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
struct gl_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
@@ -56,6 +57,9 @@ brw_shader_precompile(struct gl_context *ctx,
if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
return false;
+ if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program))
+ return false;
+
if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index 1cdab97a82a..3d9e1b983c8 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -56,6 +56,7 @@ void
brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
struct gl_shader *shader, struct gl_program *prog);
+void brw_upload_tcs_prog(struct brw_context *brw);
void brw_upload_tes_prog(struct brw_context *brw);
#ifdef __cplusplus
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index fa912c96c36..9f2ff9ae5ad 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -84,6 +84,7 @@ struct brw_device_info;
#define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3)
#define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3)
#define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3)
+#define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0)
static inline bool
brw_is_single_value_swizzle(unsigned swiz)
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d9545685b1b..9b64ae475bb 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -85,6 +85,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->scalar_stage[MESA_SHADER_VERTEX] =
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
+ compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true;
compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
@@ -137,6 +138,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
}
+ compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false;
compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
@@ -549,6 +551,21 @@ brw_instruction_name(enum opcode op)
return "mulh";
case SHADER_OPCODE_MOV_INDIRECT:
return "mov_indirect";
+
+ case VEC4_OPCODE_URB_READ:
+ return "urb_read";
+ case TCS_OPCODE_GET_INSTANCE_ID:
+ return "tcs_get_instance_id";
+ case TCS_OPCODE_URB_WRITE:
+ return "tcs_urb_write";
+ case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
+ return "tcs_set_input_urb_offsets";
+ case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+ return "tcs_set_output_urb_offsets";
+ case TCS_OPCODE_GET_PRIMITIVE_ID:
+ return "tcs_get_primitive_id";
+ case TCS_OPCODE_CREATE_BARRIER_HEADER:
+ return "tcs_create_barrier_header";
}
unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 2e73f123082..593361348fd 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -273,6 +273,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
bool brw_vs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
+bool brw_tcs_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog);
bool brw_tes_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index c657b254f04..56962d59c49 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -678,6 +678,7 @@ brw_upload_programs(struct brw_context *brw,
{
if (pipeline == BRW_RENDER_PIPELINE) {
brw_upload_vs_prog(brw);
+ brw_upload_tcs_prog(brw);
brw_upload_tes_prog(brw);
if (brw->gen < 6)
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
new file mode 100644
index 00000000000..b33a16d1710
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_tcs.c
+ *
+ * Tessellation control shader state upload code.
+ */
+
+#include "brw_context.h"
+#include "brw_nir.h"
+#include "brw_program.h"
+#include "brw_shader.h"
+#include "brw_state.h"
+#include "program/prog_parameter.h"
+
+static void
+brw_tcs_debug_recompile(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ const struct brw_tcs_prog_key *key)
+{
+ struct brw_cache_item *c = NULL;
+ const struct brw_tcs_prog_key *old_key = NULL;
+ bool found = false;
+
+ perf_debug("Recompiling tessellation control shader for program %d\n",
+ shader_prog->Name);
+
+ for (unsigned int i = 0; i < brw->cache.size; i++) {
+ for (c = brw->cache.items[i]; c; c = c->next) {
+ if (c->cache_id == BRW_CACHE_TCS_PROG) {
+ old_key = c->key;
+
+ if (old_key->program_string_id == key->program_string_id)
+ break;
+ }
+ }
+ if (c)
+ break;
+ }
+
+ if (!c) {
+ perf_debug(" Didn't find previous compile in the shader cache for "
+ "debug\n");
+ return;
+ }
+
+ found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
+ key->tes_primitive_mode);
+ found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
+
+ if (!found) {
+ perf_debug(" Something else\n");
+ }
+}
+
+static bool
+brw_codegen_tcs_prog(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct brw_tess_ctrl_program *tcp,
+ struct brw_tcs_prog_key *key)
+{
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
+ struct brw_stage_state *stage_state = &brw->tcs.base;
+ nir_shader *nir = tcp->program.Base.nir;
+ struct brw_tcs_prog_data prog_data;
+ bool start_busy = false;
+ double start_time = 0;
+
+ memset(&prog_data, 0, sizeof(prog_data));
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ *
+ * Note: param_count needs to be num_uniform_components * 4, since we add
+ * padding around uniform values below vec4 size, so the worst case is that
+ * every uniform is a float which gets padded to the size of a vec4.
+ */
+ struct gl_shader *tcs = shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+ int param_count = nir->num_uniforms;
+ if (!compiler->scalar_stage[MESA_SHADER_TESS_CTRL])
+ param_count *= 4;
+
+ prog_data.base.base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, tcs->NumImages);
+ prog_data.base.base.nr_params = param_count;
+ prog_data.base.base.nr_image_params = tcs->NumImages;
+
+ brw_nir_setup_glsl_uniforms(nir, shader_prog, &tcp->program.Base,
+ &prog_data.base.base, false);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_TCS))
+ brw_dump_ir("tessellation control", shader_prog, tcs, NULL);
+
+ int st_index = -1;
+ if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
+ st_index = brw_get_shader_time_index(brw, shader_prog, NULL, ST_TCS);
+
+ if (unlikely(brw->perf_debug)) {
+ start_busy = brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo);
+ start_time = get_time();
+ }
+
+ void *mem_ctx = ralloc_context(NULL);
+ unsigned program_size;
+ char *error_str;
+ const unsigned *program =
+ brw_compile_tcs(compiler, brw, mem_ctx, key, &prog_data, nir, st_index,
+ &program_size, &error_str);
+ if (program == NULL) {
+ if (shader_prog) {
+ shader_prog->LinkStatus = false;
+ ralloc_strcat(&shader_prog->InfoLog, error_str);
+ }
+
+ _mesa_problem(NULL, "Failed to compile tessellation control shader: "
+ "%s\n", error_str);
+
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ if (unlikely(brw->perf_debug)) {
+ struct brw_shader *btcs = (struct brw_shader *) tcs;
+ if (btcs->compiled_once) {
+ brw_tcs_debug_recompile(brw, shader_prog, key);
+ }
+ if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("TCS compile took %.03f ms and stalled the GPU\n",
+ (get_time() - start_time) * 1000);
+ }
+ btcs->compiled_once = true;
+ }
+
+ /* Scratch space is used for register spilling */
+ if (prog_data.base.base.total_scratch) {
+ brw_get_scratch_bo(brw, &stage_state->scratch_bo,
+ prog_data.base.base.total_scratch *
+ brw->max_hs_threads);
+ }
+
+ brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
+ key, sizeof(*key),
+ program, program_size,
+ &prog_data, sizeof(prog_data),
+ &stage_state->prog_offset, &brw->tcs.prog_data);
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+
+void
+brw_upload_tcs_prog(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
+ struct brw_stage_state *stage_state = &brw->tcs.base;
+ struct brw_tcs_prog_key key;
+ /* BRW_NEW_TESS_CTRL_PROGRAM */
+ struct brw_tess_ctrl_program *tcp =
+ (struct brw_tess_ctrl_program *) brw->tess_ctrl_program;
+
+ if (!brw_state_dirty(brw,
+ _NEW_TEXTURE,
+ BRW_NEW_TESS_CTRL_PROGRAM |
+ BRW_NEW_TESS_EVAL_PROGRAM))
+ return;
+
+ if (tcp == NULL) {
+ /* Other state atoms had better not try to access prog_data, since
+ * there's no HS program.
+ */
+ brw->tcs.prog_data = NULL;
+ brw->tcs.base.prog_data = NULL;
+ return;
+ }
+
+ struct gl_program *prog = &tcp->program.Base;
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = tcp->id;
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
+ &key.tex);
+
+ /* BRW_NEW_TESS_EVAL_PROGRAM */
+ /* We need to specialize our code generation for tessellation levels
+ * based on the domain the DS is expecting to tessellate.
+ */
+ struct brw_tess_eval_program *tep =
+ (struct brw_tess_eval_program *) brw->tess_eval_program;
+ assert(tep);
+ key.tes_primitive_mode = tep->program.PrimitiveMode;
+
+ if (!brw_search_cache(&brw->cache, BRW_CACHE_TCS_PROG,
+ &key, sizeof(key),
+ &stage_state->prog_offset, &brw->tcs.prog_data)) {
+ bool success = brw_codegen_tcs_prog(brw, current[MESA_SHADER_TESS_CTRL],
+ tcp, &key);
+ assert(success);
+ (void)success;
+ }
+ brw->tcs.base.prog_data = &brw->tcs.prog_data->base.base;
+}
+
+
+bool
+brw_tcs_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_tcs_prog_key key;
+ uint32_t old_prog_offset = brw->tcs.base.prog_offset;
+ struct brw_tcs_prog_data *old_prog_data = brw->tcs.prog_data;
+ bool success;
+
+ struct gl_tess_ctrl_program *tcp = (struct gl_tess_ctrl_program *)prog;
+ struct brw_tess_ctrl_program *btcp = brw_tess_ctrl_program(tcp);
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = btcp->id;
+ brw_setup_tex_for_precompile(brw, &key.tex, prog);
+
+ key.tes_primitive_mode = GL_TRIANGLES;
+
+ success = brw_codegen_tcs_prog(brw, shader_prog, btcp, &key);
+
+ brw->tcs.base.prog_offset = old_prog_offset;
+ brw->tcs.prog_data = old_prog_data;
+
+ return success;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index a697bdf84a0..0cded0c87c6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -155,6 +155,9 @@ vec4_instruction::is_send_from_grf()
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ case VEC4_OPCODE_URB_READ:
+ case TCS_OPCODE_URB_WRITE:
+ case SHADER_OPCODE_BARRIER:
return true;
default:
return false;
@@ -184,7 +187,9 @@ bool
vec4_instruction::has_source_and_destination_hazard() const
{
switch (opcode) {
- /* Most opcodes in the vec4 world use MRFs. */
+ case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
+ case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+ return true;
default:
return false;
}
@@ -204,6 +209,7 @@ vec4_instruction::regs_read(unsigned arg) const
case SHADER_OPCODE_TYPED_ATOMIC:
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ case TCS_OPCODE_URB_WRITE:
return arg == 0 ? mlen : 1;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
@@ -281,6 +287,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
return 0;
case GS_OPCODE_FF_SYNC:
return 1;
+ case TCS_OPCODE_URB_WRITE:
+ return 0;
case SHADER_OPCODE_SHADER_TIME_ADD:
return 0;
case SHADER_OPCODE_TEX:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 61e57380c42..ddfd87d31c9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -342,6 +342,7 @@ public:
unsigned num_components = 4);
src_reg get_nir_src(nir_src src,
unsigned num_components = 4);
+ src_reg get_indirect_offset(nir_intrinsic_instr *instr);
virtual dst_reg *make_reg_for_system_value(int location,
const glsl_type *type) = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 85cbf24092e..0c1f0c31b0d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -75,6 +75,8 @@ is_expression(const vec4_instruction *const inst)
case VEC4_OPCODE_UNPACK_UNIFORM:
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
case SHADER_OPCODE_BROADCAST:
+ case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
+ case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
return true;
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 2d0722aa1eb..c31e72def67 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -45,6 +45,9 @@ can_do_writemask(const struct brw_device_info *devinfo,
case VS_OPCODE_PULL_CONSTANT_LOAD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
+ case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
+ case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+ case VEC4_OPCODE_URB_READ:
return false;
default:
/* The MATH instruction on Gen6 only executes in align1 mode, which does
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index c3426ddd1c8..6325569956f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -714,6 +714,220 @@ generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst)
}
static void
+generate_tcs_get_instance_id(struct brw_codegen *p, struct brw_reg dst)
+{
+ /* "Instance Count" comes as part of the payload in r0.2 bits 23:17.
+ *
+ * Since we operate in SIMD4x2 mode, we need run half as many threads
+ * as necessary. So we assign (2i + 1, 2i) as the thread counts. We
+ * shift right by one less to accomplish the multiplication by two.
+ */
+ dst = retype(dst, BRW_REGISTER_TYPE_UD);
+ struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+ const int mask = INTEL_MASK(23, 17);
+ const int shift = 17;
+
+ brw_AND(p, get_element_ud(dst, 0), get_element_ud(r0, 2), brw_imm_ud(mask));
+ brw_SHR(p, get_element_ud(dst, 0), get_element_ud(dst, 0),
+ brw_imm_ud(shift - 1));
+ brw_ADD(p, get_element_ud(dst, 4), get_element_ud(dst, 0), brw_imm_ud(1));
+
+ brw_pop_insn_state(p);
+}
+
+static void
+generate_tcs_urb_write(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg urb_header)
+{
+ const struct brw_device_info *devinfo = p->devinfo;
+
+ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, brw_null_reg());
+ brw_set_src0(p, send, urb_header);
+
+ brw_set_message_descriptor(p, send, BRW_SFID_URB,
+ inst->mlen /* mlen */, 0 /* rlen */,
+ true /* header */, false /* eot */);
+ brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_WRITE_OWORD);
+ brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
+ brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
+ brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
+
+ /* what happens to swizzles? */
+}
+
+
+static void
+generate_tcs_input_urb_offsets(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg vertex,
+ struct brw_reg offset)
+{
+ /* Generates an URB read/write message header for HS/DS operation.
+ * Inputs are a vertex index, and a byte offset from the beginning of
+ * the vertex. */
+
+ /* If `vertex` is not an immediate, we clobber a0.0 */
+
+ assert(vertex.file == BRW_IMMEDIATE_VALUE || vertex.file == BRW_GENERAL_REGISTER_FILE);
+ assert(vertex.type == BRW_REGISTER_TYPE_UD || vertex.type == BRW_REGISTER_TYPE_D);
+
+ assert(dst.file == BRW_GENERAL_REGISTER_FILE);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, dst, brw_imm_ud(0));
+
+ /* m0.5 bits 8-15 are channel enables */
+ brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00));
+
+ /* m0.0-0.1: URB handles */
+ if (vertex.file == BRW_IMMEDIATE_VALUE) {
+ uint32_t vertex_index = vertex.ud;
+ struct brw_reg index_reg = brw_vec1_grf(
+ 1 + (vertex_index >> 3), vertex_index & 7);
+
+ brw_MOV(p, vec2(get_element_ud(dst, 0)),
+ retype(index_reg, BRW_REGISTER_TYPE_UD));
+ } else {
+ /* Use indirect addressing. ICP Handles are DWords (single channels
+ * of a register) and start at g1.0.
+ *
+ * In order to start our region at g1.0, we add 8 to the vertex index,
+ * effectively skipping over the 8 channels in g0.0. This gives us a
+ * DWord offset to the ICP Handle.
+ *
+ * Indirect addressing works in terms of bytes, so we then multiply
+ * the DWord offset by 4 (by shifting left by 2).
+ */
+ struct brw_reg addr = brw_address_reg(0);
+
+ /* bottom half: m0.0 = g[1.0 + vertex.0]UD */
+ brw_ADD(p, addr, get_element_ud(vertex, 0), brw_imm_uw(0x8));
+ brw_SHL(p, addr, addr, brw_imm_ud(2));
+ brw_MOV(p, get_element_ud(dst, 0), deref_1ud(brw_indirect(0, 0), 0));
+
+ /* top half: m0.1 = g[1.0 + vertex.4]UD */
+ brw_ADD(p, addr, get_element_ud(vertex, 4), brw_imm_uw(0x8));
+ brw_SHL(p, addr, addr, brw_imm_ud(2));
+ brw_MOV(p, get_element_ud(dst, 1), deref_1ud(brw_indirect(0, 0), 0));
+ }
+
+ /* m0.3-0.4: 128bit-granular offsets into the URB from the handles */
+ if (offset.file != ARF)
+ brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
+
+ brw_pop_insn_state(p);
+}
+
+
+static void
+generate_tcs_output_urb_offsets(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg write_mask,
+ struct brw_reg offset)
+{
+ /* Generates an URB read/write message header for HS/DS operation, for the patch URB entry. */
+ assert(dst.file == BRW_GENERAL_REGISTER_FILE || dst.file == BRW_MESSAGE_REGISTER_FILE);
+
+ assert(write_mask.file == BRW_IMMEDIATE_VALUE);
+ assert(write_mask.type == BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, dst, brw_imm_ud(0));
+
+ unsigned mask = write_mask.ud;
+
+ /* m0.5 bits 15:12 and 11:8 are channel enables */
+ brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud((mask << 8) | (mask << 12)));
+
+ /* HS patch URB handle is delivered in r0.0 */
+ struct brw_reg urb_handle = brw_vec1_grf(0, 0);
+
+ /* m0.0-0.1: URB handles */
+ brw_MOV(p, vec2(get_element_ud(dst, 0)),
+ retype(urb_handle, BRW_REGISTER_TYPE_UD));
+
+ /* m0.3-0.4: 128bit-granular offsets into the URB from the handles */
+ if (offset.file != ARF)
+ brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
+
+ brw_pop_insn_state(p);
+}
+
+static void
+generate_vec4_urb_read(struct brw_codegen *p,
+ vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg header)
+{
+ const struct brw_device_info *devinfo = p->devinfo;
+
+ assert(header.file == BRW_GENERAL_REGISTER_FILE);
+ assert(header.type == BRW_REGISTER_TYPE_UD);
+
+ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, header);
+
+ brw_set_message_descriptor(p, send, BRW_SFID_URB,
+ 1 /* mlen */, 1 /* rlen */,
+ true /* header */, false /* eot */);
+ brw_inst_set_urb_opcode(devinfo, send, BRW_URB_OPCODE_READ_OWORD);
+ brw_inst_set_urb_swizzle_control(devinfo, send, BRW_URB_SWIZZLE_INTERLEAVE);
+ brw_inst_set_urb_per_slot_offset(devinfo, send, 1);
+
+ brw_inst_set_urb_global_offset(devinfo, send, inst->offset);
+}
+
+static void
+generate_tcs_get_primitive_id(struct brw_codegen *p, struct brw_reg dst)
+{
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, dst, retype(brw_vec1_grf(0, 1), BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+}
+
+static void
+generate_tcs_create_barrier_header(struct brw_codegen *p,
+ struct brw_vue_prog_data *prog_data,
+ struct brw_reg dst)
+{
+ struct brw_reg m0_2 = get_element_ud(dst, 2);
+ unsigned instances = ((struct brw_tcs_prog_data *) prog_data)->instances;
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+
+ /* Zero the message header */
+ brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
+
+ /* Copy "Barrier ID" from DW0 bits 16:13 */
+ brw_AND(p, m0_2,
+ retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0x1e000));
+
+ /* Shift it into place */
+ brw_SHL(p, m0_2, get_element_ud(dst, 2), brw_imm_ud(11));
+
+ /* Set the Barrier Count and the enable bit */
+ brw_OR(p, m0_2, m0_2, brw_imm_ud(instances << 9 | (1 << 15)));
+
+ brw_pop_insn_state(p);
+}
+
+static void
generate_oword_dual_block_offsets(struct brw_codegen *p,
struct brw_reg m1,
struct brw_reg index)
@@ -1538,6 +1752,39 @@ generate_code(struct brw_codegen *p,
break;
}
+ case TCS_OPCODE_URB_WRITE:
+ generate_tcs_urb_write(p, inst, src[0]);
+ break;
+
+ case VEC4_OPCODE_URB_READ:
+ generate_vec4_urb_read(p, inst, dst, src[0]);
+ break;
+
+ case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
+ generate_tcs_input_urb_offsets(p, dst, src[0], src[1]);
+ break;
+
+ case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS:
+ generate_tcs_output_urb_offsets(p, dst, src[0], src[1]);
+ break;
+
+ case TCS_OPCODE_GET_INSTANCE_ID:
+ generate_tcs_get_instance_id(p, dst);
+ break;
+
+ case TCS_OPCODE_GET_PRIMITIVE_ID:
+ generate_tcs_get_primitive_id(p, dst);
+ break;
+
+ case TCS_OPCODE_CREATE_BARRIER_HEADER:
+ generate_tcs_create_barrier_header(p, prog_data, dst);
+ break;
+
+ case SHADER_OPCODE_BARRIER:
+ brw_barrier(p, src[0]);
+ brw_WAIT(p);
+ break;
+
default:
unreachable("Unsupported opcode");
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 0ded465ebff..25996281131 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -327,6 +327,24 @@ vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
return get_nir_src(src, nir_type_int, num_components);
}
+src_reg
+vec4_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
+{
+ nir_src *offset_src = nir_get_io_offset_src(instr);
+ nir_const_value *const_value = nir_src_as_const_value(*offset_src);
+
+ if (const_value) {
+ /* The only constant offset we should find is 0. brw_nir.c's
+ * add_const_offset_to_base() will fold other constant offsets
+ * into instr->const_index[0].
+ */
+ assert(const_value->u[0] == 0);
+ return src_reg();
+ }
+
+ return get_nir_src(*offset_src, BRW_REGISTER_TYPE_UD, 1);
+}
+
void
vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
{
@@ -650,7 +668,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_base_vertex:
- case nir_intrinsic_load_instance_id: {
+ case nir_intrinsic_load_instance_id:
+ case nir_intrinsic_load_invocation_id:
+ case nir_intrinsic_load_tess_level_inner:
+ case nir_intrinsic_load_tess_level_outer: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
src_reg val = src_reg(nir_system_values[sv]);
assert(val.file != BAD_FILE);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
new file mode 100644
index 00000000000..22224d1186b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -0,0 +1,496 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_tcs.cpp
+ *
+ * Tessellaton control shader specific code derived from the vec4_visitor class.
+ */
+
+#include "brw_nir.h"
+#include "brw_vec4_tcs.h"
+
+namespace brw {
+
+vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler,
+ void *log_data,
+ const struct brw_tcs_prog_key *key,
+ struct brw_tcs_prog_data *prog_data,
+ const nir_shader *nir,
+ void *mem_ctx,
+ int shader_time_index)
+ : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base,
+ nir, mem_ctx, false, shader_time_index),
+ key(key)
+{
+}
+
+
+void
+vec4_tcs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
+{
+}
+
+dst_reg *
+vec4_tcs_visitor::make_reg_for_system_value(int location, const glsl_type *type)
+{
+ return NULL;
+}
+
+
+void
+vec4_tcs_visitor::setup_payload()
+{
+ int reg = 0;
+
+ /* The payload always contains important data in r0, which contains
+ * the URB handles that are passed on to the URB write at the end
+ * of the thread.
+ */
+ reg++;
+
+ /* r1.0 - r4.7 may contain the input control point URB handles,
+ * which we use to pull vertex data.
+ */
+ reg += 4;
+
+ /* Push constants may start at r5.0 */
+ reg = setup_uniforms(reg);
+
+ this->first_non_payload_grf = reg;
+}
+
+
+void
+vec4_tcs_visitor::emit_prolog()
+{
+ invocation_id = src_reg(this, glsl_type::uint_type);
+ emit(TCS_OPCODE_GET_INSTANCE_ID, dst_reg(invocation_id));
+
+ /* HS threads are dispatched with the dispatch mask set to 0xFF.
+ * If there are an odd number of output vertices, then the final
+ * HS instance dispatched will only have its bottom half doing real
+ * work, and so we need to disable the upper half:
+ */
+ if (nir->info.tcs.vertices_out % 2) {
+ emit(CMP(dst_null_d(), invocation_id,
+ brw_imm_ud(nir->info.tcs.vertices_out), BRW_CONDITIONAL_L));
+
+ /* Matching ENDIF is in emit_thread_end() */
+ emit(IF(BRW_PREDICATE_NORMAL));
+ }
+}
+
+
+void
+vec4_tcs_visitor::emit_thread_end()
+{
+ current_annotation = "thread end";
+
+ if (nir->info.tcs.vertices_out % 2) {
+ emit(BRW_OPCODE_ENDIF);
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
+ emit_shader_time_end();
+
+ vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+ inst->mlen = 1; /* just the header, no data. */
+ inst->urb_write_flags = BRW_URB_WRITE_EOT_COMPLETE;
+}
+
+
+void
+vec4_tcs_visitor::emit_input_urb_read(const dst_reg &dst,
+ const src_reg &vertex_index,
+ unsigned base_offset,
+ const src_reg &indirect_offset)
+{
+ vec4_instruction *inst;
+ dst_reg temp(this, glsl_type::ivec4_type);
+ temp.type = dst.type;
+
+ /* Set up the message header to reference the proper parts of the URB */
+ dst_reg header = dst_reg(this, glsl_type::uvec4_type);
+ inst = emit(TCS_OPCODE_SET_INPUT_URB_OFFSETS, header, vertex_index,
+ indirect_offset);
+ inst->force_writemask_all = true;
+
+ /* Read into a temporary, ignoring writemasking. */
+ inst = emit(VEC4_OPCODE_URB_READ, temp, src_reg(header));
+ inst->offset = base_offset;
+ inst->mlen = 1;
+ inst->base_mrf = -1;
+
+ /* Copy the temporary to the destination to deal with writemasking.
+ *
+ * Also attempt to deal with gl_PointSize being in the .w component.
+ */
+ if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
+ emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WWWW)));
+ } else {
+ emit(MOV(dst, src_reg(temp)));
+ }
+}
+
+void
+vec4_tcs_visitor::emit_output_urb_read(const dst_reg &dst,
+ unsigned base_offset,
+ const src_reg &indirect_offset)
+{
+ vec4_instruction *inst;
+
+ /* Set up the message header to reference the proper parts of the URB */
+ dst_reg header = dst_reg(this, glsl_type::uvec4_type);
+ inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, header,
+ brw_imm_ud(dst.writemask), indirect_offset);
+ inst->force_writemask_all = true;
+
+ /* Read into a temporary, ignoring writemasking. */
+ vec4_instruction *read = emit(VEC4_OPCODE_URB_READ, dst, src_reg(header));
+ read->offset = base_offset;
+ read->mlen = 1;
+ read->base_mrf = -1;
+}
+
+void
+vec4_tcs_visitor::emit_urb_write(const src_reg &value,
+ unsigned writemask,
+ unsigned base_offset,
+ const src_reg &indirect_offset)
+{
+ if (writemask == 0)
+ return;
+
+ src_reg message(this, glsl_type::uvec4_type, 2);
+ vec4_instruction *inst;
+
+ inst = emit(TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, dst_reg(message),
+ brw_imm_ud(writemask), indirect_offset);
+ inst->force_writemask_all = true;
+ inst = emit(MOV(offset(dst_reg(retype(message, value.type)), 1), value));
+ inst->force_writemask_all = true;
+
+ inst = emit(TCS_OPCODE_URB_WRITE, dst_null_f(), message);
+ inst->offset = base_offset;
+ inst->mlen = 2;
+ inst->base_mrf = -1;
+}
+
+static unsigned
+tesslevel_outer_components(GLenum tes_primitive_mode)
+{
+ switch (tes_primitive_mode) {
+ case GL_QUADS:
+ return 4;
+ case GL_TRIANGLES:
+ return 3;
+ case GL_ISOLINES:
+ return 2;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ return 0;
+}
+
+static unsigned
+tesslevel_inner_components(GLenum tes_primitive_mode)
+{
+ switch (tes_primitive_mode) {
+ case GL_QUADS:
+ return 2;
+ case GL_TRIANGLES:
+ return 1;
+ case GL_ISOLINES:
+ return 0;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ return 0;
+}
+
+/**
+ * Given a normal .xyzw writemask, convert it to a writemask for a vector
+ * that's stored backwards, i.e. .wzyx.
+ */
+static unsigned
+writemask_for_backwards_vector(unsigned mask)
+{
+ unsigned new_mask = 0;
+
+ for (int i = 0; i < 4; i++)
+ new_mask |= ((mask >> i) & 1) << (3 - i);
+
+ return new_mask;
+}
+
+void
+vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_invocation_id:
+ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD),
+ invocation_id));
+ break;
+ case nir_intrinsic_load_primitive_id:
+ emit(TCS_OPCODE_GET_PRIMITIVE_ID,
+ get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD));
+ break;
+ case nir_intrinsic_load_patch_vertices_in:
+ unreachable("XXX: gl_PatchVerticesIn not implemented yet.");
+ break;
+ case nir_intrinsic_load_per_vertex_input: {
+ src_reg indirect_offset = get_indirect_offset(instr);
+ unsigned imm_offset = instr->const_index[0];
+
+ nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]);
+ src_reg vertex_index =
+ vertex_const ? src_reg(brw_imm_ud(vertex_const->u[0]))
+ : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
+
+ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+ dst.writemask = brw_writemask_for_size(instr->num_components);
+
+ emit_input_urb_read(dst, vertex_index, imm_offset, indirect_offset);
+ break;
+ }
+ case nir_intrinsic_load_input:
+ unreachable("nir_lower_io should use load_per_vertex_input intrinsics");
+ break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_per_vertex_output: {
+ src_reg indirect_offset = get_indirect_offset(instr);
+ unsigned imm_offset = instr->const_index[0];;
+
+ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+ dst.writemask = brw_writemask_for_size(instr->num_components);
+
+ if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
+ dst.type = BRW_REGISTER_TYPE_F;
+
+ /* This is a read of gl_TessLevelInner[], which lives in the
+ * Patch URB header. The layout depends on the domain.
+ */
+ switch (key->tes_primitive_mode) {
+ case GL_QUADS: {
+ /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
+ dst_reg tmp(this, glsl_type::vec4_type);
+ emit_output_urb_read(tmp, 0, src_reg());
+ emit(MOV(writemask(dst, WRITEMASK_XY),
+ swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
+ break;
+ }
+ case GL_TRIANGLES:
+ /* DWord 4; use offset 1 but normal swizzle/writemask. */
+ emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, src_reg());
+ break;
+ case GL_ISOLINES:
+ /* All channels are undefined. */
+ return;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
+ dst.type = BRW_REGISTER_TYPE_F;
+
+ /* This is a read of gl_TessLevelOuter[], which lives in the
+ * high 4 DWords of the Patch URB header, in reverse order.
+ */
+ switch (key->tes_primitive_mode) {
+ case GL_QUADS:
+ dst.writemask = WRITEMASK_XYZW;
+ break;
+ case GL_TRIANGLES:
+ dst.writemask = WRITEMASK_XYZ;
+ break;
+ case GL_ISOLINES:
+ dst.writemask = WRITEMASK_XY;
+ return;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+
+ dst_reg tmp(this, glsl_type::vec4_type);
+ emit_output_urb_read(tmp, 1, src_reg());
+ emit(MOV(dst, swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
+ } else {
+ emit_output_urb_read(dst, imm_offset, indirect_offset);
+ }
+ break;
+ }
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output: {
+ src_reg value = get_nir_src(instr->src[0]);
+ unsigned mask = instr->const_index[1];
+ unsigned swiz = BRW_SWIZZLE_XYZW;
+
+ src_reg indirect_offset = get_indirect_offset(instr);
+ unsigned imm_offset = instr->const_index[0];
+
+ if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
+ value.type = BRW_REGISTER_TYPE_F;
+
+ mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1;
+
+ /* This is a write to gl_TessLevelInner[], which lives in the
+ * Patch URB header. The layout depends on the domain.
+ */
+ switch (key->tes_primitive_mode) {
+ case GL_QUADS:
+ /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
+ * We use an XXYX swizzle to reverse put .xy in the .wz
+ * channels, and use a .zw writemask.
+ */
+ swiz = BRW_SWIZZLE4(0, 0, 1, 0);
+ mask = writemask_for_backwards_vector(mask);
+ break;
+ case GL_TRIANGLES:
+ /* gl_TessLevelInner[].x lives at DWord 4, so we set the
+ * writemask to X and bump the URB offset by 1.
+ */
+ imm_offset = 1;
+ break;
+ case GL_ISOLINES:
+ /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
+ return;
+ default:
+ unreachable("Bogus tessellation domain");
+ }
+ } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
+ value.type = BRW_REGISTER_TYPE_F;
+
+ mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1;
+
+ /* This is a write to gl_TessLevelOuter[] which lives in the
+ * Patch URB Header at DWords 4-7. However, it's reversed, so
+ * instead of .xyzw we have .wzyx.
+ */
+ swiz = BRW_SWIZZLE_WZYX;
+ mask = writemask_for_backwards_vector(mask);
+ }
+
+ emit_urb_write(swizzle(value, swiz), mask,
+ imm_offset, indirect_offset);
+ break;
+ }
+
+ case nir_intrinsic_barrier: {
+ dst_reg header = dst_reg(this, glsl_type::uvec4_type);
+ emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header);
+ emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header));
+ break;
+ }
+
+ default:
+ vec4_visitor::nir_emit_intrinsic(instr);
+ }
+}
+
+
+extern "C" const unsigned *
+brw_compile_tcs(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const struct brw_tcs_prog_key *key,
+ struct brw_tcs_prog_data *prog_data,
+ const nir_shader *src_shader,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
+{
+ const struct brw_device_info *devinfo = compiler->devinfo;
+ struct brw_vue_prog_data *vue_prog_data = &prog_data->base;
+ const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL];
+
+ nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
+ nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
+ nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
+
+ prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2);
+
+ brw_compute_tess_vue_map(&vue_prog_data->vue_map,
+ nir->info.outputs_written,
+ nir->info.patch_outputs_written);
+
+ /* Compute URB entry size. The maximum allowed URB entry size is 32k.
+ * That divides up as follows:
+ *
+ * 32 bytes for the patch header (tessellation factors)
+ * 480 bytes for per-patch varyings (a varying component is 4 bytes and
+ * gl_MaxTessPatchComponents = 120)
+ * 16384 bytes for per-vertex varyings (a varying component is 4 bytes,
+ * gl_MaxPatchVertices = 32 and
+ * gl_MaxTessControlOutputComponents = 128)
+ *
+ * 15808 bytes left for varying packing overhead
+ */
+ const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots;
+ const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots;
+ unsigned output_size_bytes = 0;
+ /* Note that the patch header is counted in num_per_patch_slots. */
+ output_size_bytes += num_per_patch_slots * 16;
+ output_size_bytes += nir->info.tcs.vertices_out * num_per_vertex_slots * 16;
+
+ assert(output_size_bytes >= 1);
+ if (output_size_bytes > GEN7_MAX_HS_URB_ENTRY_SIZE_BYTES)
+ return false;
+
+ /* URB entry sizes are stored as a multiple of 64 bytes. */
+ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+
+ struct brw_vue_map input_vue_map;
+ brw_compute_vue_map(devinfo, &input_vue_map,
+ nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
+ true);
+
+ /* HS does not use the usual payload pushing from URB to GRFs,
+ * because we don't have enough registers for a full-size payload, and
+ * the hardware is broken on Haswell anyway.
+ */
+ vue_prog_data->urb_read_length = 0;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_TCS)) {
+ fprintf(stderr, "TCS Input ");
+ brw_print_vue_map(stderr, &input_vue_map);
+ fprintf(stderr, "TCS Output ");
+ brw_print_vue_map(stderr, &vue_prog_data->vue_map);
+ }
+
+ vec4_tcs_visitor v(compiler, log_data, key, prog_data,
+ nir, mem_ctx, shader_time_index);
+ if (!v.run()) {
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
+ return NULL;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_TCS))
+ v.dump_instructions();
+
+ return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
+ &prog_data->base, v.cfg,
+ final_assembly_size);
+}
+
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
new file mode 100644
index 00000000000..2bf4885a560
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_vec4_tcs.h
+ *
+ * The vec4-mode tessellation control shader compiler backend.
+ */
+
+#ifndef BRW_VEC4_TCS_H
+#define BRW_VEC4_TCS_H
+
+#include "brw_compiler.h"
+#include "brw_vec4.h"
+
+#ifdef __cplusplus
+namespace brw {
+
+class vec4_tcs_visitor : public vec4_visitor
+{
+public:
+ vec4_tcs_visitor(const struct brw_compiler *compiler,
+ void *log_data,
+ const struct brw_tcs_prog_key *key,
+ struct brw_tcs_prog_data *prog_data,
+ const nir_shader *nir,
+ void *mem_ctx,
+ int shader_time_index);
+
+protected:
+ virtual dst_reg *make_reg_for_system_value(int location,
+ const glsl_type *type);
+ virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
+ virtual void setup_payload();
+ virtual void emit_prolog();
+ virtual void emit_thread_end();
+
+ virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
+
+ void emit_input_urb_read(const dst_reg &dst,
+ const src_reg &vertex_index,
+ unsigned base_offset,
+ const src_reg &indirect_offset);
+ void emit_output_urb_read(const dst_reg &dst,
+ unsigned base_offset,
+ const src_reg &indirect_offset);
+
+ void emit_urb_write(const src_reg &value, unsigned writemask,
+ unsigned base_offset, const src_reg &indirect_offset);
+
+ /* we do not use the normal end-of-shader URB write mechanism -- but every vec4 stage
+ * must provide implementations of these:
+ */
+ virtual void emit_urb_write_header(int mrf) {}
+ virtual vec4_instruction *emit_urb_write_opcode(bool complete) { return NULL; }
+
+ const struct brw_tcs_prog_key *key;
+ src_reg invocation_id;
+};
+
+} /* namespace brw */
+#endif /* __cplusplus */
+
+#endif /* BRW_VEC4_TCS_H */