summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/iris/iris_program.c9
-rw-r--r--src/gallium/drivers/iris/iris_state.c5
-rw-r--r--src/intel/compiler/brw_compiler.c3
-rw-r--r--src/intel/compiler/brw_compiler.h7
-rw-r--r--src/intel/compiler/brw_fs.cpp53
-rw-r--r--src/intel/compiler/brw_fs.h7
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp96
-rw-r--r--src/intel/compiler/brw_vec4_tcs.cpp27
-rw-r--r--src/intel/dev/gen_debug.c1
-rw-r--r--src/intel/dev/gen_debug.h4
-rw-r--r--src/intel/vulkan/genX_pipeline.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_tcs.c5
-rw-r--r--src/mesa/drivers/dri/i965/genX_state_upload.c5
13 files changed, 193 insertions, 34 deletions
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 30ec3f1ff86..d5c5a32bbc4 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -1526,6 +1526,7 @@ iris_create_tcs_state(struct pipe_context *ctx,
{
struct iris_context *ice = (void *) ctx;
struct iris_screen *screen = (void *) ctx->screen;
+ const struct brw_compiler *compiler = screen->compiler;
struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state);
struct shader_info *info = &ish->nir->info;
@@ -1544,6 +1545,14 @@ iris_create_tcs_state(struct pipe_context *ctx,
.patch_outputs_written = info->patch_outputs_written,
};
+ /* 8_PATCH mode needs the key to contain the input patch dimensionality.
+ * We don't have that information, so we randomly guess that the input
+ * and output patches are the same size. This is a bad guess, but we
+ * can't do much better.
+ */
+ if (compiler->use_tcs_8_patch)
+ key.input_vertices = info->tess.tcs_vertices_out;
+
iris_compile_tcs(ice, ish, &key);
}
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index a9af1cd0dc4..f48bacf77e1 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -3651,6 +3651,11 @@ iris_store_tcs_state(struct iris_context *ice,
hs.InstanceCount = tcs_prog_data->instances - 1;
hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
hs.IncludeVertexHandles = true;
+
+#if GEN_GEN >= 9
+ hs.DispatchMode = vue_prog_data->dispatch_mode;
+ hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
+#endif
}
}
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 626ff4bb16a..6a41cd20270 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -99,6 +99,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false);
+ compiler->use_tcs_8_patch =
+ devinfo->gen >= 9 && (INTEL_DEBUG & DEBUG_TCS_EIGHT_PATCH);
+
if (devinfo->gen >= 10) {
/* We don't support vec4 mode on Cannonlake. */
for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++)
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 332789d7809..8c6ae35636b 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -93,6 +93,7 @@ struct brw_compiler {
void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
bool scalar_stage[MESA_SHADER_STAGES];
+ bool use_tcs_8_patch;
struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
/**
@@ -1002,6 +1003,9 @@ enum shader_dispatch_mode {
DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
DISPATCH_MODE_SIMD8 = 3,
+
+ DISPATCH_MODE_TCS_SINGLE_PATCH = 0,
+ DISPATCH_MODE_TCS_8_PATCH = 2,
};
/**
@@ -1074,6 +1078,9 @@ struct brw_tcs_prog_data
{
struct brw_vue_prog_data base;
+ /** Should the non-SINGLE_PATCH payload provide primitive ID? */
+ bool include_primitive_id;
+
/** Number vertices in output patch */
int instances;
};
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 9d05800d1fe..9b4e030b54f 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1805,7 +1805,7 @@ fs_visitor::assign_vs_urb_setup()
}
void
-fs_visitor::assign_tcs_single_patch_urb_setup()
+fs_visitor::assign_tcs_urb_setup()
{
assert(stage == MESA_SHADER_TESS_CTRL);
@@ -7396,12 +7396,28 @@ void
fs_visitor::set_tcs_invocation_id()
{
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+ struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
const unsigned instance_id_mask =
devinfo->gen >= 11 ? INTEL_MASK(22, 16) : INTEL_MASK(23, 17);
const unsigned instance_id_shift =
devinfo->gen >= 11 ? 16 : 17;
+ /* Get instance number from g0.2 bits 22:16 or 23:17 */
+ fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)),
+ brw_imm_ud(instance_id_mask));
+
+ invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD);
+
+ if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH) {
+ /* gl_InvocationID is just the thread number */
+ bld.SHR(invocation_id, t, brw_imm_ud(instance_id_shift));
+ return;
+ }
+
+ assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH);
+
fs_reg channels_uw = bld.vgrf(BRW_REGISTER_TYPE_UW);
fs_reg channels_ud = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.MOV(channels_uw, fs_reg(brw_imm_uv(0x76543210)));
@@ -7410,26 +7426,36 @@ fs_visitor::set_tcs_invocation_id()
if (tcs_prog_data->instances == 1) {
invocation_id = channels_ud;
} else {
- invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD);
-
- /* Get instance number from g0.2 bits 23:17, and multiply it by 8. */
- fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD);
fs_reg instance_times_8 = bld.vgrf(BRW_REGISTER_TYPE_UD);
- bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)),
- brw_imm_ud(instance_id_mask));
bld.SHR(instance_times_8, t, brw_imm_ud(instance_id_shift - 3));
-
bld.ADD(invocation_id, instance_times_8, channels_ud);
}
}
bool
-fs_visitor::run_tcs_single_patch()
+fs_visitor::run_tcs()
{
assert(stage == MESA_SHADER_TESS_CTRL);
- /* r1-r4 contain the ICP handles. */
- payload.num_regs = 5;
+ struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
+ struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+ struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
+
+ assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH ||
+ vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH);
+
+ if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH) {
+ /* r1-r4 contain the ICP handles. */
+ payload.num_regs = 5;
+ } else {
+ assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH);
+ assert(tcs_key->input_vertices > 0);
+ /* r1 contains output handles, r2 may contain primitive ID, then the
+ * ICP handles occupy the next 1-32 registers.
+ */
+ payload.num_regs = 2 + tcs_prog_data->include_primitive_id +
+ tcs_key->input_vertices;
+ }
if (shader_time_index >= 0)
emit_shader_time_begin();
@@ -7438,6 +7464,7 @@ fs_visitor::run_tcs_single_patch()
set_tcs_invocation_id();
const bool fix_dispatch_mask =
+ vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH &&
(nir->info.tess.tcs_vertices_out % 8) != 0;
/* Fix the disptach mask */
@@ -7455,7 +7482,7 @@ fs_visitor::run_tcs_single_patch()
/* Emit EOT write; set TR DS Cache bit */
fs_reg srcs[3] = {
- fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ fs_reg(get_tcs_output_urb_handle()),
fs_reg(brw_imm_ud(WRITEMASK_X << 16)),
fs_reg(brw_imm_ud(0)),
};
@@ -7478,7 +7505,7 @@ fs_visitor::run_tcs_single_patch()
optimize();
assign_curb_setup();
- assign_tcs_single_patch_urb_setup();
+ assign_tcs_urb_setup();
fixup_3src_null_dest();
allocate_registers(8, true);
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 8ae73401cdf..7db486688af 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -92,7 +92,7 @@ public:
bool run_fs(bool allow_spilling, bool do_rep_send);
bool run_vs();
- bool run_tcs_single_patch();
+ bool run_tcs();
bool run_tes();
bool run_gs();
bool run_cs(unsigned min_dispatch_width);
@@ -110,7 +110,7 @@ public:
void assign_urb_setup();
void convert_attr_sources_to_hw_regs(fs_inst *inst);
void assign_vs_urb_setup();
- void assign_tcs_single_patch_urb_setup();
+ void assign_tcs_urb_setup();
void assign_tes_urb_setup();
void assign_gs_urb_setup();
bool assign_regs(bool allow_spilling, bool spill_all);
@@ -251,6 +251,9 @@ public:
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
+ fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
+ struct brw_reg get_tcs_output_urb_handle();
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
unsigned wr_mask);
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 73e2f38145e..a2c8f3f557f 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2605,6 +2605,73 @@ fs_visitor::get_tcs_single_patch_icp_handle(const fs_builder &bld,
return icp_handle;
}
+fs_reg
+fs_visitor::get_tcs_eight_patch_icp_handle(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
+ struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+ const nir_src &vertex_src = instr->src[0];
+
+ unsigned first_icp_handle = tcs_prog_data->include_primitive_id ? 3 : 2;
+
+ if (nir_src_is_const(vertex_src)) {
+ return fs_reg(retype(brw_vec8_grf(first_icp_handle +
+ nir_src_as_uint(vertex_src), 0),
+ BRW_REGISTER_TYPE_UD));
+ }
+
+ /* The vertex index is non-constant. We need to use indirect
+ * addressing to fetch the proper URB handle.
+ *
+ * First, we start with the sequence <7, 6, 5, 4, 3, 2, 1, 0>
+ * indicating that channel <n> should read the handle from
+ * DWord <n>. We convert that to bytes by multiplying by 4.
+ *
+ * Next, we convert the vertex index to bytes by multiplying
+ * by 32 (shifting by 5), and add the two together. This is
+ * the final indirect byte offset.
+ */
+ fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg sequence = bld.vgrf(BRW_REGISTER_TYPE_UW, 1);
+ fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+
+ /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */
+ bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210)));
+ /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */
+ bld.SHL(channel_offsets, sequence, brw_imm_ud(2u));
+ /* Convert vertex_index to bytes (multiply by 32) */
+ bld.SHL(vertex_offset_bytes,
+ retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(5u));
+ bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets);
+
+ /* Use first_icp_handle as the base offset. There is one register
+ * of URB handles per vertex, so inform the register allocator that
+ * we might read up to nir->info.gs.vertices_in registers.
+ */
+ bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
+ retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type),
+ icp_offset_bytes, brw_imm_ud(tcs_key->input_vertices * REG_SIZE));
+
+ return icp_handle;
+}
+
+struct brw_reg
+fs_visitor::get_tcs_output_urb_handle()
+{
+ struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
+
+ if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH) {
+ return retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD);
+ } else {
+ assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH);
+ return retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD);
+ }
+}
+
void
fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
@@ -2612,6 +2679,10 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
assert(stage == MESA_SHADER_TESS_CTRL);
struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+ struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
+
+ bool eight_patch =
+ vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH;
fs_reg dst;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
@@ -2619,7 +2690,8 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
switch (instr->intrinsic) {
case nir_intrinsic_load_primitive_id:
- bld.MOV(dst, fs_reg(brw_vec1_grf(0, 1)));
+ bld.MOV(dst, fs_reg(eight_patch ? brw_vec8_grf(2, 0)
+ : brw_vec1_grf(0, 1)));
break;
case nir_intrinsic_load_invocation_id:
bld.MOV(retype(dst, invocation_id.type), invocation_id);
@@ -2675,7 +2747,9 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
unsigned imm_offset = instr->const_index[0];
fs_inst *inst;
- fs_reg icp_handle = get_tcs_single_patch_icp_handle(bld, instr);
+ fs_reg icp_handle =
+ eight_patch ? get_tcs_eight_patch_icp_handle(bld, instr)
+ : get_tcs_single_patch_icp_handle(bld, instr);
/* We can only read two double components with each URB read, so
* we send two read messages in that case, each one loading up to
@@ -2776,12 +2850,15 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
unsigned imm_offset = instr->const_index[0];
unsigned first_component = nir_intrinsic_component(instr);
+ struct brw_reg output_handles = get_tcs_output_urb_handle();
+
fs_inst *inst;
if (indirect_offset.file == BAD_FILE) {
- /* Replicate the patch handle to all enabled channels */
+ /* This MOV replicates the output handle to all enabled channels
+ * is SINGLE_PATCH mode.
+ */
fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- bld.MOV(patch_handle,
- retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ bld.MOV(patch_handle, output_handles);
{
if (first_component != 0) {
@@ -2805,10 +2882,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
}
} else {
/* Indirect indexing - use per-slot offsets as well. */
- const fs_reg srcs[] = {
- retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
- indirect_offset
- };
+ const fs_reg srcs[] = { output_handles, indirect_offset };
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
if (first_component != 0) {
@@ -2842,8 +2916,10 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
unsigned imm_offset = instr->const_index[0];
unsigned mask = instr->const_index[1];
unsigned header_regs = 0;
+ struct brw_reg output_handles = get_tcs_output_urb_handle();
+
fs_reg srcs[7];
- srcs[header_regs++] = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD);
+ srcs[header_regs++] = output_handles;
if (indirect_offset.file != BAD_FILE) {
srcs[header_regs++] = indirect_offset;
diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp
index f0ef8c0dd96..c37f34cbe81 100644
--- a/src/intel/compiler/brw_vec4_tcs.cpp
+++ b/src/intel/compiler/brw_vec4_tcs.cpp
@@ -406,10 +406,26 @@ brw_compile_tcs(const struct brw_compiler *compiler,
nir = brw_postprocess_nir(nir, compiler, is_scalar);
- if (is_scalar)
- prog_data->instances = DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, 8);
- else
- prog_data->instances = DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, 2);
+ bool has_primitive_id =
+ nir->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID);
+
+ if (compiler->use_tcs_8_patch &&
+ nir->info.tess.tcs_vertices_out <= 16 &&
+ 2 + has_primitive_id + key->input_vertices <= 31) {
+ /* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First,
+ * the "Instance" field limits the number of output vertices to [1, 16].
+ * Secondly, the "Dispatch GRF Start Register for URB Data" field is
+ * limited to [0, 31] - which imposes a limit on the input vertices.
+ */
+ vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_8_PATCH;
+ prog_data->instances = nir->info.tess.tcs_vertices_out;
+ prog_data->include_primitive_id = has_primitive_id;
+ } else {
+ unsigned verts_per_thread = is_scalar ? 8 : 2;
+ vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_SINGLE_PATCH;
+ prog_data->instances =
+ DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, verts_per_thread);
+ }
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
* That divides up as follows:
@@ -462,14 +478,13 @@ brw_compile_tcs(const struct brw_compiler *compiler,
fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
&prog_data->base.base, NULL, nir, 8,
shader_time_index, &input_vue_map);
- if (!v.run_tcs_single_patch()) {
+ if (!v.run_tcs()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
- prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
fs_generator g(compiler, log_data, mem_ctx,
&prog_data->base.base, v.promoted_constants, false,
diff --git a/src/intel/dev/gen_debug.c b/src/intel/dev/gen_debug.c
index b0d0d1a574a..5fa3d29fa51 100644
--- a/src/intel/dev/gen_debug.c
+++ b/src/intel/dev/gen_debug.c
@@ -86,6 +86,7 @@ static const struct debug_control debug_control[] = {
{ "color", DEBUG_COLOR },
{ "reemit", DEBUG_REEMIT },
{ "soft64", DEBUG_SOFT64 },
+ { "tcs8", DEBUG_TCS_EIGHT_PATCH },
{ NULL, 0 }
};
diff --git a/src/intel/dev/gen_debug.h b/src/intel/dev/gen_debug.h
index e4dabc67f8d..a6592354a64 100644
--- a/src/intel/dev/gen_debug.h
+++ b/src/intel/dev/gen_debug.h
@@ -84,6 +84,7 @@ extern uint64_t INTEL_DEBUG;
#define DEBUG_COLOR (1ull << 40)
#define DEBUG_REEMIT (1ull << 41)
#define DEBUG_SOFT64 (1ull << 42)
+#define DEBUG_TCS_EIGHT_PATCH (1ull << 43)
/* These flags are not compatible with the disk shader cache */
#define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME
@@ -91,7 +92,8 @@ extern uint64_t INTEL_DEBUG;
/* These flags may affect program generation */
#define DEBUG_DISK_CACHE_MASK \
(DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \
- DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64)
+ DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64 | \
+ DEBUG_TCS_EIGHT_PATCH)
#ifdef HAVE_ANDROID_PLATFORM
#define LOG_TAG "INTEL-MESA"
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 0b58dce05b0..6b64f7ea8c7 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1434,6 +1434,11 @@ emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline,
hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
hs.ScratchSpaceBasePointer =
get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin);
+
+#if GEN_GEN >= 9
+ hs.DispatchMode = tcs_prog_data->base.dispatch_mode;
+ hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
+#endif
}
const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
index 1c4d7988638..1050850bb1c 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -160,6 +160,7 @@ brw_tcs_populate_key(struct brw_context *brw,
struct brw_tcs_prog_key *key)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ const struct brw_compiler *compiler = brw->screen->compiler;
struct brw_program *tcp =
(struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
struct brw_program *tep =
@@ -177,7 +178,7 @@ brw_tcs_populate_key(struct brw_context *brw,
per_patch_slots |= prog->info.patch_outputs_written;
}
- if (devinfo->gen < 8 || !tcp)
+ if (devinfo->gen < 8 || !tcp || compiler->use_tcs_8_patch)
key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices;
key->outputs_written = per_vertex_slots;
key->patch_outputs_written = per_patch_slots;
@@ -251,7 +252,7 @@ brw_tcs_populate_default_key(const struct brw_compiler *compiler,
brw_setup_tex_for_precompile(devinfo, &key->tex, prog);
/* Guess that the input and output patches have the same dimensionality. */
- if (devinfo->gen < 8)
+ if (devinfo->gen < 8 || compiler->use_tcs_8_patch)
key->input_vertices = prog->info.tess.tcs_vertices_out;
if (tes) {
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index ecffa2e8e86..961306b04fd 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4111,6 +4111,11 @@ genX(upload_hs_state)(struct brw_context *brw)
hs.IncludeVertexHandles = true;
hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
+
+#if GEN_GEN >= 9
+ hs.DispatchMode = vue_prog_data->dispatch_mode;
+ hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
+#endif
}
}
}