diff options
-rw-r--r-- | src/gallium/drivers/iris/iris_program.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/iris/iris_state.c | 5 | ||||
-rw-r--r-- | src/intel/compiler/brw_compiler.c | 3 | ||||
-rw-r--r-- | src/intel/compiler/brw_compiler.h | 7 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 53 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 7 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 96 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_tcs.cpp | 27 | ||||
-rw-r--r-- | src/intel/dev/gen_debug.c | 1 | ||||
-rw-r--r-- | src/intel/dev/gen_debug.h | 4 | ||||
-rw-r--r-- | src/intel/vulkan/genX_pipeline.c | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_tcs.c | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_state_upload.c | 5 |
13 files changed, 193 insertions, 34 deletions
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 30ec3f1ff86..d5c5a32bbc4 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -1526,6 +1526,7 @@ iris_create_tcs_state(struct pipe_context *ctx, { struct iris_context *ice = (void *) ctx; struct iris_screen *screen = (void *) ctx->screen; + const struct brw_compiler *compiler = screen->compiler; struct iris_uncompiled_shader *ish = iris_create_shader_state(ctx, state); struct shader_info *info = &ish->nir->info; @@ -1544,6 +1545,14 @@ iris_create_tcs_state(struct pipe_context *ctx, .patch_outputs_written = info->patch_outputs_written, }; + /* 8_PATCH mode needs the key to contain the input patch dimensionality. + * We don't have that information, so we randomly guess that the input + * and output patches are the same size. This is a bad guess, but we + * can't do much better. + */ + if (compiler->use_tcs_8_patch) + key.input_vertices = info->tess.tcs_vertices_out; + iris_compile_tcs(ice, ish, &key); } diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index a9af1cd0dc4..f48bacf77e1 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -3651,6 +3651,11 @@ iris_store_tcs_state(struct iris_context *ice, hs.InstanceCount = tcs_prog_data->instances - 1; hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; hs.IncludeVertexHandles = true; + +#if GEN_GEN >= 9 + hs.DispatchMode = vue_prog_data->dispatch_mode; + hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; +#endif } } diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 626ff4bb16a..6a41cd20270 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -99,6 +99,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false); + compiler->use_tcs_8_patch = + devinfo->gen >= 9 && (INTEL_DEBUG & DEBUG_TCS_EIGHT_PATCH); + if (devinfo->gen >= 10) { /* We don't support vec4 mode on Cannonlake. */ for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 332789d7809..8c6ae35636b 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -93,6 +93,7 @@ struct brw_compiler { void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); bool scalar_stage[MESA_SHADER_STAGES]; + bool use_tcs_8_patch; struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; /** @@ -1002,6 +1003,9 @@ enum shader_dispatch_mode { DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, DISPATCH_MODE_4X2_DUAL_OBJECT = 2, DISPATCH_MODE_SIMD8 = 3, + + DISPATCH_MODE_TCS_SINGLE_PATCH = 0, + DISPATCH_MODE_TCS_8_PATCH = 2, }; /** @@ -1074,6 +1078,9 @@ struct brw_tcs_prog_data { struct brw_vue_prog_data base; + /** Should the non-SINGLE_PATCH payload provide primitive ID? */ + bool include_primitive_id; + /** Number vertices in output patch */ int instances; }; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 9d05800d1fe..9b4e030b54f 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1805,7 +1805,7 @@ fs_visitor::assign_vs_urb_setup() } void -fs_visitor::assign_tcs_single_patch_urb_setup() +fs_visitor::assign_tcs_urb_setup() { assert(stage == MESA_SHADER_TESS_CTRL); @@ -7396,12 +7396,28 @@ void fs_visitor::set_tcs_invocation_id() { struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data); + struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; const unsigned instance_id_mask = devinfo->gen >= 11 ? INTEL_MASK(22, 16) : INTEL_MASK(23, 17); const unsigned instance_id_shift = devinfo->gen >= 11 ? 16 : 17; + /* Get instance number from g0.2 bits 22:16 or 23:17 */ + fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)), + brw_imm_ud(instance_id_mask)); + + invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD); + + if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH) { + /* gl_InvocationID is just the thread number */ + bld.SHR(invocation_id, t, brw_imm_ud(instance_id_shift)); + return; + } + + assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH); + fs_reg channels_uw = bld.vgrf(BRW_REGISTER_TYPE_UW); fs_reg channels_ud = bld.vgrf(BRW_REGISTER_TYPE_UD); bld.MOV(channels_uw, fs_reg(brw_imm_uv(0x76543210))); @@ -7410,26 +7426,36 @@ fs_visitor::set_tcs_invocation_id() if (tcs_prog_data->instances == 1) { invocation_id = channels_ud; } else { - invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD); - - /* Get instance number from g0.2 bits 23:17, and multiply it by 8. */ - fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD); fs_reg instance_times_8 = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)), - brw_imm_ud(instance_id_mask)); bld.SHR(instance_times_8, t, brw_imm_ud(instance_id_shift - 3)); - bld.ADD(invocation_id, instance_times_8, channels_ud); } } bool -fs_visitor::run_tcs_single_patch() +fs_visitor::run_tcs() { assert(stage == MESA_SHADER_TESS_CTRL); - /* r1-r4 contain the ICP handles. */ - payload.num_regs = 5; + struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); + struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data); + struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key; + + assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH || + vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH); + + if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH) { + /* r1-r4 contain the ICP handles. */ + payload.num_regs = 5; + } else { + assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH); + assert(tcs_key->input_vertices > 0); + /* r1 contains output handles, r2 may contain primitive ID, then the + * ICP handles occupy the next 1-32 registers. + */ + payload.num_regs = 2 + tcs_prog_data->include_primitive_id + + tcs_key->input_vertices; + } if (shader_time_index >= 0) emit_shader_time_begin(); @@ -7438,6 +7464,7 @@ fs_visitor::run_tcs_single_patch() set_tcs_invocation_id(); const bool fix_dispatch_mask = + vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH && (nir->info.tess.tcs_vertices_out % 8) != 0; /* Fix the disptach mask */ @@ -7455,7 +7482,7 @@ fs_visitor::run_tcs_single_patch() /* Emit EOT write; set TR DS Cache bit */ fs_reg srcs[3] = { - fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), + fs_reg(get_tcs_output_urb_handle()), fs_reg(brw_imm_ud(WRITEMASK_X << 16)), fs_reg(brw_imm_ud(0)), }; @@ -7478,7 +7505,7 @@ fs_visitor::run_tcs_single_patch() optimize(); assign_curb_setup(); - assign_tcs_single_patch_urb_setup(); + assign_tcs_urb_setup(); fixup_3src_null_dest(); allocate_registers(8, true); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 8ae73401cdf..7db486688af 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -92,7 +92,7 @@ public: bool run_fs(bool allow_spilling, bool do_rep_send); bool run_vs(); - bool run_tcs_single_patch(); + bool run_tcs(); bool run_tes(); bool run_gs(); bool run_cs(unsigned min_dispatch_width); @@ -110,7 +110,7 @@ public: void assign_urb_setup(); void convert_attr_sources_to_hw_regs(fs_inst *inst); void assign_vs_urb_setup(); - void assign_tcs_single_patch_urb_setup(); + void assign_tcs_urb_setup(); void assign_tes_urb_setup(); void assign_gs_urb_setup(); bool assign_regs(bool allow_spilling, bool spill_all); @@ -251,6 +251,9 @@ public: fs_reg get_indirect_offset(nir_intrinsic_instr *instr); fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld, nir_intrinsic_instr *instr); + fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + struct brw_reg get_tcs_output_urb_handle(); void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, unsigned wr_mask); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 73e2f38145e..a2c8f3f557f 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2605,6 +2605,73 @@ fs_visitor::get_tcs_single_patch_icp_handle(const fs_builder &bld, return icp_handle; } +fs_reg +fs_visitor::get_tcs_eight_patch_icp_handle(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key; + struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data); + const nir_src &vertex_src = instr->src[0]; + + unsigned first_icp_handle = tcs_prog_data->include_primitive_id ? 3 : 2; + + if (nir_src_is_const(vertex_src)) { + return fs_reg(retype(brw_vec8_grf(first_icp_handle + + nir_src_as_uint(vertex_src), 0), + BRW_REGISTER_TYPE_UD)); + } + + /* The vertex index is non-constant. We need to use indirect + * addressing to fetch the proper URB handle. + * + * First, we start with the sequence <7, 6, 5, 4, 3, 2, 1, 0> + * indicating that channel <n> should read the handle from + * DWord <n>. We convert that to bytes by multiplying by 4. + * + * Next, we convert the vertex index to bytes by multiplying + * by 32 (shifting by 5), and add the two together. This is + * the final indirect byte offset. + */ + fs_reg icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + fs_reg sequence = bld.vgrf(BRW_REGISTER_TYPE_UW, 1); + fs_reg channel_offsets = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + fs_reg icp_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + + /* sequence = <7, 6, 5, 4, 3, 2, 1, 0> */ + bld.MOV(sequence, fs_reg(brw_imm_v(0x76543210))); + /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */ + bld.SHL(channel_offsets, sequence, brw_imm_ud(2u)); + /* Convert vertex_index to bytes (multiply by 32) */ + bld.SHL(vertex_offset_bytes, + retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD), + brw_imm_ud(5u)); + bld.ADD(icp_offset_bytes, vertex_offset_bytes, channel_offsets); + + /* Use first_icp_handle as the base offset. There is one register + * of URB handles per vertex, so inform the register allocator that + * we might read up to nir->info.gs.vertices_in registers. + */ + bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, + retype(brw_vec8_grf(first_icp_handle, 0), icp_handle.type), + icp_offset_bytes, brw_imm_ud(tcs_key->input_vertices * REG_SIZE)); + + return icp_handle; +} + +struct brw_reg +fs_visitor::get_tcs_output_urb_handle() +{ + struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); + + if (vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH) { + return retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD); + } else { + assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH); + return retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD); + } +} + void fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) @@ -2612,6 +2679,10 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, assert(stage == MESA_SHADER_TESS_CTRL); struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key; struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data); + struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; + + bool eight_patch = + vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_8_PATCH; fs_reg dst; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -2619,7 +2690,8 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, switch (instr->intrinsic) { case nir_intrinsic_load_primitive_id: - bld.MOV(dst, fs_reg(brw_vec1_grf(0, 1))); + bld.MOV(dst, fs_reg(eight_patch ? brw_vec8_grf(2, 0) + : brw_vec1_grf(0, 1))); break; case nir_intrinsic_load_invocation_id: bld.MOV(retype(dst, invocation_id.type), invocation_id); @@ -2675,7 +2747,9 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, unsigned imm_offset = instr->const_index[0]; fs_inst *inst; - fs_reg icp_handle = get_tcs_single_patch_icp_handle(bld, instr); + fs_reg icp_handle = + eight_patch ? get_tcs_eight_patch_icp_handle(bld, instr) + : get_tcs_single_patch_icp_handle(bld, instr); /* We can only read two double components with each URB read, so * we send two read messages in that case, each one loading up to @@ -2776,12 +2850,15 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, unsigned imm_offset = instr->const_index[0]; unsigned first_component = nir_intrinsic_component(instr); + struct brw_reg output_handles = get_tcs_output_urb_handle(); + fs_inst *inst; if (indirect_offset.file == BAD_FILE) { - /* Replicate the patch handle to all enabled channels */ + /* This MOV replicates the output handle to all enabled channels + * is SINGLE_PATCH mode. + */ fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); - bld.MOV(patch_handle, - retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)); + bld.MOV(patch_handle, output_handles); { if (first_component != 0) { @@ -2805,10 +2882,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, } } else { /* Indirect indexing - use per-slot offsets as well. */ - const fs_reg srcs[] = { - retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), - indirect_offset - }; + const fs_reg srcs[] = { output_handles, indirect_offset }; fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); if (first_component != 0) { @@ -2842,8 +2916,10 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, unsigned imm_offset = instr->const_index[0]; unsigned mask = instr->const_index[1]; unsigned header_regs = 0; + struct brw_reg output_handles = get_tcs_output_urb_handle(); + fs_reg srcs[7]; - srcs[header_regs++] = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD); + srcs[header_regs++] = output_handles; if (indirect_offset.file != BAD_FILE) { srcs[header_regs++] = indirect_offset; diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index f0ef8c0dd96..c37f34cbe81 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -406,10 +406,26 @@ brw_compile_tcs(const struct brw_compiler *compiler, nir = brw_postprocess_nir(nir, compiler, is_scalar); - if (is_scalar) - prog_data->instances = DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, 8); - else - prog_data->instances = DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, 2); + bool has_primitive_id = + nir->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID); + + if (compiler->use_tcs_8_patch && + nir->info.tess.tcs_vertices_out <= 16 && + 2 + has_primitive_id + key->input_vertices <= 31) { + /* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First, + * the "Instance" field limits the number of output vertices to [1, 16]. + * Secondly, the "Dispatch GRF Start Register for URB Data" field is + * limited to [0, 31] - which imposes a limit on the input vertices. + */ + vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_8_PATCH; + prog_data->instances = nir->info.tess.tcs_vertices_out; + prog_data->include_primitive_id = has_primitive_id; + } else { + unsigned verts_per_thread = is_scalar ? 8 : 2; + vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_SINGLE_PATCH; + prog_data->instances = + DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, verts_per_thread); + } /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: @@ -462,14 +478,13 @@ brw_compile_tcs(const struct brw_compiler *compiler, fs_visitor v(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, NULL, nir, 8, shader_time_index, &input_vue_map); - if (!v.run_tcs_single_patch()) { + if (!v.run_tcs()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; - prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_generator g(compiler, log_data, mem_ctx, &prog_data->base.base, v.promoted_constants, false, diff --git a/src/intel/dev/gen_debug.c b/src/intel/dev/gen_debug.c index b0d0d1a574a..5fa3d29fa51 100644 --- a/src/intel/dev/gen_debug.c +++ b/src/intel/dev/gen_debug.c @@ -86,6 +86,7 @@ static const struct debug_control debug_control[] = { { "color", DEBUG_COLOR }, { "reemit", DEBUG_REEMIT }, { "soft64", DEBUG_SOFT64 }, + { "tcs8", DEBUG_TCS_EIGHT_PATCH }, { NULL, 0 } }; diff --git a/src/intel/dev/gen_debug.h b/src/intel/dev/gen_debug.h index e4dabc67f8d..a6592354a64 100644 --- a/src/intel/dev/gen_debug.h +++ b/src/intel/dev/gen_debug.h @@ -84,6 +84,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_COLOR (1ull << 40) #define DEBUG_REEMIT (1ull << 41) #define DEBUG_SOFT64 (1ull << 42) +#define DEBUG_TCS_EIGHT_PATCH (1ull << 43) /* These flags are not compatible with the disk shader cache */ #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME @@ -91,7 +92,8 @@ extern uint64_t INTEL_DEBUG; /* These flags may affect program generation */ #define DEBUG_DISK_CACHE_MASK \ (DEBUG_NO16 | DEBUG_NO_DUAL_OBJECT_GS | DEBUG_NO8 | DEBUG_SPILL_FS | \ - DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64) + DEBUG_SPILL_VEC4 | DEBUG_NO_COMPACTION | DEBUG_DO32 | DEBUG_SOFT64 | \ + DEBUG_TCS_EIGHT_PATCH) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 0b58dce05b0..6b64f7ea8c7 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1434,6 +1434,11 @@ emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline, hs.PerThreadScratchSpace = get_scratch_space(tcs_bin); hs.ScratchSpaceBasePointer = get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin); + +#if GEN_GEN >= 9 + hs.DispatchMode = tcs_prog_data->base.dispatch_mode; + hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; +#endif } const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state = diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 1c4d7988638..1050850bb1c 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -160,6 +160,7 @@ brw_tcs_populate_key(struct brw_context *brw, struct brw_tcs_prog_key *key) { const struct gen_device_info *devinfo = &brw->screen->devinfo; + const struct brw_compiler *compiler = brw->screen->compiler; struct brw_program *tcp = (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL]; struct brw_program *tep = @@ -177,7 +178,7 @@ brw_tcs_populate_key(struct brw_context *brw, per_patch_slots |= prog->info.patch_outputs_written; } - if (devinfo->gen < 8 || !tcp) + if (devinfo->gen < 8 || !tcp || compiler->use_tcs_8_patch) key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices; key->outputs_written = per_vertex_slots; key->patch_outputs_written = per_patch_slots; @@ -251,7 +252,7 @@ brw_tcs_populate_default_key(const struct brw_compiler *compiler, brw_setup_tex_for_precompile(devinfo, &key->tex, prog); /* Guess that the input and output patches have the same dimensionality. */ - if (devinfo->gen < 8) + if (devinfo->gen < 8 || compiler->use_tcs_8_patch) key->input_vertices = prog->info.tess.tcs_vertices_out; if (tes) { diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index ecffa2e8e86..961306b04fd 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -4111,6 +4111,11 @@ genX(upload_hs_state)(struct brw_context *brw) hs.IncludeVertexHandles = true; hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; + +#if GEN_GEN >= 9 + hs.DispatchMode = vue_prog_data->dispatch_mode; + hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; +#endif } } } |