diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compiler.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 95 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 356 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_tcs.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 59 |
7 files changed, 510 insertions, 15 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index c8a38e3145c..61bb5ade282 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -152,7 +152,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->scalar_stage[MESA_SHADER_VERTEX] = devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); - compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false; + compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TCS", false); compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); compiler->scalar_stage[MESA_SHADER_GEOMETRY] = @@ -194,6 +195,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectInput = false; compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; + compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].EmitNoIndirectOutput = false; if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 78f7d40a84e..4b6aa678d54 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1758,6 +1758,19 @@ fs_visitor::assign_vs_urb_setup() } void +fs_visitor::assign_tcs_single_patch_urb_setup() +{ + assert(stage == MESA_SHADER_TESS_CTRL); + + brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data; + + /* Rewrite all ATTR file references to HW_REGs. */ + foreach_block_and_inst(block, fs_inst, inst, cfg) { + convert_attr_sources_to_hw_regs(inst); + } +} + +void fs_visitor::assign_tes_urb_setup() { assert(stage == MESA_SHADER_TESS_EVAL); @@ -5474,6 +5487,88 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) } bool +fs_visitor::run_tcs_single_patch() +{ + assert(stage == MESA_SHADER_TESS_CTRL); + + struct brw_tcs_prog_data *tcs_prog_data = + (struct brw_tcs_prog_data *) prog_data; + + /* r1-r4 contain the ICP handles. */ + payload.num_regs = 5; + + if (shader_time_index >= 0) + emit_shader_time_begin(); + + /* Initialize gl_InvocationID */ + fs_reg channels_uw = bld.vgrf(BRW_REGISTER_TYPE_UW); + fs_reg channels_ud = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(channels_uw, fs_reg(brw_imm_uv(0x76543210))); + bld.MOV(channels_ud, channels_uw); + + if (tcs_prog_data->instances == 1) { + invocation_id = channels_ud; + } else { + invocation_id = bld.vgrf(BRW_REGISTER_TYPE_UD); + + /* Get instance number from g0.2 bits 23:17, and multiply it by 8. */ + fs_reg t = bld.vgrf(BRW_REGISTER_TYPE_UD); + fs_reg instance_times_8 = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.AND(t, fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD)), + brw_imm_ud(INTEL_MASK(23, 17))); + bld.SHR(instance_times_8, t, brw_imm_ud(17 - 3)); + + bld.ADD(invocation_id, instance_times_8, channels_ud); + } + + /* Fix the disptach mask */ + if (nir->info.tcs.vertices_out % 8) { + bld.CMP(bld.null_reg_ud(), invocation_id, + brw_imm_ud(nir->info.tcs.vertices_out), BRW_CONDITIONAL_L); + bld.IF(BRW_PREDICATE_NORMAL); + } + + emit_nir_code(); + + if (nir->info.tcs.vertices_out % 8) { + bld.emit(BRW_OPCODE_ENDIF); + } + + /* Emit EOT write; set TR DS Cache bit */ + fs_reg srcs[3] = { + fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), + fs_reg(brw_imm_ud(WRITEMASK_X << 16)), + fs_reg(brw_imm_ud(0)), + }; + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 3); + bld.LOAD_PAYLOAD(payload, srcs, 3, 2); + + fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8_MASKED, + bld.null_reg_ud(), payload); + inst->mlen = 3; + inst->base_mrf = -1; + inst->eot = true; + + if (shader_time_index >= 0) + emit_shader_time_end(); + + if (failed) + return false; + + calculate_cfg(); + + optimize(); + + assign_curb_setup(); + assign_tcs_single_patch_urb_setup(); + + fixup_3src_null_dest(); + allocate_registers(); + + return !failed; +} + +bool fs_visitor::run_tes() { assert(stage == MESA_SHADER_TESS_EVAL); diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index a5c3297e5a1..ba6bd3f5725 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -110,6 +110,7 @@ public: bool run_fs(bool do_rep_send); bool run_vs(gl_clip_plane *clip_planes); + bool run_tcs_single_patch(); bool run_tes(); bool run_gs(); bool run_cs(); @@ -126,6 +127,7 @@ public: void assign_urb_setup(); void convert_attr_sources_to_hw_regs(fs_inst *inst); void assign_vs_urb_setup(); + void assign_tcs_single_patch_urb_setup(); void assign_tes_urb_setup(); void assign_gs_urb_setup(); bool assign_regs(bool allow_spilling); @@ -250,6 +252,8 @@ public: nir_ssa_undef_instr *instr); void nir_emit_vs_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); + void nir_emit_tcs_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); void nir_emit_gs_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_fs_intrinsic(const brw::fs_builder &bld, @@ -405,6 +409,7 @@ public: fs_reg userplane[MAX_CLIP_PLANES]; fs_reg final_gs_vertex_count; fs_reg control_data_bits; + fs_reg invocation_id; unsigned grf_used; bool spilled_any_registers; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 360e2c97d74..4d14fda4a86 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -113,6 +113,9 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg, void fs_visitor::nir_setup_outputs() { + if (stage == MESA_SHADER_TESS_CTRL) + return; + brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs); @@ -230,6 +233,8 @@ emit_system_values_block(nir_block *block, fs_visitor *v) break; case nir_intrinsic_load_invocation_id: + if (v->stage == MESA_SHADER_TESS_CTRL) + break; assert(v->stage == MESA_SHADER_GEOMETRY); reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; if (reg->file == BAD_FILE) { @@ -452,6 +457,9 @@ fs_visitor::nir_emit_instr(nir_instr *instr) case MESA_SHADER_VERTEX: nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; + case MESA_SHADER_TESS_CTRL: + nir_emit_tcs_intrinsic(abld, nir_instr_as_intrinsic(instr)); + break; case MESA_SHADER_TESS_EVAL: nir_emit_tes_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; @@ -1901,6 +1909,354 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, } void +fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + assert(stage == MESA_SHADER_TESS_CTRL); + struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key; + struct brw_tcs_prog_data *tcs_prog_data = + (struct brw_tcs_prog_data *) prog_data; + + fs_reg dst; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dst = get_nir_dest(instr->dest); + + switch (instr->intrinsic) { + case nir_intrinsic_load_primitive_id: + bld.MOV(dst, fs_reg(brw_vec1_grf(0, 1))); + break; + case nir_intrinsic_load_invocation_id: + bld.MOV(retype(dst, invocation_id.type), invocation_id); + break; + case nir_intrinsic_load_patch_vertices_in: + bld.MOV(retype(dst, BRW_REGISTER_TYPE_D), + brw_imm_d(tcs_key->input_vertices)); + break; + + case nir_intrinsic_barrier: { + if (tcs_prog_data->instances == 1) + break; + + fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t)); + + const fs_builder fwa_bld = bld.exec_all(); + + /* Zero the message header */ + fwa_bld.MOV(m0, brw_imm_ud(0u)); + + /* Copy "Barrier ID" from r0.2, bits 16:13 */ + fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(INTEL_MASK(16, 13))); + + /* Shift it up to bits 27:24. */ + fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11)); + + /* Set the Barrier Count and the enable bit */ + fwa_bld.OR(m0_2, m0_2, + brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15))); + + bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0); + break; + } + + case nir_intrinsic_load_input: + unreachable("nir_lower_io should never give us these."); + break; + + case nir_intrinsic_load_per_vertex_input: { + fs_reg indirect_offset = get_indirect_offset(instr); + unsigned imm_offset = instr->const_index[0]; + + const nir_src &vertex_src = instr->src[0]; + nir_const_value *vertex_const = nir_src_as_const_value(vertex_src); + + fs_inst *inst; + + fs_reg icp_handle; + + if (vertex_const) { + /* Emit a MOV to resolve <0,1,0> regioning. */ + icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + bld.MOV(icp_handle, + retype(brw_vec1_grf(1 + (vertex_const->i32[0] >> 3), + vertex_const->i32[0] & 7), + BRW_REGISTER_TYPE_UD)); + } else if (tcs_prog_data->instances == 1 && + vertex_src.is_ssa && + vertex_src.ssa->parent_instr->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(vertex_src.ssa->parent_instr)->intrinsic == nir_intrinsic_load_invocation_id) { + /* For the common case of only 1 instance, an array index of + * gl_InvocationID means reading g1. Skip all the indirect work. + */ + icp_handle = retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD); + } else { + /* The vertex index is non-constant. We need to use indirect + * addressing to fetch the proper URB handle. + */ + icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + + /* Each ICP handle is a single DWord (4 bytes) */ + fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + bld.SHL(vertex_offset_bytes, + retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD), + brw_imm_ud(2u)); + + /* Start at g1. We might read up to 4 registers. */ + bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, + fs_reg(brw_vec8_grf(1, 0)), vertex_offset_bytes, + brw_imm_ud(4 * REG_SIZE)); + } + + if (indirect_offset.file == BAD_FILE) { + /* Constant indexing - use global offset. */ + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle); + inst->offset = imm_offset; + inst->mlen = 1; + inst->base_mrf = -1; + inst->regs_written = instr->num_components; + } else { + /* Indirect indexing - use per-slot offsets as well. */ + const fs_reg srcs[] = { icp_handle, indirect_offset }; + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); + inst->offset = imm_offset; + inst->base_mrf = -1; + inst->mlen = 2; + inst->regs_written = instr->num_components; + } + + /* Copy the temporary to the destination to deal with writemasking. + * + * Also attempt to deal with gl_PointSize being in the .w component. + */ + if (inst->offset == 0 && indirect_offset.file == BAD_FILE) { + inst->dst = bld.vgrf(dst.type, 4); + inst->regs_written = 4; + bld.MOV(dst, offset(inst->dst, bld, 3)); + } + break; + } + + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: { + fs_reg indirect_offset = get_indirect_offset(instr); + unsigned imm_offset = instr->const_index[0]; + + fs_inst *inst; + if (indirect_offset.file == BAD_FILE) { + /* Replicate the patch handle to all enabled channels */ + fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + bld.MOV(patch_handle, + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)); + + if (imm_offset == 0) { + /* This is a read of gl_TessLevelInner[], which lives in the + * Patch URB header. The layout depends on the domain. + */ + dst.type = BRW_REGISTER_TYPE_F; + switch (tcs_key->tes_primitive_mode) { + case GL_QUADS: { + /* DWords 3-2 (reversed) */ + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4); + + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle); + inst->offset = 0; + inst->mlen = 1; + inst->base_mrf = -1; + inst->regs_written = 4; + + /* dst.xy = tmp.wz */ + bld.MOV(dst, offset(tmp, bld, 3)); + bld.MOV(offset(dst, bld, 1), offset(tmp, bld, 2)); + break; + } + case GL_TRIANGLES: + /* DWord 4; hardcode offset = 1 and regs_written = 1 */ + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle); + inst->offset = 1; + inst->mlen = 1; + inst->base_mrf = -1; + inst->regs_written = 1; + break; + case GL_ISOLINES: + /* All channels are undefined. */ + break; + default: + unreachable("Bogus tessellation domain"); + } + } else if (imm_offset == 1) { + /* This is a read of gl_TessLevelOuter[], which lives in the + * Patch URB header. The layout depends on the domain. + */ + dst.type = BRW_REGISTER_TYPE_F; + + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4); + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle); + inst->offset = 1; + inst->mlen = 1; + inst->base_mrf = -1; + inst->regs_written = 4; + + /* Reswizzle: WZYX */ + fs_reg srcs[4] = { + offset(tmp, bld, 3), + offset(tmp, bld, 2), + offset(tmp, bld, 1), + offset(tmp, bld, 0), + }; + + unsigned num_components; + switch (tcs_key->tes_primitive_mode) { + case GL_QUADS: + num_components = 4; + break; + case GL_TRIANGLES: + num_components = 3; + break; + case GL_ISOLINES: + /* Isolines are not reversed; swizzle .zw -> .xy */ + srcs[0] = offset(tmp, bld, 2); + srcs[1] = offset(tmp, bld, 3); + num_components = 2; + break; + default: + unreachable("Bogus tessellation domain"); + } + bld.LOAD_PAYLOAD(dst, srcs, num_components, 0); + } else { + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle); + inst->offset = imm_offset; + inst->mlen = 1; + inst->base_mrf = -1; + inst->regs_written = instr->num_components; + } + } else { + /* Indirect indexing - use per-slot offsets as well. */ + const fs_reg srcs[] = { + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), + indirect_offset + }; + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload); + inst->offset = imm_offset; + inst->mlen = 2; + inst->base_mrf = -1; + inst->regs_written = instr->num_components; + } + break; + } + + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: { + fs_reg value = get_nir_src(instr->src[0]); + fs_reg indirect_offset = get_indirect_offset(instr); + unsigned imm_offset = instr->const_index[0]; + unsigned swiz = BRW_SWIZZLE_XYZW; + unsigned mask = instr->const_index[1]; + unsigned header_regs = 0; + fs_reg srcs[7]; + srcs[header_regs++] = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD); + + if (indirect_offset.file != BAD_FILE) { + srcs[header_regs++] = indirect_offset; + } else if (tcs_key->program_string_id != 0) { + if (imm_offset == 0) { + value.type = BRW_REGISTER_TYPE_F; + + mask &= (1 << tesslevel_inner_components(tcs_key->tes_primitive_mode)) - 1; + + /* This is a write to gl_TessLevelInner[], which lives in the + * Patch URB header. The layout depends on the domain. + */ + switch (tcs_key->tes_primitive_mode) { + case GL_QUADS: + /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed). + * We use an XXYX swizzle to reverse put .xy in the .wz + * channels, and use a .zw writemask. + */ + mask = writemask_for_backwards_vector(mask); + swiz = BRW_SWIZZLE4(0, 0, 1, 0); + break; + case GL_TRIANGLES: + /* gl_TessLevelInner[].x lives at DWord 4, so we set the + * writemask to X and bump the URB offset by 1. + */ + imm_offset = 1; + break; + case GL_ISOLINES: + /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */ + return; + default: + unreachable("Bogus tessellation domain"); + } + } else if (imm_offset == 1) { + /* This is a write to gl_TessLevelOuter[] which lives in the + * Patch URB Header at DWords 4-7. However, it's reversed, so + * instead of .xyzw we have .wzyx. + */ + value.type = BRW_REGISTER_TYPE_F; + + mask &= (1 << tesslevel_outer_components(tcs_key->tes_primitive_mode)) - 1; + + if (tcs_key->tes_primitive_mode == GL_ISOLINES) { + /* Isolines .xy should be stored in .zw, in order. */ + swiz = BRW_SWIZZLE4(0, 0, 0, 1); + mask <<= 2; + } else { + /* Other domains are reversed; store .wzyx instead of .xyzw */ + swiz = BRW_SWIZZLE_WZYX; + mask = writemask_for_backwards_vector(mask); + } + } + } + + if (mask == 0) + break; + + unsigned num_components = _mesa_fls(mask); + enum opcode opcode; + + if (mask != WRITEMASK_XYZW) { + srcs[header_regs++] = brw_imm_ud(mask << 16); + opcode = indirect_offset.file != BAD_FILE ? + SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT : + SHADER_OPCODE_URB_WRITE_SIMD8_MASKED; + } else { + opcode = indirect_offset.file != BAD_FILE ? + SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT : + SHADER_OPCODE_URB_WRITE_SIMD8; + } + + for (unsigned i = 0; i < num_components; i++) { + if (mask & (1 << i)) + srcs[header_regs + i] = offset(value, bld, BRW_GET_SWZ(swiz, i)); + } + + unsigned mlen = header_regs + num_components; + + fs_reg payload = + bld.vgrf(BRW_REGISTER_TYPE_UD, mlen); + bld.LOAD_PAYLOAD(payload, srcs, mlen, header_regs); + + fs_inst *inst = bld.emit(opcode, bld.null_reg_ud(), payload); + inst->offset = imm_offset; + inst->mlen = mlen; + inst->base_mrf = -1; + break; + } + + default: + nir_emit_intrinsic(bld, instr); + break; + } +} + +void fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index da29f0be8b4..f0b99688dec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1015,6 +1015,9 @@ fs_visitor::init() case MESA_SHADER_VERTEX: key_tex = &((const brw_vs_prog_key *) key)->tex; break; + case MESA_SHADER_TESS_CTRL: + key_tex = &((const brw_tcs_prog_key *) key)->tex; + break; case MESA_SHADER_TESS_EVAL: key_tex = &((const brw_tes_prog_key *) key)->tex; break; diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 0117ffe3589..98ed2b253a6 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -214,7 +214,8 @@ brw_codegen_tcs_prog(struct brw_context *brw, prog_data.base.base.nr_image_params = tcs->NumImages; brw_nir_setup_glsl_uniforms(nir, shader_prog, &tcp->program.Base, - &prog_data.base.base, false); + &prog_data.base.base, + compiler->scalar_stage[MESA_SHADER_TESS_CTRL]); } else { /* Upload the Patch URB Header as the first two uniforms. * Do the annoying scrambling so the shader doesn't have to. diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index 5096f135124..4da30b9c47d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -29,6 +29,7 @@ #include "brw_nir.h" #include "brw_vec4_tcs.h" +#include "brw_fs.h" namespace brw { @@ -452,7 +453,10 @@ brw_compile_tcs(const struct brw_compiler *compiler, brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); - prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2); + if (is_scalar) + prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 8); + else + prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2); /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: @@ -493,20 +497,49 @@ brw_compile_tcs(const struct brw_compiler *compiler, brw_print_vue_map(stderr, &vue_prog_data->vue_map); } - vec4_tcs_visitor v(compiler, log_data, key, prog_data, - nir, mem_ctx, shader_time_index, &input_vue_map); - if (!v.run()) { - if (error_str) - *error_str = ralloc_strdup(mem_ctx, v.fail_msg); - return NULL; - } + if (is_scalar) { + fs_visitor v(compiler, log_data, mem_ctx, (void *) key, + &prog_data->base.base, NULL, nir, 8, + shader_time_index, &input_vue_map); + if (!v.run_tcs_single_patch()) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); + return NULL; + } - if (unlikely(INTEL_DEBUG & DEBUG_TCS)) - v.dump_instructions(); + prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; + + fs_generator g(compiler, log_data, mem_ctx, (void *) key, + &prog_data->base.base, v.promoted_constants, false, + MESA_SHADER_TESS_CTRL); + if (unlikely(INTEL_DEBUG & DEBUG_TCS)) { + g.enable_debug(ralloc_asprintf(mem_ctx, + "%s tessellation control shader %s", + nir->info.label ? nir->info.label + : "unnamed", + nir->info.name)); + } + + g.generate_code(v.cfg, 8); - return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, - &prog_data->base, v.cfg, - final_assembly_size); + return g.get_assembly(final_assembly_size); + } else { + vec4_tcs_visitor v(compiler, log_data, key, prog_data, + nir, mem_ctx, shader_time_index, &input_vue_map); + if (!v.run()) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); + return NULL; + } + + if (unlikely(INTEL_DEBUG & DEBUG_TCS)) + v.dump_instructions(); + + + return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, + &prog_data->base, v.cfg, + final_assembly_size); + } } |