From 7a82ad54bb56cafaeea7f909cd9fc35542c23ba0 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 1 Sep 2017 09:59:34 -0700 Subject: intel/fs: Rework zero-length URB write handling Originally we tried to handle this case based on slots_valid. However, there are a number of ways that this can go wrong. For one, we throw away any trailing slots which either aren't written or are set to VARYING_SLOT_PAD. Second, even if PSIZ is a valid slot, we may not actually write anything there. Between the lot of these, it was possible to end up in a case where we tried to do a regular URB write but ended up with a length of 1 which is invalid. This commit moves it to the end and makes it based on a new boolean flag urb_written. Reviewed-by: Iago Toral Quiroga Cc: mesa-stable@lists.freedesktop.org --- src/intel/compiler/brw_fs_visitor.cpp | 60 ++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 9fd4c20837f..9a19dc25567 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -566,34 +566,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) else urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); - /* If we don't have any valid slots to write, just do a minimal urb write - * send to terminate the shader. This includes 1 slot of undefined data, - * because it's invalid to write 0 data: - * - * From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions - - * Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read > - * Write Data Payload: - * - * "The write data payload can be between 1 and 8 message phases long." - */ - if (vue_map->slots_valid == 0) { - /* For GS, just turn EmitVertex() into a no-op. We don't want it to - * end the thread, and emit_gs_thread_end() already emits a SEND with - * EOT at the end of the program for us. - */ - if (stage == MESA_SHADER_GEOMETRY) - return; - - fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD); - bld.exec_all().MOV(payload, urb_handle); - - fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); - inst->eot = true; - inst->mlen = 2; - inst->offset = 1; - return; - } - opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8; int header_size = 1; fs_reg per_slot_offsets; @@ -645,6 +617,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) last_slot--; } + bool urb_written = false; for (slot = 0; slot < vue_map->num_slots; slot++) { int varying = vue_map->slot_to_varying[slot]; switch (varying) { @@ -730,7 +703,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) * the last slot or if we need to flush (see BAD_FILE varying case * above), emit a URB write send now to flush out the data. */ - if (length == 8 || slot == last_slot) + if (length == 8 || (length > 0 && slot == last_slot)) flush = true; if (flush) { fs_reg *payload_sources = @@ -755,8 +728,37 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) urb_offset = starting_urb_offset + slot + 1; length = 0; flush = false; + urb_written = true; } } + + /* If we don't have any valid slots to write, just do a minimal urb write + * send to terminate the shader. This includes 1 slot of undefined data, + * because it's invalid to write 0 data: + * + * From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions - + * Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read > + * Write Data Payload: + * + * "The write data payload can be between 1 and 8 message phases long." + */ + if (!urb_written) { + /* For GS, just turn EmitVertex() into a no-op. We don't want it to + * end the thread, and emit_gs_thread_end() already emits a SEND with + * EOT at the end of the program for us. + */ + if (stage == MESA_SHADER_GEOMETRY) + return; + + fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD); + bld.exec_all().MOV(payload, urb_handle); + + fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload); + inst->eot = true; + inst->mlen = 2; + inst->offset = 1; + return; + } } void -- cgit v1.2.3