diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 36 |
4 files changed, 57 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 52009e204fc..ff270da1536 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -817,6 +817,22 @@ enum opcode { * for Slot {0,1}" fields in the message header. */ GS_OPCODE_THREAD_END, + + /** + * Set the "Slot {0,1} Offset" fields of a URB_WRITE message header. + * + * - dst is the MRF containing the message header. + * + * - src0.x indicates which portion of the URB should be written to (e.g. a + * vertex number) + * + * - src1 is an immediate multiplier which will be applied to src0 + * (e.g. the size of a single vertex in the URB). + * + * Note: the hardware will apply this offset *in addition to* the offset in + * vec4_instruction::offset. + */ + GS_OPCODE_SET_WRITE_OFFSET, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 689e908d37d..e5d939af24f 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -501,6 +501,8 @@ brw_instruction_name(enum opcode op) return "gs_urb_write"; case GS_OPCODE_THREAD_END: return "gs_thread_end"; + case GS_OPCODE_SET_WRITE_OFFSET: + return "set_write_offset"; default: /* Yes, this leaks. It's in debug code, it should never occur, and if diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index a95f61fb75a..484e5787e5d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -630,6 +630,9 @@ private: void generate_vs_urb_write(vec4_instruction *inst); void generate_gs_urb_write(vec4_instruction *inst); void generate_gs_thread_end(vec4_instruction *inst); + void generate_gs_set_write_offset(struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); void generate_scratch_write(vec4_instruction *inst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index 23a3b675138..c487ac85a23 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -443,6 +443,38 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst) } void +vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message + * Header: M0.3): + * + * Slot 0 Offset. This field, after adding to the Global Offset field + * in the message descriptor, specifies the offset (in 256-bit units) + * from the start of the URB entry, as referenced by URB Handle 0, at + * which the data will be accessed. + * + * Similar text describes DWORD M0.4, which is slot 1 offset. + * + * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components + * of the register for geometry shader invocations 0 and 1) by the + * immediate value in src1, and store the result in DWORDs 3 and 4 of dst. + * + * We can do this with the following EU instruction: + * + * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all } + */ + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), + src1); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); +} + +void vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index) { @@ -918,6 +950,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, generate_gs_thread_end(inst); break; + case GS_OPCODE_SET_WRITE_OFFSET: + generate_gs_set_write_offset(dst, src[0], src[1]); + break; + case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME); mark_surface_used(SURF_INDEX_VS_SHADER_TIME); |