diff options
author | Paul Berry <[email protected]> | 2013-09-03 12:30:06 -0700 |
---|---|---|
committer | Paul Berry <[email protected]> | 2013-09-16 12:53:49 -0700 |
commit | 784044c206efd774ce1f7a481311480f85446887 (patch) | |
tree | b80744b5428849537633f1f876c9698e306d0267 /src/mesa/drivers/dri/i965 | |
parent | 875972029eddfd53cb90a8e34e9f27b2afed119f (diff) |
i965/vec4: Generate URB writes using a loop.
Previously we only ever did 1 or 2 URB writes, since the maximum
number of varyings we support is small enough to fit in 2 URB writes.
But GL 3.2 requires the geometry shader to support 128 output varying
components, and this could require up to 3 URB writes.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 52 |
1 files changed, 21 insertions, 31 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 304636a8cf9..874e6e3f385 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2694,47 +2694,37 @@ vec4_visitor::emit_vertex() emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4); } - /* Set up the VUE data for the first URB write */ - int slot; - for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) { - emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]); - - /* If this was max_usable_mrf, we can't fit anything more into this URB - * WRITE. + /* We may need to split this up into several URB writes, so do them in a + * loop. + */ + int slot = 0; + bool complete = false; + do { + /* URB offset is in URB row increments, and each of our MRFs is half of + * one of those, since we're doing interleaved writes. */ - if (mrf > max_usable_mrf) { - slot++; - break; - } - } - - bool complete = slot >= prog_data->vue_map.num_slots; - current_annotation = "URB write"; - vec4_instruction *inst = emit_urb_write_opcode(complete); - inst->base_mrf = base_mrf; - inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + int offset = slot / 2; - /* Optional second URB write */ - if (!complete) { mrf = base_mrf + 1; - for (; slot < prog_data->vue_map.num_slots; ++slot) { - assert(mrf < max_usable_mrf); - emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]); + + /* If this was max_usable_mrf, we can't fit anything more into this + * URB WRITE. + */ + if (mrf > max_usable_mrf) { + slot++; + break; + } } + complete = slot >= prog_data->vue_map.num_slots; current_annotation = "URB write"; - inst = emit_urb_write_opcode(true /* complete */); + vec4_instruction *inst = emit_urb_write_opcode(complete); inst->base_mrf = base_mrf; inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); - /* URB destination offset. In the previous write, we got MRFs - * 2-13 minus the one header MRF, so 12 regs. URB offset is in - * URB row increments, and each of our MRFs is half of one of - * those, since we're doing interleaved writes. - */ - inst->offset += (max_usable_mrf - base_mrf) / 2; - } + inst->offset += offset; + } while(!complete); } |