Merge commit 'origin/master' into gallium-0.2

Conflicts: src/mesa/shader/prog_print.c
author: Keith Whitwell <[email protected]> 2008-11-15 16:53:24 +0000
committer: Keith Whitwell <[email protected]> 2008-11-15 16:53:24 +0000
commit: 7468765b18be202a64d58b83f6267b6973ea4897 (patch)
tree: 6e9fa6dd4a2ff79787bcae247377d07c00a036cf /src/mesa/drivers/dri
parent: 5e1454a036be6da2c48e2e20bf6f8047ee1a94d3 (diff)
parent: 80d6379722a1249ce13db79a898d340644936f67 (diff)
7 files changed, 79 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 740c7cbd109..9b0d7eab7bf 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -88,7 +88,21 @@ clip_unit_create_from_key(struct brw_context *brw,
 
    clip.thread4.nr_urb_entries = key->nr_urb_entries;
    clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
-   clip.thread4.max_threads = 1; /* 2 threads */
+   /* If we have enough clip URB entries to run two threads, do so.
+    */
+   if (key->nr_urb_entries >= 10) {
+      /* Half of the URB entries go to each thread, and it has to be an
+       * even number.
+       */
+      assert(key->nr_urb_entries % 2 == 0);
+      clip.thread4.max_threads = 2 - 1;
+   } else {
+      assert(key->nr_urb_entries >= 5);
+      clip.thread4.max_threads = 1 - 1;
+   }
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      clip.thread4.max_threads = 0;
 
    if (INTEL_DEBUG & DEBUG_STATS)
       clip.thread4.stats_enable = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index d87b8f8a848..f893dd67423 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -105,6 +105,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
       }
 
       brw_validate_state(brw);
+      brw_upload_state(brw);
    }
 
    return hw_prim[prim];
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 4080c5e3228..73d6dea01ee 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -353,6 +353,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
 	    intel_buffer_object(input->glarray->BufferObj);
 
 	 /* Named buffer object: Just reference its contents directly. */
+	 dri_bo_unreference(input->bo);
 	 input->bo = intel_bufferobj_buffer(intel, intel_buffer,
 					    INTEL_READ);
 	 dri_bo_reference(input->bo);
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index 4f925d18105..506126fcfb0 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -172,7 +172,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 
    sf.thread4.nr_urb_entries = key->nr_urb_entries;
    sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
-   sf.thread4.max_threads = MIN2(12, key->nr_urb_entries / 2) - 1;
+   /* Each SF thread produces 1 PUE, and there can be up to 24 threads */
+   sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1;
 
    if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
       sf.thread4.max_threads = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index 1a004176de1..7673dd36eb9 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -42,7 +42,44 @@
 #define SF 3
 #define CS 4
 
-/* XXX: Are the min_entry_size numbers useful?
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one.  So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5.  There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
  * XXX: Verify min_nr_entries, esp for VS.
  * XXX: Verify SF min_entry_size.
  */
@@ -54,7 +91,7 @@ static const struct {
 } limits[CS+1] = {
    { 16, 32, 1, 5 },			/* vs */
    { 4, 8,  1, 5 },			/* gs */
-   { 6, 8,  1, 5 },			/* clp */
+   { 5, 10,  1, 5 },			/* clp */
    { 1, 8,  1, 12 },		        /* sf */
    { 1, 4,  1, 32 }			/* cs */
 };
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 6e66f54524b..942581696d8 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -77,12 +77,19 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 {
    struct brw_vs_unit_state vs;
    dri_bo *bo;
+   int chipset_max_threads;
 
    memset(&vs, 0, sizeof(vs));
 
    vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
    vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
    vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   /* Choosing multiple program flow means that we may get 2-vertex threads,
+    * which will have the channel mask for dwords 4-7 enabled in the thread,
+    * and those dwords will be written to the second URB handle when we
+    * brw_urb_WRITE() results.
+    */
+   vs.thread1.single_program_flow = 0;
    vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
    vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
    vs.thread3.dispatch_grf_start_reg = 1;
@@ -91,8 +98,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 
    vs.thread4.nr_urb_entries = key->nr_urb_entries;
    vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
-   vs.thread4.max_threads = MIN2(MAX2(0, (key->nr_urb_entries - 6) / 2 - 1),
-				 15);
+
+   if (BRW_IS_G4X(brw))
+      chipset_max_threads = 32;
+   else
+      chipset_max_threads = 16;
+   vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+				  1, chipset_max_threads) - 1;
 
    if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
       vs.thread4.max_threads = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 61fe97a4634..fd461618bc4 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -67,8 +67,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 
    if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
       key->max_threads = 1;
-   else
-      key->max_threads = 32;
+   else {
+      /* WM maximum threads is number of EUs times number of threads per EU. */
+      if (BRW_IS_G4X(brw))
+	 key->max_threads = 10 * 5;
+      else
+	 key->max_threads = 8 * 4;
+   }
 
    /* CACHE_NEW_WM_PROG */
    key->total_grf = brw->wm.prog_data->total_grf;
author	Keith Whitwell <[email protected]>	2008-11-15 16:53:24 +0000
committer	Keith Whitwell <[email protected]>	2008-11-15 16:53:24 +0000
commit	7468765b18be202a64d58b83f6267b6973ea4897 (patch)
tree	6e9fa6dd4a2ff79787bcae247377d07c00a036cf /src/mesa/drivers/dri
parent	5e1454a036be6da2c48e2e20bf6f8047ee1a94d3 (diff)
parent	80d6379722a1249ce13db79a898d340644936f67 (diff)