diff options
author | Keith Whitwell <[email protected]> | 2008-11-15 16:53:24 +0000 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2008-11-15 16:53:24 +0000 |
commit | 7468765b18be202a64d58b83f6267b6973ea4897 (patch) | |
tree | 6e9fa6dd4a2ff79787bcae247377d07c00a036cf /src/mesa/drivers/dri | |
parent | 5e1454a036be6da2c48e2e20bf6f8047ee1a94d3 (diff) | |
parent | 80d6379722a1249ce13db79a898d340644936f67 (diff) |
Merge commit 'origin/master' into gallium-0.2
Conflicts:
src/mesa/shader/prog_print.c
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_clip_state.c | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw_upload.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_sf_state.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_urb.c | 41 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_state.c | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_state.c | 9 |
7 files changed, 79 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 740c7cbd109..9b0d7eab7bf 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -88,7 +88,21 @@ clip_unit_create_from_key(struct brw_context *brw, clip.thread4.nr_urb_entries = key->nr_urb_entries; clip.thread4.urb_entry_allocation_size = key->urb_size - 1; - clip.thread4.max_threads = 1; /* 2 threads */ + /* If we have enough clip URB entries to run two threads, do so. + */ + if (key->nr_urb_entries >= 10) { + /* Half of the URB entries go to each thread, and it has to be an + * even number. + */ + assert(key->nr_urb_entries % 2 == 0); + clip.thread4.max_threads = 2 - 1; + } else { + assert(key->nr_urb_entries >= 5); + clip.thread4.max_threads = 1 - 1; + } + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + clip.thread4.max_threads = 0; if (INTEL_DEBUG & DEBUG_STATS) clip.thread4.stats_enable = 1; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index d87b8f8a848..f893dd67423 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -105,6 +105,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) } brw_validate_state(brw); + brw_upload_state(brw); } return hw_prim[prim]; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 4080c5e3228..73d6dea01ee 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -353,6 +353,7 @@ static void brw_prepare_vertices(struct brw_context *brw) intel_buffer_object(input->glarray->BufferObj); /* Named buffer object: Just reference its contents directly. */ + dri_bo_unreference(input->bo); input->bo = intel_bufferobj_buffer(intel, intel_buffer, INTEL_READ); dri_bo_reference(input->bo); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 4f925d18105..506126fcfb0 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -172,7 +172,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread4.nr_urb_entries = key->nr_urb_entries; sf.thread4.urb_entry_allocation_size = key->sfsize - 1; - sf.thread4.max_threads = MIN2(12, key->nr_urb_entries / 2) - 1; + /* Each SF thread produces 1 PUE, and there can be up to 24 threads */ + sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) sf.thread4.max_threads = 0; diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 1a004176de1..7673dd36eb9 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -42,7 +42,44 @@ #define SF 3 #define CS 4 -/* XXX: Are the min_entry_size numbers useful? +/** @file brw_urb.c + * + * Manages the division of the URB space between the various fixed-function + * units. + * + * See the Thread Initiation Management section of the GEN4 B-Spec, and + * the individual *_STATE structures for restrictions on numbers of + * entries and threads. + */ + +/* + * Generally, a unit requires a min_nr_entries based on how many entries + * it produces before the downstream unit gets unblocked and can use and + * dereference some of its handles. + * + * The SF unit preallocates a PUE at the start of thread dispatch, and only + * uses that one. So it requires one entry per thread. + * + * For CLIP, the SF unit will hold the previous primitive while the + * next is getting assembled, meaning that linestrips require 3 CLIP VUEs + * (vertices) to ensure continued processing, trifans require 4, and tristrips + * require 5. There can be 1 or 2 threads, and each has the same requirement. + * + * GS has the same requirement as CLIP, but it never handles tristrips, + * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces. + * We only run it single-threaded. + * + * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X). + * Each thread processes 2 preallocated VUEs (vertices) at a time, and they + * get streamed down as soon as threads processing earlier vertices get + * theirs accepted. + * + * Each unit will take the number of URB entries we give it (based on the + * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs, + * and brw_curbe.c for the CURBEs) and decide its maximum number of + * threads it can support based on that. in brw_*_state.c. + * + * XXX: Are the min_entry_size numbers useful? * XXX: Verify min_nr_entries, esp for VS. * XXX: Verify SF min_entry_size. */ @@ -54,7 +91,7 @@ static const struct { } limits[CS+1] = { { 16, 32, 1, 5 }, /* vs */ { 4, 8, 1, 5 }, /* gs */ - { 6, 8, 1, 5 }, /* clp */ + { 5, 10, 1, 5 }, /* clp */ { 1, 8, 1, 12 }, /* sf */ { 1, 4, 1, 32 } /* cs */ }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 6e66f54524b..942581696d8 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -77,12 +77,19 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) { struct brw_vs_unit_state vs; dri_bo *bo; + int chipset_max_threads; memset(&vs, 0, sizeof(vs)); vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + /* Choosing multiple program flow means that we may get 2-vertex threads, + * which will have the channel mask for dwords 4-7 enabled in the thread, + * and those dwords will be written to the second URB handle when we + * brw_urb_WRITE() results. + */ + vs.thread1.single_program_flow = 0; vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; @@ -91,8 +98,13 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread4.nr_urb_entries = key->nr_urb_entries; vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - vs.thread4.max_threads = MIN2(MAX2(0, (key->nr_urb_entries - 6) / 2 - 1), - 15); + + if (BRW_IS_G4X(brw)) + chipset_max_threads = 32; + else + chipset_max_threads = 16; + vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, + 1, chipset_max_threads) - 1; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) vs.thread4.max_threads = 0; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 61fe97a4634..fd461618bc4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -67,8 +67,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) key->max_threads = 1; - else - key->max_threads = 32; + else { + /* WM maximum threads is number of EUs times number of threads per EU. */ + if (BRW_IS_G4X(brw)) + key->max_threads = 10 * 5; + else + key->max_threads = 8 * 4; + } /* CACHE_NEW_WM_PROG */ key->total_grf = brw->wm.prog_data->total_grf; |