diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_blorp.cpp | 16 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_urb.c | 156 |
3 files changed, 143 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 3f17f1d1066..0bfe606420b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1511,8 +1511,10 @@ void gen7_allocate_push_constants(struct brw_context *brw); void -gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries, - GLuint vs_size, GLuint vs_start); +gen7_emit_urb_state(struct brw_context *brw, + unsigned nr_vs_entries, unsigned vs_size, + unsigned vs_start, unsigned nr_gs_entries, + unsigned gs_size, unsigned gs_start); diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index a387836b9f2..6c798b12631 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -51,14 +51,16 @@ static void gen7_blorp_emit_urb_config(struct brw_context *brw, const brw_blorp_params *params) { - /* The minimum valid value is 32. See 3DSTATE_URB_VS, - * Dword 1.15:0 "VS Number of URB Entries". + /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS, Dword + * 1.15:0 "VS Number of URB Entries". */ - int num_vs_entries = 32; - int vs_size = 2; - int vs_start = 2; /* skip over push constants */ - - gen7_emit_urb_state(brw, num_vs_entries, vs_size, vs_start); + gen7_emit_urb_state(brw, + 32 /* num_vs_entries */, + 2 /* vs_size */, + 2 /* vs_start */, + 0 /* num_gs_entries */, + 1 /* gs_size */, + 2 /* gs_start */); } diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 927af3782a2..ed5cda8001b 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -74,34 +74,137 @@ gen7_upload_urb(struct brw_context *brw) { const int push_size_kB = brw->is_haswell && brw->gt == 3 ? 32 : 16; - /* Total space for entries is URB size - 16kB for push constants */ - int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /* bytes */ - /* CACHE_NEW_VS_PROG */ unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1); - - int nr_vs_entries = handle_region_size / (vs_size * 64); - if (nr_vs_entries > brw->urb.max_vs_entries) - nr_vs_entries = brw->urb.max_vs_entries; - - /* According to volume 2a, nr_vs_entries must be a multiple of 8. */ - brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8); - - /* URB Starting Addresses are specified in multiples of 8kB. */ - brw->urb.vs_start = push_size_kB / 8; /* skip over push constants */ - - assert(brw->urb.nr_vs_entries % 8 == 0); - assert(brw->urb.nr_gs_entries % 8 == 0); - /* GS requirement */ - assert(!brw->ff_gs.prog_active); + unsigned vs_entry_size_bytes = vs_size * 64; + /* BRW_NEW_GEOMETRY_PROGRAM, CACHE_NEW_GS_PROG */ + bool gs_present = brw->geometry_program; + unsigned gs_size = gs_present ? brw->gs.prog_data->base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. + */ + unsigned urb_chunks = brw->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = 1024 * push_size_kB; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS always requires at least 32 URB entries */ + unsigned vs_chunks = + ALIGN(32 * vs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(brw->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, brw->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, brw->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= 32); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Gen7 doesn't actually use brw->urb.nr_{vs,gs}_entries, but it seems + * better to put reasonable data in there rather than leave them + * uninitialized. + */ + brw->urb.nr_vs_entries = nr_vs_entries; + brw->urb.nr_gs_entries = nr_gs_entries; + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + brw->urb.vs_start = push_constant_chunks; + brw->urb.gs_start = push_constant_chunks + vs_chunks; gen7_emit_vs_workaround_flush(brw); - gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start); + gen7_emit_urb_state(brw, + brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start, + brw->urb.nr_gs_entries, gs_size, brw->urb.gs_start); } void -gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries, - GLuint vs_size, GLuint vs_start) +gen7_emit_urb_state(struct brw_context *brw, + unsigned nr_vs_entries, unsigned vs_size, + unsigned vs_start, unsigned nr_gs_entries, + unsigned gs_size, unsigned gs_start) { BEGIN_BATCH(8); OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); @@ -109,11 +212,12 @@ gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries, ((vs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) | (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT)); - /* Allocate the GS, HS, and DS zero space - we don't use them. */ OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); - OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | - (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT)); + OUT_BATCH(nr_gs_entries | + ((gs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) | + (gs_start << GEN7_URB_STARTING_ADDRESS_SHIFT)); + /* Allocate the HS and DS zero space - we don't use them. */ OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT)); @@ -127,8 +231,8 @@ gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries, const struct brw_tracked_state gen7_urb = { .dirty = { .mesa = 0, - .brw = BRW_NEW_CONTEXT, - .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_FF_GS_PROG), + .brw = BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM, + .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG), }, .emit = gen7_upload_urb, }; |