From 27eecefc670c8b9e75f8520df7c9c7846923bf20 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 27 Mar 2013 10:34:55 -0700 Subject: i965/gs: Allocate push constant space for use by GS. Previously, we would always use the same push constant allocation regardless of what shader programs were being run: the available push constant space was split into 2 equal size partitions, one for the vertex shader, and one for the fragment shader. Now that we are adding geometry shader support, we need to do something smarter. This patch adjusts things so that when a geometry shader is in use, we split the available push constant space into 3 nearly-equal size partitions instead of 2. Since the push constant allocation is now affected by GL state, it can no longer be set up by brw_upload_initial_gpu_state(); instead it must be set up by a state atom. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h | 3 +- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_state.h | 4 +- src/mesa/drivers/dri/i965/brw_state_upload.c | 5 +- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 6 +++ src/mesa/drivers/dri/i965/gen7_urb.c | 71 ++++++++++++++++++++++------ 6 files changed, 68 insertions(+), 22 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0bfe606420b..a14e2b55bd6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1508,7 +1508,8 @@ gen6_get_sample_position(struct gl_context *ctx, /* gen7_urb.c */ void -gen7_allocate_push_constants(struct brw_context *brw); +gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, + unsigned gs_size, unsigned fs_size); void gen7_emit_urb_state(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 832ff55a0c0..8d9a824248a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1284,6 +1284,7 @@ enum brw_message_target { # define GEN7_URB_STARTING_ADDRESS_SHIFT 25 #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ +#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS 0x7915 /* GEN7+ */ #define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */ # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 85f82fe81e1..481463991ae 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -112,6 +112,7 @@ extern const struct brw_tracked_state gen7_cc_viewport_state_pointer; extern const struct brw_tracked_state gen7_clip_state; extern const struct brw_tracked_state gen7_disable_stages; extern const struct brw_tracked_state gen7_ps_state; +extern const struct brw_tracked_state gen7_push_constant_space; extern const struct brw_tracked_state gen7_sbe_state; extern const struct brw_tracked_state gen7_sf_clip_viewport; extern const struct brw_tracked_state gen7_sf_state; @@ -220,9 +221,6 @@ uint32_t get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset, int fs_attr, bool two_side_color, uint32_t *max_source_attr); -/* gen7_urb.c */ -void gen7_allocate_push_constants(struct brw_context *brw); - #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index b8830024827..9638c69fba8 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -188,6 +188,7 @@ static const struct brw_tracked_state *gen7_atoms[] = &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */ &gen7_sf_clip_viewport, + &gen7_push_constant_space, &gen7_urb, &gen6_blend_state, /* must do before cc unit */ &gen6_color_calc_state, /* must do before cc unit */ @@ -251,10 +252,6 @@ brw_upload_initial_gpu_state(struct brw_context *brw) return; brw_upload_invariant_state(brw); - - if (brw->gen >= 7) { - gen7_allocate_push_constants(brw); - } } void brw_init_state( struct brw_context *brw ) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 6c798b12631..9df3d929ffb 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -51,6 +51,12 @@ static void gen7_blorp_emit_urb_config(struct brw_context *brw, const brw_blorp_params *params) { + unsigned urb_size = (brw->is_haswell && brw->gt == 3) ? 32 : 16; + gen7_emit_push_constant_state(brw, + urb_size / 2 /* vs_size */, + 0 /* gs_size */, + urb_size / 2 /* fs_size */); + /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS, Dword * 1.15:0 "VS Number of URB Entries". */ diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 63467945e3a..5a7ab473b6e 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -30,12 +30,12 @@ /** * The following diagram shows how we partition the URB: * - * 8kB 8kB Rest of the URB space - * ____-____ ____-____ _________________-_________________ - * / \ / \ / \ + * 16kB or 32kB Rest of the URB space + * __________-__________ _________________-_________________ + * / \ / \ * +-------------------------------------------------------------+ - * | VS Push | FS Push | VS | - * | Constants | Constants | Handles | + * | VS/FS/GS Push | VS/GS URB | + * | Constants | Entries | * +-------------------------------------------------------------+ * * Notably, push constants must be stored at the beginning of the URB @@ -43,8 +43,12 @@ * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3 * doubles this (32kB). * - * Currently we split the constant buffer space evenly between VS and FS. - * This is probably not ideal, but simple. + * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and + * sized) in increments of 1kB. Haswell GT3 requires them to be located and + * sized in increments of 2kB. + * + * Currently we split the constant buffer space evenly among whatever stages + * are active. This is probably not ideal, but simple. * * Ivybridge GT1 and Haswell GT1 have 128kB of URB space. * Ivybridge GT2 and Haswell GT2 have 256kB of URB space. @@ -53,19 +57,49 @@ * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations", * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS. */ -void +static void gen7_allocate_push_constants(struct brw_context *brw) { - unsigned size = 8; - if (brw->is_haswell && brw->gt == 3) - size = 16; + unsigned avail_size = 16; + unsigned multiplier = (brw->is_haswell && brw->gt == 3) ? 2 : 1; + + /* BRW_NEW_GEOMETRY_PROGRAM */ + bool gs_present = brw->geometry_program; + + unsigned vs_size, gs_size; + if (gs_present) { + vs_size = avail_size / 3; + avail_size -= vs_size; + gs_size = avail_size / 2; + avail_size -= gs_size; + } else { + vs_size = avail_size / 2; + avail_size -= vs_size; + gs_size = 0; + } + unsigned fs_size = avail_size; + + gen7_emit_push_constant_state(brw, multiplier * vs_size, + multiplier * gs_size, multiplier * fs_size); +} + +void +gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size, + unsigned gs_size, unsigned fs_size) +{ + unsigned offset = 0; - BEGIN_BATCH(4); + BEGIN_BATCH(6); OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); - OUT_BATCH(size); + OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); + offset += vs_size; + + OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2)); + OUT_BATCH(gs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); + offset += gs_size; OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2)); - OUT_BATCH(size | size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); + OUT_BATCH(offset | fs_size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); ADVANCE_BATCH(); /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS): @@ -99,6 +133,15 @@ gen7_allocate_push_constants(struct brw_context *brw) } } +const struct brw_tracked_state gen7_push_constant_space = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM, + .cache = 0, + }, + .emit = gen7_allocate_push_constants, +}; + static void gen7_upload_urb(struct brw_context *brw) { -- cgit v1.2.3