summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2014-08-14 22:46:54 -0700
committerKenneth Graunke <[email protected]>2017-05-20 00:23:10 -0700
commit494593e6b2b989c2c17c52c989dde625d301a494 (patch)
tree5a0f4a91f2b7f9e40c2d34936743f3ad712d23a8 /src/mesa/drivers
parent731b577cc6d0294a4ef1266996a53bbc30713d6e (diff)
i965: Use the upload BO for push constants on Gen9+.
Shaders can use quite a bit of uniform data. Better to put it in the upload buffers, like we do for client vertex data, rather than the batch buffer state area, which is primarly used for indirect state. This should free up batch space, allowing us to emit more commands in a batch before flushing. Because BRW_NEW_BATCH also causes a lot of state to be re-emitted, it may also reduce CPU overhead a little bit. We took this approach on Gen4-5, but switched to using the batch area on Gen6+ because buffer 0 is relative to Dynamic State Base Address by default, which is set to the start of the batch. On Gen9+, we already use a relocation due to a workaround, so this is trivial to change and has basically no downside. Unfortunately we can't change compute shader push constants because MEDIA_CURBE_LOAD always uses an offset from dynamic state base address. Improves performance in GLBenchmark 2.7/TRex Offscreen by: - Skylake GT4e: 0.52821% +/- 0.113402% (n = 190) - Apollolake: 0.510225% +/- 0.273064% (n = 70) Reviewed-by: Chris Forbes <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/gen6_constant_state.c15
-rw-r--r--src/mesa/drivers/dri/i965/genX_state_upload.c2
2 files changed, 11 insertions, 6 deletions
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
index 460f1c51457..749e77e958f 100644
--- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -25,6 +25,7 @@
#include "brw_state.h"
#include "brw_defines.h"
#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
#include "program/prog_parameter.h"
/**
@@ -60,12 +61,16 @@ gen6_upload_push_constants(struct brw_context *brw,
if (prog)
_mesa_load_state_parameters(ctx, prog->Parameters);
- gl_constant_value *param;
int i;
-
- param = brw_state_batch(brw,
- prog_data->nr_params * sizeof(gl_constant_value),
- 32, &stage_state->push_const_offset);
+ const int size = prog_data->nr_params * sizeof(gl_constant_value);
+ gl_constant_value *param;
+ if (brw->gen >= 9) {
+ param = intel_upload_space(brw, size, 32, &brw->curbe.curbe_bo,
+ &stage_state->push_const_offset);
+ } else {
+ param = brw_state_batch(brw, size, 32,
+ &stage_state->push_const_offset);
+ }
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 64ba3577383..f7128ae8960 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2601,7 +2601,7 @@ upload_constant_state(struct brw_context *brw,
pkt.ConstantBody.ConstantBuffer2ReadLength =
stage_state->push_const_size;
pkt.ConstantBody.PointerToConstantBuffer2 =
- render_ro_bo(brw->batch.bo, stage_state->push_const_offset);
+ render_ro_bo(brw->curbe.curbe_bo, stage_state->push_const_offset);
#else
pkt.ConstantBody.ConstantBuffer0ReadLength =
stage_state->push_const_size;