summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/iris
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2019-03-12 14:51:22 -0700
committerKenneth Graunke <[email protected]>2019-04-23 00:24:08 -0700
commit5ad0c88dbe3e5805a10d8f1fef9d0cf1bbecdd46 (patch)
tree060190534de2bbd7ea57d3a5724adbc0ea5af36b /src/gallium/drivers/iris
parent0a082b6560867b359e8858cd1cbb15b0869ae18e (diff)
iris: Replace buffer backing storage and rebind to update addresses.
This implements PIPE_CAP_INVALIDATE_BUFFER and invalidate_resource(), as well as the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag. When either of these happen, we swap out the backing storage of the buffer for a new idle BO, allowing us to write to it immediately without stalling or queueing a blit. On my Skylake GT4e at 1920x1080, this improves performance in games: ----------------------------------------------- | DiRT Rally | +25% (avg) | +17% (max) | | Bioshock Infinite | +22% (avg) | +11% (max) | | Shadow of Mordor | +27% (avg) | +83% (max) | -----------------------------------------------
Diffstat (limited to 'src/gallium/drivers/iris')
-rw-r--r--src/gallium/drivers/iris/iris_context.h3
-rw-r--r--src/gallium/drivers/iris/iris_resource.c47
-rw-r--r--src/gallium/drivers/iris/iris_screen.c1
-rw-r--r--src/gallium/drivers/iris/iris_state.c125
4 files changed, 171 insertions, 5 deletions
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index 1b69b256947..ab70fc58718 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -355,6 +355,9 @@ struct iris_vtable {
void (*upload_compute_state)(struct iris_context *ice,
struct iris_batch *batch,
const struct pipe_grid_info *grid);
+ void (*rebind_buffer)(struct iris_context *ice,
+ struct iris_resource *res,
+ uint64_t old_address);
void (*load_register_reg32)(struct iris_batch *batch, uint32_t dst,
uint32_t src);
void (*load_register_reg64)(struct iris_batch *batch, uint32_t dst,
diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
index 0011439949e..293f71aa1f9 100644
--- a/src/gallium/drivers/iris/iris_resource.c
+++ b/src/gallium/drivers/iris/iris_resource.c
@@ -38,6 +38,7 @@
#include "util/u_cpu_detect.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "util/u_threaded_context.h"
#include "util/u_transfer.h"
#include "util/u_transfer_helper.h"
#include "util/u_upload_mgr.h"
@@ -878,6 +879,37 @@ iris_resource_get_handle(struct pipe_screen *pscreen,
}
static void
+iris_invalidate_resource(struct pipe_context *ctx,
+ struct pipe_resource *resource)
+{
+ struct iris_screen *screen = (void *) ctx->screen;
+ struct iris_context *ice = (void *) ctx;
+ struct iris_resource *res = (void *) resource;
+
+ if (resource->target != PIPE_BUFFER)
+ return;
+
+ /* We can't reallocate memory we didn't allocate in the first place. */
+ if (res->bo->userptr)
+ return;
+
+ // XXX: We should support this.
+ if (res->bind_history & PIPE_BIND_STREAM_OUTPUT)
+ return;
+
+ struct iris_bo *old_bo = res->bo;
+ struct iris_bo *new_bo =
+ iris_bo_alloc(screen->bufmgr, res->bo->name, resource->width0,
+ iris_memzone_for_address(old_bo->gtt_offset));
+ if (!new_bo)
+ return;
+
+ res->bo = new_bo;
+ ice->vtbl.rebind_buffer(ice, res, old_bo->gtt_offset);
+ iris_bo_unreference(old_bo);
+}
+
+static void
iris_flush_staging_region(struct pipe_transfer *xfer,
const struct pipe_box *flush_box)
{
@@ -1280,11 +1312,15 @@ iris_transfer_map(struct pipe_context *ctx,
struct iris_resource *res = (struct iris_resource *)resource;
struct isl_surf *surf = &res->surf;
- /* If we can discard the whole resource, we can also discard the
- * subrange being accessed.
- */
- if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
- usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
+ /* Replace the backing storage with a fresh buffer for non-async maps */
+ if (!(usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
+ TC_TRANSFER_MAP_NO_INVALIDATE)))
+ iris_invalidate_resource(ctx, resource);
+
+ /* If we can discard the whole resource, we can discard the range. */
+ usage |= PIPE_TRANSFER_DISCARD_RANGE;
+ }
bool map_would_stall = false;
@@ -1536,6 +1572,7 @@ void
iris_init_resource_functions(struct pipe_context *ctx)
{
ctx->flush_resource = iris_flush_resource;
+ ctx->invalidate_resource = iris_invalidate_resource;
ctx->transfer_map = u_transfer_helper_transfer_map;
ctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
ctx->transfer_unmap = u_transfer_helper_transfer_unmap;
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 38cdbc1507d..1ede9c4335f 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -178,6 +178,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
+ case PIPE_CAP_INVALIDATE_BUFFER:
return true;
case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_POST_DEPTH_COVERAGE:
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 073da79a28a..d6a8ba4fb47 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -5461,6 +5461,130 @@ iris_destroy_state(struct iris_context *ice)
/* ------------------------------------------------------------------- */
static void
+iris_rebind_buffer(struct iris_context *ice,
+ struct iris_resource *res,
+ uint64_t old_address)
+{
+ struct pipe_context *ctx = &ice->ctx;
+ struct iris_screen *screen = (void *) ctx->screen;
+ struct iris_genx_state *genx = ice->state.genx;
+
+ assert(res->base.target == PIPE_BUFFER);
+
+ /* Buffers can't be framebuffer attachments, nor display related,
+ * and we don't have upstream Clover support.
+ */
+ assert(!(res->bind_history & (PIPE_BIND_DEPTH_STENCIL |
+ PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_BLENDABLE |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_CURSOR |
+ PIPE_BIND_COMPUTE_RESOURCE |
+ PIPE_BIND_GLOBAL)));
+
+ if (res->bind_history & PIPE_BIND_VERTEX_BUFFER) {
+ uint64_t bound_vbs = ice->state.bound_vertex_buffers;
+ while (bound_vbs) {
+ const int i = u_bit_scan64(&bound_vbs);
+ struct iris_vertex_buffer_state *state = &genx->vertex_buffers[i];
+
+ /* Update the CPU struct */
+ STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_start) == 32);
+ STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) == 64);
+ uint64_t *addr = (uint64_t *) &state->state[1];
+
+ if (*addr == old_address) {
+ *addr = res->bo->gtt_offset;
+ ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
+ }
+ }
+ }
+
+ /* No need to handle these:
+ * - PIPE_BIND_INDEX_BUFFER (emitted for every indexed draw)
+ * - PIPE_BIND_COMMAND_ARGS_BUFFER (emitted for every indirect draw)
+ * - PIPE_BIND_QUERY_BUFFER (no persistent state references)
+ */
+
+ if (res->bind_history & PIPE_BIND_STREAM_OUTPUT) {
+ /* XXX: be careful about resetting vs appending... */
+ assert(false);
+ }
+
+ for (int s = MESA_SHADER_VERTEX; s < MESA_SHADER_STAGES; s++) {
+ struct iris_shader_state *shs = &ice->state.shaders[s];
+ enum pipe_shader_type p_stage = stage_to_pipe(s);
+
+ if (res->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
+ /* Skip constant buffer 0, it's for regular uniforms, not UBOs */
+ uint32_t bound_cbufs = shs->bound_cbufs & ~1u;
+ while (bound_cbufs) {
+ const int i = u_bit_scan(&bound_cbufs);
+ struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
+ struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
+
+ if (res->bo == iris_resource_bo(cbuf->buffer)) {
+ upload_ubo_ssbo_surf_state(ice, cbuf, surf_state, false);
+ ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << s;
+ }
+ }
+ }
+
+ if (res->bind_history & PIPE_BIND_SHADER_BUFFER) {
+ uint32_t bound_ssbos = shs->bound_ssbos;
+ while (bound_ssbos) {
+ const int i = u_bit_scan(&bound_ssbos);
+ struct pipe_shader_buffer *ssbo = &shs->ssbo[i];
+
+ if (res->bo == iris_resource_bo(ssbo->buffer)) {
+ struct pipe_shader_buffer buf = {
+ .buffer = &res->base,
+ .buffer_offset = ssbo->buffer_offset,
+ .buffer_size = ssbo->buffer_size,
+ };
+ iris_set_shader_buffers(ctx, p_stage, i, 1, &buf,
+ (shs->writable_ssbos >> i) & 1);
+ }
+ }
+ }
+
+ if (res->bind_history & PIPE_BIND_SAMPLER_VIEW) {
+ uint32_t bound_sampler_views = shs->bound_sampler_views;
+ while (bound_sampler_views) {
+ const int i = u_bit_scan(&bound_sampler_views);
+ struct iris_sampler_view *isv = shs->textures[i];
+
+ if (res->bo == iris_resource_bo(isv->base.texture)) {
+ void *map = alloc_surface_states(ice->state.surface_uploader,
+ &isv->surface_state,
+ isv->res->aux.sampler_usages);
+ assert(map);
+ fill_buffer_surface_state(&screen->isl_dev, isv->res->bo, map,
+ isv->view.format, isv->view.swizzle,
+ isv->base.u.buf.offset,
+ isv->base.u.buf.size);
+ ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << s;
+ }
+ }
+ }
+
+ if (res->bind_history & PIPE_BIND_SHADER_IMAGE) {
+ uint32_t bound_image_views = shs->bound_image_views;
+ while (bound_image_views) {
+ const int i = u_bit_scan(&bound_image_views);
+ struct iris_image_view *iv = &shs->image[i];
+
+ if (res->bo == iris_resource_bo(iv->base.resource)) {
+ iris_set_shader_images(ctx, p_stage, i, 1, &iv->base);
+ }
+ }
+ }
+ }
+}
+
+/* ------------------------------------------------------------------- */
+
+static void
iris_load_register_reg32(struct iris_batch *batch, uint32_t dst,
uint32_t src)
{
@@ -6075,6 +6199,7 @@ genX(init_state)(struct iris_context *ice)
ice->vtbl.update_surface_base_address = iris_update_surface_base_address;
ice->vtbl.upload_compute_state = iris_upload_compute_state;
ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
+ ice->vtbl.rebind_buffer = iris_rebind_buffer;
ice->vtbl.load_register_reg32 = iris_load_register_reg32;
ice->vtbl.load_register_reg64 = iris_load_register_reg64;
ice->vtbl.load_register_imm32 = iris_load_register_imm32;