diff options
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 48 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 17 | ||||
-rw-r--r-- | src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 50 |
4 files changed, 81 insertions, 47 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 11635c94658..4963493877b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -181,7 +181,7 @@ nvc0_compute_invalidate_constbufs(struct nvc0_context *nvc0) /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ for (s = 0; s < 5; s++) { nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s]; - nvc0->state.uniform_buffer_bound[s] = 0; + nvc0->state.uniform_buffer_bound[s] = false; } nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; } @@ -203,19 +203,18 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) assert(i == 0); /* we really only want OpenGL uniforms here */ assert(nvc0->constbuf[s][0].u.data); - if (nvc0->state.uniform_buffer_bound[s] < size) { - nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + if (!nvc0->state.uniform_buffer_bound[s]) { + nvc0->state.uniform_buffer_bound[s] = true; BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); - PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); + PUSH_DATA (push, NVC0_MAX_CONSTBUF_SIZE); PUSH_DATAh(push, bo->offset + base); PUSH_DATA (push, bo->offset + base); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); } nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), - base, nvc0->state.uniform_buffer_bound[s], - 0, (size + 3) / 4, + base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4, nvc0->constbuf[s][0].u.data); } else { struct nv04_resource *res = @@ -236,7 +235,7 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) PUSH_DATA (push, (i << 8) | 0); } if (i == 0) - nvc0->state.uniform_buffer_bound[s] = 0; + nvc0->state.uniform_buffer_bound[s] = false; } } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 37e10dcd07f..bcd30b560bc 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -382,7 +382,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_OUTPUTS: return 32; case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: - return 65536; + return NVC0_MAX_CONSTBUF_SIZE; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return NVC0_MAX_PIPE_CONSTBUFS; case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: @@ -829,6 +829,40 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size) return 0; } +void +nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize, + int stage, int index, int size, uint64_t addr) +{ + assert(stage != 5); + + struct nouveau_pushbuf *push = screen->base.pushbuf; + + if (screen->base.class_3d >= GM107_3D_CLASS) { + struct nvc0_cb_binding *binding = &screen->cb_bindings[stage][index]; + + // TODO: Better figure out the conditions in which this is needed + bool serialize = binding->addr == addr && binding->size != size; + if (can_serialize) + serialize = serialize && *can_serialize; + if (serialize) { + IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); + if (can_serialize) + *can_serialize = false; + } + + binding->addr = addr; + binding->size = size; + } + + if (size >= 0) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, size); + PUSH_DATAh(push, addr); + PUSH_DATA (push, addr); + } + IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0)); +} + #define FAIL_SCREEN_INIT(str, err) \ do { \ NOUVEAU_ERR(str, err); \ @@ -1272,14 +1306,14 @@ nvc0_screen_create(struct nouveau_device *dev) /* XXX: Compute and 3D are somehow aliased on Fermi. */ for (i = 0; i < 5; ++i) { + unsigned j = 0; + for (j = 0; j < 16; j++) + screen->cb_bindings[i][j].size = -1; + /* TIC and TSC entries for each unit (nve4+ only) */ /* auxiliary constants (6 user clip planes, base instance id) */ - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, NVC0_CB_AUX_SIZE); - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); - PUSH_DATA (push, (15 << 4) | 1); + nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE, + screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); if (screen->eng3d->oclass >= NVE4_3D_CLASS) { unsigned j; BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index efd62a8a412..d8223ba9d54 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -16,7 +16,9 @@ #define NVE4_IMG_MAX_HANDLES 512 /* doesn't count driver-reserved slot */ -#define NVC0_MAX_PIPE_CONSTBUFS 15 +#define NVC0_MAX_PIPE_CONSTBUFS 15 +#define NVC0_MAX_CONST_BUFFERS 16 +#define NVC0_MAX_CONSTBUF_SIZE 65536 #define NVC0_MAX_SURFACE_SLOTS 16 @@ -53,12 +55,17 @@ struct nvc0_graph_state { uint8_t tls_required; /* bitmask of shader types using l[] */ uint8_t clip_enable; uint32_t clip_mode; - uint32_t uniform_buffer_bound[6]; + bool uniform_buffer_bound[6]; struct nvc0_transform_feedback_state *tfb; bool seamless_cube_map; bool post_depth_coverage; }; +struct nvc0_cb_binding { + uint64_t addr; + int size; +}; + struct nvc0_screen { struct nouveau_screen base; @@ -114,6 +121,9 @@ struct nvc0_screen { bool mp_counters_enabled; } pm; + /* only maintained on Maxwell+ */ + struct nvc0_cb_binding cb_bindings[5][NVC0_MAX_CONST_BUFFERS]; + struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */ struct nouveau_object *eng2d; struct nouveau_object *m2mf; @@ -146,6 +156,9 @@ int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *); int nvc0_screen_resize_text_area(struct nvc0_screen *, uint64_t); +// 3D Only +void nvc0_screen_bind_cb_3d(struct nvc0_screen *, bool *, int, int, int, uint64_t); + static inline void nvc0_resource_fence(struct nv04_resource *res, uint32_t flags) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index cc18f41c4bb..4f004a4f705 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -565,9 +565,10 @@ nvc0_validate_rasterizer(struct nvc0_context *nvc0) static void nvc0_constbufs_validate(struct nvc0_context *nvc0) { - struct nouveau_pushbuf *push = nvc0->base.pushbuf; unsigned s; + bool can_serialize = true; + for (s = 0; s < 5; ++s) { while (nvc0->constbuf_dirty[s]) { int i = ffs(nvc0->constbuf_dirty[s]) - 1; @@ -580,41 +581,34 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) assert(i == 0); /* we really only want OpenGL uniforms here */ assert(nvc0->constbuf[s][0].u.data); - if (nvc0->state.uniform_buffer_bound[s] < size) { - nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + if (!nvc0->state.uniform_buffer_bound[s]) { + nvc0->state.uniform_buffer_bound[s] = true; - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); - PUSH_DATAh(push, bo->offset + base); - PUSH_DATA (push, bo->offset + base); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); - PUSH_DATA (push, (0 << 4) | 1); + nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, + NVC0_MAX_CONSTBUF_SIZE, bo->offset + base); } nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), - base, nvc0->state.uniform_buffer_bound[s], + base, NVC0_MAX_CONSTBUF_SIZE, 0, (size + 3) / 4, nvc0->constbuf[s][0].u.data); } else { struct nv04_resource *res = nv04_resource(nvc0->constbuf[s][i].u.buf); if (res) { - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, nvc0->constbuf[s][i].size); - PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); - PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); - PUSH_DATA (push, (i << 4) | 1); + nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, + nvc0->constbuf[s][i].size, + res->address + nvc0->constbuf[s][i].offset); BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD); nvc0->cb_dirty = 1; /* Force cache flush for UBO. */ res->cb_bindings[s] |= 1 << i; - } else { - BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); - PUSH_DATA (push, (i << 4) | 0); + + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = false; + } else if (i != 0) { + nvc0_screen_bind_cb_3d(nvc0->screen, &can_serialize, s, i, -1, 0); } - if (i == 0) - nvc0->state.uniform_buffer_bound[s] = 0; } } } @@ -623,7 +617,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5]; - nvc0->state.uniform_buffer_bound[5] = 0; + nvc0->state.uniform_buffer_bound[5] = false; } } @@ -710,18 +704,12 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0) static void nvc0_validate_driverconst(struct nvc0_context *nvc0) { - struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_screen *screen = nvc0->screen; int i; - for (i = 0; i < 5; ++i) { - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, NVC0_CB_AUX_SIZE); - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); - BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); - PUSH_DATA (push, (15 << 4) | 1); - } + for (i = 0; i < 5; ++i) + nvc0_screen_bind_cb_3d(screen, NULL, i, 15, NVC0_CB_AUX_SIZE, + screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST; } |