summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nvc0/nvc0_tex.c
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2012-04-14 23:56:56 +0200
committerChristoph Bumiller <[email protected]>2012-04-15 00:08:51 +0200
commite44089b2f79aa2dcaacf348911433d1e21235c0c (patch)
tree955d621392f0068ef8e3c98dc46195ff3916525e /src/gallium/drivers/nvc0/nvc0_tex.c
parent69a921892d2303f1400576aa73980c28880f8654 (diff)
nvc0: add initial support for nve4+ (Kepler) chipsets
Most things that work on Fermi should work on Kepler too. There are a few performance optimizations left to do, like better placement of texture barriers and adding scheduling data to the shader instructions (without them, a thread group will be masked for 32 cycles after each single instruction issue).
Diffstat (limited to 'src/gallium/drivers/nvc0/nvc0_tex.c')
-rw-r--r--src/gallium/drivers/nvc0/nvc0_tex.c165
1 files changed, 159 insertions, 6 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
index f6c4ab39bd9..8dd7185bcdf 100644
--- a/src/gallium/drivers/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -26,6 +26,9 @@
#include "util/u_format.h"
+#define NVE4_TIC_ENTRY_INVALID 0x000fffff
+#define NVE4_TSC_ENTRY_INVALID 0xfff00000
+
#define NV50_TIC_0_SWIZZLE__MASK \
(NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
@@ -271,13 +274,76 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
return need_flush;
}
+static boolean
+nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, (tic->id << 4) | 1);
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tic->id;
+ if (dirty)
+ BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i)
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ return need_flush;
+}
+
void nvc0_validate_textures(struct nvc0_context *nvc0)
{
boolean need_flush;
- need_flush = nvc0_validate_tic(nvc0, 0);
- need_flush |= nvc0_validate_tic(nvc0, 3);
- need_flush |= nvc0_validate_tic(nvc0, 4);
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ need_flush = nve4_validate_tic(nvc0, 0);
+ need_flush |= nve4_validate_tic(nvc0, 3);
+ need_flush |= nve4_validate_tic(nvc0, 4);
+ } else {
+ need_flush = nvc0_validate_tic(nvc0, 0);
+ need_flush |= nvc0_validate_tic(nvc0, 3);
+ need_flush |= nvc0_validate_tic(nvc0, 4);
+ }
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
@@ -329,16 +395,103 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
return need_flush;
}
+static boolean
+nve4_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
+
+ if (!tsc) {
+ nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32));
+ PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32));
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, &tsc->tsc[0], 8);
+
+ need_flush = TRUE;
+ }
+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tsc->id << 20;
+ }
+ for (; i < nvc0->state.num_samplers[s]; ++i)
+ nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
+
+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+ return need_flush;
+}
+
void nvc0_validate_samplers(struct nvc0_context *nvc0)
{
boolean need_flush;
- need_flush = nvc0_validate_tsc(nvc0, 0);
- need_flush |= nvc0_validate_tsc(nvc0, 3);
- need_flush |= nvc0_validate_tsc(nvc0, 4);
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ need_flush = nve4_validate_tsc(nvc0, 0);
+ need_flush |= nve4_validate_tsc(nvc0, 3);
+ need_flush |= nve4_validate_tsc(nvc0, 4);
+ } else {
+ need_flush = nvc0_validate_tsc(nvc0, 0);
+ need_flush |= nvc0_validate_tsc(nvc0, 3);
+ need_flush |= nvc0_validate_tsc(nvc0, 4);
+ }
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
}
+
+/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
+ * At some point we might want to get a list of the combinations used by a
+ * shader and fill in those entries instead of having it extract the handles.
+ */
+void
+nve4_set_tex_handles(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint64_t address;
+ unsigned s;
+
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
+ return;
+ address = nvc0->screen->uniform_bo->offset + (5 << 16);
+
+ for (s = 0; s < 5; ++s, address += (1 << 9)) {
+ uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
+ if (!dirty)
+ continue;
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ do {
+ int i = ffs(dirty) - 1;
+ dirty &= ~(1 << i);
+
+ BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
+ PUSH_DATA (push, (8 + i) * 4);
+ PUSH_DATA (push, nvc0->tex_handles[s][i]);
+ } while (dirty);
+
+ nvc0->textures_dirty[s] = 0;
+ nvc0->samplers_dirty[s] = 0;
+ }
+}