summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/nvc0
diff options
context:
space:
mode:
authorIlia Mirkin <[email protected]>2017-01-02 00:48:51 -0500
committerIlia Mirkin <[email protected]>2017-01-16 21:13:09 -0500
commit5ba380c226b127cbfad00dd647471e1518ba2cb2 (patch)
tree6c00e7e1945ab525057bc915ddcf409a8dd0e32e /src/gallium/drivers/nouveau/nvc0
parent6b7511c2f123014fe469a11d0b46fbff357335e4 (diff)
nvc0: enable FBFETCH with a special slot for color buffer 0
We don't need to support all the color buffers for advanced blend, just cb0. For Fermi, we use the special binding slots so that we don't overlap with user textures, while Kepler+ gets a dedicated position for the fb handle in the driver constbuf. This logic is only triggered when a FBFETCH is actually present so it should be a no-op most of the time. Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/nvc0')
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h4
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.h1
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c9
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c93
6 files changed, 113 insertions, 2 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 37aecae9047..79a5333367e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -120,6 +120,9 @@
/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
#define NVC0_CB_AUX_GRID_INFO(i) 0x100 + (i) * 4 /* CP */
#define NVC0_CB_AUX_GRID_SIZE (8 * 4)
+/* FB texture handle */
+#define NVC0_CB_AUX_FB_TEX_INFO 0x100 /* FP */
+#define NVC0_CB_AUX_FB_TEX_SIZE (4)
/* 8 user clip planes, at 4 32-bits floats each */
#define NVC0_CB_AUX_UCP_INFO 0x120
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
@@ -206,6 +209,7 @@ struct nvc0_context {
unsigned num_samplers[6];
uint32_t samplers_dirty[6];
bool seamless_cube_map;
+ struct pipe_sampler_view *fbtexture;
uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index a4a164f15f8..6cc518309cd 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -486,6 +486,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
fp->fp.early_z = info->prop.fp.earlyFragTests;
fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn;
+ fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer;
+
+ /* Mark position xy and layer as read */
+ if (fp->fp.reads_framebuffer)
+ fp->hdr[5] |= 0x32000000;
return 0;
}
@@ -583,6 +588,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
if (info->target >= NVISA_GK104_CHIPSET) {
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
+ info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
}
if (prog->type == PIPE_SHADER_COMPUTE) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index d33aa04e3bc..421ca191d0a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -49,6 +49,7 @@ struct nvc0_program {
bool sample_mask_in;
bool force_persample_interp;
bool flatshade;
+ bool reads_framebuffer;
} fp;
struct {
uint32_t tess_mode; /* ~0 if defined by the other stage */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index ac9dd5b0642..1ef481b4e4d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -38,6 +38,8 @@
#include "nvc0/mme/com9097.mme.h"
#include "nvc0/mme/com90c0.mme.h"
+#include "nv50/g80_texture.xml.h"
+
static boolean
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@@ -247,6 +249,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
+ case PIPE_CAP_TGSI_FS_FBFETCH:
+ return class_3d >= NVE4_3D_CLASS; /* needs testing on fermi */
/* unsupported caps */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -275,7 +279,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
- case PIPE_CAP_TGSI_FS_FBFETCH:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -535,6 +538,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
nouveau_heap_destroy(&screen->lib_code);
nouveau_heap_destroy(&screen->text_heap);
+ FREE(screen->default_tsc);
FREE(screen->tic.entries);
nouveau_object_del(&screen->eng3d);
@@ -1226,6 +1230,9 @@ nvc0_screen_create(struct nouveau_device *dev)
if (!nvc0_blitter_create(screen))
goto fail;
+ screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry);
+ screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION;
+
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return &screen->base;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index aff0308e823..a6d4a2b4e3e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -81,6 +81,8 @@ struct nvc0_screen {
struct nvc0_blitter *blitter;
+ struct nv50_tsc_entry *default_tsc;
+
struct {
void **entries;
int next;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 88766f42590..d4931cbc675 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -604,7 +604,9 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
// If we're using the incoming sample mask and doing sample shading, we
// have to do sample shading "to the max", otherwise there's no way to
// tell which sets of samples are covered by the current invocation.
- if (nvc0->fragprog->fp.sample_mask_in)
+ // Similarly for reading the framebuffer.
+ if (nvc0->fragprog->fp.sample_mask_in ||
+ nvc0->fragprog->fp.reads_framebuffer)
samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
}
@@ -700,6 +702,93 @@ nvc0_validate_tess_state(struct nvc0_context *nvc0)
PUSH_DATAp(push, nvc0->default_tess_inner, 2);
}
+/* If we have a frag shader bound which tries to read from the framebuffer, we
+ * have to make sure that the fb is bound as a texture in the expected
+ * location. For Fermi, that's in the special driver slot 16, while for Kepler
+ * it's a regular binding stored in the driver constbuf.
+ */
+static void
+nvc0_validate_fbread(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct pipe_sampler_view *old_view = nvc0->fbtexture;
+ struct pipe_sampler_view *new_view = NULL;
+
+ if (nvc0->fragprog &&
+ nvc0->fragprog->fp.reads_framebuffer &&
+ nvc0->framebuffer.nr_cbufs &&
+ nvc0->framebuffer.cbufs[0]) {
+ struct pipe_sampler_view tmpl;
+ struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
+
+ tmpl.target = PIPE_TEXTURE_2D_ARRAY;
+ tmpl.format = sf->format;
+ tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
+ tmpl.u.tex.first_layer = sf->u.tex.first_layer;
+ tmpl.u.tex.last_layer = sf->u.tex.last_layer;
+ tmpl.swizzle_r = PIPE_SWIZZLE_X;
+ tmpl.swizzle_g = PIPE_SWIZZLE_Y;
+ tmpl.swizzle_b = PIPE_SWIZZLE_Z;
+ tmpl.swizzle_a = PIPE_SWIZZLE_W;
+
+ /* Bail if it's the same parameters */
+ if (old_view && old_view->texture == sf->texture &&
+ old_view->format == sf->format &&
+ old_view->u.tex.first_level == sf->u.tex.level &&
+ old_view->u.tex.first_layer == sf->u.tex.first_layer &&
+ old_view->u.tex.last_layer == sf->u.tex.last_layer)
+ return;
+
+ new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
+ } else if (old_view == NULL) {
+ return;
+ }
+
+ if (old_view)
+ pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
+ nvc0->fbtexture = new_view;
+
+ if (screen->default_tsc->id < 0) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc);
+ tsc->id = nvc0_screen_tsc_alloc(screen, tsc);
+ nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32,
+ NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc);
+ screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
+ if (screen->base.class_3d < NVE4_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1);
+ PUSH_DATA (push, (tsc->id << 12) | 1);
+ }
+ }
+
+ if (new_view) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
+ assert(tic->id < 0);
+ tic->id = nvc0_screen_tic_alloc(screen, tic);
+ nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
+ NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
+ screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
+ PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
+ PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
+ PUSH_DATA (push, (tic->id << 9) | 1);
+ }
+
+ IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
+ }
+}
+
static void
nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
{
@@ -781,6 +870,8 @@ validate_list_3d[] = {
{ nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
{ nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
{ nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
+ { nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG |
+ NVC0_NEW_3D_FRAMEBUFFER },
{ nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
{ nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
{ nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },