diff options
author | Ilia Mirkin <[email protected]> | 2017-01-02 00:48:51 -0500 |
---|---|---|
committer | Ilia Mirkin <[email protected]> | 2017-01-16 21:13:09 -0500 |
commit | 5ba380c226b127cbfad00dd647471e1518ba2cb2 (patch) | |
tree | 6c00e7e1945ab525057bc915ddcf409a8dd0e32e /src | |
parent | 6b7511c2f123014fe469a11d0b46fbff357335e4 (diff) |
nvc0: enable FBFETCH with a special slot for color buffer 0
We don't need to support all the color buffers for advanced blend, just
cb0. For Fermi, we use the special binding slots so that we don't
overlap with user textures, while Kepler+ gets a dedicated position for
the fb handle in the driver constbuf.
This logic is only triggered when a FBFETCH is actually present so it
should be a no-op most of the time.
Signed-off-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src')
9 files changed, 172 insertions, 6 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index e85b5fab143..a0388011aed 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -146,6 +146,7 @@ struct nv50_ir_prog_info bool usesDiscard; bool persampleInvocation; bool usesSampleMaskIn; + bool readsFramebuffer; } fp; struct { uint32_t inputOffset; /* base address for user args */ @@ -178,6 +179,7 @@ struct nv50_ir_prog_info bool fp64; /* program uses fp64 math */ bool nv50styleSurfaces; /* generate gX[] access for raw buffers */ uint16_t texBindBase; /* base address for tex handles (nve4) */ + uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */ uint16_t suInfoBase; /* base address for surface info (nve4) */ uint16_t bufInfoBase; /* base address for buffer info */ uint16_t sampleInfoBase; /* base address for sample positions */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 6b38d5f0fa9..64bfd084326 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1459,6 +1459,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (insn.getOpcode() == TGSI_OPCODE_BARRIER) info->numBarriers = 1; + if (insn.getOpcode() == TGSI_OPCODE_FBFETCH) + info->prop.fp.readsFramebuffer = true; + if (insn.dstCount()) { Instruction::DstRegister dst = insn.getDst(0); @@ -1574,6 +1577,7 @@ private: void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); void handleTXF(Value *dst0[4], int R, int L_M); void handleTXQ(Value *dst0[4], enum TexQuery, int R); + void handleFBFETCH(Value *dst0[4]); void handleLIT(Value *dst0[4]); void handleUserClipPlanes(); @@ -2283,6 +2287,40 @@ Converter::handleTXF(Value *dst[4], int R, int L_M) } void +Converter::handleFBFETCH(Value *dst[4]) +{ + TexInstruction *texi = new_TexInstruction(func, OP_TXF); + unsigned int c, d; + + texi->tex.target = TEX_TARGET_2D_MS_ARRAY; + texi->tex.levelZero = 1; + texi->tex.useOffsets = 0; + + for (c = 0, d = 0; c < 4; ++c) { + if (dst[c]) { + texi->setDef(d++, dst[c]); + texi->tex.mask |= 1 << c; + } + } + + Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0)); + Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1)); + Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0)); + Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0)); + + mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z; + mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z; + texi->setSrc(0, x); + texi->setSrc(1, y); + texi->setSrc(2, z); + texi->setSrc(3, ms); + + texi->tex.r = texi->tex.s = -1; + + bb->insertTail(texi); +} + +void Converter::handleLIT(Value *dst0[4]) { Value *val0 = NULL; @@ -3323,6 +3361,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) handleTXQ(dst0, TXQ_TYPE, 0); std::swap(dst0[0], dst0[2]); break; + case TGSI_OPCODE_FBFETCH: + handleFBFETCH(dst0); + break; case TGSI_OPCODE_F2I: case TGSI_OPCODE_F2U: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 95de87c03b3..ec50578c32a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -749,7 +749,10 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) i->setIndirectR(hnd); i->setIndirectS(NULL); } else if (i->tex.r == i->tex.s || i->op == OP_TXF) { - i->tex.r += prog->driver->io.texBindBase / 4; + if (i->tex.r == 0xffff) + i->tex.r = prog->driver->io.fbtexBindBase / 4; + else + i->tex.r += prog->driver->io.texBindBase / 4; i->tex.s = 0; // only a single cX[] value possible here } else { Value *hnd = bld.getScratch(); @@ -805,6 +808,11 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) Value *ticRel = i->getIndirectR(); Value *tscRel = i->getIndirectS(); + if (i->tex.r == 0xffff) { + i->tex.r = 0x20; + i->tex.s = 0x10; + } + if (ticRel) { i->setSrc(i->tex.rIndirectSrc, NULL); if (i->tex.r) @@ -2507,9 +2515,13 @@ NVC0LoweringPass::handleRDSV(Instruction *i) default: if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch) vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); - ld = bld.mkFetch(i->getDef(0), i->dType, - FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); - ld->perPatch = i->perPatch; + if (prog->getType() == Program::TYPE_FRAGMENT) { + bld.mkInterp(NV50_IR_INTERP_FLAT, i->getDef(0), addr, NULL); + } else { + ld = bld.mkFetch(i->getDef(0), i->dType, + FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); + ld->perPatch = i->perPatch; + } break; } bld.getBB()->remove(i); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 37aecae9047..79a5333367e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -120,6 +120,9 @@ /* block/grid size, at 3 32-bits integers each, gridid and work_dim */ #define NVC0_CB_AUX_GRID_INFO(i) 0x100 + (i) * 4 /* CP */ #define NVC0_CB_AUX_GRID_SIZE (8 * 4) +/* FB texture handle */ +#define NVC0_CB_AUX_FB_TEX_INFO 0x100 /* FP */ +#define NVC0_CB_AUX_FB_TEX_SIZE (4) /* 8 user clip planes, at 4 32-bits floats each */ #define NVC0_CB_AUX_UCP_INFO 0x120 #define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4) @@ -206,6 +209,7 @@ struct nvc0_context { unsigned num_samplers[6]; uint32_t samplers_dirty[6]; bool seamless_cube_map; + struct pipe_sampler_view *fbtexture; uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index a4a164f15f8..6cc518309cd 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -486,6 +486,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) fp->fp.early_z = info->prop.fp.earlyFragTests; fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn; + fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer; + + /* Mark position xy and layer as read */ + if (fp->fp.reads_framebuffer) + fp->hdr[5] |= 0x32000000; return 0; } @@ -583,6 +588,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0); if (info->target >= NVISA_GK104_CHIPSET) { info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); + info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO; } if (prog->type == PIPE_SHADER_COMPUTE) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h index d33aa04e3bc..421ca191d0a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h @@ -49,6 +49,7 @@ struct nvc0_program { bool sample_mask_in; bool force_persample_interp; bool flatshade; + bool reads_framebuffer; } fp; struct { uint32_t tess_mode; /* ~0 if defined by the other stage */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index ac9dd5b0642..1ef481b4e4d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -38,6 +38,8 @@ #include "nvc0/mme/com9097.mme.h" #include "nvc0/mme/com90c0.mme.h" +#include "nv50/g80_texture.xml.h" + static boolean nvc0_screen_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format, @@ -247,6 +249,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; + case PIPE_CAP_TGSI_FS_FBFETCH: + return class_3d >= NVE4_3D_CLASS; /* needs testing on fermi */ /* unsupported caps */ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: @@ -275,7 +279,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_CAN_READ_OUTPUTS: case PIPE_CAP_NATIVE_FENCE_FD: case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY: - case PIPE_CAP_TGSI_FS_FBFETCH: return 0; case PIPE_CAP_VENDOR_ID: @@ -535,6 +538,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen) nouveau_heap_destroy(&screen->lib_code); nouveau_heap_destroy(&screen->text_heap); + FREE(screen->default_tsc); FREE(screen->tic.entries); nouveau_object_del(&screen->eng3d); @@ -1226,6 +1230,9 @@ nvc0_screen_create(struct nouveau_device *dev) if (!nvc0_blitter_create(screen)) goto fail; + screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry); + screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION; + nouveau_fence_new(&screen->base, &screen->base.fence.current, false); return &screen->base; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index aff0308e823..a6d4a2b4e3e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -81,6 +81,8 @@ struct nvc0_screen { struct nvc0_blitter *blitter; + struct nv50_tsc_entry *default_tsc; + struct { void **entries; int next; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 88766f42590..d4931cbc675 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -604,7 +604,9 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0) // If we're using the incoming sample mask and doing sample shading, we // have to do sample shading "to the max", otherwise there's no way to // tell which sets of samples are covered by the current invocation. - if (nvc0->fragprog->fp.sample_mask_in) + // Similarly for reading the framebuffer. + if (nvc0->fragprog->fp.sample_mask_in || + nvc0->fragprog->fp.reads_framebuffer) samples = util_framebuffer_get_num_samples(&nvc0->framebuffer); samples |= NVC0_3D_SAMPLE_SHADING_ENABLE; } @@ -700,6 +702,93 @@ nvc0_validate_tess_state(struct nvc0_context *nvc0) PUSH_DATAp(push, nvc0->default_tess_inner, 2); } +/* If we have a frag shader bound which tries to read from the framebuffer, we + * have to make sure that the fb is bound as a texture in the expected + * location. For Fermi, that's in the special driver slot 16, while for Kepler + * it's a regular binding stored in the driver constbuf. + */ +static void +nvc0_validate_fbread(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + struct pipe_context *pipe = &nvc0->base.pipe; + struct pipe_sampler_view *old_view = nvc0->fbtexture; + struct pipe_sampler_view *new_view = NULL; + + if (nvc0->fragprog && + nvc0->fragprog->fp.reads_framebuffer && + nvc0->framebuffer.nr_cbufs && + nvc0->framebuffer.cbufs[0]) { + struct pipe_sampler_view tmpl; + struct pipe_surface *sf = nvc0->framebuffer.cbufs[0]; + + tmpl.target = PIPE_TEXTURE_2D_ARRAY; + tmpl.format = sf->format; + tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level; + tmpl.u.tex.first_layer = sf->u.tex.first_layer; + tmpl.u.tex.last_layer = sf->u.tex.last_layer; + tmpl.swizzle_r = PIPE_SWIZZLE_X; + tmpl.swizzle_g = PIPE_SWIZZLE_Y; + tmpl.swizzle_b = PIPE_SWIZZLE_Z; + tmpl.swizzle_a = PIPE_SWIZZLE_W; + + /* Bail if it's the same parameters */ + if (old_view && old_view->texture == sf->texture && + old_view->format == sf->format && + old_view->u.tex.first_level == sf->u.tex.level && + old_view->u.tex.first_layer == sf->u.tex.first_layer && + old_view->u.tex.last_layer == sf->u.tex.last_layer) + return; + + new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl); + } else if (old_view == NULL) { + return; + } + + if (old_view) + pipe_sampler_view_reference(&nvc0->fbtexture, NULL); + nvc0->fbtexture = new_view; + + if (screen->default_tsc->id < 0) { + struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc); + tsc->id = nvc0_screen_tsc_alloc(screen, tsc); + nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32, + NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc); + screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32); + + IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0); + if (screen->base.class_3d < NVE4_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1); + PUSH_DATA (push, (tsc->id << 12) | 1); + } + } + + if (new_view) { + struct nv50_tic_entry *tic = nv50_tic_entry(new_view); + assert(tic->id < 0); + tic->id = nvc0_screen_tic_alloc(screen, tic); + nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32, + NV_VRAM_DOMAIN(&screen->base), 32, tic->tic); + screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32); + + if (screen->base.class_3d >= NVE4_3D_CLASS) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1); + PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO); + PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id); + } else { + BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1); + PUSH_DATA (push, (tic->id << 9) | 1); + } + + IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0); + } +} + static void nvc0_switch_pipe_context(struct nvc0_context *ctx_to) { @@ -781,6 +870,8 @@ validate_list_3d[] = { { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS }, { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS }, + { nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG | + NVC0_NEW_3D_FRAMEBUFFER }, { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS }, { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES }, { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS }, |