diff options
Diffstat (limited to 'src/gallium/drivers')
93 files changed, 2081 insertions, 1877 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 7a1812f2518..54315d2f592 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1017,7 +1017,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr, const_offset = nir_src_as_const_value(intr->src[1]); if (const_offset) { - off += const_offset->u[0]; + off += const_offset->u32[0]; } else { /* For load_ubo_indirect, second src is indirect offset: */ src1 = get_src(ctx, &intr->src[1])[0]; @@ -1159,7 +1159,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) idx = nir_intrinsic_base(intr); const_offset = nir_src_as_const_value(intr->src[0]); if (const_offset) { - idx += const_offset->u[0]; + idx += const_offset->u32[0]; for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; dst[i] = create_uniform(ctx, n); @@ -1186,7 +1186,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) idx = nir_intrinsic_base(intr); const_offset = nir_src_as_const_value(intr->src[0]); if (const_offset) { - idx += const_offset->u[0]; + idx += const_offset->u32[0]; for (int i = 0; i < intr->num_components; i++) { unsigned n = idx * 4 + i; dst[i] = ctx->ir->inputs[n]; @@ -1213,7 +1213,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr) idx = nir_intrinsic_base(intr); const_offset = nir_src_as_const_value(intr->src[1]); compile_assert(ctx, const_offset != NULL); - idx += const_offset->u[0]; + idx += const_offset->u32[0]; src = get_src(ctx, &intr->src[0]); for (int i = 0; i < intr->num_components; i++) { @@ -1301,7 +1301,7 @@ emit_load_const(struct ir3_compile *ctx, nir_load_const_instr *instr) struct ir3_instruction **dst = get_dst_ssa(ctx, &instr->def, instr->def.num_components); for (int i = 0; i < instr->def.num_components; i++) - dst[i] = create_immed(ctx->block, instr->value.u[i]); + dst[i] = create_immed(ctx->block, instr->value.u32[i]); } static void diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c index 8815ac981eb..ec76b0bdc4d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c @@ -290,7 +290,7 @@ lower_if_else_block(nir_block *block, void *void_state) } nir_ssa_dest_init(&sel->instr, &sel->dest.dest, - phi->dest.ssa.num_components, phi->dest.ssa.name); + phi->dest.ssa.num_components, 32, phi->dest.ssa.name); sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; nir_ssa_def_rewrite_uses(&phi->dest.ssa, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 9f7d2572bbe..21523a27761 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -160,7 +160,7 @@ struct nv50_ir_prog_info uint8_t clipDistances; /* number of clip distance outputs */ uint8_t cullDistances; /* number of cull distance outputs */ int8_t genUserClip; /* request user clip planes for ClipVertex */ - uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */ + uint8_t auxCBSlot; /* driver constant buffer slot */ uint16_t ucpBase; /* base address for UCPs */ uint16_t drawInfoBase; /* base address for draw parameters */ uint8_t pointSize; /* output index for PointSize */ @@ -175,7 +175,6 @@ struct nv50_ir_prog_info uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ bool fp64; /* program uses fp64 math */ bool nv50styleSurfaces; /* generate gX[] access for raw buffers */ - uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */ uint16_t texBindBase; /* base address for tex handles (nve4) */ uint16_t suInfoBase; /* base address for surface info (nve4) */ uint16_t sampleInfoBase; /* base address for sample positions */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 0d7d95e3105..70f3c3f69ff 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -1655,10 +1655,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) break; } - if (i->src(0).getFile() != FILE_MEMORY_GLOBAL) - offset &= 0xffffff; - if (code[0] & 0x2) { + offset &= 0xffffff; emitLoadStoreType(i->dType, 0x33); if (i->src(0).getFile() == FILE_MEMORY_LOCAL) emitCachingMode(i->cache, 0x2f); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index 682a19d6d78..bd6200687ed 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -1634,7 +1634,9 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i) code[1] |= (i->tex.mask & 0xc) << 12; if (i->tex.liveOnly) - code[1] |= 4; + code[1] |= 1 << 2; + if (i->tex.derivAll) + code[1] |= 1 << 3; defId(i->def(0), 2); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index d284446f5d9..611d5f9c3ed 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -856,15 +856,17 @@ public: }; std::vector<TextureView> textureViews; + /* struct Resource { uint8_t target; // TGSI_TEXTURE_* bool raw; uint8_t slot; // $surface index }; std::vector<Resource> resources; + */ struct MemoryFile { - bool shared; + uint8_t mem_type; // TGSI_MEMORY_TYPE_* }; std::vector<MemoryFile> memoryFiles; @@ -1037,6 +1039,9 @@ void Source::scanProperty(const struct tgsi_full_property *prop) case TGSI_PROPERTY_NUM_CULLDIST_ENABLED: info->io.cullDistances = prop->u[0].Data; break; + case TGSI_PROPERTY_NEXT_SHADER: + /* Do not need to know the next shader stage. */ + break; default: INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); break; @@ -1222,7 +1227,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) break; case TGSI_FILE_MEMORY: for (i = first; i <= last; ++i) - memoryFiles[i].shared = decl->Declaration.Shared; + memoryFiles[i].mem_type = decl->Declaration.MemType; break; case TGSI_FILE_NULL: case TGSI_FILE_TEMPORARY: @@ -1261,9 +1266,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) info->numBarriers = 1; if (insn.dstCount()) { - if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) { - Instruction::DstRegister dst = insn.getDst(0); + Instruction::DstRegister dst = insn.getDst(0); + if (dst.getFile() == TGSI_FILE_OUTPUT) { if (dst.isIndirect(0)) for (unsigned i = 0; i < info->numOutputs; ++i) info->out[i].mask = 0xf; @@ -1280,11 +1285,11 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (isEdgeFlagPassthrough(insn)) info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); } else - if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { - if (insn.getDst(0).isIndirect(0)) - indirectTempArrays.insert(insn.getDst(0).getArrayId()); + if (dst.getFile() == TGSI_FILE_TEMPORARY) { + if (dst.isIndirect(0)) + indirectTempArrays.insert(dst.getArrayId()); } else - if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) { + if (dst.getFile() == TGSI_FILE_BUFFER) { info->io.globalAccess |= 0x2; } } @@ -1419,8 +1424,8 @@ private: void handleLIT(Value *dst0[4]); void handleUserClipPlanes(); - Symbol *getResourceBase(int r); - void getResourceCoords(std::vector<Value *>&, int r, int s); + // Symbol *getResourceBase(int r); + // void getResourceCoords(std::vector<Value *>&, int r, int s); void handleLOAD(Value *dst0[4]); void handleSTORE(); @@ -1527,8 +1532,21 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) sym->reg.fileIndex = fileIdx; - if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared) - sym->setFile(FILE_MEMORY_SHARED); + if (tgsiFile == TGSI_FILE_MEMORY) { + switch (code->memoryFiles[fileIdx].mem_type) { + case TGSI_MEMORY_TYPE_SHARED: + sym->setFile(FILE_MEMORY_SHARED); + break; + case TGSI_MEMORY_TYPE_INPUT: + assert(prog->getType() == Program::TYPE_COMPUTE); + assert(idx == -1); + sym->setFile(FILE_SHADER_INPUT); + address += info->prop.cp.inputOffset; + break; + default: + assert(0); /* TODO: Add support for global and private memory */ + } + } if (idx >= 0) { if (sym->reg.file == FILE_SHADER_INPUT) @@ -1989,7 +2007,6 @@ Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask) void Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) { - Value *val; Value *arg[4], *src[8]; Value *lod = NULL, *shd = NULL; unsigned int s, c, d; @@ -2032,17 +2049,6 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) shd = src[n - 1]; } - if (tgt.isCube()) { - for (c = 0; c < 3; ++c) - src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); - val = getScratch(); - mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); - mkOp2(OP_MAX, TYPE_F32, val, src[2], val); - mkOp1(OP_RCP, TYPE_F32, val, val); - for (c = 0; c < 3; ++c) - src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); - } - for (c = 0, d = 0; c < 4; ++c) { if (dst[c]) { texi->setDef(d++, dst[c]); @@ -2148,6 +2154,7 @@ Converter::handleLIT(Value *dst0[4]) } } +/* Keep this around for now as reference when adding img support static inline bool isResourceSpecial(const int r) { @@ -2178,7 +2185,8 @@ Converter::getResourceBase(const int r) switch (r) { case TGSI_RESOURCE_GLOBAL: - sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15); + sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, + info->io.auxCBSlot); break; case TGSI_RESOURCE_LOCAL: assert(prog->getType() == Program::TYPE_COMPUTE); @@ -2243,6 +2251,7 @@ partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) } return n + 1; } +*/ // For raw loads, granularity is 4 byte. // Usage of the texture read mask on OP_SULDP is not allowed. @@ -2253,8 +2262,9 @@ Converter::handleLOAD(Value *dst0[4]) int c; std::vector<Value *> off, src, ldv, def; - if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER || - tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) { + switch (tgsi.getSrc(0).getFile()) { + case TGSI_FILE_BUFFER: + case TGSI_FILE_MEMORY: for (c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -2274,9 +2284,12 @@ Converter::handleLOAD(Value *dst0[4]) if (tgsi.getSrc(0).isIndirect(0)) ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0)); } - return; + break; + default: + assert(!"Unsupported srcFile for LOAD"); } +/* Keep this around for now as reference when adding img support getResourceCoords(off, r, 1); if (isResourceRaw(code, r)) { @@ -2342,6 +2355,7 @@ Converter::handleLOAD(Value *dst0[4]) FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) if (dst0[c] != def[c]) mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); +*/ } // For formatted stores, the write mask on OP_SUSTP can be used. @@ -2353,8 +2367,9 @@ Converter::handleSTORE() int c; std::vector<Value *> off, src, dummy; - if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER || - tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) { + switch (tgsi.getDst(0).getFile()) { + case TGSI_FILE_BUFFER: + case TGSI_FILE_MEMORY: for (c = 0; c < 4; ++c) { if (!(tgsi.getDst(0).getMask() & (1 << c))) continue; @@ -2375,9 +2390,12 @@ Converter::handleSTORE() if (tgsi.getDst(0).isIndirect(0)) st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0)); } - return; + break; + default: + assert(!"Unsupported dstFile for STORE"); } +/* Keep this around for now as reference when adding img support getResourceCoords(off, r, 0); src = off; const int s = src.size(); @@ -2425,6 +2443,7 @@ Converter::handleSTORE() mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0, dummy, src)->tex.mask = tgsi.getDst(0).getMask(); } +*/ } // XXX: These only work on resources with the single-component u32/s32 formats. @@ -2439,8 +2458,9 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) std::vector<Value *> defv; LValue *dst = getScratch(); - if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER || - tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) { + switch (tgsi.getSrc(0).getFile()) { + case TGSI_FILE_BUFFER: + case TGSI_FILE_MEMORY: for (int c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -2468,10 +2488,12 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) for (int c = 0; c < 4; ++c) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov - return; + break; + default: + assert(!"Unsupported srcFile for ATOM"); } - +/* Keep this around for now as reference when adding img support getResourceCoords(srcv, r, 1); if (isResourceSpecial(r)) { @@ -2499,6 +2521,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) for (int c = 0; c < 4; ++c) if (dst0[c]) dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov +*/ } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp index 0b903780614..a5deaef14e0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp @@ -67,6 +67,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i) tmp = bld.getScratch(); for (l = 0; l < 4; ++l) { + Value *src[3], *val; // mov coordinates from lane l to all lanes bld.mkOp(OP_QUADON, TYPE_NONE, NULL); for (c = 0; c < dim; ++c) { @@ -92,10 +93,25 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i) add->lanes = 1; /* abused for .ndv */ } + // normalize cube coordinates if necessary + if (i->tex.target.isCube()) { + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]); + val = bld.getScratch(); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); + bld.mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val); + } else { + for (c = 0; c < dim; ++c) + src[c] = crd[c]; + } + // texture bld.insert(tex = cloneForward(func, i)); for (c = 0; c < dim; ++c) - tex->setSrc(c + array, crd[c]); + tex->setSrc(c + array, src[c]); bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); // save results diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp index 12c5f699603..02c4f1a4ca8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp @@ -682,7 +682,7 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms, Value **ms_x, Value **ms_y) { // This loads the texture-indexed ms setting from the constant buffer Value *tmp = new_LValue(func, FILE_GPR); - uint8_t b = prog->driver->io.resInfoCBSlot; + uint8_t b = prog->driver->io.auxCBSlot; off += prog->driver->io.suInfoBase; if (prog->getType() > Program::TYPE_VERTEX) off += 16 * 2 * 4; @@ -724,6 +724,23 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i) const int dref = arg; const int lod = i->tex.target.isShadow() ? (arg + 1) : arg; + /* Only normalize in the non-explicit derivatives case. + */ + if (i->tex.target.isCube() && i->op != OP_TXD) { + Value *src[3], *val; + int c; + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c)); + val = bld.getScratch(); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); + bld.mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) { + i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), + i->getSrc(c), val)); + } + } + // handle MS, which means looking up the MS params for this texture, and // adjusting the input coordinates to point at the right sample. if (i->tex.target.isMS()) { @@ -934,12 +951,14 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i) handleTEX(i); i->op = OP_TEX; // no need to clone dPdx/dPdy later + i->tex.derivAll = true; for (c = 0; c < dim; ++c) crd[c] = bld.getScratch(); bld.mkOp(OP_QUADON, TYPE_NONE, NULL); for (l = 0; l < 4; ++l) { + Value *src[3], *val; // mov coordinates from lane l to all lanes for (c = 0; c < dim; ++c) bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero); @@ -949,10 +968,24 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i) // add dPdy from lane l to lanes dy for (c = 0; c < dim; ++c) bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); + // normalize cube coordinates if necessary + if (i->tex.target.isCube()) { + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]); + val = bld.getScratch(); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); + bld.mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val); + } else { + for (c = 0; c < dim; ++c) + src[c] = crd[c]; + } // texture bld.insert(tex = cloneForward(func, i)); for (c = 0; c < dim; ++c) - tex->setSrc(c, crd[c]); + tex->setSrc(c, src[c]); // save results for (c = 0; i->defExists(c); ++c) { Instruction *mov; @@ -1174,7 +1207,7 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) bld.mkLoad(TYPE_F32, def, bld.mkSymbol( - FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot, + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx), off); break; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index d0936d88d60..e8f8e30918b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -600,7 +600,7 @@ NVC0LoweringPass::visit(BasicBlock *bb) inline Value * NVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot) { - uint8_t b = prog->driver->io.resInfoCBSlot; + uint8_t b = prog->driver->io.auxCBSlot; uint32_t off = prog->driver->io.texBindBase + slot * 4; return bld. mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); @@ -615,6 +615,24 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); const int chipset = prog->getTarget()->getChipset(); + /* Only normalize in the non-explicit derivatives case. For explicit + * derivatives, this is handled in handleManualTXD. + */ + if (i->tex.target.isCube() && i->dPdx[0].get() == NULL) { + Value *src[3], *val; + int c; + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c)); + val = bld.getScratch(); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); + bld.mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) { + i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), + i->getSrc(c), val)); + } + } + // Arguments to the TEX instruction are a little insane. Even though the // encoding is identical between SM20 and SM30, the arguments mean // different things between Fermi and Kepler+. A lot of arguments are @@ -728,9 +746,13 @@ NVC0LoweringPass::handleTEX(TexInstruction *i) } Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL; - for (int s = dim; s >= 1; --s) - i->setSrc(s, i->getSrc(s - 1)); - i->setSrc(0, arrayIndex); + if (arrayIndex) { + for (int s = dim; s >= 1; --s) + i->setSrc(s, i->getSrc(s - 1)); + i->setSrc(0, arrayIndex); + } else { + i->moveSources(0, 1); + } if (arrayIndex) { int sat = (i->op == OP_TXF) ? 1 : 0; @@ -861,6 +883,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) bld.mkOp(OP_QUADON, TYPE_NONE, NULL); for (l = 0; l < 4; ++l) { + Value *src[3], *val; // mov coordinates from lane l to all lanes for (c = 0; c < dim; ++c) bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero); @@ -870,10 +893,24 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i) // add dPdy from lane l to lanes dy for (c = 0; c < dim; ++c) bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); + // normalize cube coordinates + if (i->tex.target.isCube()) { + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]); + val = bld.getScratch(); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); + bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val); + bld.mkOp1(OP_RCP, TYPE_F32, val, val); + for (c = 0; c < 3; ++c) + src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val); + } else { + for (c = 0; c < dim; ++c) + src[c] = crd[c]; + } // texture bld.insert(tex = cloneForward(func, i)); for (c = 0; c < dim; ++c) - tex->setSrc(c + array, crd[c]); + tex->setSrc(c + array, src[c]); // save results for (c = 0; i->defExists(c); ++c) { Instruction *mov; @@ -1098,6 +1135,7 @@ NVC0LoweringPass::handleSharedATOM(Instruction *atom) break; default: assert(0); + return; } Instruction *i = @@ -1204,7 +1242,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) inline Value * NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) { - uint8_t b = prog->driver->io.resInfoCBSlot; + uint8_t b = prog->driver->io.auxCBSlot; off += prog->driver->io.suInfoBase; return bld. mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); @@ -1213,7 +1251,7 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) inline Value * NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off) { - uint8_t b = prog->driver->io.resInfoCBSlot; + uint8_t b = prog->driver->io.auxCBSlot; off += prog->driver->io.suInfoBase; if (ptr) @@ -1226,7 +1264,7 @@ NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off) inline Value * NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off) { - uint8_t b = prog->driver->io.resInfoCBSlot; + uint8_t b = prog->driver->io.auxCBSlot; off += prog->driver->io.suInfoBase; if (ptr) @@ -1540,7 +1578,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) call->indirect = 1; call->absolute = 1; call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST, - prog->driver->io.resInfoCBSlot, TYPE_U32, + prog->driver->io.auxCBSlot, TYPE_U32, prog->driver->io.suInfoBase + base)); call->setSrc(1, r[2]); call->setSrc(2, r[4]); @@ -1698,7 +1736,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i) } addr += prog->driver->prop.cp.gridInfoBase; bld.mkLoad(TYPE_U32, i->getDef(0), - bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL); + bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, addr), NULL); break; case SV_SAMPLE_INDEX: // TODO: Properly pass source as an address in the PIX address space @@ -1715,7 +1754,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i) bld.mkLoad(TYPE_F32, i->getDef(0), bld.mkSymbol( - FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot, + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, TYPE_U32, prog->driver->io.sampleInfoBase + 4 * sym->reg.data.sv.index), off); @@ -1780,7 +1819,7 @@ NVC0LoweringPass::handleSQRT(Instruction *i) { if (i->dType == TYPE_F64) { Value *pred = bld.getSSA(1, FILE_PREDICATE); - Value *zero = bld.loadImm(NULL, 0.0d); + Value *zero = bld.loadImm(NULL, 0.0); Value *dst = bld.getSSA(8); bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0)); bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index cfa85ec123c..066faa367d2 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -204,6 +204,11 @@ static const char *ldstSubOpStr[] = "", "lock", "unlock" }; +static const char *subfmOpStr[] = +{ + "", "3d" +}; + static const char *DataTypeStr[] = { "-", @@ -548,6 +553,10 @@ void Instruction::print() const if (subOp < Elements(ldstSubOpStr)) PRINT("%s ", ldstSubOpStr[subOp]); break; + case OP_SUBFM: + if (subOp < Elements(subfmOpStr)) + PRINT("%s ", subfmOpStr[subOp]); + break; default: if (subOp) PRINT("(SUBOP:%u) ", subOp); diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c b/src/gallium/drivers/nouveau/nouveau_compiler.c index cd44aa1e1d9..ca73fd17a43 100644 --- a/src/gallium/drivers/nouveau/nouveau_compiler.c +++ b/src/gallium/drivers/nouveau/nouveau_compiler.c @@ -114,8 +114,6 @@ nouveau_codegen(int chipset, int type, struct tgsi_token tokens[], info.io.auxCBSlot = 15; info.io.ucpBase = NV50_CB_AUX_UCP_OFFSET; - - info.io.resInfoCBSlot = 15; info.io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info.io.msInfoCBSlot = 15; info.io.msInfoBase = NV50_CB_AUX_MS_OFFSET; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c index 04488d6d0a6..d781f6fd7d4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -67,122 +67,94 @@ nv50_screen_compute_setup(struct nv50_screen *screen, if (ret) return ret; - BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->handle); - BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1); + BEGIN_NV04(push, NV50_CP(UNK02A0), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1); + BEGIN_NV04(push, NV50_CP(DMA_STACK), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2); + BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->stack_bo->offset); PUSH_DATA (push, screen->stack_bo->offset); - BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1); + BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1); PUSH_DATA (push, 4); - BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1); + BEGIN_NV04(push, NV50_CP(UNK0290), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1); + BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1); + BEGIN_NV04(push, NV50_CP(REG_MODE), 1); PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED); - BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1); + BEGIN_NV04(push, NV50_CP(UNK0384), 1); PUSH_DATA (push, 0x100); - BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1); + BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1); PUSH_DATA (push, fifo->vram); for (i = 0; i < 15; i++) { - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2); + BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1); + BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1); + BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1); PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); } - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2); + BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2); PUSH_DATA (push, 0); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1); + BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1); PUSH_DATA (push, ~0); - BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1); + BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1); PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR); - BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1); + BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1); PUSH_DATA (push, 7); - BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1); + BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1); + BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1); PUSH_DATA (push, 7); - BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1); + BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1); + BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1); + BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1); PUSH_DATA (push, 0x54); - BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1); + BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1); PUSH_DATA (push, 0); - BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1); + BEGIN_NV04(push, NV50_CP(DMA_TIC), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3); + BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1); - BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1); + BEGIN_NV04(push, NV50_CP(DMA_TSC), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3); + BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1); - BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1); + BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1); + BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1); PUSH_DATA (push, fifo->vram); - BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2); + BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls_bo->offset + 65536); PUSH_DATA (push, screen->tls_bo->offset + 65536); - BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1); + BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1); PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2)); return 0; } -static bool -nv50_compute_validate_program(struct nv50_context *nv50) -{ - struct nv50_program *prog = nv50->compprog; - - if (prog->mem) - return true; - - if (!prog->translated) { - prog->translated = nv50_program_translate( - prog, nv50->screen->base.device->chipset, &nv50->base.debug); - if (!prog->translated) - return false; - } - if (unlikely(!prog->code_size)) - return false; - - if (likely(prog->code_size)) { - if (nv50_program_upload_code(nv50, prog)) { - struct nouveau_pushbuf *push = nv50->base.pushbuf; - BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1); - PUSH_DATA (push, 0); - return true; - } - } - return false; -} - static void nv50_compute_validate_globals(struct nv50_context *nv50) { @@ -198,26 +170,25 @@ nv50_compute_validate_globals(struct nv50_context *nv50) } } +static struct nv50_state_validate +validate_list_cp[] = { + { nv50_compprog_validate, NV50_NEW_CP_PROGRAM }, + { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS }, +}; + static bool -nv50_compute_state_validate(struct nv50_context *nv50) +nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask) { - if (!nv50_compute_validate_program(nv50)) - return false; - - if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS) - nv50_compute_validate_globals(nv50); + bool ret; /* TODO: validate textures, samplers, surfaces */ + ret = nv50_state_validate(nv50, mask, validate_list_cp, + ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp, + nv50->bufctx_cp); - nv50_bufctx_fence(nv50->bufctx_cp, false); - - nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp); - if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf))) - return false; if (unlikely(nv50->state.flushed)) nv50_bufctx_fence(nv50->bufctx_cp, true); - - return true; + return ret; } static void @@ -227,7 +198,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) struct nouveau_pushbuf *push = screen->base.pushbuf; unsigned size = align(nv50->compprog->parm_size, 0x4); - BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1); + BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1); PUSH_DATA (push, (size / 4) << 8); if (size) { @@ -245,7 +216,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input) nouveau_pushbuf_bufctx(push, nv50->bufctx); nouveau_pushbuf_validate(push); - BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4); + BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4); nouveau_pushbuf_data(push, bo, offset, size); nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm); @@ -278,7 +249,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) struct nv50_program *cp = nv50->compprog; bool ret; - ret = !nv50_compute_state_validate(nv50); + ret = !nv50_state_validate_cp(nv50, ~0); if (ret) { NOUVEAU_ERR("Failed to launch grid !\n"); return; @@ -286,33 +257,33 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) nv50_compute_upload_input(nv50, info->input); - BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); + BEGIN_NV04(push, NV50_CP(CP_START_ID), 1); PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc)); - BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); + BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1); PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); - BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1); + BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1); PUSH_DATA (push, cp->max_gpr); /* grid/block setup */ - BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); + BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2); PUSH_DATA (push, info->block[1] << 16 | info->block[0]); PUSH_DATA (push, info->block[2]); - BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); + BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1); PUSH_DATA (push, 1 << 16 | block_size); - BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); + BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1); PUSH_DATA (push, 1); - BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); + BEGIN_NV04(push, NV50_CP(GRIDDIM), 1); PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]); - BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); + BEGIN_NV04(push, NV50_CP(GRIDID), 1); PUSH_DATA (push, 1); /* kernel launching */ - BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1); + BEGIN_NV04(push, NV50_CP(LAUNCH), 1); PUSH_DATA (push, 0); - BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); /* bind a compute shader clobbers fragment shader state */ - nv50->dirty |= NV50_NEW_FRAGPROG; + nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c index 4874b77b1e1..61a52c4b366 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c @@ -176,8 +176,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) { if (nv50->framebuffer.cbufs[i] && nv50->framebuffer.cbufs[i]->texture == res) { - nv50->dirty |= NV50_NEW_FRAMEBUFFER; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); if (!--ref) return ref; } @@ -186,8 +186,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, if (bind & PIPE_BIND_DEPTH_STENCIL) { if (nv50->framebuffer.zsbuf && nv50->framebuffer.zsbuf->texture == res) { - nv50->dirty |= NV50_NEW_FRAMEBUFFER; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); if (!--ref) return ref; } @@ -202,8 +202,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS); for (i = 0; i < nv50->num_vtxbufs; ++i) { if (nv50->vtxbuf[i].buffer == res) { - nv50->dirty |= NV50_NEW_ARRAYS; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); + nv50->dirty_3d |= NV50_NEW_3D_ARRAYS; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX); if (!--ref) return ref; } @@ -211,8 +211,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, if (nv50->idxbuf.buffer == res) { /* Just rebind to the bufctx as there is no separate dirty bit */ - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX); - BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX); + BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(res), RD); if (!--ref) return ref; } @@ -222,8 +222,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, for (i = 0; i < nv50->num_textures[s]; ++i) { if (nv50->textures[s][i] && nv50->textures[s][i]->texture == res) { - nv50->dirty |= NV50_NEW_TEXTURES; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); if (!--ref) return ref; } @@ -236,9 +236,9 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx, continue; if (!nv50->constbuf[s][i].user && nv50->constbuf[s][i].u.buf == res) { - nv50->dirty |= NV50_NEW_CONSTBUF; + nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; nv50->constbuf_dirty[s] |= 1 << i; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i)); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); if (!--ref) return ref; } @@ -345,10 +345,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD; - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code); - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms); - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc); - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->code); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->uniforms); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->txc); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo); if (screen->compute) { BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code); BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc); @@ -357,7 +357,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; - BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo); if (screen->compute) BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 2620d03b999..2317fa2ccf8 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -26,43 +26,43 @@ #include "nv50/nv50_3d.xml.h" #include "nv50/nv50_2d.xml.h" -#define NV50_NEW_BLEND (1 << 0) -#define NV50_NEW_RASTERIZER (1 << 1) -#define NV50_NEW_ZSA (1 << 2) -#define NV50_NEW_VERTPROG (1 << 3) -#define NV50_NEW_GMTYPROG (1 << 6) -#define NV50_NEW_FRAGPROG (1 << 7) -#define NV50_NEW_BLEND_COLOUR (1 << 8) -#define NV50_NEW_STENCIL_REF (1 << 9) -#define NV50_NEW_CLIP (1 << 10) -#define NV50_NEW_SAMPLE_MASK (1 << 11) -#define NV50_NEW_FRAMEBUFFER (1 << 12) -#define NV50_NEW_STIPPLE (1 << 13) -#define NV50_NEW_SCISSOR (1 << 14) -#define NV50_NEW_VIEWPORT (1 << 15) -#define NV50_NEW_ARRAYS (1 << 16) -#define NV50_NEW_VERTEX (1 << 17) -#define NV50_NEW_CONSTBUF (1 << 18) -#define NV50_NEW_TEXTURES (1 << 19) -#define NV50_NEW_SAMPLERS (1 << 20) -#define NV50_NEW_STRMOUT (1 << 21) -#define NV50_NEW_MIN_SAMPLES (1 << 22) -#define NV50_NEW_CONTEXT (1 << 31) +#define NV50_NEW_3D_BLEND (1 << 0) +#define NV50_NEW_3D_RASTERIZER (1 << 1) +#define NV50_NEW_3D_ZSA (1 << 2) +#define NV50_NEW_3D_VERTPROG (1 << 3) +#define NV50_NEW_3D_GMTYPROG (1 << 6) +#define NV50_NEW_3D_FRAGPROG (1 << 7) +#define NV50_NEW_3D_BLEND_COLOUR (1 << 8) +#define NV50_NEW_3D_STENCIL_REF (1 << 9) +#define NV50_NEW_3D_CLIP (1 << 10) +#define NV50_NEW_3D_SAMPLE_MASK (1 << 11) +#define NV50_NEW_3D_FRAMEBUFFER (1 << 12) +#define NV50_NEW_3D_STIPPLE (1 << 13) +#define NV50_NEW_3D_SCISSOR (1 << 14) +#define NV50_NEW_3D_VIEWPORT (1 << 15) +#define NV50_NEW_3D_ARRAYS (1 << 16) +#define NV50_NEW_3D_VERTEX (1 << 17) +#define NV50_NEW_3D_CONSTBUF (1 << 18) +#define NV50_NEW_3D_TEXTURES (1 << 19) +#define NV50_NEW_3D_SAMPLERS (1 << 20) +#define NV50_NEW_3D_STRMOUT (1 << 21) +#define NV50_NEW_3D_MIN_SAMPLES (1 << 22) +#define NV50_NEW_3D_CONTEXT (1 << 31) #define NV50_NEW_CP_PROGRAM (1 << 0) #define NV50_NEW_CP_GLOBALS (1 << 1) /* 3d bufctx (during draw_vbo, blit_3d) */ -#define NV50_BIND_FB 0 -#define NV50_BIND_VERTEX 1 -#define NV50_BIND_VERTEX_TMP 2 -#define NV50_BIND_INDEX 3 -#define NV50_BIND_TEXTURES 4 -#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i)) -#define NV50_BIND_SO 53 -#define NV50_BIND_SCREEN 54 -#define NV50_BIND_TLS 55 -#define NV50_BIND_3D_COUNT 56 +#define NV50_BIND_3D_FB 0 +#define NV50_BIND_3D_VERTEX 1 +#define NV50_BIND_3D_VERTEX_TMP 2 +#define NV50_BIND_3D_INDEX 3 +#define NV50_BIND_3D_TEXTURES 4 +#define NV50_BIND_3D_CB(s, i) (5 + 16 * (s) + (i)) +#define NV50_BIND_3D_SO 53 +#define NV50_BIND_3D_SCREEN 54 +#define NV50_BIND_3D_TLS 55 +#define NV50_BIND_3D_COUNT 56 /* compute bufctx (during launch_grid) */ #define NV50_BIND_CP_GLOBAL 0 @@ -115,7 +115,7 @@ struct nv50_context { struct nouveau_bufctx *bufctx; struct nouveau_bufctx *bufctx_cp; - uint32_t dirty; + uint32_t dirty_3d; /* dirty flags for 3d state */ uint32_t dirty_cp; /* dirty flags for compute state */ bool cb_dirty; @@ -221,6 +221,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); void nv50_vertprog_validate(struct nv50_context *); void nv50_gmtyprog_validate(struct nv50_context *); void nv50_fragprog_validate(struct nv50_context *); +void nv50_compprog_validate(struct nv50_context *); void nv50_fp_linkage_validate(struct nv50_context *); void nv50_gp_linkage_validate(struct nv50_context *); void nv50_constbufs_validate(struct nv50_context *); @@ -231,7 +232,15 @@ void nv50_stream_output_validate(struct nv50_context *); extern void nv50_init_state_functions(struct nv50_context *); /* nv50_state_validate.c */ -bool nv50_state_validate(struct nv50_context *, uint32_t state_mask); +struct nv50_state_validate { + void (*func)(struct nv50_context *); + uint32_t states; +}; + +bool nv50_state_validate(struct nv50_context *, uint32_t, + struct nv50_state_validate *, int, uint32_t *, + struct nouveau_bufctx *); +bool nv50_state_validate_3d(struct nv50_context *, uint32_t); /* nv50_surface.c */ extern void nv50_clear(struct pipe_context *, unsigned buffers, diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c index a67ef28abf8..3444b3110de 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_program.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c @@ -335,7 +335,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset, info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET; info->io.genUserClip = prog->vp.clpd_nr; - info->io.resInfoCBSlot = 15; info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET; info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET; info->io.msInfoCBSlot = 15; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c index be19c0fdc85..0a73090d78d 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -202,10 +202,10 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq) func = nv50_hw_sm_get_func(c); /* configure and reset the counter(s) */ - BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1); PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) | cfg->ctr[i].unit | cfg->ctr[i].mode); - BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1); + BEGIN_NV04(push, NV50_CP(MP_PM_SET(c)), 1); PUSH_DATA (push, 0); } return true; @@ -240,7 +240,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) PUSH_SPACE(push, 8); for (c = 0; c < 4; c++) { if (screen->pm.mp_counter[c]) { - BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1); + BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1); PUSH_DATA (push, 0); } } @@ -257,7 +257,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) hq->bo); PUSH_SPACE(push, 2); - BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); pipe->bind_compute_state(pipe, screen->pm.prog); @@ -295,7 +295,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) mask |= 1 << hsq->ctr[i]; func = nv50_hw_sm_get_func(hsq->ctr[i]); - BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1); + BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(hsq->ctr[i])), 1); PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8) | cfg->ctr[i].unit | cfg->ctr[i].mode); } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index 8e4b2b42bda..3d2ebfbcc46 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -29,6 +29,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_query_hw.h" +#include "nv50/nv50_compute.xml.h" + void nv50_constbufs_validate(struct nv50_context *nv50) { @@ -94,7 +96,7 @@ nv50_constbufs_validate(struct nv50_context *nv50) BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1); PUSH_DATA (push, (b << 12) | (i << 8) | p | 1); - BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD); + BCTX_REFN(nv50->bufctx_3d, 3D_CB(s, i), res, RD); nv50->cb_dirty = 1; /* Force cache flush for UBO. */ } else { @@ -131,14 +133,14 @@ nv50_program_update_context_state(struct nv50_context *nv50, if (prog && prog->tls_space) { if (nv50->state.new_tls_space) - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS); if (!nv50->state.tls_required || nv50->state.new_tls_space) - BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_TLS, flags, nv50->screen->tls_bo); nv50->state.new_tls_space = false; nv50->state.tls_required |= 1 << stage; } else { if (nv50->state.tls_required == (1 << stage)) - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS); nv50->state.tls_required &= ~(1 << stage); } } @@ -181,7 +183,7 @@ nv50_fragprog_validate(struct nv50_context *nv50) fp->fp.force_persample_interp = rast->force_persample_interp; } - if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES))) + if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES))) return; if (!nv50_program_validate(nv50, fp)) @@ -238,6 +240,19 @@ nv50_gmtyprog_validate(struct nv50_context *nv50) /* GP_ENABLE is updated in linkage validation */ } +void +nv50_compprog_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_program *cp = nv50->compprog; + + if (cp && !nv50_program_validate(nv50, cp)) + return; + + BEGIN_NV04(push, NV50_CP(CODE_CB_FLUSH), 1); + PUSH_DATA (push, 0); +} + static void nv50_sprite_coords_validate(struct nv50_context *nv50) { @@ -309,7 +324,7 @@ nv50_validate_derived_rs(struct nv50_context *nv50) PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard); } - if (nv50->dirty & NV50_NEW_FRAGPROG) + if (nv50->dirty_3d & NV50_NEW_3D_FRAGPROG) return; psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK; color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN; @@ -378,9 +393,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) uint8_t map[64]; uint8_t so_map[64]; - if (!(nv50->dirty & (NV50_NEW_VERTPROG | - NV50_NEW_FRAGPROG | - NV50_NEW_GMTYPROG))) { + if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG | + NV50_NEW_3D_FRAGPROG | + NV50_NEW_3D_GMTYPROG))) { uint8_t bfc, ffc; ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK); bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK) @@ -633,8 +648,6 @@ nv50_stream_output_validate(struct nv50_context *nv50) BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1); PUSH_DATA (push, ctrl); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO); - for (i = 0; i < nv50->num_so_targets; ++i) { struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]); struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); @@ -664,7 +677,7 @@ nv50_stream_output_validate(struct nv50_context *nv50) prims = MIN2(prims, limit); } targ->stride = so->stride[i]; - BCTX_REFN(nv50->bufctx_3d, SO, buf, WR); + BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR); } if (prims != ~0) { BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 8504ba466cc..86e74d68b11 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -200,7 +200,7 @@ nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->blend = hwcso; - nv50->dirty |= NV50_NEW_BLEND; + nv50->dirty_3d |= NV50_NEW_3D_BLEND; } static void @@ -337,7 +337,7 @@ nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->rast = hwcso; - nv50->dirty |= NV50_NEW_RASTERIZER; + nv50->dirty_3d |= NV50_NEW_3D_RASTERIZER; } static void @@ -426,7 +426,7 @@ nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->zsa = hwcso; - nv50->dirty |= NV50_NEW_ZSA; + nv50->dirty_3d |= NV50_NEW_3D_ZSA; } static void @@ -605,7 +605,7 @@ nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s, nv50->num_samplers[s] = nr; - nv50->dirty |= NV50_NEW_SAMPLERS; + nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS; } static void @@ -698,9 +698,9 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, nv50->num_textures[s] = nr; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); - nv50->dirty |= NV50_NEW_TEXTURES; + nv50->dirty_3d |= NV50_NEW_3D_TEXTURES; } static void @@ -776,7 +776,7 @@ nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->vertprog = hwcso; - nv50->dirty |= NV50_NEW_VERTPROG; + nv50->dirty_3d |= NV50_NEW_3D_VERTPROG; } static void * @@ -792,7 +792,7 @@ nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->fragprog = hwcso; - nv50->dirty |= NV50_NEW_FRAGPROG; + nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG; } static void * @@ -808,7 +808,7 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->gmtyprog = hwcso; - nv50->dirty |= NV50_NEW_GMTYPROG; + nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG; } static void * @@ -857,7 +857,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nv50->constbuf[s][i].u.buf = NULL; else if (nv50->constbuf[s][i].u.buf) - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i)); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i)); pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res); @@ -882,7 +882,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, } nv50->constbuf_dirty[s] |= 1 << i; - nv50->dirty |= NV50_NEW_CONSTBUF; + nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF; } /* ============================================================================= @@ -895,7 +895,7 @@ nv50_set_blend_color(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); nv50->blend_colour = *bcol; - nv50->dirty |= NV50_NEW_BLEND_COLOUR; + nv50->dirty_3d |= NV50_NEW_3D_BLEND_COLOUR; } static void @@ -905,7 +905,7 @@ nv50_set_stencil_ref(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); nv50->stencil_ref = *sr; - nv50->dirty |= NV50_NEW_STENCIL_REF; + nv50->dirty_3d |= NV50_NEW_3D_STENCIL_REF; } static void @@ -916,7 +916,7 @@ nv50_set_clip_state(struct pipe_context *pipe, memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp)); - nv50->dirty |= NV50_NEW_CLIP; + nv50->dirty_3d |= NV50_NEW_3D_CLIP; } static void @@ -925,7 +925,7 @@ nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) struct nv50_context *nv50 = nv50_context(pipe); nv50->sample_mask = sample_mask; - nv50->dirty |= NV50_NEW_SAMPLE_MASK; + nv50->dirty_3d |= NV50_NEW_3D_SAMPLE_MASK; } static void @@ -935,7 +935,7 @@ nv50_set_min_samples(struct pipe_context *pipe, unsigned min_samples) if (nv50->min_samples != min_samples) { nv50->min_samples = min_samples; - nv50->dirty |= NV50_NEW_MIN_SAMPLES; + nv50->dirty_3d |= NV50_NEW_3D_MIN_SAMPLES; } } @@ -945,11 +945,11 @@ nv50_set_framebuffer_state(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); util_copy_framebuffer_state(&nv50->framebuffer, fb); - nv50->dirty |= NV50_NEW_FRAMEBUFFER; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER; } static void @@ -959,7 +959,7 @@ nv50_set_polygon_stipple(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); nv50->stipple = *stipple; - nv50->dirty |= NV50_NEW_STIPPLE; + nv50->dirty_3d |= NV50_NEW_3D_STIPPLE; } static void @@ -977,7 +977,7 @@ nv50_set_scissor_states(struct pipe_context *pipe, continue; nv50->scissors[start_slot + i] = scissor[i]; nv50->scissors_dirty |= 1 << (start_slot + i); - nv50->dirty |= NV50_NEW_SCISSOR; + nv50->dirty_3d |= NV50_NEW_3D_SCISSOR; } } @@ -996,7 +996,7 @@ nv50_set_viewport_states(struct pipe_context *pipe, continue; nv50->viewports[start_slot + i] = vpt[i]; nv50->viewports_dirty |= 1 << (start_slot + i); - nv50->dirty |= NV50_NEW_VIEWPORT; + nv50->dirty_3d |= NV50_NEW_3D_VIEWPORT; } } @@ -1008,8 +1008,8 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); unsigned i; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX); - nv50->dirty |= NV50_NEW_ARRAYS; + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX); + nv50->dirty_3d |= NV50_NEW_3D_ARRAYS; util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb, start_slot, count); @@ -1051,14 +1051,14 @@ nv50_set_index_buffer(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); if (nv50->idxbuf.buffer) - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX); if (ib) { pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer); nv50->idxbuf.index_size = ib->index_size; if (ib->buffer) { nv50->idxbuf.offset = ib->offset; - BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD); + BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(ib->buffer), RD); } else { nv50->idxbuf.user_buffer = ib->user_buffer; } @@ -1073,7 +1073,7 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso) struct nv50_context *nv50 = nv50_context(pipe); nv50->vertex = hwcso; - nv50->dirty |= NV50_NEW_VERTEX; + nv50->dirty_3d |= NV50_NEW_3D_VERTEX; } static struct pipe_stream_output_target * @@ -1180,8 +1180,10 @@ nv50_set_stream_output_targets(struct pipe_context *pipe, } nv50->num_so_targets = num_targets; - if (nv50->so_targets_dirty) - nv50->dirty |= NV50_NEW_STRMOUT; + if (nv50->so_targets_dirty) { + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_SO); + nv50->dirty_3d |= NV50_NEW_3D_STRMOUT; + } } static void diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 55369781606..51204930031 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -25,7 +25,7 @@ nv50_validate_fb(struct nv50_context *nv50) unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1; uint32_t array_size = 0xffff, array_mode = 0; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs); @@ -90,7 +90,7 @@ nv50_validate_fb(struct nv50_context *nv50) mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; /* only register for writing, otherwise we'd always serialize here */ - BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR); + BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR); } if (fb->zsbuf) { @@ -118,7 +118,7 @@ nv50_validate_fb(struct nv50_context *nv50) mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; - BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR); + BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR); } else { BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); PUSH_DATA (push, 0); @@ -187,8 +187,8 @@ nv50_validate_scissor(struct nv50_context *nv50) #ifdef NV50_SCISSORS_CLIPPING int minx, maxx, miny, maxy, i; - if (!(nv50->dirty & - (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) && + if (!(nv50->dirty_3d & + (NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT | NV50_NEW_3D_FRAMEBUFFER)) && nv50->state.scissor == nv50->rast->pipe.scissor) return; @@ -197,7 +197,7 @@ nv50_validate_scissor(struct nv50_context *nv50) nv50->state.scissor = nv50->rast->pipe.scissor; - if ((nv50->dirty & NV50_NEW_FRAMEBUFFER) && !nv50->state.scissor) + if ((nv50->dirty_3d & NV50_NEW_3D_FRAMEBUFFER) && !nv50->state.scissor) nv50->scissors_dirty = (1 << NV50_MAX_VIEWPORTS) - 1; for (i = 0; i < NV50_MAX_VIEWPORTS; i++) { @@ -290,10 +290,10 @@ nv50_check_program_ucps(struct nv50_context *nv50, vp->vp.clpd_nr = n; if (likely(vp == nv50->vertprog)) { - nv50->dirty |= NV50_NEW_VERTPROG; + nv50->dirty_3d |= NV50_NEW_3D_VERTPROG; nv50_vertprog_validate(nv50); } else { - nv50->dirty |= NV50_NEW_GMTYPROG; + nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG; nv50_gmtyprog_validate(nv50); } nv50_fp_linkage_validate(nv50); @@ -342,7 +342,7 @@ nv50_validate_clip(struct nv50_context *nv50) struct nv50_program *vp; uint8_t clip_enable; - if (nv50->dirty & NV50_NEW_CLIP) { + if (nv50->dirty_3d & NV50_NEW_3D_CLIP) { BEGIN_NV04(push, NV50_3D(CB_ADDR), 1); PUSH_DATA (push, (NV50_CB_AUX_UCP_OFFSET << 8) | NV50_CB_AUX); BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4); @@ -436,7 +436,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to) else ctx_to->state = ctx_to->screen->save_state; - ctx_to->dirty = ~0; + ctx_to->dirty_3d = ~0; + ctx_to->dirty_cp = ~0; ctx_to->viewports_dirty = ~0; ctx_to->scissors_dirty = ~0; @@ -445,71 +446,71 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to) ctx_to->constbuf_dirty[2] = (1 << NV50_MAX_PIPE_CONSTBUFS) - 1; if (!ctx_to->vertex) - ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS); + ctx_to->dirty_3d &= ~(NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS); if (!ctx_to->vertprog) - ctx_to->dirty &= ~NV50_NEW_VERTPROG; + ctx_to->dirty_3d &= ~NV50_NEW_3D_VERTPROG; if (!ctx_to->fragprog) - ctx_to->dirty &= ~NV50_NEW_FRAGPROG; + ctx_to->dirty_3d &= ~NV50_NEW_3D_FRAGPROG; if (!ctx_to->blend) - ctx_to->dirty &= ~NV50_NEW_BLEND; + ctx_to->dirty_3d &= ~NV50_NEW_3D_BLEND; if (!ctx_to->rast) #ifdef NV50_SCISSORS_CLIPPING - ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR); + ctx_to->dirty_3d &= ~(NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_SCISSOR); #else - ctx_to->dirty &= ~NV50_NEW_RASTERIZER; + ctx_to->dirty_3d &= ~NV50_NEW_3D_RASTERIZER; #endif if (!ctx_to->zsa) - ctx_to->dirty &= ~NV50_NEW_ZSA; + ctx_to->dirty_3d &= ~NV50_NEW_3D_ZSA; ctx_to->screen->cur_ctx = ctx_to; } -static struct state_validate { - void (*func)(struct nv50_context *); - uint32_t states; -} validate_list[] = { - { nv50_validate_fb, NV50_NEW_FRAMEBUFFER }, - { nv50_validate_blend, NV50_NEW_BLEND }, - { nv50_validate_zsa, NV50_NEW_ZSA }, - { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK }, - { nv50_validate_rasterizer, NV50_NEW_RASTERIZER }, - { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR }, - { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF }, - { nv50_validate_stipple, NV50_NEW_STIPPLE }, +static struct nv50_state_validate +validate_list_3d[] = { + { nv50_validate_fb, NV50_NEW_3D_FRAMEBUFFER }, + { nv50_validate_blend, NV50_NEW_3D_BLEND }, + { nv50_validate_zsa, NV50_NEW_3D_ZSA }, + { nv50_validate_sample_mask, NV50_NEW_3D_SAMPLE_MASK }, + { nv50_validate_rasterizer, NV50_NEW_3D_RASTERIZER }, + { nv50_validate_blend_colour, NV50_NEW_3D_BLEND_COLOUR }, + { nv50_validate_stencil_ref, NV50_NEW_3D_STENCIL_REF }, + { nv50_validate_stipple, NV50_NEW_3D_STIPPLE }, #ifdef NV50_SCISSORS_CLIPPING - { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | - NV50_NEW_RASTERIZER | - NV50_NEW_FRAMEBUFFER }, + { nv50_validate_scissor, NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT | + NV50_NEW_3D_RASTERIZER | + NV50_NEW_3D_FRAMEBUFFER }, #else - { nv50_validate_scissor, NV50_NEW_SCISSOR }, + { nv50_validate_scissor, NV50_NEW_3D_SCISSOR }, #endif - { nv50_validate_viewport, NV50_NEW_VIEWPORT }, - { nv50_vertprog_validate, NV50_NEW_VERTPROG }, - { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG }, - { nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | - NV50_NEW_MIN_SAMPLES }, - { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER }, - { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG }, - { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER | - NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER }, - { nv50_validate_derived_3, NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER }, - { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER | - NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, - { nv50_validate_textures, NV50_NEW_TEXTURES }, - { nv50_validate_samplers, NV50_NEW_SAMPLERS }, - { nv50_stream_output_validate, NV50_NEW_STRMOUT | - NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, - { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }, - { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES }, + { nv50_validate_viewport, NV50_NEW_3D_VIEWPORT }, + { nv50_vertprog_validate, NV50_NEW_3D_VERTPROG }, + { nv50_gmtyprog_validate, NV50_NEW_3D_GMTYPROG }, + { nv50_fragprog_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER | + NV50_NEW_3D_MIN_SAMPLES }, + { nv50_fp_linkage_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_VERTPROG | + NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_RASTERIZER }, + { nv50_gp_linkage_validate, NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_VERTPROG }, + { nv50_validate_derived_rs, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG }, + { nv50_validate_derived_2, NV50_NEW_3D_ZSA | NV50_NEW_3D_FRAMEBUFFER }, + { nv50_validate_derived_3, NV50_NEW_3D_BLEND | NV50_NEW_3D_FRAMEBUFFER }, + { nv50_validate_clip, NV50_NEW_3D_CLIP | NV50_NEW_3D_RASTERIZER | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG }, + { nv50_constbufs_validate, NV50_NEW_3D_CONSTBUF }, + { nv50_validate_textures, NV50_NEW_3D_TEXTURES }, + { nv50_validate_samplers, NV50_NEW_3D_SAMPLERS }, + { nv50_stream_output_validate, NV50_NEW_3D_STRMOUT | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG }, + { nv50_vertex_arrays_validate, NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS }, + { nv50_validate_min_samples, NV50_NEW_3D_MIN_SAMPLES }, }; bool -nv50_state_validate(struct nv50_context *nv50, uint32_t mask) +nv50_state_validate(struct nv50_context *nv50, uint32_t mask, + struct nv50_state_validate *validate_list, int size, + uint32_t *dirty, struct nouveau_bufctx *bufctx) { uint32_t state_mask; int ret; @@ -518,16 +519,16 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask) if (nv50->screen->cur_ctx != nv50) nv50_switch_pipe_context(nv50); - state_mask = nv50->dirty & mask; + state_mask = *dirty & mask; if (state_mask) { - for (i = 0; i < ARRAY_SIZE(validate_list); ++i) { - struct state_validate *validate = &validate_list[i]; + for (i = 0; i < size; i++) { + struct nv50_state_validate *validate = &validate_list[i]; if (state_mask & validate->states) validate->func(nv50); } - nv50->dirty &= ~state_mask; + *dirty &= ~state_mask; if (nv50->state.rt_serialize) { nv50->state.rt_serialize = false; @@ -535,14 +536,26 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask) PUSH_DATA (nv50->base.pushbuf, 0); } - nv50_bufctx_fence(nv50->bufctx_3d, false); + nv50_bufctx_fence(bufctx, false); } - nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d); + nouveau_pushbuf_bufctx(nv50->base.pushbuf, bufctx); ret = nouveau_pushbuf_validate(nv50->base.pushbuf); + return !ret; +} + +bool +nv50_state_validate_3d(struct nv50_context *nv50, uint32_t mask) +{ + bool ret; + + ret = nv50_state_validate(nv50, mask, validate_list_3d, + ARRAY_SIZE(validate_list_3d), &nv50->dirty_3d, + nv50->bufctx_3d); + if (unlikely(nv50->state.flushed)) { nv50->state.flushed = false; nv50_bufctx_fence(nv50->bufctx_3d, true); } - return !ret; + return ret; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 84646f6adb1..68b0e18ef8f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -353,7 +353,7 @@ nv50_clear_render_target(struct pipe_context *pipe, BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, nv50->cond_condmode); - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR; } static void @@ -436,7 +436,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe, BEGIN_NV04(push, NV50_3D(COND_MODE), 1); PUSH_DATA (push, nv50->cond_condmode); - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR; } void @@ -525,7 +525,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, uint32_t mode = 0; /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ - if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER)) + if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER)) return; /* We have to clear ALL of the layers, not up to the min number of layers @@ -798,7 +798,7 @@ nv50_clear_buffer(struct pipe_context *pipe, data, data_size); } - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; + nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR; } /* =============================== BLIT CODE =================================== @@ -834,7 +834,7 @@ struct nv50_blitctx struct pipe_sampler_view *texture[2]; struct nv50_tsc_entry *sampler[2]; unsigned min_samples; - uint32_t dirty; + uint32_t dirty_3d; } saved; struct nv50_rasterizer_stateobj rast; }; @@ -1253,15 +1253,15 @@ nv50_blitctx_pre_blit(struct nv50_blitctx *ctx) nv50->min_samples = 1; - ctx->saved.dirty = nv50->dirty; + ctx->saved.dirty_3d = nv50->dirty_3d; - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); - nv50->dirty = - NV50_NEW_FRAMEBUFFER | NV50_NEW_MIN_SAMPLES | - NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG | - NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS; + nv50->dirty_3d = + NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_MIN_SAMPLES | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_GMTYPROG | + NV50_NEW_3D_TEXTURES | NV50_NEW_3D_SAMPLERS; } static void @@ -1302,14 +1302,14 @@ nv50_blitctx_post_blit(struct nv50_blitctx *blit) nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query, nv50->cond_cond, nv50->cond_mode); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES); - nv50->dirty = blit->saved.dirty | - (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK | - NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND | - NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS | - NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG); + nv50->dirty_3d = blit->saved.dirty_3d | + (NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR | NV50_NEW_3D_SAMPLE_MASK | + NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_ZSA | NV50_NEW_3D_BLEND | + NV50_NEW_3D_TEXTURES | NV50_NEW_3D_SAMPLERS | + NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_FRAGPROG); nv50->scissors_dirty |= 1; nv50->base.pipe.set_min_samples(&nv50->base.pipe, blit->saved.min_samples); @@ -1344,7 +1344,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info) nv50_blitctx_prepare_state(blit); - nv50_state_validate(nv50, ~0); + nv50_state_validate_3d(nv50, ~0); x_range = (float)info->src.box.width / (float)info->dst.box.width; y_range = (float)info->src.box.height / (float)info->dst.box.height; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c index 4b69c3bd504..414d326eeed 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -299,7 +299,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s) res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING; - BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD); + BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD); BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1); PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 6f60445d8d2..a11cdf847b1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -230,7 +230,7 @@ nv50_upload_user_buffers(struct nv50_context *nv50, addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size, &bo); if (addrs[b]) - BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART | + BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); } nv50->base.vbo_dirty = true; @@ -269,7 +269,7 @@ nv50_update_user_vbufs(struct nv50_context *nv50) address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size, &bo); if (address[b]) - BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo); + BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, bo_flags, bo); } BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); @@ -286,7 +286,7 @@ static inline void nv50_release_user_vbufs(struct nv50_context *nv50) { if (nv50->vbo_user) { - nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP); + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX_TMP); nouveau_scratch_done(&nv50->base); } } @@ -394,7 +394,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) struct nv04_resource *buf = nv04_resource(vb->buffer); if (!(refd & (1 << b))) { refd |= 1 << b; - BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD); + BCTX_REFN(nv50->bufctx_3d, 3D_VERTEX, buf, RD); } address = buf->address + vb->buffer_offset + ve->pipe.src_offset; limit = buf->address + buf->base.width0 - 1; @@ -779,9 +779,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50->vbo_push_hint = /* the 64 is heuristic */ !(info->indexed && ((nv50->vb_elt_limit + 64) < info->count)); - if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) { + if (nv50->vbo_user && !(nv50->dirty_3d & (NV50_NEW_3D_ARRAYS | NV50_NEW_3D_VERTEX))) { if (!!nv50->vbo_fifo != nv50->vbo_push_hint) - nv50->dirty |= NV50_NEW_ARRAYS; + nv50->dirty_3d |= NV50_NEW_3D_ARRAYS; else if (!nv50->vbo_fifo) nv50_update_user_vbufs(nv50); @@ -790,7 +790,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (unlikely(nv50->num_so_targets && !nv50->gmtyprog)) nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode]; - nv50_state_validate(nv50, ~0); + nv50_state_validate_3d(nv50, ~0); push->kick_notify = nv50_draw_vbo_kick_notify; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h index 68002305d72..7056258d1bf 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h @@ -58,8 +58,8 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_M2MF(m) 5, (m) #define NV50_M2MF(n) SUBC_M2MF(NV50_M2MF_##n) -#define SUBC_COMPUTE(m) 6, (m) -#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n) +#define SUBC_CP(m) 6, (m) +#define NV50_CP(n) SUBC_CP(NV50_COMPUTE_##n) static inline uint32_t diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index ffbb16f79de..6aaa7ce1aaf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -153,7 +153,7 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) if (nvc0->constbuf[s][i].user) { struct nouveau_bo *bo = nvc0->screen->uniform_bo; - const unsigned base = s << 16; + const unsigned base = NVC0_CB_USR_INFO(s); const unsigned size = nvc0->constbuf[s][0].size; assert(i == 0); /* we really only want OpenGL uniforms here */ assert(nvc0->constbuf[s][0].u.data); @@ -207,8 +207,8 @@ nvc0_compute_validate_driverconst(struct nvc0_context *nvc0) BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5)); BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (15 << 8) | 1); @@ -219,15 +219,16 @@ static void nvc0_compute_validate_buffers(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; const int s = 5; int i; BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); - PUSH_DATA (push, 512); + PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0)); for (i = 0; i < NVC0_MAX_BUFFERS; i++) { if (nvc0->buffers[s][i].buffer) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 54afe887ebd..31e1272aeed 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -98,6 +98,31 @@ #define NVC0_BIND_M2MF 0 #define NVC0_BIND_FENCE 1 +/* 6 user uniform buffers, at 64K each */ +#define NVC0_CB_USR_INFO(s) (s << 16) +#define NVC0_CB_USR_SIZE (6 << 16) +/* 6 driver constbuts, at 1K each */ +#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 10) +#define NVC0_CB_AUX_SIZE (6 << 10) +/* XXX: Figure out what this UNK data is. */ +#define NVC0_CB_AUX_UNK_INFO 0x000 +#define NVC0_CB_AUX_UNK_SIZE (8 * 4) +/* 32 textures handles, at 1 32-bits integer each */ +#define NVC0_CB_AUX_TEX_INFO(i) 0x020 + (i) * 4 +#define NVC0_CB_AUX_TEX_SIZE (32 * 4) +/* 8 user clip planes, at 4 32-bits floats each */ +#define NVC0_CB_AUX_UCP_INFO 0x100 +#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4) +/* 8 sets of 32-bits integer pairs sample offsets */ +#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */ +#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2) +/* draw parameters (index bais, base instance, drawid) */ +#define NVC0_CB_AUX_DRAW_INFO 0x180 /* VP */ +/* 32 user buffers, at 4 32-bits integers each */ +#define NVC0_CB_AUX_BUF_INFO(i) 0x200 + (i) * 4 * 4 +#define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4) +/* 4 32-bits floats for the vertex runout, put at the end */ +#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + NVC0_CB_AUX_SIZE struct nvc0_blitctx; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index bc884d6c08f..b7c6faf9cde 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -535,29 +535,27 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->io.genUserClip = prog->vp.num_ucps; info->io.auxCBSlot = 15; - info->io.ucpBase = 256; - info->io.drawInfoBase = 256 + 128; + info->io.ucpBase = NVC0_CB_AUX_UCP_INFO; + info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO; if (prog->type == PIPE_SHADER_COMPUTE) { if (chipset >= NVISA_GK104_CHIPSET) { - info->io.resInfoCBSlot = 0; + info->io.auxCBSlot = 0; info->io.texBindBase = NVE4_CP_INPUT_TEX(0); info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); } else { - info->io.resInfoCBSlot = 15; - info->io.suInfoBase = 512; + info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0); } info->io.msInfoCBSlot = 0; info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; } else { if (chipset >= NVISA_GK104_CHIPSET) { - info->io.texBindBase = 0x20; + info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0); info->io.suInfoBase = 0; /* TODO */ } - info->io.resInfoCBSlot = 15; - info->io.sampleInfoBase = 256 + 128; - info->io.suInfoBase = 512; + info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO; + info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0); info->io.msInfoCBSlot = 15; info->io.msInfoBase = 0; /* TODO */ } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 3c5b1da2063..553c001cd2b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -922,14 +922,14 @@ nvc0_screen_create(struct nouveau_device *dev) /* auxiliary constants (6 user clip planes, base instance id) */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); PUSH_DATA (push, (15 << 4) | 1); if (screen->eng3d->oclass >= NVE4_3D_CLASS) { unsigned j; BEGIN_1IC0(push, NVC0_3D(CB_POS), 9); - PUSH_DATA (push, 0); + PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO); for (j = 0; j < 8; ++j) PUSH_DATA(push, j); } else { @@ -943,8 +943,8 @@ nvc0_screen_create(struct nouveau_device *dev) /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 256); - PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO); BEGIN_1IC0(push, NVC0_3D(CB_POS), 5); PUSH_DATA (push, 0); PUSH_DATAf(push, 0.0f); @@ -952,8 +952,8 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO); if (screen->base.drm->version >= 0x01000101) { ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 8487abcf999..46b692df2e3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -66,7 +66,7 @@ struct nvc0_screen { struct nouveau_bo *text; struct nouveau_bo *parm; /* for COMPUTE */ - struct nouveau_bo *uniform_bo; /* for 3D */ + struct nouveau_bo *uniform_bo; struct nouveau_bo *tls; struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */ struct nouveau_bo *poly_cache; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 090a0395432..a100fc4c478 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -413,7 +413,7 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { unsigned s, i; - for (s = 0; s < 5; ++s) + for (s = 0; s < 6; ++s) for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i) if (nvc0_context(pipe)->samplers[s][i] == hwcso) nvc0_context(pipe)->samplers[s][i] = NULL; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index c0ed5c0043d..9c64482f2e2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -72,6 +72,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; + struct nvc0_screen *screen = nvc0->screen; unsigned i, ms; unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; bool serialize = false; @@ -183,10 +184,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0) ms = 1 << ms_mode; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); - PUSH_DATA (push, 256 + 128); + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); for (i = 0; i < ms; i++) { float xy[2]; nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); @@ -313,14 +314,14 @@ static inline void nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; - struct nouveau_bo *bo = nvc0->screen->uniform_bo; + struct nvc0_screen *screen = nvc0->screen; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, bo->offset + (6 << 16) + (s << 10)); - PUSH_DATA (push, bo->offset + (6 << 16) + (s << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); - PUSH_DATA (push, 256); + PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO); PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); } @@ -424,7 +425,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) if (nvc0->constbuf[s][i].user) { struct nouveau_bo *bo = nvc0->screen->uniform_bo; - const unsigned base = s << 16; + const unsigned base = NVC0_CB_USR_INFO(s); const unsigned size = nvc0->constbuf[s][0].size; assert(i == 0); /* we really only want OpenGL uniforms here */ assert(nvc0->constbuf[s][0].u.data); @@ -478,15 +479,16 @@ static void nvc0_validate_buffers(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; int i, s; for (s = 0; s < 5; s++) { BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); - PUSH_DATA (push, 512); + PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0)); for (i = 0; i < NVC0_MAX_BUFFERS; i++) { if (nvc0->buffers[s][i].buffer) { struct nv04_resource *res = @@ -550,8 +552,8 @@ nvc0_validate_driverconst(struct nvc0_context *nvc0) for (i = 0; i < 5; ++i) { BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i)); BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); PUSH_DATA (push, (15 << 4) | 1); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 53332400a4f..ce6a6dce39c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -707,21 +707,20 @@ void nve4_set_tex_handles(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; - uint64_t address; + struct nvc0_screen *screen = nvc0->screen; unsigned s; if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) return; - address = nvc0->screen->uniform_bo->offset + (6 << 16); - for (s = 0; s < 5; ++s, address += (1 << 10)) { + for (s = 0; s < 5; ++s) { uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; if (!dirty) continue; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, address); - PUSH_DATA (push, address); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s)); do { int i = ffs(dirty) - 1; dirty &= ~(1 << i); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index e0e0ad2a0f7..4d9cd5752b5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -820,6 +820,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) struct nv04_resource *buf_count = nv04_resource(info->indirect_params); unsigned size, macro, count = info->indirect_count, drawid = info->drawid; uint32_t offset = buf->offset + info->indirect_offset; + struct nvc0_screen *screen = nvc0->screen; PUSH_SPACE(push, 7); @@ -833,10 +834,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) /* Queue things up to let the macros write params to the driver constbuf */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 512); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); BEGIN_NVC0(push, NVC0_3D(CB_POS), 1); - PUSH_DATA (push, 256 + 128); + PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO); if (info->indexed) { assert(nvc0->idxbuf.buffer); @@ -934,6 +935,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; int s; /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ @@ -975,11 +977,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) PUSH_SPACE(push, 9); BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 512); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0)); if (!info->indirect) { BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3); - PUSH_DATA (push, 256 + 128); + PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO); PUSH_DATA (push, info->index_bias); PUSH_DATA (push, info->start_instance); PUSH_DATA (push, info->drawid); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 6fa892089ec..d100a9df55b 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -385,7 +385,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, if (!r300->ctx) goto fail; - r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, NULL); + r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300); if (r300->cs == NULL) goto fail; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 7a75b43a53e..63182cba2b2 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -53,7 +53,7 @@ static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags, } r300->flush_counter++; - r300->rws->cs_flush(r300->cs, flags, fence, 0); + r300->rws->cs_flush(r300->cs, flags, fence); r300->dirty_hw = 0; /* New kitchen sink, baby. */ @@ -88,11 +88,11 @@ void r300_flush(struct pipe_context *pipe, * and we cannot emit an empty CS. Let's write to some reg. */ CS_LOCALS(r300); OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0); - r300->rws->cs_flush(r300->cs, flags, fence, 0); + r300->rws->cs_flush(r300->cs, flags, fence); } else { /* Even if hw is not dirty, we should at least reset the CS in case * the space checking failed for the first draw operation. */ - r300->rws->cs_flush(r300->cs, flags, NULL, 0); + r300->rws->cs_flush(r300->cs, flags, NULL); } } diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 57456c6d867..709345a492e 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -981,8 +981,8 @@ boolean r300_resource_get_handle(struct pipe_screen* screen, return FALSE; } - return rws->buffer_get_handle(tex->buf, - tex->tex.stride_in_bytes[0], whandle); + return rws->buffer_get_handle(tex->buf, tex->tex.stride_in_bytes[0], + 0, 0, whandle); } static const struct u_resource_vtbl r300_texture_vtbl = @@ -1116,7 +1116,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen, return NULL; } - buffer = rws->buffer_from_handle(rws, whandle, &stride); + buffer = rws->buffer_from_handle(rws, whandle, &stride, NULL); if (!buffer) return NULL; diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am index 8317da727a2..f3bb03e54be 100644 --- a/src/gallium/drivers/r600/Makefile.am +++ b/src/gallium/drivers/r600/Makefile.am @@ -21,14 +21,6 @@ AM_CFLAGS += \ $(LLVM_CFLAGS) \ -I$(top_srcdir)/src/gallium/drivers/radeon/ -libr600_la_SOURCES += \ - $(LLVM_C_SOURCES) - -endif - -if USE_R600_LLVM_COMPILER -AM_CFLAGS += \ - -DR600_USE_LLVM endif if HAVE_GALLIUM_COMPUTE diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index 024dea3a002..8bf8083bbab 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -64,7 +64,3 @@ CXX_SOURCES = \ sb/sb_shader.h \ sb/sb_ssa_builder.cpp \ sb/sb_valtable.cpp - -LLVM_C_SOURCES = \ - r600_llvm.c \ - r600_llvm.h diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 2a1b2519ec7..f4b669000dc 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -192,6 +192,69 @@ static const struct u_resource_vtbl r600_global_buffer_vtbl = r600_compute_global_transfer_inline_write /* transfer_inline_write */ }; +/* We need to define these R600 registers here, because we can't include + * evergreend.h and r600d.h. + */ +#define R_028868_SQ_PGM_RESOURCES_VS 0x028868 +#define R_028850_SQ_PGM_RESOURCES_PS 0x028850 + +#ifdef HAVE_OPENCL + +static void r600_shader_binary_read_config(const struct radeon_shader_binary *binary, + struct r600_bytecode *bc, + uint64_t symbol_offset, + boolean *use_kill) +{ + unsigned i; + const unsigned char *config = + radeon_shader_binary_config_start(binary, symbol_offset); + + for (i = 0; i < binary->config_size_per_symbol; i+= 8) { + unsigned reg = + util_le32_to_cpu(*(uint32_t*)(config + i)); + unsigned value = + util_le32_to_cpu(*(uint32_t*)(config + i + 4)); + switch (reg) { + /* R600 / R700 */ + case R_028850_SQ_PGM_RESOURCES_PS: + case R_028868_SQ_PGM_RESOURCES_VS: + /* Evergreen / Northern Islands */ + case R_028844_SQ_PGM_RESOURCES_PS: + case R_028860_SQ_PGM_RESOURCES_VS: + case R_0288D4_SQ_PGM_RESOURCES_LS: + bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value)); + bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value)); + break; + case R_02880C_DB_SHADER_CONTROL: + *use_kill = G_02880C_KILL_ENABLE(value); + break; + case R_0288E8_SQ_LDS_ALLOC: + bc->nlds_dw = value; + break; + } + } +} + +static unsigned r600_create_shader(struct r600_bytecode *bc, + const struct radeon_shader_binary *binary, + boolean *use_kill) + +{ + assert(binary->code_size % 4 == 0); + bc->bytecode = CALLOC(1, binary->code_size); + memcpy(bc->bytecode, binary->code, binary->code_size); + bc->ndw = binary->code_size / 4; + + r600_shader_binary_read_config(binary, bc, 0, use_kill); + return 0; +} + +#endif + +static void r600_destroy_shader(struct r600_bytecode *bc) +{ + FREE(bc->bytecode); +} void *evergreen_create_compute_state( struct pipe_context *ctx_, @@ -236,13 +299,11 @@ void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state) if (!shader) return; -#ifdef HAVE_OPENCL radeon_shader_binary_clean(&shader->binary); r600_destroy_shader(&shader->bc); /* TODO destroy shader->code_bo, shader->const_bo * we'll need something like r600_buffer_free */ -#endif FREE(shader); } diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h index c8998d00f5a..e6ff7609aea 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.h +++ b/src/gallium/drivers/r600/evergreen_compute_internal.h @@ -26,6 +26,10 @@ #define EVERGREEN_COMPUTE_INTERNAL_H #include "r600_asm.h" +#ifdef HAVE_OPENCL +#include "radeon/radeon_llvm.h" +#include <llvm-c/Core.h> +#endif struct r600_pipe_compute { struct r600_context *ctx; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 4951297df42..7a6f957945b 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -57,18 +57,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, /* The number of dwords all the dirty states would take. */ mask = ctx->dirty_atoms; - while (mask != 0) { + while (mask != 0) num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw; - if (ctx->screen->b.trace_bo) { - num_dw += R600_TRACE_CS_DWORDS; - } - } /* The upper-bound of how much space a draw command would take. */ num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS; - if (ctx->screen->b.trace_bo) { - num_dw += R600_TRACE_CS_DWORDS; - } } /* Count in queries_suspend. */ @@ -273,7 +266,7 @@ void r600_context_gfx_flush(void *context, unsigned flags, flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; /* Flush the CS. */ - ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++); + ctx->b.ws->cs_flush(cs, flags, fence); r600_begin_new_cs(ctx); } diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c deleted file mode 100644 index 7eab29c6eb4..00000000000 --- a/src/gallium/drivers/r600/r600_llvm.c +++ /dev/null @@ -1,943 +0,0 @@ -#include "r600_llvm.h" - -#include "gallivm/lp_bld_const.h" -#include "gallivm/lp_bld_intr.h" -#include "gallivm/lp_bld_gather.h" -#include "tgsi/tgsi_parse.h" -#include "util/list.h" -#include "util/u_memory.h" - -#include "evergreend.h" -#include "r600_asm.h" -#include "r600_sq.h" -#include "r600_opcodes.h" -#include "r600_shader.h" -#include "r600_pipe.h" -#include "radeon_llvm.h" -#include "radeon_llvm_emit.h" -#include "radeon_elf_util.h" - -#include <stdio.h> - -#if defined R600_USE_LLVM || defined HAVE_OPENCL - -#define CONSTANT_BUFFER_0_ADDR_SPACE 8 -#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER) -#define LLVM_R600_BUFFER_INFO_CONST_BUFFER \ - (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER) - -static LLVMValueRef llvm_load_const_buffer( - struct lp_build_tgsi_context * bld_base, - LLVMValueRef OffsetValue, - unsigned ConstantAddressSpace) -{ - LLVMValueRef offset[2] = { - LLVMConstInt(LLVMInt64TypeInContext(bld_base->base.gallivm->context), 0, false), - OffsetValue - }; - - LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024), - ConstantAddressSpace); - LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, ""); - LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, ""); - return LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, ""); -} - -static LLVMValueRef llvm_fetch_const( - struct lp_build_tgsi_context * bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle) -{ - LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg->Register.Index); - if (reg->Register.Indirect) { - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - LLVMValueRef index = LLVMBuildLoad(bld_base->base.gallivm->builder, bld->addr[reg->Indirect.Index][reg->Indirect.Swizzle], ""); - offset = LLVMBuildAdd(bld_base->base.gallivm->builder, offset, index, ""); - } - unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ; - if (reg->Register.Dimension) { - ConstantAddressSpace += reg->Dimension.Index; - } - LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, ConstantAddressSpace); - LLVMValueRef cval = LLVMBuildExtractElement(bld_base->base.gallivm->builder, cvecval, lp_build_const_int32(bld_base->base.gallivm, swizzle), ""); - return bitcast(bld_base, type, cval); -} - -static void llvm_load_system_value( - struct radeon_llvm_context * ctx, - unsigned index, - const struct tgsi_full_declaration *decl) -{ - unsigned chan; - - switch (decl->Semantic.Name) { - case TGSI_SEMANTIC_INSTANCEID: chan = 3; break; - case TGSI_SEMANTIC_VERTEXID: chan = 0; break; - default: assert(!"unknown system value"); - } - - ctx->system_values[index] = LLVMBuildExtractElement(ctx->gallivm.builder, - LLVMGetParam(ctx->main_fn, 0), lp_build_const_int32(&(ctx->gallivm), chan), - ""); -} - -static LLVMValueRef -llvm_load_input_vector( - struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs, - boolean interp) -{ - LLVMTypeRef VecType; - LLVMValueRef Args[3] = { - lp_build_const_int32(&(ctx->gallivm), location) - }; - unsigned ArgCount = 1; - if (interp) { - VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 2); - LLVMValueRef IJIndex = LLVMGetParam(ctx->main_fn, ijregs / 2); - Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex, - lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2)), ""); - Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex, - lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), ""); - LLVMValueRef HalfVec[2] = { - lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy", - VecType, Args, ArgCount, LLVMReadNoneAttribute), - lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw", - VecType, Args, ArgCount, LLVMReadNoneAttribute) - }; - LLVMValueRef MaskInputs[4] = { - lp_build_const_int32(&(ctx->gallivm), 0), - lp_build_const_int32(&(ctx->gallivm), 1), - lp_build_const_int32(&(ctx->gallivm), 2), - lp_build_const_int32(&(ctx->gallivm), 3) - }; - LLVMValueRef Mask = LLVMConstVector(MaskInputs, 4); - return LLVMBuildShuffleVector(ctx->gallivm.builder, HalfVec[0], HalfVec[1], - Mask, ""); - } else { - VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4); - return lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const", - VecType, Args, ArgCount, LLVMReadNoneAttribute); - } -} - -static LLVMValueRef -llvm_face_select_helper( - struct radeon_llvm_context * ctx, - LLVMValueRef face, LLVMValueRef front_color, LLVMValueRef back_color) -{ - const struct lp_build_context * bb = &ctx->soa.bld_base.base; - LLVMValueRef is_front = LLVMBuildFCmp( - bb->gallivm->builder, LLVMRealUGT, face, - lp_build_const_float(bb->gallivm, 0.0f), ""); - return LLVMBuildSelect(bb->gallivm->builder, is_front, - front_color, back_color, ""); -} - -static void llvm_load_input( - struct radeon_llvm_context * ctx, - unsigned input_index, - const struct tgsi_full_declaration *decl) -{ - const struct r600_shader_io * input = &ctx->r600_inputs[input_index]; - unsigned chan; - int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR); - LLVMValueRef v; - boolean require_interp_intrinsic = ctx->chip_class >= EVERGREEN && - ctx->type == TGSI_PROCESSOR_FRAGMENT; - - if (require_interp_intrinsic && input->spi_sid) { - v = llvm_load_input_vector(ctx, input->lds_pos, input->ij_index, - (input->interpolate > 0)); - } else - v = LLVMGetParam(ctx->main_fn, input->gpr); - - if (two_side) { - struct r600_shader_io * back_input = - &ctx->r600_inputs[input->back_color_input]; - LLVMValueRef v2; - LLVMValueRef face = LLVMGetParam(ctx->main_fn, ctx->face_gpr); - face = LLVMBuildExtractElement(ctx->gallivm.builder, face, - lp_build_const_int32(&(ctx->gallivm), 0), ""); - - if (require_interp_intrinsic && back_input->spi_sid) - v2 = llvm_load_input_vector(ctx, back_input->lds_pos, - back_input->ij_index, (back_input->interpolate > 0)); - else - v2 = LLVMGetParam(ctx->main_fn, back_input->gpr); - v = llvm_face_select_helper(ctx, face, v, v2); - } - - for (chan = 0; chan < 4; chan++) { - unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); - - ctx->inputs[soa_index] = LLVMBuildExtractElement(ctx->gallivm.builder, v, - lp_build_const_int32(&(ctx->gallivm), chan), ""); - - if (input->name == TGSI_SEMANTIC_POSITION && - ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) { - /* RCP for fragcoord.w */ - ctx->inputs[soa_index] = LLVMBuildFDiv(ctx->gallivm.builder, - lp_build_const_float(&(ctx->gallivm), 1.0f), - ctx->inputs[soa_index], ""); - } - } -} - -static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base) -{ - struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); - radeon_llvm_shader_type(ctx->main_fn, ctx->type); - -} - -static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) -{ - struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); - struct lp_build_context * base = &bld_base->base; - struct pipe_stream_output_info * so = ctx->stream_outputs; - unsigned i; - unsigned next_pos = 60; - unsigned next_param = 0; - - unsigned color_count = 0; - boolean has_color = false; - - if (ctx->type == TGSI_PROCESSOR_VERTEX && so->num_outputs) { - for (i = 0; i < so->num_outputs; i++) { - unsigned register_index = so->output[i].register_index; - unsigned start_component = so->output[i].start_component; - unsigned num_components = so->output[i].num_components; - unsigned dst_offset = so->output[i].dst_offset; - unsigned chan; - LLVMValueRef elements[4]; - if (dst_offset < start_component) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - elements[chan] = LLVMBuildLoad(base->gallivm->builder, - ctx->soa.outputs[register_index][(chan + start_component) % TGSI_NUM_CHANNELS], ""); - } - start_component = 0; - } else { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - elements[chan] = LLVMBuildLoad(base->gallivm->builder, - ctx->soa.outputs[register_index][chan], ""); - } - } - LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4); - LLVMValueRef args[4]; - args[0] = output; - args[1] = lp_build_const_int32(base->gallivm, dst_offset - start_component); - args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer); - args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component); - lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output", - LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0); - } - } - - /* Add the necessary export instructions */ - for (i = 0; i < ctx->output_reg_count; i++) { - unsigned chan; - LLVMValueRef elements[4]; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - elements[chan] = LLVMBuildLoad(base->gallivm->builder, - ctx->soa.outputs[i][chan], ""); - } - if (ctx->alpha_to_one && ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->r600_outputs[i].name == TGSI_SEMANTIC_COLOR) - elements[3] = lp_build_const_float(base->gallivm, 1.0f); - LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4); - - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - switch (ctx->r600_outputs[i].name) { - case TGSI_SEMANTIC_POSITION: - case TGSI_SEMANTIC_PSIZE: { - LLVMValueRef args[3]; - args[0] = output; - args[1] = lp_build_const_int32(base->gallivm, next_pos++); - args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - lp_build_intrinsic( - base->gallivm->builder, - "llvm.R600.store.swizzle", - LLVMVoidTypeInContext(base->gallivm->context), - args, 3, 0); - break; - } - case TGSI_SEMANTIC_CLIPVERTEX: { - LLVMValueRef args[3]; - unsigned reg_index; - LLVMValueRef adjusted_elements[4]; - for (reg_index = 0; reg_index < 2; reg_index ++) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg_index * 4 + chan); - LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE); - args[0] = output; - args[1] = base_vector; - adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder, - "llvm.AMDGPU.dp4", bld_base->base.elem_type, - args, 2, LLVMReadNoneAttribute); - } - args[0] = lp_build_gather_values(base->gallivm, - adjusted_elements, 4); - args[1] = lp_build_const_int32(base->gallivm, next_pos++); - args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - lp_build_intrinsic( - base->gallivm->builder, - "llvm.R600.store.swizzle", - LLVMVoidTypeInContext(base->gallivm->context), - args, 3, 0); - } - break; - } - case TGSI_SEMANTIC_CLIPDIST : { - LLVMValueRef args[3]; - args[0] = output; - args[1] = lp_build_const_int32(base->gallivm, next_pos++); - args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - lp_build_intrinsic( - base->gallivm->builder, - "llvm.R600.store.swizzle", - LLVMVoidTypeInContext(base->gallivm->context), - args, 3, 0); - args[1] = lp_build_const_int32(base->gallivm, next_param++); - args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - lp_build_intrinsic( - base->gallivm->builder, - "llvm.R600.store.swizzle", - LLVMVoidTypeInContext(base->gallivm->context), - args, 3, 0); - break; - } - case TGSI_SEMANTIC_FOG: { - elements[0] = LLVMBuildLoad(base->gallivm->builder, - ctx->soa.outputs[i][0], ""); - elements[1] = elements[2] = lp_build_const_float(base->gallivm, 0.0f); - elements[3] = lp_build_const_float(base->gallivm, 1.0f); - - LLVMValueRef args[3]; - args[0] = lp_build_gather_values(base->gallivm, elements, 4); - args[1] = lp_build_const_int32(base->gallivm, next_param++); - args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - lp_build_intrinsic( - base->gallivm->builder, - "llvm.R600.store.swizzle", - LLVMVoidTypeInContext(base->gallivm->context), - args, 3, 0); - break; - } - default: { - LLVMValueRef args[3]; - args[0] = output; - args[1] = lp_build_const_int32(base->gallivm, next_param++); - args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - lp_build_intrinsic( - base->gallivm->builder, - "llvm.R600.store.swizzle", - LLVMVoidTypeInContext(base->gallivm->context), - args, 3, 0); - break; - } - } - } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - switch (ctx->r600_outputs[i].name) { - case TGSI_SEMANTIC_COLOR: - has_color = true; - if ( color_count < ctx->color_buffer_count) { - LLVMValueRef args[3]; - args[0] = output; - if (ctx->fs_color_all) { - for (unsigned j = 0; j < ctx->color_buffer_count; j++) { - args[1] = lp_build_const_int32(base->gallivm, j); - args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); - lp_build_intrinsic( - base->gallivm->builder, - "llvm.R600.store.swizzle", - LLVMVoidTypeInContext(base->gallivm->context), - args, 3, 0); - } - } else { - args[1] = lp_build_const_int32(base->gallivm, color_count++); - args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); - lp_build_intrinsic( - base->gallivm->builder, - "llvm.R600.store.swizzle", - LLVMVoidTypeInContext(base->gallivm->context), - args, 3, 0); - } - } - break; - case TGSI_SEMANTIC_POSITION: - lp_build_intrinsic_unary( - base->gallivm->builder, - "llvm.R600.store.pixel.depth", - LLVMVoidTypeInContext(base->gallivm->context), - LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][2], "")); - break; - case TGSI_SEMANTIC_STENCIL: - lp_build_intrinsic_unary( - base->gallivm->builder, - "llvm.R600.store.pixel.stencil", - LLVMVoidTypeInContext(base->gallivm->context), - LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][1], "")); - break; - } - } - } - // Add dummy exports - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - if (!next_param) { - lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy", - LLVMVoidTypeInContext(base->gallivm->context), - lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM)); - } - if (!(next_pos-60)) { - lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy", - LLVMVoidTypeInContext(base->gallivm->context), - lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS)); - } - } - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - if (!has_color) { - lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy", - LLVMVoidTypeInContext(base->gallivm->context), - lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL)); - } - } - -} - -static void llvm_emit_tex( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct gallivm_state * gallivm = bld_base->base.gallivm; - LLVMValueRef args[7]; - unsigned c, sampler_src; - struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); - - if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { - switch (emit_data->inst->Instruction.Opcode) { - case TGSI_OPCODE_TXQ: { - struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); - ctx->uses_tex_buffers = true; - bool isEgPlus = (ctx->chip_class >= EVERGREEN); - LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, - isEgPlus ? 0 : 1); - LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, - LLVM_R600_BUFFER_INFO_CONST_BUFFER); - if (!isEgPlus) { - LLVMValueRef maskval[4] = { - lp_build_const_int32(gallivm, 1), - lp_build_const_int32(gallivm, 2), - lp_build_const_int32(gallivm, 3), - lp_build_const_int32(gallivm, 0), - }; - LLVMValueRef mask = LLVMConstVector(maskval, 4); - cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval, - mask, ""); - } - emit_data->output[0] = cvecval; - return; - } - case TGSI_OPCODE_TXF: { - args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""); - args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS); - emit_data->output[0] = lp_build_intrinsic(gallivm->builder, - "llvm.R600.load.texbuf", - emit_data->dst_type, args, 2, LLVMReadNoneAttribute); - if (ctx->chip_class >= EVERGREEN) - return; - ctx->uses_tex_buffers = true; - LLVMDumpValue(emit_data->output[0]); - emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, - emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4), - ""); - LLVMValueRef Mask = llvm_load_const_buffer(bld_base, - lp_build_const_int32(gallivm, 0), - LLVM_R600_BUFFER_INFO_CONST_BUFFER); - Mask = LLVMBuildBitCast(gallivm->builder, Mask, - LLVMVectorType(bld_base->base.int_elem_type, 4), ""); - emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND, - emit_data->output[0], - Mask); - LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder, - emit_data->output[0], lp_build_const_int32(gallivm, 3), ""); - Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1), - LLVM_R600_BUFFER_INFO_CONST_BUFFER); - Mask = LLVMBuildExtractElement(gallivm->builder, Mask, - lp_build_const_int32(gallivm, 0), ""); - Mask = LLVMBuildBitCast(gallivm->builder, Mask, - bld_base->base.int_elem_type, ""); - WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR, - WComponent, Mask); - emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, - emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), ""); - emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, - emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), ""); - } - return; - default: - break; - } - } - - if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX || - emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) { - LLVMValueRef Vector[4] = { - LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], - lp_build_const_int32(gallivm, 0), ""), - LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], - lp_build_const_int32(gallivm, 1), ""), - LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], - lp_build_const_int32(gallivm, 2), ""), - LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], - lp_build_const_int32(gallivm, 3), ""), - }; - switch (emit_data->inst->Texture.Texture) { - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); - break; - case TGSI_TEXTURE_1D: - Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); - break; - default: - break; - } - args[0] = lp_build_gather_values(gallivm, Vector, 4); - } else { - args[0] = emit_data->args[0]; - } - - assert(emit_data->arg_count + 2 <= Elements(args)); - - for (c = 1; c < emit_data->arg_count; ++c) - args[c] = emit_data->args[c]; - - if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) { - args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), ""); - args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), ""); - args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), ""); - } - - sampler_src = emit_data->inst->Instruction.NumSrcRegs-1; - - args[c++] = lp_build_const_int32(gallivm, - emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS); - args[c++] = lp_build_const_int32(gallivm, - emit_data->inst->Src[sampler_src].Register.Index); - args[c++] = lp_build_const_int32(gallivm, - emit_data->inst->Texture.Texture); - - if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF && - (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || - emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { - - switch (emit_data->inst->Texture.Texture) { - case TGSI_TEXTURE_2D_MSAA: - args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D); - break; - case TGSI_TEXTURE_2D_ARRAY_MSAA: - args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY); - break; - default: - break; - } - - if (ctx->has_compressed_msaa_texturing) { - LLVMValueRef ldptr_args[10] = { - args[0], // Coord - args[1], // Offset X - args[2], // Offset Y - args[3], // Offset Z - args[4], - args[5], - lp_build_const_int32(gallivm, 1), - lp_build_const_int32(gallivm, 1), - lp_build_const_int32(gallivm, 1), - lp_build_const_int32(gallivm, 1) - }; - LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder, - "llvm.R600.ldptr", - emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute); - LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0], - lp_build_const_int32(gallivm, 3), ""); - Tmp = LLVMBuildMul(gallivm->builder, Tmp, - lp_build_const_int32(gallivm, 4), ""); - LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr, - lp_build_const_int32(gallivm, 0), ""); - ResX = LLVMBuildBitCast(gallivm->builder, ResX, - bld_base->base.int_elem_type, ""); - Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, ""); - Tmp = LLVMBuildAnd(gallivm->builder, Tmp, - lp_build_const_int32(gallivm, 0xF), ""); - args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp, - lp_build_const_int32(gallivm, 3), ""); - args[c++] = lp_build_const_int32(gallivm, - emit_data->inst->Texture.Texture); - } - } - - emit_data->output[0] = lp_build_intrinsic(gallivm->builder, - action->intr_name, - emit_data->dst_type, args, c, LLVMReadNoneAttribute); - - if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ && - ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || - emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) - if (emit_data->inst->Dst[0].Register.WriteMask & 4) { - LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0); - LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder, - llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER), - lp_build_const_int32(gallivm, 0), ""); - - emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); - struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); - ctx->has_txq_cube_array_z_comp = true; - } -} - -static void emit_cndlt( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - LLVMBuilderRef builder = bld_base->base.gallivm->builder; - LLVMValueRef float_zero = lp_build_const_float( - bld_base->base.gallivm, 0.0f); - LLVMValueRef cmp = LLVMBuildFCmp( - builder, LLVMRealULT, emit_data->args[0], float_zero, ""); - emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, - cmp, emit_data->args[1], emit_data->args[2], ""); -} - -static void dp_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct lp_build_context * base = &bld_base->base; - unsigned chan; - LLVMValueRef elements[2][4]; - unsigned opcode = emit_data->inst->Instruction.Opcode; - unsigned dp_components = (opcode == TGSI_OPCODE_DP2 ? 2 : - (opcode == TGSI_OPCODE_DP3 ? 3 : 4)); - for (chan = 0 ; chan < dp_components; chan++) { - elements[0][chan] = lp_build_emit_fetch(bld_base, - emit_data->inst, 0, chan); - elements[1][chan] = lp_build_emit_fetch(bld_base, - emit_data->inst, 1, chan); - } - - for ( ; chan < 4; chan++) { - elements[0][chan] = base->zero; - elements[1][chan] = base->zero; - } - - /* Fix up for DPH */ - if (opcode == TGSI_OPCODE_DPH) { - elements[0][TGSI_CHAN_W] = base->one; - } - - emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, - elements[0], 4); - emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm, - elements[1], 4); - emit_data->arg_count = 2; - - emit_data->dst_type = base->elem_type; -} - -static struct lp_build_tgsi_action dot_action = { - .fetch_args = dp_fetch_args, - .emit = build_tgsi_intrinsic_nomem, - .intr_name = "llvm.AMDGPU.dp4" -}; - -static void txd_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - const struct tgsi_full_instruction * inst = emit_data->inst; - - LLVMValueRef coords[4]; - unsigned chan, src; - for (src = 0; src < 3; src++) { - for (chan = 0; chan < 4; chan++) - coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan); - - emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm, - coords, 4); - } - emit_data->arg_count = 3; - emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); -} - - -static void txp_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - const struct tgsi_full_instruction * inst = emit_data->inst; - LLVMValueRef src_w; - unsigned chan; - LLVMValueRef coords[5]; - - emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); - src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); - - for (chan = 0; chan < 3; chan++ ) { - LLVMValueRef arg = lp_build_emit_fetch(bld_base, - emit_data->inst, 0, chan); - coords[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_DIV, arg, src_w); - } - coords[3] = bld_base->base.one; - - if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || - inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || - inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || - inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && - inst->Instruction.Opcode != TGSI_OPCODE_TXQ && - inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { - radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL); - } - - emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, - coords, 4); - emit_data->arg_count = 1; -} - -static void tex_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - const struct tgsi_full_instruction * inst = emit_data->inst; - - LLVMValueRef coords[5]; - unsigned chan; - for (chan = 0; chan < 4; chan++) { - coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); - } - - if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || - inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || - inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { - /* These instructions have additional operand that should be packed - * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords. - * That operand should be passed as a float value in the args array - * right after the coord vector. After packing it's not used anymore, - * that's why arg_count is not increased */ - coords[4] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); - } - - if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || - inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || - inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || - inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && - inst->Instruction.Opcode != TGSI_OPCODE_TXQ && - inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { - radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL); - } - - emit_data->arg_count = 1; - emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, - coords, 4); - emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); -} - -static void txf_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - const struct tgsi_full_instruction * inst = emit_data->inst; - struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - const struct tgsi_texture_offset * off = inst->TexOffsets; - LLVMTypeRef offset_type = bld_base->int_bld.elem_type; - - /* fetch tex coords */ - tex_fetch_args(bld_base, emit_data); - - /* fetch tex offsets */ - if (inst->Texture.NumOffsets) { - assert(inst->Texture.NumOffsets == 1); - - emit_data->args[1] = LLVMConstBitCast( - bld->immediates[off->Index][off->SwizzleX], - offset_type); - emit_data->args[2] = LLVMConstBitCast( - bld->immediates[off->Index][off->SwizzleY], - offset_type); - emit_data->args[3] = LLVMConstBitCast( - bld->immediates[off->Index][off->SwizzleZ], - offset_type); - } else { - emit_data->args[1] = bld_base->int_bld.zero; - emit_data->args[2] = bld_base->int_bld.zero; - emit_data->args[3] = bld_base->int_bld.zero; - } - - emit_data->arg_count = 4; -} - -LLVMModuleRef r600_tgsi_llvm( - struct radeon_llvm_context * ctx, - const struct tgsi_token * tokens) -{ - struct tgsi_shader_info shader_info; - struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base; - radeon_llvm_context_init(ctx, "r600--"); - LLVMTypeRef Arguments[32]; - unsigned ArgumentsCount = 0; - for (unsigned i = 0; i < ctx->inputs_count; i++) - Arguments[ArgumentsCount++] = LLVMVectorType(bld_base->base.elem_type, 4); - radeon_llvm_create_func(ctx, NULL, 0, Arguments, ArgumentsCount); - for (unsigned i = 0; i < ctx->inputs_count; i++) { - LLVMValueRef P = LLVMGetParam(ctx->main_fn, i); - LLVMAddAttribute(P, LLVMInRegAttribute); - } - tgsi_scan_shader(tokens, &shader_info); - - bld_base->info = &shader_info; - bld_base->userdata = ctx; - bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const; - bld_base->emit_prologue = llvm_emit_prologue; - bld_base->emit_epilogue = llvm_emit_epilogue; - ctx->load_input = llvm_load_input; - ctx->load_system_value = llvm_load_system_value; - - bld_base->op_actions[TGSI_OPCODE_DP2] = dot_action; - bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action; - bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action; - bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action; - bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; - bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; - bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex"; - bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex"; - bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb"; - bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb"; - bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd"; - bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf"; - bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl"; - bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl"; - bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex"; - bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq"; - bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt; - - lp_build_tgsi_llvm(bld_base, tokens); - - LLVMBuildRetVoid(bld_base->base.gallivm->builder); - radeon_llvm_finalize_module(ctx); - - return ctx->gallivm.module; -} - -/* We need to define these R600 registers here, because we can't include - * evergreend.h and r600d.h. - */ -#define R_028868_SQ_PGM_RESOURCES_VS 0x028868 -#define R_028850_SQ_PGM_RESOURCES_PS 0x028850 - -void r600_shader_binary_read_config(const struct radeon_shader_binary *binary, - struct r600_bytecode *bc, - uint64_t symbol_offset, - boolean *use_kill) -{ - unsigned i; - const unsigned char *config = - radeon_shader_binary_config_start(binary, symbol_offset); - - for (i = 0; i < binary->config_size_per_symbol; i+= 8) { - unsigned reg = - util_le32_to_cpu(*(uint32_t*)(config + i)); - unsigned value = - util_le32_to_cpu(*(uint32_t*)(config + i + 4)); - switch (reg) { - /* R600 / R700 */ - case R_028850_SQ_PGM_RESOURCES_PS: - case R_028868_SQ_PGM_RESOURCES_VS: - /* Evergreen / Northern Islands */ - case R_028844_SQ_PGM_RESOURCES_PS: - case R_028860_SQ_PGM_RESOURCES_VS: - case R_0288D4_SQ_PGM_RESOURCES_LS: - bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value)); - bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value)); - break; - case R_02880C_DB_SHADER_CONTROL: - *use_kill = G_02880C_KILL_ENABLE(value); - break; - case R_0288E8_SQ_LDS_ALLOC: - bc->nlds_dw = value; - break; - } - } - -} - -unsigned r600_create_shader(struct r600_bytecode *bc, - const struct radeon_shader_binary *binary, - boolean *use_kill) - -{ - assert(binary->code_size % 4 == 0); - bc->bytecode = CALLOC(1, binary->code_size); - memcpy(bc->bytecode, binary->code, binary->code_size); - bc->ndw = binary->code_size / 4; - - r600_shader_binary_read_config(binary, bc, 0, use_kill); - - return 0; -} - -void r600_destroy_shader(struct r600_bytecode *bc) -{ - FREE(bc->bytecode); -} - -unsigned r600_llvm_compile( - LLVMModuleRef mod, - enum radeon_family family, - struct r600_bytecode *bc, - boolean *use_kill, - unsigned dump, - struct pipe_debug_callback *debug) -{ - unsigned r; - struct radeon_shader_binary binary; - const char * gpu_family = r600_get_llvm_processor_name(family); - - radeon_shader_binary_init(&binary); - if (dump) - LLVMDumpModule(mod); - r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug); - - r = r600_create_shader(bc, &binary, use_kill); - - radeon_shader_binary_clean(&binary); - - return r; -} - -#endif diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h deleted file mode 100644 index 3f7fc4bef7e..00000000000 --- a/src/gallium/drivers/r600/r600_llvm.h +++ /dev/null @@ -1,42 +0,0 @@ - -#ifndef R600_LLVM_H -#define R600_LLVM_H - -#if defined R600_USE_LLVM || defined HAVE_OPENCL - -#include "radeon/radeon_llvm.h" -#include <llvm-c/Core.h> - -struct pipe_debug_callback; -struct r600_bytecode; -struct r600_shader_ctx; -struct radeon_llvm_context; -struct radeon_shader_binary; -enum radeon_family; - -LLVMModuleRef r600_tgsi_llvm( - struct radeon_llvm_context * ctx, - const struct tgsi_token * tokens); - -unsigned r600_llvm_compile( - LLVMModuleRef mod, - enum radeon_family family, - struct r600_bytecode *bc, - boolean *use_kill, - unsigned dump, - struct pipe_debug_callback *debug); - -unsigned r600_create_shader(struct r600_bytecode *bc, - const struct radeon_shader_binary *binary, - boolean *use_kill); - -void r600_destroy_shader(struct r600_bytecode *bc); - -void r600_shader_binary_read_config(const struct radeon_shader_binary *binary, - struct r600_bytecode *bc, - uint64_t symbol_offset, - boolean *use_kill); - -#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */ - -#endif /* R600_LLVM_H */ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 7018088d204..b8011917907 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -43,9 +43,6 @@ static const struct debug_named_value r600_debug_options[] = { /* features */ -#if defined(R600_USE_LLVM) - { "llvm", DBG_LLVM, "Enable the LLVM shader compiler" }, -#endif { "nocpdma", DBG_NO_CP_DMA, "Disable CP DMA" }, /* shader backend */ @@ -187,9 +184,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, } rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, - r600_context_gfx_flush, rctx, - rscreen->b.trace_bo ? - rscreen->b.trace_bo->buf : NULL); + r600_context_gfx_flush, rctx); rctx->b.gfx.flush = r600_context_gfx_flush; rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256, @@ -622,8 +617,6 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws) rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS | DBG_TCS | DBG_TES; if (!debug_get_bool_option("R600_HYPERZ", TRUE)) rscreen->b.debug_flags |= DBG_NO_HYPERZ; - if (debug_get_bool_option("R600_LLVM", FALSE)) - rscreen->b.debug_flags |= DBG_LLVM; if (rscreen->b.family == CHIP_UNKNOWN) { fprintf(stderr, "r600: Unknown chipset 0x%04X\n", rscreen->b.info.pci_id); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index f8a20398355..cd0052a519f 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -28,8 +28,6 @@ #include "radeon/r600_pipe_common.h" #include "radeon/r600_cs.h" - -#include "r600_llvm.h" #include "r600_public.h" #include "util/u_suballoc.h" @@ -60,7 +58,6 @@ /* the number of CS dwords for flushing and drawing */ #define R600_MAX_FLUSH_CS_DWORDS 16 #define R600_MAX_DRAW_CS_DWORDS 58 -#define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 #define R600_MAX_DRIVER_CONST_BUFFERS 3 @@ -244,7 +241,6 @@ struct r600_gs_rings_state { /* This must start from 16. */ /* features */ -#define DBG_LLVM (1 << 29) #define DBG_NO_CP_DMA (1 << 30) /* shader backend */ #define DBG_NO_SB (1 << 21) @@ -571,15 +567,10 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx, r600_set_atom_dirty(rctx, atom, true); } -void r600_trace_emit(struct r600_context *rctx); - static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) { atom->emit(&rctx->b, atom); r600_set_atom_dirty(rctx, atom, false); - if (rctx->screen->b.trace_bo) { - r600_trace_emit(rctx); - } } static inline void r600_set_cso_state(struct r600_context *rctx, diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index df40f94bdcf..77658f53551 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -21,7 +21,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r600_sq.h" -#include "r600_llvm.h" #include "r600_formats.h" #include "r600_opcodes.h" #include "r600_shader.h" @@ -194,10 +193,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, /* disable SB for shaders using doubles */ use_sb &= !shader->shader.uses_doubles; - /* Check if the bytecode has already been built. When using the llvm - * backend, r600_shader_from_tgsi() will take care of building the - * bytecode. - */ + /* Check if the bytecode has already been built. */ if (!shader->shader.bc.bytecode) { r = r600_bytecode_build(&shader->shader.bc); if (r) { @@ -332,7 +328,6 @@ struct r600_shader_ctx { uint32_t *literals; uint32_t nliterals; uint32_t max_driver_temp_used; - boolean use_llvm; /* needed for evergreen interpolation */ struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid /* evergreen/cayman also store sample mask in face register */ @@ -661,11 +656,9 @@ static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) ctx->shader->input[index].lds_pos = ctx->shader->nlds++; if (ctx->shader->input[index].interpolate > 0) { evergreen_interp_assign_ij_index(ctx, index); - if (!ctx->use_llvm) - r = evergreen_interp_alu(ctx, index); + r = evergreen_interp_alu(ctx, index); } else { - if (!ctx->use_llvm) - r = evergreen_interp_flat(ctx, index); + r = evergreen_interp_flat(ctx, index); } } return r; @@ -2936,22 +2929,16 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, int i, j, k, r = 0; int next_param_base = 0, next_clip_base; int max_color_exports = MAX2(key.ps.nr_cbufs, 1); - /* Declarations used by llvm code */ - bool use_llvm = false; bool indirect_gprs; bool ring_outputs = false; bool lds_outputs = false; bool lds_inputs = false; bool pos_emitted = false; -#ifdef R600_USE_LLVM - use_llvm = rscreen->b.debug_flags & DBG_LLVM; -#endif ctx.bc = &shader->bc; ctx.shader = shader; ctx.native_integers = true; - r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, rscreen->has_compressed_msaa_texturing); ctx.tokens = tokens; @@ -3043,19 +3030,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.file_offset[i] = 0; } -#ifdef R600_USE_LLVM - if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { - fprintf(stderr, "Warning: R600 LLVM backend does not support " - "indirect adressing. Falling back to TGSI " - "backend.\n"); - use_llvm = 0; - } -#endif if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (!use_llvm) { - r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); - } + r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); } if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { if (ctx.bc->chip_class >= EVERGREEN) @@ -3085,16 +3062,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, if (add_tess_inout) ctx.file_offset[TGSI_FILE_INPUT]+=2; } - ctx.use_llvm = use_llvm; - if (use_llvm) { - ctx.file_offset[TGSI_FILE_OUTPUT] = - ctx.file_offset[TGSI_FILE_INPUT]; - } else { - ctx.file_offset[TGSI_FILE_OUTPUT] = + ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_max[TGSI_FILE_INPUT] + 1; - } ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; @@ -3234,71 +3205,12 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } } -/* LLVM backend setup */ -#ifdef R600_USE_LLVM - if (use_llvm) { - struct radeon_llvm_context radeon_llvm_ctx; - LLVMModuleRef mod; - bool dump = r600_can_dump_shader(&rscreen->b, - tgsi_get_processor_type(tokens)); - boolean use_kill = false; - - memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); - radeon_llvm_ctx.type = ctx.type; - radeon_llvm_ctx.two_side = shader->two_side; - radeon_llvm_ctx.face_gpr = ctx.face_gpr; - radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; - radeon_llvm_ctx.r600_inputs = ctx.shader->input; - radeon_llvm_ctx.r600_outputs = ctx.shader->output; - radeon_llvm_ctx.color_buffer_count = max_color_exports; - radeon_llvm_ctx.chip_class = ctx.bc->chip_class; - radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); - radeon_llvm_ctx.stream_outputs = &so; - radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; - radeon_llvm_ctx.has_compressed_msaa_texturing = - ctx.bc->has_compressed_msaa_texturing; - mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); - ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; - ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; - - if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, - dump, &rctx->b.debug)) { - radeon_llvm_dispose(&radeon_llvm_ctx); - use_llvm = 0; - fprintf(stderr, "R600 LLVM backend failed to compile " - "shader. Falling back to TGSI\n"); - } else { - ctx.file_offset[TGSI_FILE_OUTPUT] = - ctx.file_offset[TGSI_FILE_INPUT]; - } - if (use_kill) - ctx.shader->uses_kill = use_kill; - radeon_llvm_dispose(&radeon_llvm_ctx); - } -#endif -/* End of LLVM backend setup */ - if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) shader->nr_ps_max_color_exports = 8; - if (!use_llvm) { - if (ctx.fragcoord_input >= 0) { - if (ctx.bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { - struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP1_RECIP_IEEE; - alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; - alu.src[0].chan = 3; - - alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; - alu.dst.chan = j; - alu.dst.write = (j == 3); - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) - return r; - } - } else { + if (ctx.fragcoord_input >= 0) { + if (ctx.bc->chip_class == CAYMAN) { + for (j = 0 ; j < 4; j++) { struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_RECIP_IEEE; @@ -3306,87 +3218,100 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, alu.src[0].chan = 3; alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; - alu.dst.chan = 3; - alu.dst.write = 1; + alu.dst.chan = j; + alu.dst.write = (j == 3); alu.last = 1; if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) return r; } - } - - if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + } else { struct r600_bytecode_alu alu; - int r; - - /* GS thread with no output workaround - emit a cut at start of GS */ - if (ctx.bc->chip_class == R600) - r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_RECIP_IEEE; + alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; + alu.src[0].chan = 3; - for (j = 0; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP1_MOV; - alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[0].value = 0; - alu.dst.sel = ctx.gs_export_gpr_tregs[j]; - alu.dst.write = 1; - alu.last = 1; - r = r600_bytecode_add_alu(ctx.bc, &alu); - if (r) - return r; - } + alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) + return r; } + } + + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + struct r600_bytecode_alu alu; + int r; - if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) - r600_fetch_tess_io_info(&ctx); + /* GS thread with no output workaround - emit a cut at start of GS */ + if (ctx.bc->chip_class == R600) + r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); - if (shader->two_side && ctx.colors_used) { - if ((r = process_twoside_color_inputs(&ctx))) + for (j = 0; j < 4; j++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[0].value = 0; + alu.dst.sel = ctx.gs_export_gpr_tregs[j]; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx.bc, &alu); + if (r) return r; } + } - tgsi_parse_init(&ctx.parse, tokens); - while (!tgsi_parse_end_of_tokens(&ctx.parse)) { - tgsi_parse_token(&ctx.parse); - switch (ctx.parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - r = tgsi_is_supported(&ctx); - if (r) - goto out_err; - ctx.max_driver_temp_used = 0; - /* reserve first tmp for everyone */ - r600_get_temp(&ctx); + if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) + r600_fetch_tess_io_info(&ctx); + + if (shader->two_side && ctx.colors_used) { + if ((r = process_twoside_color_inputs(&ctx))) + return r; + } - opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; - if ((r = tgsi_split_constant(&ctx))) + tgsi_parse_init(&ctx.parse, tokens); + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { + tgsi_parse_token(&ctx.parse); + switch (ctx.parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = tgsi_is_supported(&ctx); + if (r) + goto out_err; + ctx.max_driver_temp_used = 0; + /* reserve first tmp for everyone */ + r600_get_temp(&ctx); + + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + if ((r = tgsi_split_gs_inputs(&ctx))) goto out_err; - if ((r = tgsi_split_literal_constant(&ctx))) + } else if (lds_inputs) { + if ((r = tgsi_split_lds_inputs(&ctx))) goto out_err; - if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { - if ((r = tgsi_split_gs_inputs(&ctx))) - goto out_err; - } else if (lds_inputs) { - if ((r = tgsi_split_lds_inputs(&ctx))) - goto out_err; - } - if (ctx.bc->chip_class == CAYMAN) - ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; - else if (ctx.bc->chip_class >= EVERGREEN) - ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; - else - ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; - r = ctx.inst_info->process(&ctx); + } + if (ctx.bc->chip_class == CAYMAN) + ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; + else if (ctx.bc->chip_class >= EVERGREEN) + ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; + else + ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; + r = ctx.inst_info->process(&ctx); + if (r) + goto out_err; + + if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { + r = r600_store_tcs_output(&ctx); if (r) goto out_err; - - if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { - r = r600_store_tcs_output(&ctx); - if (r) - goto out_err; - } - break; - default: - break; } + break; + default: + break; } } @@ -3437,8 +3362,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, alu.dst.write = (j == ochan); if (j == 3) alu.last = 1; - if (!use_llvm) - r = r600_bytecode_add_alu(ctx.bc, &alu); + r = r600_bytecode_add_alu(ctx.bc, &alu); if (r) return r; } @@ -3446,7 +3370,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } /* Add stream outputs. */ - if (!use_llvm && so.num_outputs) { + if (so.num_outputs) { bool emit = false; if (!lds_outputs && !ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX) emit = true; @@ -3709,31 +3633,27 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } } /* add output to bytecode */ - if (!use_llvm) { - for (i = 0; i < noutput; i++) { - r = r600_bytecode_add_output(ctx.bc, &output[i]); - if (r) - goto out_err; - } + for (i = 0; i < noutput; i++) { + r = r600_bytecode_add_output(ctx.bc, &output[i]); + if (r) + goto out_err; } } /* add program end */ - if (!use_llvm) { - if (ctx.bc->chip_class == CAYMAN) - cm_bytecode_add_cf_end(ctx.bc); - else { - const struct cf_op_info *last = NULL; + if (ctx.bc->chip_class == CAYMAN) + cm_bytecode_add_cf_end(ctx.bc); + else { + const struct cf_op_info *last = NULL; - if (ctx.bc->cf_last) - last = r600_isa_cf(ctx.bc->cf_last->op); + if (ctx.bc->cf_last) + last = r600_isa_cf(ctx.bc->cf_last->op); - /* alu clause instructions don't have EOP bit, so add NOP */ - if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) - r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); + /* alu clause instructions don't have EOP bit, so add NOP */ + if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) + r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); - ctx.bc->cf_last->end_of_program = 1; - } + ctx.bc->cf_last->end_of_program = 1; } /* check GPR limit - we have 124 = 128 - 4 diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 2211e07ceba..df41d3f028d 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -2029,10 +2029,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT); } - if (rctx->screen->b.trace_bo) { - r600_trace_emit(rctx); - } - /* Set the depth buffer as dirty. */ if (rctx->framebuffer.state.zsbuf) { struct pipe_surface *surf = rctx->framebuffer.state.zsbuf; @@ -2927,22 +2923,3 @@ void r600_init_common_state_functions(struct r600_context *rctx) rctx->b.set_occlusion_query_state = r600_set_occlusion_query_state; rctx->b.need_gfx_cs_space = r600_need_gfx_cs_space; } - -void r600_trace_emit(struct r600_context *rctx) -{ - struct r600_screen *rscreen = rctx->screen; - struct radeon_winsys_cs *cs = rctx->b.gfx.cs; - uint64_t va; - uint32_t reloc; - - va = rscreen->b.trace_bo->gpu_address; - reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rscreen->b.trace_bo, - RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); - radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); - radeon_emit(cs, va & 0xFFFFFFFFUL); - radeon_emit(cs, (va >> 32UL) & 0xFFUL); - radeon_emit(cs, cs->cdw); - radeon_emit(cs, rscreen->b.cs_count); - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, reloc); -} diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp index 556a05da395..3dd3a4815ba 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.cpp +++ b/src/gallium/drivers/r600/sb/sb_expr.cpp @@ -598,9 +598,13 @@ bool expr_handler::fold_assoc(alu_node *n) { unsigned op = n->bc.op; bool allow_neg = false, cur_neg = false; + bool distribute_neg = false; switch(op) { case ALU_OP2_ADD: + distribute_neg = true; + allow_neg = true; + break; case ALU_OP2_MUL: case ALU_OP2_MUL_IEEE: allow_neg = true; @@ -632,7 +636,7 @@ bool expr_handler::fold_assoc(alu_node *n) { if (v1->is_const()) { literal arg = v1->get_const_value(); apply_alu_src_mod(a->bc, 1, arg); - if (cur_neg) + if (cur_neg && distribute_neg) arg.f = -arg.f; if (a == n) @@ -660,7 +664,7 @@ bool expr_handler::fold_assoc(alu_node *n) { if (v0->is_const()) { literal arg = v0->get_const_value(); apply_alu_src_mod(a->bc, 0, arg); - if (cur_neg) + if (cur_neg && distribute_neg) arg.f = -arg.f; if (last_arg == 0) { diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index ea028272ccd..eed9d83ee49 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -229,7 +229,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags, struct radeon_winsys_cs *cs = rctx->dma.cs; if (cs->cdw) - rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence, 0); + rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence); if (fence) rctx->ws->fence_reference(fence, rctx->last_sdma_fence); } @@ -318,7 +318,7 @@ bool r600_common_context_init(struct r600_common_context *rctx, if (rscreen->info.has_sdma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, r600_flush_dma_ring, - rctx, NULL); + rctx); rctx->dma.flush = r600_flush_dma_ring; } @@ -379,7 +379,6 @@ static const struct debug_named_value common_debug_options[] = { { "tex", DBG_TEX, "Print texture info" }, { "compute", DBG_COMPUTE, "Print compute info" }, { "vm", DBG_VM, "Print virtual addresses when creating resources" }, - { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" }, { "info", DBG_INFO, "Print driver information" }, /* shaders */ @@ -893,19 +892,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, pipe_mutex_init(rscreen->aux_context_lock); pipe_mutex_init(rscreen->gpu_load_mutex); - if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) || - rscreen->info.drm_major == 3) && - (rscreen->debug_flags & DBG_TRACE_CS)) { - rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b, - PIPE_BIND_CUSTOM, - PIPE_USAGE_STAGING, - 4096); - if (rscreen->trace_bo) { - rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->buf, NULL, - PIPE_TRANSFER_UNSYNCHRONIZED); - } - } - if (rscreen->debug_flags & DBG_INFO) { printf("pci_id = 0x%x\n", rscreen->info.pci_id); printf("family = %i (%s)\n", rscreen->info.family, @@ -951,9 +937,6 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen) pipe_mutex_destroy(rscreen->aux_context_lock); rscreen->aux_context->destroy(rscreen->aux_context); - if (rscreen->trace_bo) - pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL); - rscreen->ws->destroy(rscreen->ws); FREE(rscreen); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index cf8dcf7ea88..381ad21a4e3 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -61,7 +61,7 @@ /* gap - reuse */ #define DBG_COMPUTE (1 << 2) #define DBG_VM (1 << 3) -#define DBG_TRACE_CS (1 << 4) +/* gap - reuse */ /* shader logging */ #define DBG_FS (1 << 5) #define DBG_VS (1 << 6) @@ -303,10 +303,6 @@ struct r600_common_screen { struct pipe_context *aux_context; pipe_mutex aux_context_lock; - struct r600_resource *trace_bo; - uint32_t *trace_ptr; - unsigned cs_count; - /* This must be in the screen, because UE4 uses one context for * compilation and another one for rendering. */ @@ -610,6 +606,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct r600_atom *fb_state, unsigned *buffers, unsigned *dirty_cbufs, const union pipe_color_union *color); +void r600_texture_disable_dcc(struct r600_common_screen *rscreen, + struct r600_texture *rtex); void r600_init_screen_texture_functions(struct r600_common_screen *rscreen); void r600_init_context_texture_functions(struct r600_common_context *rctx); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 115c7289c4c..7322f3ee985 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -201,9 +201,11 @@ static int r600_init_surface(struct r600_common_screen *rscreen, static int r600_setup_surface(struct pipe_screen *screen, struct r600_texture *rtex, - unsigned pitch_in_bytes_override) + unsigned pitch_in_bytes_override, + unsigned offset) { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + unsigned i; int r; r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface); @@ -225,6 +227,11 @@ static int r600_setup_surface(struct pipe_screen *screen, rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size; } } + + if (offset) { + for (i = 0; i < Elements(rtex->surface.level); ++i) + rtex->surface.level[i].offset += offset; + } return 0; } @@ -290,8 +297,8 @@ static void r600_texture_disable_cmask(struct r600_common_screen *rscreen, p_atomic_inc(&rscreen->compressed_colortex_counter); } -static void r600_texture_disable_dcc(struct r600_common_screen *rscreen, - struct r600_texture *rtex) +void r600_texture_disable_dcc(struct r600_common_screen *rscreen, + struct r600_texture *rtex) { struct r600_common_context *rctx = (struct r600_common_context *)rscreen->aux_context; @@ -366,6 +373,8 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, return rscreen->ws->buffer_get_handle(res->buf, rtex->surface.level[0].pitch_bytes, + rtex->surface.level[0].offset, + rtex->surface.level[0].slice_size, whandle); } @@ -629,8 +638,14 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen, rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38) return 0; - /* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */ - if (rscreen->family == CHIP_STONEY) + /* Overalign HTILE on P2 configs to work around GPU hangs in + * piglit/depthstencil-render-miplevels 585. + * + * This has been confirmed to help Kabini & Stoney, where the hangs + * are always reproducible. I think I have seen the test hang + * on Carrizo too, though it was very rare there. + */ + if (rscreen->chip_class >= CIK && num_pipes < 4) num_pipes = 4; switch (num_pipes) { @@ -791,6 +806,7 @@ static struct r600_texture * r600_texture_create_object(struct pipe_screen *screen, const struct pipe_resource *base, unsigned pitch_in_bytes_override, + unsigned offset, struct pb_buffer *buf, struct radeon_surf *surface) { @@ -812,7 +828,7 @@ r600_texture_create_object(struct pipe_screen *screen, rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format)); rtex->surface = *surface; - if (r600_setup_surface(screen, rtex, pitch_in_bytes_override)) { + if (r600_setup_surface(screen, rtex, pitch_in_bytes_override, offset)) { FREE(rtex); return NULL; } @@ -979,7 +995,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, if (r) { return NULL; } - return (struct pipe_resource *)r600_texture_create_object(screen, templ, + return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0, 0, NULL, &surface); } @@ -990,7 +1006,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct pb_buffer *buf = NULL; - unsigned stride = 0; + unsigned stride = 0, offset = 0; unsigned array_mode; struct radeon_surf surface; int r; @@ -1002,7 +1018,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen templ->depth0 != 1 || templ->last_level != 0) return NULL; - buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride); + buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset); if (!buf) return NULL; @@ -1029,8 +1045,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen if (metadata.scanout) surface.flags |= RADEON_SURF_SCANOUT; - rtex = r600_texture_create_object(screen, templ, - stride, buf, &surface); + rtex = r600_texture_create_object(screen, templ, stride, + offset, buf, &surface); if (!rtex) return NULL; diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index bdee2f8020a..0a164bba307 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -51,24 +51,8 @@ struct radeon_llvm_loop { }; struct radeon_llvm_context { - struct lp_build_tgsi_soa_context soa; - unsigned chip_class; - unsigned type; - unsigned face_gpr; - unsigned two_side; - unsigned inputs_count; - struct r600_shader_io * r600_inputs; - struct r600_shader_io * r600_outputs; - struct pipe_stream_output_info *stream_outputs; - unsigned color_buffer_count; - unsigned fs_color_all; - unsigned alpha_to_one; - unsigned has_txq_cube_array_z_comp; - unsigned uses_tex_buffers; - unsigned has_compressed_msaa_texturing; - /*=== Front end configuration ===*/ /* Instructions that are not described by any of the TGSI opcodes. */ @@ -90,7 +74,6 @@ struct radeon_llvm_context { */ LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS]; LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS]; - unsigned output_reg_count; /** This pointer is used to contain the temporary values. * The amount of temporary used in tgsi can't be bound to a max value and diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index c74397fb5c9..fb883cb585e 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -363,9 +363,6 @@ static void emit_declaration( ctx->soa.bld_base.base.elem_type, ""); } } - - ctx->output_reg_count = MAX2(ctx->output_reg_count, - decl->Range.Last + 1); break; } diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index b8efc58eaab..233f46091a4 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -92,7 +92,7 @@ struct ruvd_decoder { /* flush IB to the hardware */ static void flush(struct ruvd_decoder *dec) { - dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL, 0); + dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL); } /* add a new set register command to the IB */ @@ -1142,7 +1142,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, dec->stream_handle = rvid_alloc_stream_handle(); dec->screen = context->screen; dec->ws = ws; - dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, NULL); + dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL); if (!dec->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 087d9422c04..2ab74e9eb6c 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -56,7 +56,7 @@ */ static void flush(struct rvce_encoder *enc) { - enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0); + enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL); enc->task_info_idx = 0; enc->bs_idx = 0; } @@ -429,7 +429,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, enc->screen = context->screen; enc->ws = ws; - enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, NULL); + enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc); if (!enc->cs) { RVID_ERR("Can't get command submission context.\n"); goto error; diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index b8a065957a7..d35e963133e 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -515,7 +515,7 @@ struct radeon_winsys { */ struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws, struct winsys_handle *whandle, - unsigned *stride); + unsigned *stride, unsigned *offset); /** * Get a winsys buffer from a user pointer. The resulting buffer can't @@ -546,7 +546,8 @@ struct radeon_winsys { * \return TRUE on success. */ boolean (*buffer_get_handle)(struct pb_buffer *buf, - unsigned stride, + unsigned stride, unsigned offset, + unsigned slice_size, struct winsys_handle *whandle); /** @@ -592,14 +593,12 @@ struct radeon_winsys { * \param ring_type The ring type (GFX, DMA, UVD) * \param flush Flush callback function associated with the command stream. * \param user User pointer that will be passed to the flush callback. - * \param trace_buf Trace buffer when tracing is enabled */ struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, void (*flush)(void *ctx, unsigned flags, struct pipe_fence_handle **fence), - void *flush_ctx, - struct pb_buffer *trace_buf); + void *flush_ctx); /** * Destroy a command stream. @@ -672,12 +671,10 @@ struct radeon_winsys { * \param flags, RADEON_FLUSH_ASYNC or 0. * \param fence Pointer to a fence. If non-NULL, a fence is inserted * after the CS and is returned through this parameter. - * \param cs_trace_id A unique identifier of the cs, used for tracing. */ void (*cs_flush)(struct radeon_winsys_cs *cs, unsigned flags, - struct pipe_fence_handle **fence, - uint32_t cs_trace_id); + struct pipe_fence_handle **fence); /** * Return TRUE if a buffer is referenced by a command stream. diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index f9a6de48f6b..e0dbec5fb79 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -325,8 +325,8 @@ static void si_blit_decompress_color(struct pipe_context *ctx, } static void -si_decompress_color_textures(struct si_context *sctx, - struct si_textures_info *textures) +si_decompress_sampler_color_textures(struct si_context *sctx, + struct si_textures_info *textures) { unsigned i; unsigned mask = textures->compressed_colortex_mask; @@ -350,6 +350,33 @@ si_decompress_color_textures(struct si_context *sctx, } } +static void +si_decompress_image_color_textures(struct si_context *sctx, + struct si_images_info *images) +{ + unsigned i; + unsigned mask = images->compressed_colortex_mask; + + while (mask) { + const struct pipe_image_view *view; + struct r600_texture *tex; + + i = u_bit_scan(&mask); + + view = &images->views[i]; + assert(view->resource->target != PIPE_BUFFER); + + tex = (struct r600_texture *)view->resource; + if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset) + continue; + + si_blit_decompress_color(&sctx->b.b, tex, + view->u.tex.level, view->u.tex.level, + 0, util_max_layer(&tex->resource.b.b, view->u.tex.level), + false); + } +} + void si_decompress_textures(struct si_context *sctx) { unsigned compressed_colortex_counter; @@ -370,7 +397,10 @@ void si_decompress_textures(struct si_context *sctx) si_flush_depth_textures(sctx, &sctx->samplers[i]); } if (sctx->samplers[i].compressed_colortex_mask) { - si_decompress_color_textures(sctx, &sctx->samplers[i]); + si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]); + } + if (sctx->images[i].compressed_colortex_mask) { + si_decompress_image_color_textures(sctx, &sctx->images[i]); } } } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index d12b3e6b28a..815b87bbd7e 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -64,7 +64,8 @@ #include "util/u_upload_mgr.h" -/* NULL image and buffer descriptor. +/* NULL image and buffer descriptor for textures (alpha = 1) and images + * (alpha = 0). * * For images, all fields must be zero except for the swizzle, which * supports arbitrary combinations of 0s and 1s. The texture type must be @@ -74,7 +75,7 @@ * * This is the only reason why the buffer descriptor must be in words [4:7]. */ -static uint32_t null_descriptor[8] = { +static uint32_t null_texture_descriptor[8] = { 0, 0, 0, @@ -84,10 +85,20 @@ static uint32_t null_descriptor[8] = { * descriptor */ }; +static uint32_t null_image_descriptor[8] = { + 0, + 0, + 0, + S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) + /* the rest must contain zeros, which is also used by the buffer + * descriptor */ +}; + static void si_init_descriptors(struct si_descriptors *desc, unsigned shader_userdata_index, unsigned element_dw_size, - unsigned num_elements) + unsigned num_elements, + const uint32_t *null_descriptor) { int i; @@ -100,10 +111,12 @@ static void si_init_descriptors(struct si_descriptors *desc, desc->shader_userdata_offset = shader_userdata_index * 4; /* Initialize the array to NULL descriptors if the element size is 8. */ - if (element_dw_size % 8 == 0) + if (null_descriptor) { + assert(element_dw_size % 8 == 0); for (i = 0; i < num_elements * element_dw_size / 8; i++) - memcpy(desc->list + i*8, null_descriptor, - sizeof(null_descriptor)); + memcpy(desc->list + i * 8, null_descriptor, + 8 * 4); + } } static void si_release_descriptors(struct si_descriptors *desc) @@ -210,7 +223,7 @@ static void si_set_sampler_view(struct si_context *sctx, } else { /* Disable FMASK and bind sampler state in [12:15]. */ memcpy(views->desc.list + slot*16 + 8, - null_descriptor, 4*4); + null_texture_descriptor, 4*4); if (views->sampler_states[slot]) memcpy(views->desc.list + slot*16 + 12, @@ -220,9 +233,9 @@ static void si_set_sampler_view(struct si_context *sctx, views->desc.enabled_mask |= 1llu << slot; } else { pipe_sampler_view_reference(&views->views[slot], NULL); - memcpy(views->desc.list + slot*16, null_descriptor, 8*4); + memcpy(views->desc.list + slot*16, null_texture_descriptor, 8*4); /* Only clear the lower dwords of FMASK. */ - memcpy(views->desc.list + slot*16 + 8, null_descriptor, 4*4); + memcpy(views->desc.list + slot*16 + 8, null_texture_descriptor, 4*4); views->desc.enabled_mask &= ~(1llu << slot); } @@ -301,6 +314,160 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers) } } +/* IMAGE VIEWS */ + +static void +si_release_image_views(struct si_images_info *images) +{ + unsigned i; + + for (i = 0; i < SI_NUM_IMAGES; ++i) { + struct pipe_image_view *view = &images->views[i]; + + pipe_resource_reference(&view->resource, NULL); + } + + si_release_descriptors(&images->desc); +} + +static void +si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *images) +{ + uint mask = images->desc.enabled_mask; + + /* Add buffers to the CS. */ + while (mask) { + int i = u_bit_scan(&mask); + struct pipe_image_view *view = &images->views[i]; + + assert(view->resource); + + si_sampler_view_add_buffer(sctx, view->resource); + } + + if (images->desc.buffer) { + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + images->desc.buffer, + RADEON_USAGE_READ, + RADEON_PRIO_DESCRIPTORS); + } +} + +static void +si_disable_shader_image(struct si_images_info *images, unsigned slot) +{ + if (images->desc.enabled_mask & (1llu << slot)) { + pipe_resource_reference(&images->views[slot].resource, NULL); + images->compressed_colortex_mask &= ~(1 << slot); + + memcpy(images->desc.list + slot*8, null_image_descriptor, 8*4); + images->desc.enabled_mask &= ~(1llu << slot); + images->desc.list_dirty = true; + } +} + +static void +si_set_shader_images(struct pipe_context *pipe, unsigned shader, + unsigned start_slot, unsigned count, + struct pipe_image_view *views) +{ + struct si_context *ctx = (struct si_context *)pipe; + struct si_screen *screen = ctx->screen; + struct si_images_info *images = &ctx->images[shader]; + unsigned i, slot; + + assert(shader < SI_NUM_SHADERS); + + if (!count) + return; + + assert(start_slot + count <= SI_NUM_IMAGES); + + for (i = 0, slot = start_slot; i < count; ++i, ++slot) { + struct r600_resource *res; + + if (!views || !views[i].resource) { + si_disable_shader_image(images, slot); + continue; + } + + res = (struct r600_resource *)views[i].resource; + util_copy_image_view(&images->views[slot], &views[i]); + + si_sampler_view_add_buffer(ctx, &res->b.b); + + if (res->b.b.target == PIPE_BUFFER) { + si_make_buffer_descriptor(screen, res, + views[i].format, + views[i].u.buf.first_element, + views[i].u.buf.last_element, + images->desc.list + slot * 8); + images->compressed_colortex_mask &= ~(1 << slot); + } else { + static const unsigned char swizzle[4] = { 0, 1, 2, 3 }; + struct r600_texture *tex = (struct r600_texture *)res; + unsigned level; + unsigned width, height, depth; + + assert(!tex->is_depth); + assert(tex->fmask.size == 0); + + if (tex->dcc_offset && + views[i].access & PIPE_IMAGE_ACCESS_WRITE) + r600_texture_disable_dcc(&screen->b, tex); + + if (is_compressed_colortex(tex)) { + images->compressed_colortex_mask |= 1 << slot; + } else { + images->compressed_colortex_mask &= ~(1 << slot); + } + + /* Always force the base level to the selected level. + * + * This is required for 3D textures, where otherwise + * selecting a single slice for non-layered bindings + * fails. It doesn't hurt the other targets. + */ + level = views[i].u.tex.level; + width = u_minify(res->b.b.width0, level); + height = u_minify(res->b.b.height0, level); + depth = u_minify(res->b.b.depth0, level); + + si_make_texture_descriptor(screen, tex, false, res->b.b.target, + views[i].format, swizzle, + level, 0, 0, + views[i].u.tex.first_layer, views[i].u.tex.last_layer, + width, height, depth, + images->desc.list + slot * 8, + NULL); + } + + images->desc.enabled_mask |= 1llu << slot; + images->desc.list_dirty = true; + } +} + +static void +si_images_update_compressed_colortex_mask(struct si_images_info *images) +{ + uint64_t mask = images->desc.enabled_mask; + + while (mask) { + int i = u_bit_scan64(&mask); + struct pipe_resource *res = images->views[i].resource; + + if (res && res->target != PIPE_BUFFER) { + struct r600_texture *rtex = (struct r600_texture *)res; + + if (is_compressed_colortex(rtex)) { + images->compressed_colortex_mask |= 1 << i; + } else { + images->compressed_colortex_mask &= ~(1 << i); + } + } + } +} + /* SAMPLER STATES */ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, @@ -351,7 +518,7 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers, buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); si_init_descriptors(&buffers->desc, shader_userdata_index, 4, - num_buffers); + num_buffers, NULL); } static void si_release_buffer_resources(struct si_buffer_resources *buffers) @@ -804,6 +971,7 @@ void si_update_compressed_colortex_masks(struct si_context *sctx) { for (int i = 0; i < SI_NUM_SHADERS; ++i) { si_samplers_update_compressed_colortex_mask(&sctx->samplers[i]); + si_images_update_compressed_colortex_mask(&sctx->images[i]); } } @@ -925,6 +1093,28 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource } } } + + /* Shader images */ + for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { + struct si_images_info *images = &sctx->images[shader]; + unsigned mask = images->desc.enabled_mask; + + while (mask) { + unsigned i = u_bit_scan(&mask); + + if (images->views[i].resource == buf) { + si_desc_reset_buffer_offset( + ctx, images->desc.list + i * 8 + 4, + old_va, buf); + images->desc.list_dirty = true; + + radeon_add_to_buffer_list( + &sctx->b, &sctx->b.gfx, rbuffer, + RADEON_USAGE_READWRITE, + RADEON_PRIO_SAMPLER_BUFFER); + } + } + } } /* SHADER USER DATA */ @@ -1055,6 +1245,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false); si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false); + si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, false); } si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false); } @@ -1074,14 +1265,20 @@ void si_init_all_descriptors(struct si_context *sctx) RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT); si_init_descriptors(&sctx->samplers[i].views.desc, - SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS); + SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS, + null_texture_descriptor); + + si_init_descriptors(&sctx->images[i].desc, + SI_SGPR_IMAGES, 8, SI_NUM_IMAGES, + null_image_descriptor); } si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, - 4, SI_NUM_VERTEX_BUFFERS); + 4, SI_NUM_VERTEX_BUFFERS, NULL); /* Set pipe_context functions. */ sctx->b.b.bind_sampler_states = si_bind_sampler_states; + sctx->b.b.set_shader_images = si_set_shader_images; sctx->b.b.set_constant_buffer = si_set_constant_buffer; sctx->b.b.set_sampler_views = si_set_sampler_views; sctx->b.b.set_stream_output_targets = si_set_streamout_targets; @@ -1105,7 +1302,8 @@ bool si_upload_shader_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) || !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) || - !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc)) + !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) || + !si_upload_descriptors(sctx, &sctx->images[i].desc)) return false; } return si_upload_vertex_buffer_descriptors(sctx); @@ -1119,6 +1317,7 @@ void si_release_all_descriptors(struct si_context *sctx) si_release_buffer_resources(&sctx->const_buffers[i]); si_release_buffer_resources(&sctx->rw_buffers[i]); si_release_sampler_views(&sctx->samplers[i].views); + si_release_image_views(&sctx->images[i]); } si_release_descriptors(&sctx->vertex_buffers); } @@ -1131,6 +1330,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx) si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]); si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]); si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views); + si_image_views_begin_new_cs(sctx, &sctx->images[i]); } si_vertex_buffers_begin_new_cs(sctx); si_shader_userdata_begin_new_cs(sctx); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index b5a4034cc12..8c900a4ecb6 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -118,8 +118,7 @@ void si_context_gfx_flush(void *context, unsigned flags, } /* Flush the CS. */ - ws->cs_flush(cs, flags, &ctx->last_gfx_fence, - ctx->screen->b.cs_count++); + ws->cs_flush(cs, flags, &ctx->last_gfx_fence); if (fence) ws->fence_reference(fence, ctx->last_gfx_fence); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 8b50a49cba0..dd1103eed06 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -140,9 +140,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->b.b.create_video_buffer = vl_video_buffer_create; } - sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush, - sctx, sscreen->b.trace_bo ? - sscreen->b.trace_bo->buf : NULL); + sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, + si_context_gfx_flush, sctx); sctx->b.gfx.flush = si_context_gfx_flush; /* Border colors. */ @@ -539,8 +538,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: - case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + return HAVE_LLVM >= 0x0309 ? SI_NUM_IMAGES : 0; } return 0; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 0fef5f72098..6d0d687fe4c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -141,6 +141,12 @@ struct si_textures_info { uint32_t compressed_colortex_mask; }; +struct si_images_info { + struct si_descriptors desc; + struct pipe_image_view views[SI_NUM_IMAGES]; + uint32_t compressed_colortex_mask; +}; + struct si_framebuffer { struct r600_atom atom; struct pipe_framebuffer_state state; @@ -251,6 +257,7 @@ struct si_context { struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; struct si_buffer_resources rw_buffers[SI_NUM_SHADERS]; struct si_textures_info samplers[SI_NUM_SHADERS]; + struct si_images_info images[SI_NUM_SHADERS]; /* other shader resources */ struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8c1151aa493..9eb531f8d80 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -40,6 +40,7 @@ #include "util/u_memory.h" #include "util/u_pstipple.h" #include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_dump.h" @@ -99,6 +100,7 @@ struct si_shader_context LLVMValueRef sampler_views[SI_NUM_SAMPLERS]; LLVMValueRef sampler_states[SI_NUM_SAMPLERS]; LLVMValueRef fmasks[SI_NUM_USER_SAMPLERS]; + LLVMValueRef images[SI_NUM_IMAGES]; LLVMValueRef so_buffers[4]; LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; @@ -530,6 +532,37 @@ static LLVMValueRef get_indirect_index(struct si_shader_context *ctx, } /** + * Like get_indirect_index, but restricts the return value to a (possibly + * undefined) value inside [0..num). + */ +static LLVMValueRef get_bounded_indirect_index(struct si_shader_context *ctx, + const struct tgsi_ind_register *ind, + int rel_index, unsigned num) +{ + struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef result = get_indirect_index(ctx, ind, rel_index); + LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0); + LLVMValueRef cc; + + if (util_is_power_of_two(num)) { + result = LLVMBuildAnd(builder, result, c_max, ""); + } else { + /* In theory, this MAX pattern should result in code that is + * as good as the bit-wise AND above. + * + * In practice, LLVM generates worse code (at the time of + * writing), because its value tracking is not strong enough. + */ + cc = LLVMBuildICmp(builder, LLVMIntULE, result, c_max, ""); + result = LLVMBuildSelect(builder, cc, result, c_max, ""); + } + + return result; +} + + +/** * Calculate a dword address given an input or output register and a stride. */ static LLVMValueRef get_dw_address(struct si_shader_context *ctx, @@ -2656,10 +2689,90 @@ static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base) ctx->return_value = ret; } +/** + * Given a v8i32 resource descriptor for a buffer, extract the size of the + * buffer in number of elements and return it as an i32. + */ +static LLVMValueRef get_buffer_size( + struct lp_build_tgsi_context *bld_base, + LLVMValueRef descriptor) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef size = + LLVMBuildExtractElement(builder, descriptor, + lp_build_const_int32(gallivm, 6), ""); + + if (ctx->screen->b.chip_class >= VI) { + /* On VI, the descriptor contains the size in bytes, + * but TXQ must return the size in elements. + * The stride is always non-zero for resources using TXQ. + */ + LLVMValueRef stride = + LLVMBuildExtractElement(builder, descriptor, + lp_build_const_int32(gallivm, 5), ""); + stride = LLVMBuildLShr(builder, stride, + lp_build_const_int32(gallivm, 16), ""); + stride = LLVMBuildAnd(builder, stride, + lp_build_const_int32(gallivm, 0x3FFF), ""); + + size = LLVMBuildUDiv(builder, size, stride, ""); + } + + return size; +} + +/** + * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with + * intrinsic names). + */ +static void build_int_type_name( + LLVMTypeRef type, + char *buf, unsigned bufsize) +{ + assert(bufsize >= 6); + + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) + snprintf(buf, bufsize, "v%ui32", + LLVMGetVectorSize(type)); + else + strcpy(buf, "i32"); +} + static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data); +/* Prevent optimizations (at least of memory accesses) across the current + * point in the program by emitting empty inline assembly that is marked as + * having side effects. + */ +static void emit_optimization_barrier(struct si_shader_context *ctx) +{ + LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder; + LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false); + LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false); + LLVMBuildCall(builder, inlineasm, NULL, 0, ""); +} + +static void membar_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + + /* Since memoryBarrier only makes guarantees about atomics and + * coherent image accesses (which bypass TC L1), we do not need to emit + * any special cache handling here. + * + * We do have to prevent LLVM from re-ordering loads across + * the barrier though. + */ + emit_optimization_barrier(ctx); +} + static bool tgsi_is_array_sampler(unsigned target) { return target == TGSI_TEXTURE_1D_ARRAY || @@ -2671,6 +2784,459 @@ static bool tgsi_is_array_sampler(unsigned target) target == TGSI_TEXTURE_2D_ARRAY_MSAA; } +static bool tgsi_is_array_image(unsigned target) +{ + return target == TGSI_TEXTURE_3D || + target == TGSI_TEXTURE_CUBE || + target == TGSI_TEXTURE_1D_ARRAY || + target == TGSI_TEXTURE_2D_ARRAY || + target == TGSI_TEXTURE_CUBE_ARRAY || + target == TGSI_TEXTURE_2D_ARRAY_MSAA; +} + +/** + * Given a 256-bit resource descriptor, force the DCC enable bit to off. + * + * At least on Tonga, executing image stores on images with DCC enabled and + * non-trivial can eventually lead to lockups. This can occur when an + * application binds an image as read-only but then uses a shader that writes + * to it. The OpenGL spec allows almost arbitrarily bad behavior (including + * program termination) in this case, but it doesn't cost much to be a bit + * nicer: disabling DCC in the shader still leads to undefined results but + * avoids the lockup. + */ +static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, + LLVMValueRef rsrc) +{ + if (ctx->screen->b.chip_class <= CIK) { + return rsrc; + } else { + LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder; + LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); + LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0); + LLVMValueRef tmp; + + tmp = LLVMBuildExtractElement(builder, rsrc, i32_6, ""); + tmp = LLVMBuildAnd(builder, tmp, i32_C, ""); + return LLVMBuildInsertElement(builder, rsrc, tmp, i32_6, ""); + } +} + +/** + * Load the resource descriptor for \p image. + */ +static void +image_fetch_rsrc( + struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_src_register *image, + bool dcc_off, + LLVMValueRef *rsrc) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + + assert(image->Register.File == TGSI_FILE_IMAGE); + + if (!image->Register.Indirect) { + /* Fast path: use preloaded resources */ + *rsrc = ctx->images[image->Register.Index]; + } else { + /* Indexing and manual load */ + LLVMValueRef ind_index; + LLVMValueRef rsrc_ptr; + LLVMValueRef tmp; + + /* From the GL_ARB_shader_image_load_store extension spec: + * + * If a shader performs an image load, store, or atomic + * operation using an image variable declared as an array, + * and if the index used to select an individual element is + * negative or greater than or equal to the size of the + * array, the results of the operation are undefined but may + * not lead to termination. + */ + ind_index = get_bounded_indirect_index(ctx, &image->Indirect, + image->Register.Index, + SI_NUM_IMAGES); + + rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES); + tmp = build_indexed_load_const(ctx, rsrc_ptr, ind_index); + if (dcc_off) + tmp = force_dcc_off(ctx, tmp); + *rsrc = tmp; + } +} + +static LLVMValueRef image_fetch_coords( + struct lp_build_tgsi_context *bld_base, + const struct tgsi_full_instruction *inst, + unsigned src) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + unsigned target = inst->Memory.Texture; + int sample; + unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &sample); + LLVMValueRef coords[4]; + LLVMValueRef tmp; + int chan; + + for (chan = 0; chan < num_coords; ++chan) { + tmp = lp_build_emit_fetch(bld_base, inst, src, chan); + tmp = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, ""); + coords[chan] = tmp; + } + + if (num_coords == 1) + return coords[0]; + + if (num_coords == 3) { + /* LLVM has difficulties lowering 3-element vectors. */ + coords[3] = bld_base->uint_bld.undef; + num_coords = 4; + } + + return lp_build_gather_values(gallivm, coords, num_coords); +} + +/** + * Append the extra mode bits that are used by image load and store. + */ +static void image_append_args( + struct si_shader_context *ctx, + struct lp_build_emit_data * emit_data, + unsigned target, + bool atomic) +{ + const struct tgsi_full_instruction *inst = emit_data->inst; + LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); + LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); + + emit_data->args[emit_data->arg_count++] = i1false; /* r128 */ + emit_data->args[emit_data->arg_count++] = + tgsi_is_array_image(target) ? i1true : i1false; /* da */ + if (!atomic) { + emit_data->args[emit_data->arg_count++] = + inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? + i1true : i1false; /* glc */ + } + emit_data->args[emit_data->arg_count++] = i1false; /* slc */ +} + +/** + * Append the resource and indexing arguments for buffer intrinsics. + * + * \param rsrc the 256 bit resource + * \param index index into the buffer + */ +static void buffer_append_args( + struct si_shader_context *ctx, + struct lp_build_emit_data *emit_data, + LLVMValueRef rsrc, + LLVMValueRef index, + bool atomic) +{ + struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; + struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; + const struct tgsi_full_instruction *inst = emit_data->inst; + LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2); + LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); + LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); + + rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, ""); + rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, ""); + rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, ""); + + emit_data->args[emit_data->arg_count++] = rsrc; + emit_data->args[emit_data->arg_count++] = index; /* vindex */ + emit_data->args[emit_data->arg_count++] = bld_base->uint_bld.zero; /* voffset */ + if (!atomic) { + emit_data->args[emit_data->arg_count++] = + inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? + i1true : i1false; /* glc */ + } + emit_data->args[emit_data->arg_count++] = i1false; /* slc */ +} + +static void load_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + const struct tgsi_full_instruction * inst = emit_data->inst; + unsigned target = inst->Memory.Texture; + LLVMValueRef coords; + LLVMValueRef rsrc; + + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + + image_fetch_rsrc(bld_base, &inst->Src[0], false, &rsrc); + coords = image_fetch_coords(bld_base, inst, 1); + + if (target == TGSI_TEXTURE_BUFFER) { + buffer_append_args(ctx, emit_data, rsrc, coords, false); + } else { + emit_data->args[0] = coords; + emit_data->args[1] = rsrc; + emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */ + emit_data->arg_count = 3; + + image_append_args(ctx, emit_data, target, false); + } +} + +static void load_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + const struct tgsi_full_instruction * inst = emit_data->inst; + unsigned target = inst->Memory.Texture; + char intrinsic_name[32]; + char coords_type[8]; + + if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) + emit_optimization_barrier(ctx); + + if (target == TGSI_TEXTURE_BUFFER) { + emit_data->output[emit_data->chan] = lp_build_intrinsic( + builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type, + emit_data->args, emit_data->arg_count, + LLVMReadOnlyAttribute | LLVMNoUnwindAttribute); + } else { + build_int_type_name(LLVMTypeOf(emit_data->args[0]), + coords_type, sizeof(coords_type)); + + snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.image.load.%s", coords_type); + + emit_data->output[emit_data->chan] = + lp_build_intrinsic( + builder, intrinsic_name, emit_data->dst_type, + emit_data->args, emit_data->arg_count, + LLVMReadOnlyAttribute | LLVMNoUnwindAttribute); + } +} + +static void store_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + const struct tgsi_full_instruction * inst = emit_data->inst; + struct tgsi_full_src_register image; + unsigned target = inst->Memory.Texture; + LLVMValueRef chans[4]; + LLVMValueRef data; + LLVMValueRef coords; + LLVMValueRef rsrc; + unsigned chan; + + emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context); + + image = tgsi_full_src_register_from_dst(&inst->Dst[0]); + coords = image_fetch_coords(bld_base, inst, 0); + + for (chan = 0; chan < 4; ++chan) { + chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); + } + data = lp_build_gather_values(gallivm, chans, 4); + + if (target == TGSI_TEXTURE_BUFFER) { + image_fetch_rsrc(bld_base, &image, false, &rsrc); + emit_data->args[0] = data; + emit_data->arg_count = 1; + + buffer_append_args(ctx, emit_data, rsrc, coords, false); + } else { + emit_data->args[0] = data; + emit_data->args[1] = coords; + image_fetch_rsrc(bld_base, &image, true, &emit_data->args[2]); + emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */ + emit_data->arg_count = 4; + + image_append_args(ctx, emit_data, target, false); + } +} + +static void store_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + const struct tgsi_full_instruction * inst = emit_data->inst; + unsigned target = inst->Memory.Texture; + char intrinsic_name[32]; + char coords_type[8]; + + if (target == TGSI_TEXTURE_BUFFER) { + emit_data->output[emit_data->chan] = lp_build_intrinsic( + builder, "llvm.amdgcn.buffer.store.format.v4f32", + emit_data->dst_type, emit_data->args, emit_data->arg_count, + LLVMNoUnwindAttribute); + } else { + build_int_type_name(LLVMTypeOf(emit_data->args[1]), + coords_type, sizeof(coords_type)); + snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.image.store.%s", coords_type); + + emit_data->output[emit_data->chan] = + lp_build_intrinsic( + builder, intrinsic_name, emit_data->dst_type, + emit_data->args, emit_data->arg_count, + LLVMNoUnwindAttribute); + } +} + +static void atomic_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + const struct tgsi_full_instruction * inst = emit_data->inst; + unsigned target = inst->Memory.Texture; + LLVMValueRef data1, data2; + LLVMValueRef coords; + LLVMValueRef rsrc; + LLVMValueRef tmp; + + emit_data->dst_type = bld_base->base.elem_type; + + image_fetch_rsrc(bld_base, &inst->Src[0], target != TGSI_TEXTURE_BUFFER, + &rsrc); + coords = image_fetch_coords(bld_base, inst, 1); + + tmp = lp_build_emit_fetch(bld_base, inst, 2, 0); + data1 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, ""); + + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + tmp = lp_build_emit_fetch(bld_base, inst, 3, 0); + data2 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, ""); + } + + /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order + * of arguments, which is reversed relative to TGSI (and GLSL) + */ + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + emit_data->args[emit_data->arg_count++] = data2; + emit_data->args[emit_data->arg_count++] = data1; + + if (target == TGSI_TEXTURE_BUFFER) { + buffer_append_args(ctx, emit_data, rsrc, coords, true); + } else { + emit_data->args[emit_data->arg_count++] = coords; + emit_data->args[emit_data->arg_count++] = rsrc; + + image_append_args(ctx, emit_data, target, true); + } +} + +static void atomic_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + const struct tgsi_full_instruction * inst = emit_data->inst; + unsigned target = inst->Memory.Texture; + char intrinsic_name[40]; + LLVMValueRef tmp; + + if (target == TGSI_TEXTURE_BUFFER) { + snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.buffer.atomic.%s", action->intr_name); + } else { + char coords_type[8]; + + build_int_type_name(LLVMTypeOf(emit_data->args[1]), + coords_type, sizeof(coords_type)); + snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.image.atomic.%s.%s", + action->intr_name, coords_type); + } + + tmp = lp_build_intrinsic( + builder, intrinsic_name, bld_base->uint_bld.elem_type, + emit_data->args, emit_data->arg_count, + LLVMNoUnwindAttribute); + emit_data->output[emit_data->chan] = + LLVMBuildBitCast(builder, tmp, bld_base->base.elem_type, ""); +} + +static void resq_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + const struct tgsi_full_instruction *inst = emit_data->inst; + const struct tgsi_full_src_register *reg = &inst->Src[0]; + unsigned tex_target = inst->Memory.Texture; + + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + + if (tex_target == TGSI_TEXTURE_BUFFER) { + image_fetch_rsrc(bld_base, reg, false, &emit_data->args[0]); + emit_data->arg_count = 1; + } else { + emit_data->args[0] = bld_base->uint_bld.zero; /* mip level */ + image_fetch_rsrc(bld_base, reg, false, &emit_data->args[1]); + emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */ + emit_data->args[3] = bld_base->uint_bld.zero; /* unorm */ + emit_data->args[4] = bld_base->uint_bld.zero; /* r128 */ + emit_data->args[5] = tgsi_is_array_image(tex_target) ? + bld_base->uint_bld.one : bld_base->uint_bld.zero; /* da */ + emit_data->args[6] = bld_base->uint_bld.zero; /* glc */ + emit_data->args[7] = bld_base->uint_bld.zero; /* slc */ + emit_data->args[8] = bld_base->uint_bld.zero; /* tfe */ + emit_data->args[9] = bld_base->uint_bld.zero; /* lwe */ + emit_data->arg_count = 10; + } +} + +static void resq_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + const struct tgsi_full_instruction *inst = emit_data->inst; + unsigned target = inst->Memory.Texture; + LLVMValueRef out; + + if (target == TGSI_TEXTURE_BUFFER) { + out = get_buffer_size(bld_base, emit_data->args[0]); + } else { + out = lp_build_intrinsic( + builder, "llvm.SI.getresinfo.i32", emit_data->dst_type, + emit_data->args, emit_data->arg_count, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + + /* Divide the number of layers by 6 to get the number of cubes. */ + if (target == TGSI_TEXTURE_CUBE_ARRAY) { + LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2); + LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6); + + LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, ""); + z = LLVMBuildBitCast(builder, z, bld_base->uint_bld.elem_type, ""); + z = LLVMBuildSDiv(builder, z, imm6, ""); + z = LLVMBuildBitCast(builder, z, bld_base->base.elem_type, ""); + out = LLVMBuildInsertElement(builder, out, z, imm2, ""); + } + } + + emit_data->output[emit_data->chan] = out; +} + static void set_tex_fetch_args(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, unsigned opcode, unsigned target, @@ -2836,26 +3402,7 @@ static void tex_fetch_args( if (target == TGSI_TEXTURE_BUFFER) { /* Read the size from the buffer descriptor directly. */ LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, ""); - LLVMValueRef size = LLVMBuildExtractElement(builder, res, - lp_build_const_int32(gallivm, 6), ""); - - if (ctx->screen->b.chip_class >= VI) { - /* On VI, the descriptor contains the size in bytes, - * but TXQ must return the size in elements. - * The stride is always non-zero for resources using TXQ. - */ - LLVMValueRef stride = - LLVMBuildExtractElement(builder, res, - lp_build_const_int32(gallivm, 5), ""); - stride = LLVMBuildLShr(builder, stride, - lp_build_const_int32(gallivm, 16), ""); - stride = LLVMBuildAnd(builder, stride, - lp_build_const_int32(gallivm, 0x3FFF), ""); - - size = LLVMBuildUDiv(builder, size, stride, ""); - } - - emit_data->args[0] = size; + emit_data->args[0] = get_buffer_size(bld_base, res); return; } @@ -3236,14 +3783,9 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, return; } - if (LLVMGetTypeKind(LLVMTypeOf(emit_data->args[0])) == LLVMVectorTypeKind) - sprintf(type, ".v%ui32", - LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0]))); - else - strcpy(type, ".i32"); - /* Add the type and suffixes .c, .o if needed. */ - sprintf(intr_name, "%s%s%s%s%s", + build_int_type_name(LLVMTypeOf(emit_data->args[0]), type, sizeof(type)); + sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix, has_offset ? ".o" : "", type); @@ -3865,8 +4407,8 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS); params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS); - params[SI_PARAM_UNUSED] = LLVMPointerType(ctx->i32, CONST_ADDR_SPACE); - last_array_pointer = SI_PARAM_UNUSED; + params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES); + last_array_pointer = SI_PARAM_IMAGES; switch (ctx->type) { case TGSI_PROCESSOR_VERTEX: @@ -4153,6 +4695,34 @@ static void preload_samplers(struct si_shader_context *ctx) } } +static void preload_images(struct si_shader_context *ctx) +{ + struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; + struct tgsi_shader_info *info = &ctx->shader->selector->info; + struct gallivm_state *gallivm = bld_base->base.gallivm; + unsigned num_images = bld_base->info->file_max[TGSI_FILE_IMAGE] + 1; + LLVMValueRef res_ptr; + unsigned i; + + if (num_images == 0) + return; + + res_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES); + + for (i = 0; i < num_images; ++i) { + /* Rely on LLVM to shrink the load for buffer resources. */ + LLVMValueRef rsrc = + build_indexed_load_const(ctx, res_ptr, + lp_build_const_int32(gallivm, i)); + + if (info->images_writemask & (1 << i) && + !(info->images_buffers & (1 << i))) + rsrc = force_dcc_off(ctx, rsrc); + + ctx->images[i] = rsrc; + } +} + static void preload_streamout_buffers(struct si_shader_context *ctx) { struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; @@ -4792,6 +5362,7 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, LLVMTargetMachineRef tm) { struct lp_build_tgsi_context *bld_base; + struct lp_build_tgsi_action tmpl = {}; memset(ctx, 0, sizeof(*ctx)); radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--"); @@ -4839,6 +5410,38 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs; + bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args; + bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit; + bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args; + bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit; + bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args; + bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit; + + tmpl.fetch_args = atomic_fetch_args; + tmpl.emit = atomic_emit; + bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add"; + bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap"; + bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap"; + bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and"; + bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or"; + bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor"; + bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin"; + bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax"; + bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin"; + bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax"; + + bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit; + bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy; bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy; @@ -4926,6 +5529,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, create_function(&ctx); preload_constants(&ctx); preload_samplers(&ctx); + preload_images(&ctx); preload_streamout_buffers(&ctx); preload_ring_buffers(&ctx); @@ -5383,7 +5987,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen, last_array_pointer = SI_PARAM_RW_BUFFERS; params[SI_PARAM_CONST_BUFFERS] = ctx.i64; params[SI_PARAM_SAMPLERS] = ctx.i64; - params[SI_PARAM_UNUSED] = ctx.i64; + params[SI_PARAM_IMAGES] = ctx.i64; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32; @@ -5633,7 +6237,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen, params[SI_PARAM_RW_BUFFERS] = ctx.i64; params[SI_PARAM_CONST_BUFFERS] = ctx.i64; params[SI_PARAM_SAMPLERS] = ctx.i64; - params[SI_PARAM_UNUSED] = ctx.i64; + params[SI_PARAM_IMAGES] = ctx.i64; params[SI_PARAM_ALPHA_REF] = ctx.f32; last_array_pointer = -1; last_sgpr = SI_PARAM_ALPHA_REF; @@ -5897,12 +6501,15 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *mainp = shader->selector->main_shader_part; int r; - /* LS and ES are always compiled on demand. */ + /* LS, ES, VS are compiled on demand if the main part hasn't been + * compiled for that stage. + */ if (!mainp || (shader->selector->type == PIPE_SHADER_VERTEX && - (shader->key.vs.as_es || shader->key.vs.as_ls)) || + (shader->key.vs.as_es != mainp->key.vs.as_es || + shader->key.vs.as_ls != mainp->key.vs.as_ls)) || (shader->selector->type == PIPE_SHADER_TESS_EVAL && - shader->key.tes.as_es)) { + shader->key.tes.as_es != mainp->key.tes.as_es)) { /* Monolithic shader (compiled as a whole, has many variants, * may take a long time to compile). */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index de23e642fe4..8059edf6395 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -80,7 +80,7 @@ struct radeon_shader_reloc; #define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */ #define SI_SGPR_CONST_BUFFERS 2 #define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */ -/* TODO: gap */ +#define SI_SGPR_IMAGES 6 #define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */ #define SI_SGPR_BASE_VERTEX 10 /* VS only */ #define SI_SGPR_START_INSTANCE 11 /* VS only */ @@ -104,7 +104,7 @@ struct radeon_shader_reloc; #define SI_PARAM_RW_BUFFERS 0 #define SI_PARAM_CONST_BUFFERS 1 #define SI_PARAM_SAMPLERS 2 -#define SI_PARAM_UNUSED 3 /* TODO: use */ +#define SI_PARAM_IMAGES 3 /* VS only parameters */ #define SI_PARAM_VERTEX_BUFFERS 4 diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index f823af188c7..1245f56c08a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2797,7 +2797,7 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) * Build the sampler view descriptor for a buffer texture. * @param state 256-bit descriptor; only the high 128 bits are filled in */ -static void +void si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, enum pipe_format format, unsigned first_element, unsigned last_element, @@ -2838,9 +2838,10 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, /** * Build the sampler view descriptor for a texture. */ -static void +void si_make_texture_descriptor(struct si_screen *screen, struct r600_texture *tex, + bool sampler, enum pipe_texture_target target, enum pipe_format pipe_format, const unsigned char state_swizzle[4], @@ -2855,7 +2856,7 @@ si_make_texture_descriptor(struct si_screen *screen, const struct util_format_description *desc; unsigned char swizzle[4]; int first_non_void; - unsigned num_format, data_format; + unsigned num_format, data_format, type; uint32_t pitch; uint64_t va; @@ -2973,12 +2974,30 @@ si_make_texture_descriptor(struct si_screen *screen, data_format = 0; } - if (res->target == PIPE_TEXTURE_1D_ARRAY) { + if (!sampler && + (res->target == PIPE_TEXTURE_CUBE || + res->target == PIPE_TEXTURE_CUBE_ARRAY || + res->target == PIPE_TEXTURE_3D)) { + /* For the purpose of shader images, treat cube maps and 3D + * textures as 2D arrays. For 3D textures, the address + * calculations for mipmaps are different, so we rely on the + * caller to effectively disable mipmaps. + */ + type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; + + assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); + } else { + type = si_tex_dim(res->target, target, res->nr_samples); + } + + if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { height = 1; depth = res->array_size; - } else if (res->target == PIPE_TEXTURE_2D_ARRAY) { - depth = res->array_size; - } else if (res->target == PIPE_TEXTURE_CUBE_ARRAY) + } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || + type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { + if (sampler || res->target != PIPE_TEXTURE_3D) + depth = res->array_size; + } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) depth = res->array_size / 6; pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format); @@ -3001,7 +3020,7 @@ si_make_texture_descriptor(struct si_screen *screen, last_level) | S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level, false)) | S_008F1C_POW2_PAD(res->last_level > 0) | - S_008F1C_TYPE(si_tex_dim(res->target, target, res->nr_samples))); + S_008F1C_TYPE(type)); state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1)); state[5] = (S_008F24_BASE_ARRAY(first_layer) | S_008F24_LAST_ARRAY(last_layer)); @@ -3155,7 +3174,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, state->target == PIPE_TEXTURE_CUBE) last_layer = state->u.tex.first_layer; - si_make_texture_descriptor(sctx->screen, tmp, state->target, + si_make_texture_descriptor(sctx->screen, tmp, true, state->target, state->format, state_swizzle, base_level, first_level, last_level, state->u.tex.first_layer, last_layer, @@ -3503,6 +3522,52 @@ static void si_texture_barrier(struct pipe_context *ctx) SI_CONTEXT_FLUSH_AND_INV_CB; } +static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) +{ + struct si_context *sctx = (struct si_context *)ctx; + + /* Subsequent commands must wait for all shader invocations to + * complete. */ + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; + + if (flags & PIPE_BARRIER_CONSTANT_BUFFER) + sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | + SI_CONTEXT_INV_VMEM_L1; + + if (flags & (PIPE_BARRIER_VERTEX_BUFFER | + PIPE_BARRIER_SHADER_BUFFER | + PIPE_BARRIER_TEXTURE | + PIPE_BARRIER_IMAGE | + PIPE_BARRIER_STREAMOUT_BUFFER)) { + /* As far as I can tell, L1 contents are written back to L2 + * automatically at end of shader, but the contents of other + * L1 caches might still be stale. */ + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; + } + + if (flags & PIPE_BARRIER_INDEX_BUFFER) { + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1; + + /* Indices are read through TC L2 since VI. */ + if (sctx->screen->b.chip_class <= CIK) + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + } + + if (flags & PIPE_BARRIER_FRAMEBUFFER) + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + + if (flags & (PIPE_BARRIER_MAPPED_BUFFER | + PIPE_BARRIER_FRAMEBUFFER | + PIPE_BARRIER_INDIRECT_BUFFER)) { + /* Not sure if INV_GLOBAL_L2 is the best thing here. + * + * We need to make sure that TC L1 & L2 are written back to + * memory, because neither CPU accesses nor CB fetches consider + * TC, but there's no need to invalidate any TC cache lines. */ + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + } +} + static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) { struct pipe_blend_state blend; @@ -3583,6 +3648,7 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.set_index_buffer = si_set_index_buffer; sctx->b.b.texture_barrier = si_texture_barrier; + sctx->b.b.memory_barrier = si_memory_barrier; sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; sctx->b.b.set_min_samples = si_set_min_samples; sctx->b.b.set_tess_state = si_set_tess_state; @@ -3637,7 +3703,8 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen, /* TILE_MODE_INDEX is ambiguous without a PCI ID. */ md->metadata[1] = (ATI_VENDOR_ID << 16) | rscreen->info.pci_id; - si_make_texture_descriptor(sscreen, rtex, res->target, res->format, + si_make_texture_descriptor(sscreen, rtex, true, + res->target, res->format, swizzle, 0, 0, res->last_level, 0, is_array ? res->array_size - 1 : 0, res->width0, res->height0, res->depth0, diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 60c34f19e55..c4d6b9d9eee 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -158,6 +158,8 @@ struct si_shader_data { #define SI_DRIVER_STATE_CONST_BUF SI_NUM_USER_CONST_BUFFERS #define SI_NUM_CONST_BUFFERS (SI_DRIVER_STATE_CONST_BUF + 1) +#define SI_NUM_IMAGES 16 + /* Read-write buffer slots. * * Ring buffers: 0..1 @@ -272,6 +274,23 @@ unsigned cik_tile_split(unsigned tile_split); unsigned si_array_mode(unsigned mode); uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex); unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil); +void +si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, + enum pipe_format format, + unsigned first_element, unsigned last_element, + uint32_t *state); +void +si_make_texture_descriptor(struct si_screen *screen, + struct r600_texture *tex, + bool sampler, + enum pipe_texture_target target, + enum pipe_format pipe_format, + const unsigned char state_swizzle[4], + unsigned base_level, unsigned first_level, unsigned last_level, + unsigned first_layer, unsigned last_layer, + unsigned width, unsigned height, unsigned depth, + uint32_t *state, + uint32_t *fmask_state); struct pipe_sampler_view * si_create_sampler_view_custom(struct pipe_context *ctx, struct pipe_resource *texture, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 5fe1f7960f3..02489583423 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -794,9 +794,15 @@ static void si_shader_ps(struct si_shader *shader) * - the shader uses at least 2 VMEM instructions, or * - the code size is at least 50 2-dword instructions or 100 1-dword * instructions. + * + * Shaders with side effects that must execute independently of the + * depth test require LATE_Z. */ - if (info->num_memory_instructions >= 2 || - shader->binary.code_size > 100*4) + if (info->writes_memory && + !info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) + shader->z_order = V_02880C_LATE_Z; + else if (info->num_memory_instructions >= 2 || + shader->binary.code_size > 100*4) shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z; else shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z; @@ -1042,6 +1048,31 @@ static int si_shader_select(struct pipe_context *ctx, return si_shader_select_with_key(ctx, state, &key); } +static void si_parse_next_shader_property(const struct tgsi_shader_info *info, + union si_shader_key *key) +{ + unsigned next_shader = info->properties[TGSI_PROPERTY_NEXT_SHADER]; + + switch (info->processor) { + case TGSI_PROCESSOR_VERTEX: + switch (next_shader) { + case TGSI_PROCESSOR_GEOMETRY: + key->vs.as_es = 1; + break; + case TGSI_PROCESSOR_TESS_CTRL: + case TGSI_PROCESSOR_TESS_EVAL: + key->vs.as_ls = 1; + break; + } + break; + + case TGSI_PROCESSOR_TESS_EVAL: + if (next_shader == TGSI_PROCESSOR_GEOMETRY) + key->tes.as_es = 1; + break; + } +} + static void *si_create_shader_selector(struct pipe_context *ctx, const struct pipe_shader_state *state) { @@ -1157,6 +1188,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx, if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1); + if (sel->info.writes_memory) + sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) | + S_02880C_EXEC_ON_NOOP(1); + /* Compile the main shader part for use with a prolog and/or epilog. */ if (sel->type != PIPE_SHADER_GEOMETRY && !sscreen->use_monolithic_shaders) { @@ -1167,6 +1202,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, goto error; shader->selector = sel; + si_parse_next_shader_property(&sel->info, &shader->key); tgsi_binary = si_get_tgsi_binary(sel); @@ -1202,6 +1238,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, union si_shader_key key; memset(&key, 0, sizeof(key)); + si_parse_next_shader_property(&sel->info, &key); /* Set reasonable defaults, so that the shader key doesn't * cause any code to be eliminated. diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index da4281490ae..896dcdf59d0 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -247,6 +247,7 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen, sizeof(svga->state.hw_draw.default_constbuf_size)); memset(svga->state.hw_draw.enabled_constbufs, 0, sizeof(svga->state.hw_draw.enabled_constbufs)); + svga->state.hw_draw.ib = NULL; /* Create a no-operation blend state which we will bind whenever the * requested blend state is impossible (e.g. due to having an integer diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 1976f98e5c1..ead47c07980 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -55,16 +55,21 @@ #define SVGA_QUERY_COMMAND_BUFFER_SIZE (PIPE_QUERY_DRIVER_SPECIFIC + 7) #define SVGA_QUERY_FLUSH_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 8) #define SVGA_QUERY_SURFACE_WRITE_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 9) +#define SVGA_QUERY_NUM_READBACKS (PIPE_QUERY_DRIVER_SPECIFIC + 10) +#define SVGA_QUERY_NUM_RESOURCE_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 11) +#define SVGA_QUERY_NUM_BUFFER_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 12) +#define SVGA_QUERY_NUM_CONST_BUF_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 13) +#define SVGA_QUERY_NUM_CONST_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 14) /* running total counters */ -#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 10) -#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 11) -#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 12) -#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 13) -#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 14) -#define SVGA_QUERY_NUM_GENERATE_MIPMAP (PIPE_QUERY_DRIVER_SPECIFIC + 15) +#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 15) +#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 16) +#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 17) +#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 18) +#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 19) +#define SVGA_QUERY_NUM_GENERATE_MIPMAP (PIPE_QUERY_DRIVER_SPECIFIC + 20) /*SVGA_QUERY_MAX has to be last because it is size of an array*/ -#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 16) +#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 21) /** * Maximum supported number of constant buffers per shader @@ -499,20 +504,25 @@ struct svga_context /** performance / info queries for HUD */ struct { - uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */ - uint64_t num_fallbacks; /**< SVGA_QUERY_NUM_FALLBACKS */ - uint64_t num_flushes; /**< SVGA_QUERY_NUM_FLUSHES */ - uint64_t num_validations; /**< SVGA_QUERY_NUM_VALIDATIONS */ - uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */ - uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */ - uint64_t command_buffer_size; /**< SVGA_QUERY_COMMAND_BUFFER_SIZE */ - uint64_t flush_time; /**< SVGA_QUERY_FLUSH_TIME */ - uint64_t surface_write_flushes; /**< SVGA_QUERY_SURFACE_WRITE_FLUSHES */ - uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */ - uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */ - uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */ - uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */ - uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */ + uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */ + uint64_t num_fallbacks; /**< SVGA_QUERY_NUM_FALLBACKS */ + uint64_t num_flushes; /**< SVGA_QUERY_NUM_FLUSHES */ + uint64_t num_validations; /**< SVGA_QUERY_NUM_VALIDATIONS */ + uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */ + uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */ + uint64_t command_buffer_size; /**< SVGA_QUERY_COMMAND_BUFFER_SIZE */ + uint64_t flush_time; /**< SVGA_QUERY_FLUSH_TIME */ + uint64_t surface_write_flushes; /**< SVGA_QUERY_SURFACE_WRITE_FLUSHES */ + uint64_t num_readbacks; /**< SVGA_QUERY_NUM_READBACKS */ + uint64_t num_resource_updates; /**< SVGA_QUERY_NUM_RESOURCE_UPDATES */ + uint64_t num_buffer_uploads; /**< SVGA_QUERY_NUM_BUFFER_UPLOADS */ + uint64_t num_const_buf_updates; /**< SVGA_QUERY_NUM_CONST_BUF_UPDATES */ + uint64_t num_const_updates; /**< SVGA_QUERY_NUM_CONST_UPDATES */ + uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */ + uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */ + uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */ + uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */ + uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */ } hud; /** The currently bound stream output targets */ diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index fe6cf71a6e5..0b9ea889afa 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -458,6 +458,14 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, ret = svga_rebind_shaders(svga); if (ret != PIPE_OK) return ret; + + /* Rebind stream output targets */ + ret = svga_rebind_stream_output_targets(svga); + if (ret != PIPE_OK) + return ret; + + /* Force rebinding the index buffer when needed */ + svga->state.hw_draw.ib = NULL; } ret = validate_sampler_resources(svga); diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c index af9356d7c75..a26e577d8f7 100644 --- a/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -254,10 +254,13 @@ svga_set_debug_callback(struct pipe_context *pipe, { struct svga_context *svga = svga_context(pipe); - if (cb) + if (cb) { svga->debug.callback = *cb; - else + svga->swc->debug_callback = &svga->debug.callback; + } else { memset(&svga->debug.callback, 0, sizeof(svga->debug.callback)); + svga->swc->debug_callback = NULL; + } } diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c index 845f4ef3a1c..88f41eadc1d 100644 --- a/src/gallium/drivers/svga/svga_pipe_query.c +++ b/src/gallium/drivers/svga/svga_pipe_query.c @@ -72,11 +72,14 @@ struct svga_query { /** cast wrapper */ static inline struct svga_query * -svga_query( struct pipe_query *q ) +svga_query(struct pipe_query *q) { return (struct svga_query *)q; } +/** + * VGPU9 + */ static boolean svga_get_query_result(struct pipe_context *pipe, @@ -736,6 +739,11 @@ svga_create_query(struct pipe_context *pipe, case SVGA_QUERY_NUM_STATE_OBJECTS: case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_GENERATE_MIPMAP: + case SVGA_QUERY_NUM_READBACKS: + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + case SVGA_QUERY_NUM_CONST_UPDATES: break; default: assert(!"unexpected query type in svga_create_query()"); @@ -808,6 +816,11 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_NUM_STATE_OBJECTS: case SVGA_QUERY_NUM_SURFACE_VIEWS: case SVGA_QUERY_NUM_GENERATE_MIPMAP: + case SVGA_QUERY_NUM_READBACKS: + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + case SVGA_QUERY_NUM_CONST_UPDATES: /* nothing */ break; default: @@ -899,6 +912,21 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_SURFACE_WRITE_FLUSHES: sq->begin_count = svga->hud.surface_write_flushes; break; + case SVGA_QUERY_NUM_READBACKS: + sq->begin_count = svga->hud.num_readbacks; + break; + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + sq->begin_count = svga->hud.num_resource_updates; + break; + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + sq->begin_count = svga->hud.num_buffer_uploads; + break; + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + sq->begin_count = svga->hud.num_const_buf_updates; + break; + case SVGA_QUERY_NUM_CONST_UPDATES: + sq->begin_count = svga->hud.num_const_updates; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: @@ -1002,6 +1030,21 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q) case SVGA_QUERY_SURFACE_WRITE_FLUSHES: sq->end_count = svga->hud.surface_write_flushes; break; + case SVGA_QUERY_NUM_READBACKS: + sq->end_count = svga->hud.num_readbacks; + break; + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + sq->end_count = svga->hud.num_resource_updates; + break; + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + sq->end_count = svga->hud.num_buffer_uploads; + break; + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + sq->end_count = svga->hud.num_const_buf_updates; + break; + case SVGA_QUERY_NUM_CONST_UPDATES: + sq->end_count = svga->hud.num_const_updates; + break; case SVGA_QUERY_MEMORY_USED: case SVGA_QUERY_NUM_SHADERS: case SVGA_QUERY_NUM_RESOURCES: @@ -1103,6 +1146,11 @@ svga_get_query_result(struct pipe_context *pipe, case SVGA_QUERY_COMMAND_BUFFER_SIZE: case SVGA_QUERY_FLUSH_TIME: case SVGA_QUERY_SURFACE_WRITE_FLUSHES: + case SVGA_QUERY_NUM_READBACKS: + case SVGA_QUERY_NUM_RESOURCE_UPDATES: + case SVGA_QUERY_NUM_BUFFER_UPLOADS: + case SVGA_QUERY_NUM_CONST_BUF_UPDATES: + case SVGA_QUERY_NUM_CONST_UPDATES: vresult->u64 = sq->end_count - sq->begin_count; break; /* These are running total counters */ diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c index 3f443c44eee..1318b5565ce 100644 --- a/src/gallium/drivers/svga/svga_pipe_streamout.c +++ b/src/gallium/drivers/svga/svga_pipe_streamout.c @@ -311,6 +311,25 @@ svga_set_stream_output_targets(struct pipe_context *pipe, svga->num_so_targets = num_targets; } +/** + * Rebind stream output target surfaces + */ +enum pipe_error +svga_rebind_stream_output_targets(struct svga_context *svga) +{ + struct svga_winsys_context *swc = svga->swc; + enum pipe_error ret; + unsigned i; + + for (i = 0; i < svga->num_so_targets; i++) { + ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE); + if (ret != PIPE_OK) + return ret; + } + + return PIPE_OK; +} + void svga_init_stream_output_functions(struct svga_context *svga) { diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c index a8ffcc7f680..9ecb97509c2 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer.c +++ b/src/gallium/drivers/svga/svga_resource_buffer.c @@ -109,6 +109,8 @@ svga_buffer_transfer_map(struct pipe_context *pipe, assert(ret == PIPE_OK); } + svga->hud.num_readbacks++; + svga_context_finish(svga); sbuf->dirty = FALSE; diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c index 7f7ceab0aa5..1121b780af1 100644 --- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c +++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c @@ -311,6 +311,8 @@ svga_buffer_upload_gb_command(struct svga_context *svga, swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; sbuf->dma.flags.discard = FALSE; + svga->hud.num_resource_updates++; + return PIPE_OK; } @@ -385,6 +387,8 @@ svga_buffer_upload_command(struct svga_context *svga, swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH; sbuf->dma.flags.discard = FALSE; + svga->hud.num_buffer_uploads++; + return PIPE_OK; } @@ -433,6 +437,7 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(box->x + box->w <= sbuf->b.b.width0); svga->hud.num_bytes_uploaded += box->w; + svga->hud.num_buffer_uploads++; } } else { @@ -460,6 +465,7 @@ svga_buffer_upload_flush(struct svga_context *svga, assert(box->x + box->w <= sbuf->b.b.width0); svga->hud.num_bytes_uploaded += box->w; + svga->hud.num_buffer_uploads++; } } diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c index 1edb41dabee..db730802c7a 100644 --- a/src/gallium/drivers/svga/svga_resource_texture.c +++ b/src/gallium/drivers/svga/svga_resource_texture.c @@ -448,6 +448,8 @@ svga_texture_transfer_map(struct pipe_context *pipe, ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level); } + svga->hud.num_readbacks++; + assert(ret == PIPE_OK); (void) ret; @@ -681,6 +683,8 @@ svga_texture_transfer_unmap(struct pipe_context *pipe, ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level); } + svga->hud.num_resource_updates++; + assert(ret == PIPE_OK); (void) ret; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index bcc512041f7..c0873c0c65a 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -837,6 +837,16 @@ svga_get_driver_query_info(struct pipe_screen *screen, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS), QUERY("surface-write-flushes", SVGA_QUERY_SURFACE_WRITE_FLUSHES, PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-readbacks", SVGA_QUERY_NUM_READBACKS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-resource-updates", SVGA_QUERY_NUM_RESOURCE_UPDATES, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-buffer-uploads", SVGA_QUERY_NUM_BUFFER_UPLOADS, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-const-buf-updates", SVGA_QUERY_NUM_CONST_BUF_UPDATES, + PIPE_DRIVER_QUERY_TYPE_UINT64), + QUERY("num-const-updates", SVGA_QUERY_NUM_CONST_UPDATES, + PIPE_DRIVER_QUERY_TYPE_UINT64), /* running total counters */ QUERY("memory-used", SVGA_QUERY_MEMORY_USED, diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c index 5c99e16d976..78eb3f65b61 100644 --- a/src/gallium/drivers/svga/svga_shader.c +++ b/src/gallium/drivers/svga/svga_shader.c @@ -180,18 +180,18 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader, assert(view->texture); assert(view->texture->target < (1 << 4)); /* texture_target:4 */ - key->tex[i].texture_target = view->texture->target; - /* 1D/2D array textures with one slice are treated as non-arrays * by the SVGA3D device. Convert the texture type here so that * we emit the right TEX/SAMPLE instruction in the shader. */ - if (view->texture->array_size == 1) { - if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) { - key->tex[i].texture_target = PIPE_TEXTURE_1D; + if (view->texture->target == PIPE_TEXTURE_1D_ARRAY || + view->texture->target == PIPE_TEXTURE_2D_ARRAY) { + if (view->texture->array_size == 1) { + key->tex[i].is_array = 0; } - else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) { - key->tex[i].texture_target = PIPE_TEXTURE_2D; + else { + assert(view->texture->array_size > 1); + key->tex[i].is_array = 1; } } @@ -207,8 +207,6 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader, key->tex[i].swizzle_g = view->swizzle_g; key->tex[i].swizzle_b = view->swizzle_b; key->tex[i].swizzle_a = view->swizzle_a; - - key->tex[i].return_type = svga_get_texture_datatype(view->format); } } key->num_textures = svga->curr.num_sampler_views[shader]; diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h index f49fdb46d0e..3f915740b1f 100644 --- a/src/gallium/drivers/svga/svga_shader.h +++ b/src/gallium/drivers/svga/svga_shader.h @@ -98,14 +98,13 @@ struct svga_compile_key unsigned compare_func:3; unsigned unnormalized:1; unsigned width_height_idx:5; /**< texture unit */ - unsigned texture_target:4; /**< PIPE_TEXTURE_x */ + unsigned is_array:1; unsigned texture_msaa:1; /**< A multisample texture? */ unsigned sprite_texgen:1; unsigned swizzle_r:3; unsigned swizzle_g:3; unsigned swizzle_b:3; unsigned swizzle_a:3; - unsigned return_type:3; /**< TGSI_RETURN_TYPE_x */ } tex[PIPE_MAX_SAMPLERS]; /* Note: svga_compile_keys_equal() depends on the variable-size * tex[] array being at the end of this structure. diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c index 8ab1693088a..5ae0382cd45 100644 --- a/src/gallium/drivers/svga/svga_state_constants.c +++ b/src/gallium/drivers/svga/svga_state_constants.c @@ -301,6 +301,8 @@ emit_const(struct svga_context *svga, unsigned shader, unsigned i, return ret; memcpy(svga->state.hw_draw.cb[shader][i], value, 4 * sizeof(float)); + + svga->hud.num_const_updates++; } return ret; @@ -420,6 +422,9 @@ emit_const_range(struct svga_context *svga, (j - i) * 4 * sizeof(float)); i = j + 1; + + svga->hud.num_const_updates++; + } else { ++i; } @@ -549,6 +554,7 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader) void *src_map = NULL, *dst_map; unsigned offset; const struct svga_shader_variant *variant; + unsigned alloc_buf_size; assert(shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY || @@ -613,7 +619,16 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader) */ new_buf_size = align(new_buf_size, 16); - u_upload_alloc(svga->const0_upload, 0, new_buf_size, + /* Constant buffer size in the upload buffer must be in multiples of 256. + * In order to maximize the chance of merging the upload buffer chunks + * when svga_buffer_add_range() is called, + * the allocate buffer size needs to be in multiples of 256 as well. + * Otherwise, since there is gap between each dirty range of the upload buffer, + * each dirty range will end up in its own UPDATE_GB_IMAGE command. + */ + alloc_buf_size = align(new_buf_size, CONST0_UPLOAD_ALIGNMENT); + + u_upload_alloc(svga->const0_upload, 0, alloc_buf_size, CONST0_UPLOAD_ALIGNMENT, &offset, &dst_buffer, &dst_map); if (!dst_map) { @@ -664,6 +679,8 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader) pipe_resource_reference(&dst_buffer, NULL); + svga->hud.num_const_buf_updates++; + return ret; } @@ -732,6 +749,8 @@ emit_consts_vgpu10(struct svga_context *svga, unsigned shader) size); if (ret != PIPE_OK) return ret; + + svga->hud.num_const_buf_updates++; } svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs; diff --git a/src/gallium/drivers/svga/svga_streamout.h b/src/gallium/drivers/svga/svga_streamout.h index da0c4457d2e..1daa1ad5352 100644 --- a/src/gallium/drivers/svga/svga_streamout.h +++ b/src/gallium/drivers/svga/svga_streamout.h @@ -47,4 +47,7 @@ void svga_delete_stream_output(struct svga_context *svga, struct svga_stream_output *streamout); +enum pipe_error +svga_rebind_stream_output_targets(struct svga_context *svga); + #endif /* SVGA_STREAMOUT_H */ diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c index ca4009b9e38..204b814a964 100644 --- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c +++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c @@ -517,15 +517,15 @@ vs30_output(struct svga_shader_emitter *emit, static ubyte svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx) { - switch (emit->key.tex[idx].texture_target) { - case PIPE_TEXTURE_1D: + switch (emit->sampler_target[idx]) { + case TGSI_TEXTURE_1D: return SVGA3DSAMP_2D; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: return SVGA3DSAMP_2D; - case PIPE_TEXTURE_3D: + case TGSI_TEXTURE_3D: return SVGA3DSAMP_VOLUME; - case PIPE_TEXTURE_CUBE: + case TGSI_TEXTURE_CUBE: return SVGA3DSAMP_CUBE; } @@ -585,6 +585,14 @@ svga_translate_decl_sm30( struct svga_shader_emitter *emit, ok = ps30_output( emit, decl->Semantic, idx ); break; + case TGSI_FILE_SAMPLER_VIEW: + { + unsigned unit = decl->Range.First; + assert(decl->Range.First == decl->Range.Last); + emit->sampler_target[unit] = decl->SamplerView.Resource; + } + break; + default: /* don't need to declare other vars */ ok = TRUE; diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h index 83f0c8bd4d0..7a593ba6e9d 100644 --- a/src/gallium/drivers/svga/svga_tgsi_emit.h +++ b/src/gallium/drivers/svga/svga_tgsi_emit.h @@ -136,6 +136,8 @@ struct svga_shader_emitter int current_arl; unsigned pstipple_sampler_unit; + + uint8_t sampler_target[PIPE_MAX_SAMPLERS]; }; diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 489e68f88e8..3188c411863 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -3849,7 +3849,7 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit, if (new_tokens) { /* Setup texture state for stipple */ - emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D; + emit->sampler_target[unit] = TGSI_TEXTURE_2D; emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index 0c5afeb4cf9..0d5628251df 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -134,6 +134,8 @@ struct svga_shader_emitter_v10 /* Samplers */ unsigned num_samplers; + ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ + ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ /* Address regs (really implemented with temps) */ unsigned num_address_regs; @@ -2312,9 +2314,13 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, return TRUE; case TGSI_FILE_SAMPLER_VIEW: - /* Not used at this time, but maybe in the future. - * See emit_resource_declarations(). - */ + { + unsigned unit = decl->Range.First; + assert(decl->Range.First == decl->Range.Last); + emit->sampler_target[unit] = decl->SamplerView.Resource; + /* Note: we can ignore YZW return types for now */ + emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; + } return TRUE; default: @@ -2854,7 +2860,7 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit) /* Texture buffer sizes */ for (i = 0; i < emit->num_samplers; i++) { - if (emit->key.tex[i].texture_target == PIPE_BUFFER) { + if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) { emit->texture_buffer_size_index[i] = total_consts++; } } @@ -2918,30 +2924,44 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) /** - * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. + * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. */ static unsigned -pipe_texture_to_resource_dimension(unsigned target, bool msaa) +tgsi_texture_to_resource_dimension(unsigned target, boolean is_array) { switch (target) { - case PIPE_BUFFER: + case TGSI_TEXTURE_BUFFER: return VGPU10_RESOURCE_DIMENSION_BUFFER; - case PIPE_TEXTURE_1D: + case TGSI_TEXTURE_1D: return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS - : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; - case PIPE_TEXTURE_3D: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; + case TGSI_TEXTURE_3D: return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; - case PIPE_TEXTURE_CUBE: + case TGSI_TEXTURE_CUBE: + return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; + case TGSI_TEXTURE_SHADOW1D: + return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY + : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY + : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; + case TGSI_TEXTURE_SHADOWCUBE: return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; - case PIPE_TEXTURE_1D_ARRAY: - return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY; - case PIPE_TEXTURE_2D_ARRAY: - return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY - : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY; - case PIPE_TEXTURE_CUBE_ARRAY: + case TGSI_TEXTURE_2D_MSAA: + return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; + case TGSI_TEXTURE_2D_ARRAY_MSAA: + return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY + : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; + case TGSI_TEXTURE_CUBE_ARRAY: return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; default: assert(!"Unexpected resource type"); @@ -2993,8 +3013,8 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) opcode0.value = 0; opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; opcode0.resourceDimension = - pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target, - emit->key.tex[i].texture_msaa); + tgsi_texture_to_resource_dimension(emit->sampler_target[i], + emit->key.tex[i].is_array); operand0.value = 0; operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; @@ -3008,10 +3028,10 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit) STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); - assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT); - rt = emit->key.tex[i].return_type + 1; + assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); + rt = emit->sampler_return_type[i] + 1; #else - switch (emit->key.tex[i].return_type) { + switch (emit->sampler_return_type[i]) { case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; @@ -5024,7 +5044,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit, unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; unsigned writemask_0 = 0, writemask_1 = 0; - boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type); + boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); /* Swizzle w/out zero/one terms */ struct tgsi_full_src_register src_swizzled = @@ -5131,7 +5151,7 @@ is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, boolean valid = TRUE; if (tgsi_is_shadow_target(target) && - is_integer_type(emit->key.tex[unit].return_type)) { + is_integer_type(emit->sampler_return_type[unit])) { debug_printf("Invalid SAMPLE_C with an integer texture!\n"); valid = FALSE; } @@ -5528,7 +5548,7 @@ emit_txq(struct svga_shader_emitter_v10 *emit, { const uint unit = inst->Src[1].Register.Index; - if (emit->key.tex[unit].texture_target == PIPE_BUFFER) { + if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { /* RESINFO does not support querying texture buffers, so we instead * store texture buffer sizes in shader constants, then copy them to * implement TXQ instead of emitting RESINFO. @@ -6617,7 +6637,7 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, emit->fs.pstipple_sampler_unit = unit; /* Setup texture state for stipple */ - emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D; + emit->sampler_target[unit] = TGSI_TEXTURE_2D; emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index 0ad6b5e6c76..7da2c4e77ca 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -48,6 +48,7 @@ struct svga_winsys_screen; struct svga_winsys_buffer; struct pipe_screen; struct pipe_context; +struct pipe_debug_callback; struct pipe_fence_handle; struct pipe_resource; struct svga_region; @@ -286,6 +287,9 @@ struct svga_winsys_context struct svga_winsys_surface *surface, struct svga_winsys_gb_shader *shader, unsigned flags); + + /** To report perf/conformance/etc issues to the state tracker */ + struct pipe_debug_callback *debug_callback; }; diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index c8cb145d334..78b8fdf619b 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -129,7 +129,7 @@ swr_transfer_map(struct pipe_context *pipe, swr_fence_submit(swr_context(pipe), screen->flush_fence); swr_fence_finish(pipe->screen, screen->flush_fence, 0); - swr_resource_unused(pipe, spr); + swr_resource_unused(resource); } } } @@ -206,8 +206,8 @@ swr_resource_copy(struct pipe_context *pipe, swr_store_dirty_resource(pipe, dst, SWR_TILE_RESOLVED); swr_fence_finish(pipe->screen, screen->flush_fence, 0); - swr_resource_unused(pipe, swr_resource(src)); - swr_resource_unused(pipe, swr_resource(dst)); + swr_resource_unused(src); + swr_resource_unused(dst); if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) { @@ -293,6 +293,7 @@ static void swr_destroy(struct pipe_context *pipe) { struct swr_context *ctx = swr_context(pipe); + struct swr_screen *screen = swr_screen(pipe->screen); if (ctx->blitter) util_blitter_destroy(ctx->blitter); @@ -306,6 +307,9 @@ swr_destroy(struct pipe_context *pipe) swr_destroy_scratch_buffers(ctx); + assert(screen); + screen->pipe = NULL; + FREE(ctx); } @@ -324,9 +328,10 @@ swr_render_condition(struct pipe_context *pipe, } struct pipe_context * -swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags) +swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags) { struct swr_context *ctx = CALLOC_STRUCT(swr_context); + struct swr_screen *screen = swr_screen(p_screen); ctx->blendJIT = new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>; @@ -347,7 +352,8 @@ swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags) if (ctx->swrContext == NULL) goto fail; - ctx->pipe.screen = screen; + screen->pipe = &ctx->pipe; + ctx->pipe.screen = p_screen; ctx->pipe.destroy = swr_destroy; ctx->pipe.priv = priv; ctx->pipe.create_surface = swr_create_surface; diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h index 2fdc7683cb8..59cf0284461 100644 --- a/src/gallium/drivers/swr/swr_resource.h +++ b/src/gallium/drivers/swr/swr_resource.h @@ -54,9 +54,6 @@ struct swr_resource { unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; enum swr_resource_status status; - - /* pipe_context to which resource is currently bound. */ - struct pipe_context *bound_to_context; }; @@ -120,24 +117,21 @@ swr_resource_status & operator|=(enum swr_resource_status & a, } static INLINE void -swr_resource_read(struct pipe_context *pipe, struct swr_resource *resource) +swr_resource_read(struct pipe_resource *resource) { - resource->status |= SWR_RESOURCE_READ; - resource->bound_to_context = pipe; + swr_resource(resource)->status |= SWR_RESOURCE_READ; } static INLINE void -swr_resource_write(struct pipe_context *pipe, struct swr_resource *resource) +swr_resource_write(struct pipe_resource *resource) { - resource->status |= SWR_RESOURCE_WRITE; - resource->bound_to_context = pipe; + swr_resource(resource)->status |= SWR_RESOURCE_WRITE; } static INLINE void -swr_resource_unused(struct pipe_context *pipe, struct swr_resource *resource) +swr_resource_unused(struct pipe_resource *resource) { - resource->status = SWR_RESOURCE_UNUSED; - resource->bound_to_context = nullptr; + swr_resource(resource)->status = SWR_RESOURCE_UNUSED; } #endif diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index e46df47570f..f9e52be2367 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -620,7 +620,7 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt) { struct swr_screen *screen = swr_screen(p_screen); struct swr_resource *spr = swr_resource(pt); - struct pipe_context *pipe = spr->bound_to_context; + struct pipe_context *pipe = screen->pipe; /* Only wait on fence if the resource is being used */ if (pipe && spr->status) { @@ -630,7 +630,7 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt) swr_fence_submit(swr_context(pipe), screen->flush_fence); swr_fence_finish(p_screen, screen->flush_fence, 0); - swr_resource_unused(pipe, spr); + swr_resource_unused(pt); } /* @@ -661,11 +661,11 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen, struct swr_screen *screen = swr_screen(p_screen); struct sw_winsys *winsys = screen->winsys; struct swr_resource *spr = swr_resource(resource); - struct pipe_context *pipe = spr->bound_to_context; + struct pipe_context *pipe = screen->pipe; if (pipe) { swr_fence_finish(p_screen, screen->flush_fence, 0); - swr_resource_unused(pipe, spr); + swr_resource_unused(resource); SwrEndFrame(swr_context(pipe)->swrContext); } diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h index a96dc44cf66..0c82a2eff7a 100644 --- a/src/gallium/drivers/swr/swr_screen.h +++ b/src/gallium/drivers/swr/swr_screen.h @@ -32,6 +32,7 @@ struct sw_winsys; struct swr_screen { struct pipe_screen base; + struct pipe_context *pipe; struct pipe_fence_handle *flush_fence; diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 47ee3cb2664..e7bf3618a7d 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -646,24 +646,24 @@ swr_update_resource_status(struct pipe_context *pipe, if (fb->nr_cbufs) for (uint32_t i = 0; i < fb->nr_cbufs; ++i) if (fb->cbufs[i]) - swr_resource_write(pipe, swr_resource(fb->cbufs[i]->texture)); + swr_resource_write(fb->cbufs[i]->texture); /* depth/stencil target */ if (fb->zsbuf) - swr_resource_write(pipe, swr_resource(fb->zsbuf->texture)); + swr_resource_write(fb->zsbuf->texture); /* VBO vertex buffers */ for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) { struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; if (!vb->user_buffer) - swr_resource_read(pipe, swr_resource(vb->buffer)); + swr_resource_read(vb->buffer); } /* VBO index buffer */ if (p_draw_info && p_draw_info->indexed) { struct pipe_index_buffer *ib = &ctx->index_buffer; if (!ib->user_buffer) - swr_resource_read(pipe, swr_resource(ib->buffer)); + swr_resource_read(ib->buffer); } /* texture sampler views */ @@ -671,7 +671,7 @@ swr_update_resource_status(struct pipe_context *pipe, struct pipe_sampler_view *view = ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]; if (view) - swr_resource_read(pipe, swr_resource(view->texture)); + swr_resource_read(view->texture); } } diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index a13e309985a..49a314cdb25 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -62,7 +62,7 @@ vc4_nir_get_dst_color(nir_builder *b, int sample) load->num_components = 1; load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample; load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL); + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); nir_builder_instr_insert(b, &load->instr); return &load->dest.ssa; } @@ -627,7 +627,7 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_sample_mask_in); load->num_components = 1; - nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL); + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); nir_builder_instr_insert(b, &load->instr); nir_ssa_def *bitmask = &load->dest.ssa; diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index d47e3bf52b0..d08ad588e5b 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -183,7 +183,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, * with an offset value of 0. */ assert(nir_src_as_const_value(intr->src[0]) && - nir_src_as_const_value(intr->src[0])->u[0] == 0); + nir_src_as_const_value(intr->src[0])->u32[0] == 0); /* Generate dword loads for the VPM values (Since these intrinsics may * be reordered, the actual reads will be generated at the top of the @@ -197,7 +197,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, intr_comp->num_components = 1; intr_comp->const_index[0] = intr->const_index[0] * 4 + i; intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); nir_builder_instr_insert(b, &intr_comp->instr); vpm_reads[i] = &intr_comp->dest.ssa; @@ -256,7 +256,7 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, * with an offset value of 0. */ assert(nir_src_as_const_value(intr->src[0]) && - nir_src_as_const_value(intr->src[0])->u[0] == 0); + nir_src_as_const_value(intr->src[0])->u32[0] == 0); /* Generate scalar loads equivalent to the original VEC4. */ nir_ssa_def *dests[4]; @@ -267,7 +267,7 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, intr_comp->const_index[0] = intr->const_index[0] * 4 + i; intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); nir_builder_instr_insert(b, &intr_comp->instr); dests[i] = &intr_comp->dest.ssa; @@ -339,7 +339,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, * with an offset value of 0. */ assert(nir_src_as_const_value(intr->src[1]) && - nir_src_as_const_value(intr->src[1])->u[0] == 0); + nir_src_as_const_value(intr->src[1])->u32[0] == 0); b->cursor = nir_before_instr(&intr->instr); @@ -378,7 +378,7 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr_comp = nir_intrinsic_instr_create(c->s, intr->intrinsic); intr_comp->num_components = 1; - nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); + nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); /* Convert the uniform (not user_clip_plane) offset to bytes. * If it happens to be a constant, constant-folding will clean diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c index f6ba5b802ad..a2d89ef3349 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c @@ -123,7 +123,7 @@ vc4_nir_lower_txf_ms_instr(struct vc4_compile *c, nir_builder *b, txf->src[0].src_type = nir_tex_src_coord; txf->src[0].src = nir_src_for_ssa(nir_vec2(b, addr, nir_imm_int(b, 0))); - nir_ssa_dest_init(&txf->instr, &txf->dest, 4, NULL); + nir_ssa_dest_init(&txf->instr, &txf->dest, 4, 32, NULL); nir_builder_instr_insert(b, &txf->instr); nir_ssa_def_rewrite_uses(&txf_ms->dest.ssa, nir_src_for_ssa(&txf->dest.ssa)); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index f5826d85174..71a1ebbb313 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -118,7 +118,7 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b, intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4; intr->num_components = 1; intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL); + nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL); nir_builder_instr_insert(b, &intr->instr); return &intr->dest.ssa; } @@ -885,7 +885,9 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest, struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0); struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1); - if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float) + unsigned unsized_type = + nir_alu_type_get_base_type(nir_op_infos[compare_instr->op].input_types[0]); + if (unsized_type == nir_type_float) qir_SF(c, qir_FSUB(c, src0, src1)); else qir_SF(c, qir_SUB(c, src0, src1)); @@ -1519,7 +1521,7 @@ ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr) { struct qreg *qregs = ntq_init_ssa_def(c, &instr->def); for (int i = 0; i < instr->def.num_components; i++) - qregs[i] = qir_uniform_ui(c, instr->value.u[i]); + qregs[i] = qir_uniform_ui(c, instr->value.u32[i]); _mesa_hash_table_insert(c->def_ht, &instr->def, qregs); } @@ -1553,7 +1555,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) assert(instr->num_components == 1); const_offset = nir_src_as_const_value(instr->src[0]); if (const_offset) { - offset = instr->const_index[0] + const_offset->u[0]; + offset = instr->const_index[0] + const_offset->u32[0]; assert(offset % 4 == 0); /* We need dwords */ offset = offset / 4; @@ -1584,7 +1586,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) const_offset = nir_src_as_const_value(instr->src[0]); assert(const_offset && "vc4 doesn't support indirect inputs"); if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) { - assert(const_offset->u[0] == 0); + assert(const_offset->u32[0] == 0); /* Reads of the per-sample color need to be done in * order. */ @@ -1598,7 +1600,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) } *dest = c->color_reads[sample_index]; } else { - offset = instr->const_index[0] + const_offset->u[0]; + offset = instr->const_index[0] + const_offset->u32[0]; *dest = c->inputs[offset]; } break; @@ -1606,7 +1608,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_store_output: const_offset = nir_src_as_const_value(instr->src[1]); assert(const_offset && "vc4 doesn't support indirect outputs"); - offset = instr->const_index[0] + const_offset->u[0]; + offset = instr->const_index[0] + const_offset->u32[0]; /* MSAA color outputs are the only case where we have an * output that's not lowered to being a store of a single 32 |