summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c10
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h3
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp93
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp18
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp39
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp63
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp9
-rw-r--r--src/gallium/drivers/nouveau/nouveau_compiler.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_compute.c145
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.c34
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_context.h77
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_program.c1
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c10
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_shader_state.c37
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state.c58
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_state_validate.c137
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_surface.c40
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_tex.c2
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c14
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_winsys.h4
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c13
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h25
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c16
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c14
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c28
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c9
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c14
-rw-r--r--src/gallium/drivers/r300/r300_context.c2
-rw-r--r--src/gallium/drivers/r300/r300_flush.c6
-rw-r--r--src/gallium/drivers/r300/r300_texture.c6
-rw-r--r--src/gallium/drivers/r600/Makefile.am8
-rw-r--r--src/gallium/drivers/r600/Makefile.sources4
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c65
-rw-r--r--src/gallium/drivers/r600/evergreen_compute_internal.h4
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c11
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c943
-rw-r--r--src/gallium/drivers/r600/r600_llvm.h42
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c9
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h9
-rw-r--r--src/gallium/drivers/r600/r600_shader.c280
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c23
-rw-r--r--src/gallium/drivers/r600/sb/sb_expr.cpp8
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c21
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h8
-rw-r--r--src/gallium/drivers/radeon/r600_texture.c38
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm.h17
-rw-r--r--src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c3
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c4
-rw-r--r--src/gallium/drivers/radeon/radeon_vce.c4
-rw-r--r--src/gallium/drivers/radeon/radeon_winsys.h13
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c36
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c226
-rw-r--r--src/gallium/drivers/radeonsi/si_hw_context.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c8
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h7
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c675
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h4
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c87
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h19
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c41
-rw-r--r--src/gallium/drivers/svga/svga_context.c1
-rw-r--r--src/gallium/drivers/svga/svga_context.h52
-rw-r--r--src/gallium/drivers/svga/svga_draw.c8
-rw-r--r--src/gallium/drivers/svga/svga_pipe_misc.c7
-rw-r--r--src/gallium/drivers/svga/svga_pipe_query.c50
-rw-r--r--src/gallium/drivers/svga/svga_pipe_streamout.c19
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer.c2
-rw-r--r--src/gallium/drivers/svga/svga_resource_buffer_upload.c6
-rw-r--r--src/gallium/drivers/svga/svga_resource_texture.c4
-rw-r--r--src/gallium/drivers/svga/svga_screen.c10
-rw-r--r--src/gallium/drivers/svga/svga_shader.c16
-rw-r--r--src/gallium/drivers/svga/svga_shader.h3
-rw-r--r--src/gallium/drivers/svga/svga_state_constants.c21
-rw-r--r--src/gallium/drivers/svga/svga_streamout.h3
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_decl_sm30.c20
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_emit.h2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_insn.c2
-rw-r--r--src/gallium/drivers/svga/svga_tgsi_vgpu10.c78
-rw-r--r--src/gallium/drivers/svga/svga_winsys.h4
-rw-r--r--src/gallium/drivers/swr/swr_context.cpp16
-rw-r--r--src/gallium/drivers/swr/swr_resource.h18
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp8
-rw-r--r--src/gallium/drivers/swr/swr_screen.h1
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp10
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_blend.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_io.c12
-rw-r--r--src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c2
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c16
93 files changed, 2081 insertions, 1877 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 7a1812f2518..54315d2f592 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1017,7 +1017,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
const_offset = nir_src_as_const_value(intr->src[1]);
if (const_offset) {
- off += const_offset->u[0];
+ off += const_offset->u32[0];
} else {
/* For load_ubo_indirect, second src is indirect offset: */
src1 = get_src(ctx, &intr->src[1])[0];
@@ -1159,7 +1159,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[0]);
if (const_offset) {
- idx += const_offset->u[0];
+ idx += const_offset->u32[0];
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
dst[i] = create_uniform(ctx, n);
@@ -1186,7 +1186,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[0]);
if (const_offset) {
- idx += const_offset->u[0];
+ idx += const_offset->u32[0];
for (int i = 0; i < intr->num_components; i++) {
unsigned n = idx * 4 + i;
dst[i] = ctx->ir->inputs[n];
@@ -1213,7 +1213,7 @@ emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[1]);
compile_assert(ctx, const_offset != NULL);
- idx += const_offset->u[0];
+ idx += const_offset->u32[0];
src = get_src(ctx, &intr->src[0]);
for (int i = 0; i < intr->num_components; i++) {
@@ -1301,7 +1301,7 @@ emit_load_const(struct ir3_compile *ctx, nir_load_const_instr *instr)
struct ir3_instruction **dst = get_dst_ssa(ctx, &instr->def,
instr->def.num_components);
for (int i = 0; i < instr->def.num_components; i++)
- dst[i] = create_immed(ctx->block, instr->value.u[i]);
+ dst[i] = create_immed(ctx->block, instr->value.u32[i]);
}
static void
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
index 8815ac981eb..ec76b0bdc4d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c
@@ -290,7 +290,7 @@ lower_if_else_block(nir_block *block, void *void_state)
}
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
- phi->dest.ssa.num_components, phi->dest.ssa.name);
+ phi->dest.ssa.num_components, 32, phi->dest.ssa.name);
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 9f7d2572bbe..21523a27761 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -160,7 +160,7 @@ struct nv50_ir_prog_info
uint8_t clipDistances; /* number of clip distance outputs */
uint8_t cullDistances; /* number of cull distance outputs */
int8_t genUserClip; /* request user clip planes for ClipVertex */
- uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */
+ uint8_t auxCBSlot; /* driver constant buffer slot */
uint16_t ucpBase; /* base address for UCPs */
uint16_t drawInfoBase; /* base address for draw parameters */
uint8_t pointSize; /* output index for PointSize */
@@ -175,7 +175,6 @@ struct nv50_ir_prog_info
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
bool fp64; /* program uses fp64 math */
bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
- uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
uint16_t sampleInfoBase; /* base address for sample positions */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 0d7d95e3105..70f3c3f69ff 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1655,10 +1655,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i)
break;
}
- if (i->src(0).getFile() != FILE_MEMORY_GLOBAL)
- offset &= 0xffffff;
-
if (code[0] & 0x2) {
+ offset &= 0xffffff;
emitLoadStoreType(i->dType, 0x33);
if (i->src(0).getFile() == FILE_MEMORY_LOCAL)
emitCachingMode(i->cache, 0x2f);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 682a19d6d78..bd6200687ed 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -1634,7 +1634,9 @@ CodeEmitterNV50::emitTEX(const TexInstruction *i)
code[1] |= (i->tex.mask & 0xc) << 12;
if (i->tex.liveOnly)
- code[1] |= 4;
+ code[1] |= 1 << 2;
+ if (i->tex.derivAll)
+ code[1] |= 1 << 3;
defId(i->def(0), 2);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index d284446f5d9..611d5f9c3ed 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -856,15 +856,17 @@ public:
};
std::vector<TextureView> textureViews;
+ /*
struct Resource {
uint8_t target; // TGSI_TEXTURE_*
bool raw;
uint8_t slot; // $surface index
};
std::vector<Resource> resources;
+ */
struct MemoryFile {
- bool shared;
+ uint8_t mem_type; // TGSI_MEMORY_TYPE_*
};
std::vector<MemoryFile> memoryFiles;
@@ -1037,6 +1039,9 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
info->io.cullDistances = prop->u[0].Data;
break;
+ case TGSI_PROPERTY_NEXT_SHADER:
+ /* Do not need to know the next shader stage. */
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
@@ -1222,7 +1227,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
break;
case TGSI_FILE_MEMORY:
for (i = first; i <= last; ++i)
- memoryFiles[i].shared = decl->Declaration.Shared;
+ memoryFiles[i].mem_type = decl->Declaration.MemType;
break;
case TGSI_FILE_NULL:
case TGSI_FILE_TEMPORARY:
@@ -1261,9 +1266,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
info->numBarriers = 1;
if (insn.dstCount()) {
- if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) {
- Instruction::DstRegister dst = insn.getDst(0);
+ Instruction::DstRegister dst = insn.getDst(0);
+ if (dst.getFile() == TGSI_FILE_OUTPUT) {
if (dst.isIndirect(0))
for (unsigned i = 0; i < info->numOutputs; ++i)
info->out[i].mask = 0xf;
@@ -1280,11 +1285,11 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (isEdgeFlagPassthrough(insn))
info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
} else
- if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
- if (insn.getDst(0).isIndirect(0))
- indirectTempArrays.insert(insn.getDst(0).getArrayId());
+ if (dst.getFile() == TGSI_FILE_TEMPORARY) {
+ if (dst.isIndirect(0))
+ indirectTempArrays.insert(dst.getArrayId());
} else
- if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ if (dst.getFile() == TGSI_FILE_BUFFER) {
info->io.globalAccess |= 0x2;
}
}
@@ -1419,8 +1424,8 @@ private:
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
- Symbol *getResourceBase(int r);
- void getResourceCoords(std::vector<Value *>&, int r, int s);
+ // Symbol *getResourceBase(int r);
+ // void getResourceCoords(std::vector<Value *>&, int r, int s);
void handleLOAD(Value *dst0[4]);
void handleSTORE();
@@ -1527,8 +1532,21 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
sym->reg.fileIndex = fileIdx;
- if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared)
- sym->setFile(FILE_MEMORY_SHARED);
+ if (tgsiFile == TGSI_FILE_MEMORY) {
+ switch (code->memoryFiles[fileIdx].mem_type) {
+ case TGSI_MEMORY_TYPE_SHARED:
+ sym->setFile(FILE_MEMORY_SHARED);
+ break;
+ case TGSI_MEMORY_TYPE_INPUT:
+ assert(prog->getType() == Program::TYPE_COMPUTE);
+ assert(idx == -1);
+ sym->setFile(FILE_SHADER_INPUT);
+ address += info->prop.cp.inputOffset;
+ break;
+ default:
+ assert(0); /* TODO: Add support for global and private memory */
+ }
+ }
if (idx >= 0) {
if (sym->reg.file == FILE_SHADER_INPUT)
@@ -1989,7 +2007,6 @@ Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
void
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
{
- Value *val;
Value *arg[4], *src[8];
Value *lod = NULL, *shd = NULL;
unsigned int s, c, d;
@@ -2032,17 +2049,6 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
shd = src[n - 1];
}
- if (tgt.isCube()) {
- for (c = 0; c < 3; ++c)
- src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
- val = getScratch();
- mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
- mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
- mkOp1(OP_RCP, TYPE_F32, val, val);
- for (c = 0; c < 3; ++c)
- src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
- }
-
for (c = 0, d = 0; c < 4; ++c) {
if (dst[c]) {
texi->setDef(d++, dst[c]);
@@ -2148,6 +2154,7 @@ Converter::handleLIT(Value *dst0[4])
}
}
+/* Keep this around for now as reference when adding img support
static inline bool
isResourceSpecial(const int r)
{
@@ -2178,7 +2185,8 @@ Converter::getResourceBase(const int r)
switch (r) {
case TGSI_RESOURCE_GLOBAL:
- sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
+ sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
+ info->io.auxCBSlot);
break;
case TGSI_RESOURCE_LOCAL:
assert(prog->getType() == Program::TYPE_COMPUTE);
@@ -2243,6 +2251,7 @@ partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
}
return n + 1;
}
+*/
// For raw loads, granularity is 4 byte.
// Usage of the texture read mask on OP_SULDP is not allowed.
@@ -2253,8 +2262,9 @@ Converter::handleLOAD(Value *dst0[4])
int c;
std::vector<Value *> off, src, ldv, def;
- if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
- tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
+ switch (tgsi.getSrc(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
for (c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@@ -2274,9 +2284,12 @@ Converter::handleLOAD(Value *dst0[4])
if (tgsi.getSrc(0).isIndirect(0))
ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
}
- return;
+ break;
+ default:
+ assert(!"Unsupported srcFile for LOAD");
}
+/* Keep this around for now as reference when adding img support
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
@@ -2342,6 +2355,7 @@ Converter::handleLOAD(Value *dst0[4])
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
if (dst0[c] != def[c])
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
+*/
}
// For formatted stores, the write mask on OP_SUSTP can be used.
@@ -2353,8 +2367,9 @@ Converter::handleSTORE()
int c;
std::vector<Value *> off, src, dummy;
- if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER ||
- tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) {
+ switch (tgsi.getDst(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
for (c = 0; c < 4; ++c) {
if (!(tgsi.getDst(0).getMask() & (1 << c)))
continue;
@@ -2375,9 +2390,12 @@ Converter::handleSTORE()
if (tgsi.getDst(0).isIndirect(0))
st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
}
- return;
+ break;
+ default:
+ assert(!"Unsupported dstFile for STORE");
}
+/* Keep this around for now as reference when adding img support
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
@@ -2425,6 +2443,7 @@ Converter::handleSTORE()
mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
dummy, src)->tex.mask = tgsi.getDst(0).getMask();
}
+*/
}
// XXX: These only work on resources with the single-component u32/s32 formats.
@@ -2439,8 +2458,9 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
std::vector<Value *> defv;
LValue *dst = getScratch();
- if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER ||
- tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) {
+ switch (tgsi.getSrc(0).getFile()) {
+ case TGSI_FILE_BUFFER:
+ case TGSI_FILE_MEMORY:
for (int c = 0; c < 4; ++c) {
if (!dst0[c])
continue;
@@ -2468,10 +2488,12 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
- return;
+ break;
+ default:
+ assert(!"Unsupported srcFile for ATOM");
}
-
+/* Keep this around for now as reference when adding img support
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {
@@ -2499,6 +2521,7 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
for (int c = 0; c < 4; ++c)
if (dst0[c])
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+*/
}
void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 0b903780614..a5deaef14e0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -67,6 +67,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
tmp = bld.getScratch();
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (c = 0; c < dim; ++c) {
@@ -92,10 +93,25 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
add->lanes = 1; /* abused for .ndv */
}
+ // normalize cube coordinates if necessary
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
+
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c + array, crd[c]);
+ tex->setSrc(c + array, src[c]);
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
// save results
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 12c5f699603..02c4f1a4ca8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -682,7 +682,7 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
Value **ms_x, Value **ms_y) {
// This loads the texture-indexed ms setting from the constant buffer
Value *tmp = new_LValue(func, FILE_GPR);
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
if (prog->getType() > Program::TYPE_VERTEX)
off += 16 * 2 * 4;
@@ -724,6 +724,23 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
const int dref = arg;
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg;
+ /* Only normalize in the non-explicit derivatives case.
+ */
+ if (i->tex.target.isCube() && i->op != OP_TXD) {
+ Value *src[3], *val;
+ int c;
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c) {
+ i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
+ i->getSrc(c), val));
+ }
+ }
+
// handle MS, which means looking up the MS params for this texture, and
// adjusting the input coordinates to point at the right sample.
if (i->tex.target.isMS()) {
@@ -934,12 +951,14 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
handleTEX(i);
i->op = OP_TEX; // no need to clone dPdx/dPdy later
+ i->tex.derivAll = true;
for (c = 0; c < dim; ++c)
crd[c] = bld.getScratch();
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
@@ -949,10 +968,24 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // normalize cube coordinates if necessary
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c, crd[c]);
+ tex->setSrc(c, src[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
@@ -1174,7 +1207,7 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i)
bld.mkLoad(TYPE_F32,
def,
bld.mkSymbol(
- FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx),
off);
break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index d0936d88d60..e8f8e30918b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -600,7 +600,7 @@ NVC0LoweringPass::visit(BasicBlock *bb)
inline Value *
NVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot)
{
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
uint32_t off = prog->driver->io.texBindBase + slot * 4;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
@@ -615,6 +615,24 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
const int chipset = prog->getTarget()->getChipset();
+ /* Only normalize in the non-explicit derivatives case. For explicit
+ * derivatives, this is handled in handleManualTXD.
+ */
+ if (i->tex.target.isCube() && i->dPdx[0].get() == NULL) {
+ Value *src[3], *val;
+ int c;
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), i->getSrc(c));
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c) {
+ i->setSrc(c, bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(),
+ i->getSrc(c), val));
+ }
+ }
+
// Arguments to the TEX instruction are a little insane. Even though the
// encoding is identical between SM20 and SM30, the arguments mean
// different things between Fermi and Kepler+. A lot of arguments are
@@ -728,9 +746,13 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
}
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
- for (int s = dim; s >= 1; --s)
- i->setSrc(s, i->getSrc(s - 1));
- i->setSrc(0, arrayIndex);
+ if (arrayIndex) {
+ for (int s = dim; s >= 1; --s)
+ i->setSrc(s, i->getSrc(s - 1));
+ i->setSrc(0, arrayIndex);
+ } else {
+ i->moveSources(0, 1);
+ }
if (arrayIndex) {
int sat = (i->op == OP_TXF) ? 1 : 0;
@@ -861,6 +883,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
for (l = 0; l < 4; ++l) {
+ Value *src[3], *val;
// mov coordinates from lane l to all lanes
for (c = 0; c < dim; ++c)
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
@@ -870,10 +893,24 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
// add dPdy from lane l to lanes dy
for (c = 0; c < dim; ++c)
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]);
+ // normalize cube coordinates
+ if (i->tex.target.isCube()) {
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
+ val = bld.getScratch();
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
+ bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
+ bld.mkOp1(OP_RCP, TYPE_F32, val, val);
+ for (c = 0; c < 3; ++c)
+ src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
+ } else {
+ for (c = 0; c < dim; ++c)
+ src[c] = crd[c];
+ }
// texture
bld.insert(tex = cloneForward(func, i));
for (c = 0; c < dim; ++c)
- tex->setSrc(c + array, crd[c]);
+ tex->setSrc(c + array, src[c]);
// save results
for (c = 0; i->defExists(c); ++c) {
Instruction *mov;
@@ -1098,6 +1135,7 @@ NVC0LoweringPass::handleSharedATOM(Instruction *atom)
break;
default:
assert(0);
+ return;
}
Instruction *i =
@@ -1204,7 +1242,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
inline Value *
NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
{
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
return bld.
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
@@ -1213,7 +1251,7 @@ NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
inline Value *
NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
{
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
if (ptr)
@@ -1226,7 +1264,7 @@ NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
inline Value *
NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
{
- uint8_t b = prog->driver->io.resInfoCBSlot;
+ uint8_t b = prog->driver->io.auxCBSlot;
off += prog->driver->io.suInfoBase;
if (ptr)
@@ -1540,7 +1578,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
call->indirect = 1;
call->absolute = 1;
call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST,
- prog->driver->io.resInfoCBSlot, TYPE_U32,
+ prog->driver->io.auxCBSlot, TYPE_U32,
prog->driver->io.suInfoBase + base));
call->setSrc(1, r[2]);
call->setSrc(2, r[4]);
@@ -1698,7 +1736,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
}
addr += prog->driver->prop.cp.gridInfoBase;
bld.mkLoad(TYPE_U32, i->getDef(0),
- bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
+ bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, addr), NULL);
break;
case SV_SAMPLE_INDEX:
// TODO: Properly pass source as an address in the PIX address space
@@ -1715,7 +1754,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
bld.mkLoad(TYPE_F32,
i->getDef(0),
bld.mkSymbol(
- FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
TYPE_U32, prog->driver->io.sampleInfoBase +
4 * sym->reg.data.sv.index),
off);
@@ -1780,7 +1819,7 @@ NVC0LoweringPass::handleSQRT(Instruction *i)
{
if (i->dType == TYPE_F64) {
Value *pred = bld.getSSA(1, FILE_PREDICATE);
- Value *zero = bld.loadImm(NULL, 0.0d);
+ Value *zero = bld.loadImm(NULL, 0.0);
Value *dst = bld.getSSA(8);
bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index cfa85ec123c..066faa367d2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -204,6 +204,11 @@ static const char *ldstSubOpStr[] =
"", "lock", "unlock"
};
+static const char *subfmOpStr[] =
+{
+ "", "3d"
+};
+
static const char *DataTypeStr[] =
{
"-",
@@ -548,6 +553,10 @@ void Instruction::print() const
if (subOp < Elements(ldstSubOpStr))
PRINT("%s ", ldstSubOpStr[subOp]);
break;
+ case OP_SUBFM:
+ if (subOp < Elements(subfmOpStr))
+ PRINT("%s ", subfmOpStr[subOp]);
+ break;
default:
if (subOp)
PRINT("(SUBOP:%u) ", subOp);
diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c b/src/gallium/drivers/nouveau/nouveau_compiler.c
index cd44aa1e1d9..ca73fd17a43 100644
--- a/src/gallium/drivers/nouveau/nouveau_compiler.c
+++ b/src/gallium/drivers/nouveau/nouveau_compiler.c
@@ -114,8 +114,6 @@ nouveau_codegen(int chipset, int type, struct tgsi_token tokens[],
info.io.auxCBSlot = 15;
info.io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
-
- info.io.resInfoCBSlot = 15;
info.io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
info.io.msInfoCBSlot = 15;
info.io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index 04488d6d0a6..d781f6fd7d4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -67,122 +67,94 @@ nv50_screen_compute_setup(struct nv50_screen *screen,
if (ret)
return ret;
- BEGIN_NV04(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ BEGIN_NV04(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1);
PUSH_DATA (push, screen->compute->handle);
- BEGIN_NV04(push, NV50_COMPUTE(UNK02A0), 1);
+ BEGIN_NV04(push, NV50_CP(UNK02A0), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_STACK), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_STACK), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(STACK_ADDRESS_HIGH), 2);
+ BEGIN_NV04(push, NV50_CP(STACK_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->stack_bo->offset);
PUSH_DATA (push, screen->stack_bo->offset);
- BEGIN_NV04(push, NV50_COMPUTE(STACK_SIZE_LOG), 1);
+ BEGIN_NV04(push, NV50_CP(STACK_SIZE_LOG), 1);
PUSH_DATA (push, 4);
- BEGIN_NV04(push, NV50_COMPUTE(UNK0290), 1);
+ BEGIN_NV04(push, NV50_CP(UNK0290), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(LANES32_ENABLE), 1);
+ BEGIN_NV04(push, NV50_CP(LANES32_ENABLE), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(REG_MODE), 1);
+ BEGIN_NV04(push, NV50_CP(REG_MODE), 1);
PUSH_DATA (push, NV50_COMPUTE_REG_MODE_STRIPED);
- BEGIN_NV04(push, NV50_COMPUTE(UNK0384), 1);
+ BEGIN_NV04(push, NV50_CP(UNK0384), 1);
PUSH_DATA (push, 0x100);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_GLOBAL), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_GLOBAL), 1);
PUSH_DATA (push, fifo->vram);
for (i = 0; i < 15; i++) {
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(i)), 2);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(i)), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(i)), 1);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(i)), 1);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(i)), 1);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(i)), 1);
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
}
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_ADDRESS_HIGH(15)), 2);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_ADDRESS_HIGH(15)), 2);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_LIMIT(15)), 1);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_LIMIT(15)), 1);
PUSH_DATA (push, ~0);
- BEGIN_NV04(push, NV50_COMPUTE(GLOBAL_MODE(15)), 1);
+ BEGIN_NV04(push, NV50_CP(GLOBAL_MODE(15)), 1);
PUSH_DATA (push, NV50_COMPUTE_GLOBAL_MODE_LINEAR);
- BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_LOG_ALLOC), 1);
+ BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_LOG_ALLOC), 1);
PUSH_DATA (push, 7);
- BEGIN_NV04(push, NV50_COMPUTE(LOCAL_WARPS_NO_CLAMP), 1);
+ BEGIN_NV04(push, NV50_CP(LOCAL_WARPS_NO_CLAMP), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_LOG_ALLOC), 1);
+ BEGIN_NV04(push, NV50_CP(STACK_WARPS_LOG_ALLOC), 1);
PUSH_DATA (push, 7);
- BEGIN_NV04(push, NV50_COMPUTE(STACK_WARPS_NO_CLAMP), 1);
+ BEGIN_NV04(push, NV50_CP(STACK_WARPS_NO_CLAMP), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_TEXTURE), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_TEXTURE), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(TEX_LIMITS), 1);
+ BEGIN_NV04(push, NV50_CP(TEX_LIMITS), 1);
PUSH_DATA (push, 0x54);
- BEGIN_NV04(push, NV50_COMPUTE(LINKED_TSC), 1);
+ BEGIN_NV04(push, NV50_CP(LINKED_TSC), 1);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_TIC), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_TIC), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ BEGIN_NV04(push, NV50_CP(TIC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset);
PUSH_DATA (push, screen->txc->offset);
PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_TSC), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_TSC), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ BEGIN_NV04(push, NV50_CP(TSC_ADDRESS_HIGH), 3);
PUSH_DATAh(push, screen->txc->offset + 65536);
PUSH_DATA (push, screen->txc->offset + 65536);
PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_CODE_CB), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_CODE_CB), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(DMA_LOCAL), 1);
+ BEGIN_NV04(push, NV50_CP(DMA_LOCAL), 1);
PUSH_DATA (push, fifo->vram);
- BEGIN_NV04(push, NV50_COMPUTE(LOCAL_ADDRESS_HIGH), 2);
+ BEGIN_NV04(push, NV50_CP(LOCAL_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->tls_bo->offset + 65536);
PUSH_DATA (push, screen->tls_bo->offset + 65536);
- BEGIN_NV04(push, NV50_COMPUTE(LOCAL_SIZE_LOG), 1);
+ BEGIN_NV04(push, NV50_CP(LOCAL_SIZE_LOG), 1);
PUSH_DATA (push, util_logbase2((screen->max_tls_space / ONE_TEMP_SIZE) * 2));
return 0;
}
-static bool
-nv50_compute_validate_program(struct nv50_context *nv50)
-{
- struct nv50_program *prog = nv50->compprog;
-
- if (prog->mem)
- return true;
-
- if (!prog->translated) {
- prog->translated = nv50_program_translate(
- prog, nv50->screen->base.device->chipset, &nv50->base.debug);
- if (!prog->translated)
- return false;
- }
- if (unlikely(!prog->code_size))
- return false;
-
- if (likely(prog->code_size)) {
- if (nv50_program_upload_code(nv50, prog)) {
- struct nouveau_pushbuf *push = nv50->base.pushbuf;
- BEGIN_NV04(push, NV50_COMPUTE(CODE_CB_FLUSH), 1);
- PUSH_DATA (push, 0);
- return true;
- }
- }
- return false;
-}
-
static void
nv50_compute_validate_globals(struct nv50_context *nv50)
{
@@ -198,26 +170,25 @@ nv50_compute_validate_globals(struct nv50_context *nv50)
}
}
+static struct nv50_state_validate
+validate_list_cp[] = {
+ { nv50_compprog_validate, NV50_NEW_CP_PROGRAM },
+ { nv50_compute_validate_globals, NV50_NEW_CP_GLOBALS },
+};
+
static bool
-nv50_compute_state_validate(struct nv50_context *nv50)
+nv50_state_validate_cp(struct nv50_context *nv50, uint32_t mask)
{
- if (!nv50_compute_validate_program(nv50))
- return false;
-
- if (nv50->dirty_cp & NV50_NEW_CP_GLOBALS)
- nv50_compute_validate_globals(nv50);
+ bool ret;
/* TODO: validate textures, samplers, surfaces */
+ ret = nv50_state_validate(nv50, mask, validate_list_cp,
+ ARRAY_SIZE(validate_list_cp), &nv50->dirty_cp,
+ nv50->bufctx_cp);
- nv50_bufctx_fence(nv50->bufctx_cp, false);
-
- nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_cp);
- if (unlikely(nouveau_pushbuf_validate(nv50->base.pushbuf)))
- return false;
if (unlikely(nv50->state.flushed))
nv50_bufctx_fence(nv50->bufctx_cp, true);
-
- return true;
+ return ret;
}
static void
@@ -227,7 +198,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
struct nouveau_pushbuf *push = screen->base.pushbuf;
unsigned size = align(nv50->compprog->parm_size, 0x4);
- BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM_COUNT), 1);
+ BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
PUSH_DATA (push, (size / 4) << 8);
if (size) {
@@ -245,7 +216,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
nouveau_pushbuf_bufctx(push, nv50->bufctx);
nouveau_pushbuf_validate(push);
- BEGIN_NV04(push, NV50_COMPUTE(USER_PARAM(0)), size / 4);
+ BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4);
nouveau_pushbuf_data(push, bo, offset, size);
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
@@ -278,7 +249,7 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
struct nv50_program *cp = nv50->compprog;
bool ret;
- ret = !nv50_compute_state_validate(nv50);
+ ret = !nv50_state_validate_cp(nv50, ~0);
if (ret) {
NOUVEAU_ERR("Failed to launch grid !\n");
return;
@@ -286,33 +257,33 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
nv50_compute_upload_input(nv50, info->input);
- BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1);
+ BEGIN_NV04(push, NV50_CP(CP_START_ID), 1);
PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc));
- BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1);
+ BEGIN_NV04(push, NV50_CP(SHARED_SIZE), 1);
PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40));
- BEGIN_NV04(push, NV50_COMPUTE(CP_REG_ALLOC_TEMP), 1);
+ BEGIN_NV04(push, NV50_CP(CP_REG_ALLOC_TEMP), 1);
PUSH_DATA (push, cp->max_gpr);
/* grid/block setup */
- BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2);
+ BEGIN_NV04(push, NV50_CP(BLOCKDIM_XY), 2);
PUSH_DATA (push, info->block[1] << 16 | info->block[0]);
PUSH_DATA (push, info->block[2]);
- BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1);
+ BEGIN_NV04(push, NV50_CP(BLOCK_ALLOC), 1);
PUSH_DATA (push, 1 << 16 | block_size);
- BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1);
+ BEGIN_NV04(push, NV50_CP(BLOCKDIM_LATCH), 1);
PUSH_DATA (push, 1);
- BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1);
+ BEGIN_NV04(push, NV50_CP(GRIDDIM), 1);
PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]);
- BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1);
+ BEGIN_NV04(push, NV50_CP(GRIDID), 1);
PUSH_DATA (push, 1);
/* kernel launching */
- BEGIN_NV04(push, NV50_COMPUTE(LAUNCH), 1);
+ BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
PUSH_DATA (push, 0);
- BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
/* bind a compute shader clobbers fragment shader state */
- nv50->dirty |= NV50_NEW_FRAGPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 4874b77b1e1..61a52c4b366 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -176,8 +176,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
if (nv50->framebuffer.cbufs[i] &&
nv50->framebuffer.cbufs[i]->texture == res) {
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
if (!--ref)
return ref;
}
@@ -186,8 +186,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
if (bind & PIPE_BIND_DEPTH_STENCIL) {
if (nv50->framebuffer.zsbuf &&
nv50->framebuffer.zsbuf->texture == res) {
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
if (!--ref)
return ref;
}
@@ -202,8 +202,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
for (i = 0; i < nv50->num_vtxbufs; ++i) {
if (nv50->vtxbuf[i].buffer == res) {
- nv50->dirty |= NV50_NEW_ARRAYS;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
+ nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX);
if (!--ref)
return ref;
}
@@ -211,8 +211,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
if (nv50->idxbuf.buffer == res) {
/* Just rebind to the bufctx as there is no separate dirty bit */
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
- BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
+ BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(res), RD);
if (!--ref)
return ref;
}
@@ -222,8 +222,8 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
for (i = 0; i < nv50->num_textures[s]; ++i) {
if (nv50->textures[s][i] &&
nv50->textures[s][i]->texture == res) {
- nv50->dirty |= NV50_NEW_TEXTURES;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
if (!--ref)
return ref;
}
@@ -236,9 +236,9 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
continue;
if (!nv50->constbuf[s][i].user &&
nv50->constbuf[s][i].u.buf == res) {
- nv50->dirty |= NV50_NEW_CONSTBUF;
+ nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
nv50->constbuf_dirty[s] |= 1 << i;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
if (!--ref)
return ref;
}
@@ -345,10 +345,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->code);
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->uniforms);
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->txc);
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->stack_bo);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->code);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->uniforms);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->stack_bo);
if (screen->compute) {
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->code);
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->txc);
@@ -357,7 +357,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
- BCTX_REFN_bo(nv50->bufctx_3d, SCREEN, flags, screen->fence.bo);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo);
BCTX_REFN_bo(nv50->bufctx, FENCE, flags, screen->fence.bo);
if (screen->compute)
BCTX_REFN_bo(nv50->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 2620d03b999..2317fa2ccf8 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -26,43 +26,43 @@
#include "nv50/nv50_3d.xml.h"
#include "nv50/nv50_2d.xml.h"
-#define NV50_NEW_BLEND (1 << 0)
-#define NV50_NEW_RASTERIZER (1 << 1)
-#define NV50_NEW_ZSA (1 << 2)
-#define NV50_NEW_VERTPROG (1 << 3)
-#define NV50_NEW_GMTYPROG (1 << 6)
-#define NV50_NEW_FRAGPROG (1 << 7)
-#define NV50_NEW_BLEND_COLOUR (1 << 8)
-#define NV50_NEW_STENCIL_REF (1 << 9)
-#define NV50_NEW_CLIP (1 << 10)
-#define NV50_NEW_SAMPLE_MASK (1 << 11)
-#define NV50_NEW_FRAMEBUFFER (1 << 12)
-#define NV50_NEW_STIPPLE (1 << 13)
-#define NV50_NEW_SCISSOR (1 << 14)
-#define NV50_NEW_VIEWPORT (1 << 15)
-#define NV50_NEW_ARRAYS (1 << 16)
-#define NV50_NEW_VERTEX (1 << 17)
-#define NV50_NEW_CONSTBUF (1 << 18)
-#define NV50_NEW_TEXTURES (1 << 19)
-#define NV50_NEW_SAMPLERS (1 << 20)
-#define NV50_NEW_STRMOUT (1 << 21)
-#define NV50_NEW_MIN_SAMPLES (1 << 22)
-#define NV50_NEW_CONTEXT (1 << 31)
+#define NV50_NEW_3D_BLEND (1 << 0)
+#define NV50_NEW_3D_RASTERIZER (1 << 1)
+#define NV50_NEW_3D_ZSA (1 << 2)
+#define NV50_NEW_3D_VERTPROG (1 << 3)
+#define NV50_NEW_3D_GMTYPROG (1 << 6)
+#define NV50_NEW_3D_FRAGPROG (1 << 7)
+#define NV50_NEW_3D_BLEND_COLOUR (1 << 8)
+#define NV50_NEW_3D_STENCIL_REF (1 << 9)
+#define NV50_NEW_3D_CLIP (1 << 10)
+#define NV50_NEW_3D_SAMPLE_MASK (1 << 11)
+#define NV50_NEW_3D_FRAMEBUFFER (1 << 12)
+#define NV50_NEW_3D_STIPPLE (1 << 13)
+#define NV50_NEW_3D_SCISSOR (1 << 14)
+#define NV50_NEW_3D_VIEWPORT (1 << 15)
+#define NV50_NEW_3D_ARRAYS (1 << 16)
+#define NV50_NEW_3D_VERTEX (1 << 17)
+#define NV50_NEW_3D_CONSTBUF (1 << 18)
+#define NV50_NEW_3D_TEXTURES (1 << 19)
+#define NV50_NEW_3D_SAMPLERS (1 << 20)
+#define NV50_NEW_3D_STRMOUT (1 << 21)
+#define NV50_NEW_3D_MIN_SAMPLES (1 << 22)
+#define NV50_NEW_3D_CONTEXT (1 << 31)
#define NV50_NEW_CP_PROGRAM (1 << 0)
#define NV50_NEW_CP_GLOBALS (1 << 1)
/* 3d bufctx (during draw_vbo, blit_3d) */
-#define NV50_BIND_FB 0
-#define NV50_BIND_VERTEX 1
-#define NV50_BIND_VERTEX_TMP 2
-#define NV50_BIND_INDEX 3
-#define NV50_BIND_TEXTURES 4
-#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i))
-#define NV50_BIND_SO 53
-#define NV50_BIND_SCREEN 54
-#define NV50_BIND_TLS 55
-#define NV50_BIND_3D_COUNT 56
+#define NV50_BIND_3D_FB 0
+#define NV50_BIND_3D_VERTEX 1
+#define NV50_BIND_3D_VERTEX_TMP 2
+#define NV50_BIND_3D_INDEX 3
+#define NV50_BIND_3D_TEXTURES 4
+#define NV50_BIND_3D_CB(s, i) (5 + 16 * (s) + (i))
+#define NV50_BIND_3D_SO 53
+#define NV50_BIND_3D_SCREEN 54
+#define NV50_BIND_3D_TLS 55
+#define NV50_BIND_3D_COUNT 56
/* compute bufctx (during launch_grid) */
#define NV50_BIND_CP_GLOBAL 0
@@ -115,7 +115,7 @@ struct nv50_context {
struct nouveau_bufctx *bufctx;
struct nouveau_bufctx *bufctx_cp;
- uint32_t dirty;
+ uint32_t dirty_3d; /* dirty flags for 3d state */
uint32_t dirty_cp; /* dirty flags for compute state */
bool cb_dirty;
@@ -221,6 +221,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
void nv50_vertprog_validate(struct nv50_context *);
void nv50_gmtyprog_validate(struct nv50_context *);
void nv50_fragprog_validate(struct nv50_context *);
+void nv50_compprog_validate(struct nv50_context *);
void nv50_fp_linkage_validate(struct nv50_context *);
void nv50_gp_linkage_validate(struct nv50_context *);
void nv50_constbufs_validate(struct nv50_context *);
@@ -231,7 +232,15 @@ void nv50_stream_output_validate(struct nv50_context *);
extern void nv50_init_state_functions(struct nv50_context *);
/* nv50_state_validate.c */
-bool nv50_state_validate(struct nv50_context *, uint32_t state_mask);
+struct nv50_state_validate {
+ void (*func)(struct nv50_context *);
+ uint32_t states;
+};
+
+bool nv50_state_validate(struct nv50_context *, uint32_t,
+ struct nv50_state_validate *, int, uint32_t *,
+ struct nouveau_bufctx *);
+bool nv50_state_validate_3d(struct nv50_context *, uint32_t);
/* nv50_surface.c */
extern void nv50_clear(struct pipe_context *, unsigned buffers,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index a67ef28abf8..3444b3110de 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -335,7 +335,6 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;
- info->io.resInfoCBSlot = 15;
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
info->io.msInfoCBSlot = 15;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index be19c0fdc85..0a73090d78d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -202,10 +202,10 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
func = nv50_hw_sm_get_func(c);
/* configure and reset the counter(s) */
- BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1);
PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
| cfg->ctr[i].unit | cfg->ctr[i].mode);
- BEGIN_NV04(push, NV50_COMPUTE(MP_PM_SET(c)), 1);
+ BEGIN_NV04(push, NV50_CP(MP_PM_SET(c)), 1);
PUSH_DATA (push, 0);
}
return true;
@@ -240,7 +240,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
PUSH_SPACE(push, 8);
for (c = 0; c < 4; c++) {
if (screen->pm.mp_counter[c]) {
- BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(c)), 1);
+ BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(c)), 1);
PUSH_DATA (push, 0);
}
}
@@ -257,7 +257,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
hq->bo);
PUSH_SPACE(push, 2);
- BEGIN_NV04(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);
pipe->bind_compute_state(pipe, screen->pm.prog);
@@ -295,7 +295,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
mask |= 1 << hsq->ctr[i];
func = nv50_hw_sm_get_func(hsq->ctr[i]);
- BEGIN_NV04(push, NV50_COMPUTE(MP_PM_CONTROL(hsq->ctr[i])), 1);
+ BEGIN_NV04(push, NV50_CP(MP_PM_CONTROL(hsq->ctr[i])), 1);
PUSH_DATA (push, (cfg->ctr[i].sig << 24) | (func << 8)
| cfg->ctr[i].unit | cfg->ctr[i].mode);
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 8e4b2b42bda..3d2ebfbcc46 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -29,6 +29,8 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_query_hw.h"
+#include "nv50/nv50_compute.xml.h"
+
void
nv50_constbufs_validate(struct nv50_context *nv50)
{
@@ -94,7 +96,7 @@ nv50_constbufs_validate(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
- BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
+ BCTX_REFN(nv50->bufctx_3d, 3D_CB(s, i), res, RD);
nv50->cb_dirty = 1; /* Force cache flush for UBO. */
} else {
@@ -131,14 +133,14 @@ nv50_program_update_context_state(struct nv50_context *nv50,
if (prog && prog->tls_space) {
if (nv50->state.new_tls_space)
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);
if (!nv50->state.tls_required || nv50->state.new_tls_space)
- BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_TLS, flags, nv50->screen->tls_bo);
nv50->state.new_tls_space = false;
nv50->state.tls_required |= 1 << stage;
} else {
if (nv50->state.tls_required == (1 << stage))
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TLS);
nv50->state.tls_required &= ~(1 << stage);
}
}
@@ -181,7 +183,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
fp->fp.force_persample_interp = rast->force_persample_interp;
}
- if (fp->mem && !(nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_MIN_SAMPLES)))
+ if (fp->mem && !(nv50->dirty_3d & (NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_MIN_SAMPLES)))
return;
if (!nv50_program_validate(nv50, fp))
@@ -238,6 +240,19 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
/* GP_ENABLE is updated in linkage validation */
}
+void
+nv50_compprog_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_program *cp = nv50->compprog;
+
+ if (cp && !nv50_program_validate(nv50, cp))
+ return;
+
+ BEGIN_NV04(push, NV50_CP(CODE_CB_FLUSH), 1);
+ PUSH_DATA (push, 0);
+}
+
static void
nv50_sprite_coords_validate(struct nv50_context *nv50)
{
@@ -309,7 +324,7 @@ nv50_validate_derived_rs(struct nv50_context *nv50)
PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
}
- if (nv50->dirty & NV50_NEW_FRAGPROG)
+ if (nv50->dirty_3d & NV50_NEW_3D_FRAGPROG)
return;
psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
color = nv50->state.semantic_color & ~NV50_3D_SEMANTIC_COLOR_CLMP_EN;
@@ -378,9 +393,9 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
uint8_t map[64];
uint8_t so_map[64];
- if (!(nv50->dirty & (NV50_NEW_VERTPROG |
- NV50_NEW_FRAGPROG |
- NV50_NEW_GMTYPROG))) {
+ if (!(nv50->dirty_3d & (NV50_NEW_3D_VERTPROG |
+ NV50_NEW_3D_FRAGPROG |
+ NV50_NEW_3D_GMTYPROG))) {
uint8_t bfc, ffc;
ffc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK);
bfc = (nv50->state.semantic_color & NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK)
@@ -633,8 +648,6 @@ nv50_stream_output_validate(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
PUSH_DATA (push, ctrl);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
-
for (i = 0; i < nv50->num_so_targets; ++i) {
struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
@@ -664,7 +677,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
prims = MIN2(prims, limit);
}
targ->stride = so->stride[i];
- BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
+ BCTX_REFN(nv50->bufctx_3d, 3D_SO, buf, WR);
}
if (prims != ~0) {
BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 8504ba466cc..86e74d68b11 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -200,7 +200,7 @@ nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->blend = hwcso;
- nv50->dirty |= NV50_NEW_BLEND;
+ nv50->dirty_3d |= NV50_NEW_3D_BLEND;
}
static void
@@ -337,7 +337,7 @@ nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->rast = hwcso;
- nv50->dirty |= NV50_NEW_RASTERIZER;
+ nv50->dirty_3d |= NV50_NEW_3D_RASTERIZER;
}
static void
@@ -426,7 +426,7 @@ nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->zsa = hwcso;
- nv50->dirty |= NV50_NEW_ZSA;
+ nv50->dirty_3d |= NV50_NEW_3D_ZSA;
}
static void
@@ -605,7 +605,7 @@ nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
nv50->num_samplers[s] = nr;
- nv50->dirty |= NV50_NEW_SAMPLERS;
+ nv50->dirty_3d |= NV50_NEW_3D_SAMPLERS;
}
static void
@@ -698,9 +698,9 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
nv50->num_textures[s] = nr;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
- nv50->dirty |= NV50_NEW_TEXTURES;
+ nv50->dirty_3d |= NV50_NEW_3D_TEXTURES;
}
static void
@@ -776,7 +776,7 @@ nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->vertprog = hwcso;
- nv50->dirty |= NV50_NEW_VERTPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_VERTPROG;
}
static void *
@@ -792,7 +792,7 @@ nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->fragprog = hwcso;
- nv50->dirty |= NV50_NEW_FRAGPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
}
static void *
@@ -808,7 +808,7 @@ nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->gmtyprog = hwcso;
- nv50->dirty |= NV50_NEW_GMTYPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG;
}
static void *
@@ -857,7 +857,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
nv50->constbuf[s][i].u.buf = NULL;
else
if (nv50->constbuf[s][i].u.buf)
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_CB(s, i));
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_CB(s, i));
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res);
@@ -882,7 +882,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
}
nv50->constbuf_dirty[s] |= 1 << i;
- nv50->dirty |= NV50_NEW_CONSTBUF;
+ nv50->dirty_3d |= NV50_NEW_3D_CONSTBUF;
}
/* =============================================================================
@@ -895,7 +895,7 @@ nv50_set_blend_color(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
nv50->blend_colour = *bcol;
- nv50->dirty |= NV50_NEW_BLEND_COLOUR;
+ nv50->dirty_3d |= NV50_NEW_3D_BLEND_COLOUR;
}
static void
@@ -905,7 +905,7 @@ nv50_set_stencil_ref(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
nv50->stencil_ref = *sr;
- nv50->dirty |= NV50_NEW_STENCIL_REF;
+ nv50->dirty_3d |= NV50_NEW_3D_STENCIL_REF;
}
static void
@@ -916,7 +916,7 @@ nv50_set_clip_state(struct pipe_context *pipe,
memcpy(nv50->clip.ucp, clip->ucp, sizeof(clip->ucp));
- nv50->dirty |= NV50_NEW_CLIP;
+ nv50->dirty_3d |= NV50_NEW_3D_CLIP;
}
static void
@@ -925,7 +925,7 @@ nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->sample_mask = sample_mask;
- nv50->dirty |= NV50_NEW_SAMPLE_MASK;
+ nv50->dirty_3d |= NV50_NEW_3D_SAMPLE_MASK;
}
static void
@@ -935,7 +935,7 @@ nv50_set_min_samples(struct pipe_context *pipe, unsigned min_samples)
if (nv50->min_samples != min_samples) {
nv50->min_samples = min_samples;
- nv50->dirty |= NV50_NEW_MIN_SAMPLES;
+ nv50->dirty_3d |= NV50_NEW_3D_MIN_SAMPLES;
}
}
@@ -945,11 +945,11 @@ nv50_set_framebuffer_state(struct pipe_context *pipe,
{
struct nv50_context *nv50 = nv50_context(pipe);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
util_copy_framebuffer_state(&nv50->framebuffer, fb);
- nv50->dirty |= NV50_NEW_FRAMEBUFFER;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
}
static void
@@ -959,7 +959,7 @@ nv50_set_polygon_stipple(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
nv50->stipple = *stipple;
- nv50->dirty |= NV50_NEW_STIPPLE;
+ nv50->dirty_3d |= NV50_NEW_3D_STIPPLE;
}
static void
@@ -977,7 +977,7 @@ nv50_set_scissor_states(struct pipe_context *pipe,
continue;
nv50->scissors[start_slot + i] = scissor[i];
nv50->scissors_dirty |= 1 << (start_slot + i);
- nv50->dirty |= NV50_NEW_SCISSOR;
+ nv50->dirty_3d |= NV50_NEW_3D_SCISSOR;
}
}
@@ -996,7 +996,7 @@ nv50_set_viewport_states(struct pipe_context *pipe,
continue;
nv50->viewports[start_slot + i] = vpt[i];
nv50->viewports_dirty |= 1 << (start_slot + i);
- nv50->dirty |= NV50_NEW_VIEWPORT;
+ nv50->dirty_3d |= NV50_NEW_3D_VIEWPORT;
}
}
@@ -1008,8 +1008,8 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
unsigned i;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
- nv50->dirty |= NV50_NEW_ARRAYS;
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX);
+ nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb,
start_slot, count);
@@ -1051,14 +1051,14 @@ nv50_set_index_buffer(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
if (nv50->idxbuf.buffer)
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
if (ib) {
pipe_resource_reference(&nv50->idxbuf.buffer, ib->buffer);
nv50->idxbuf.index_size = ib->index_size;
if (ib->buffer) {
nv50->idxbuf.offset = ib->offset;
- BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(ib->buffer), RD);
+ BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, nv04_resource(ib->buffer), RD);
} else {
nv50->idxbuf.user_buffer = ib->user_buffer;
}
@@ -1073,7 +1073,7 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv50_context *nv50 = nv50_context(pipe);
nv50->vertex = hwcso;
- nv50->dirty |= NV50_NEW_VERTEX;
+ nv50->dirty_3d |= NV50_NEW_3D_VERTEX;
}
static struct pipe_stream_output_target *
@@ -1180,8 +1180,10 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
}
nv50->num_so_targets = num_targets;
- if (nv50->so_targets_dirty)
- nv50->dirty |= NV50_NEW_STRMOUT;
+ if (nv50->so_targets_dirty) {
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_SO);
+ nv50->dirty_3d |= NV50_NEW_3D_STRMOUT;
+ }
}
static void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 55369781606..51204930031 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -25,7 +25,7 @@ nv50_validate_fb(struct nv50_context *nv50)
unsigned ms_mode = NV50_3D_MULTISAMPLE_MODE_MS1;
uint32_t array_size = 0xffff, array_mode = 0;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
@@ -90,7 +90,7 @@ nv50_validate_fb(struct nv50_context *nv50)
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
/* only register for writing, otherwise we'd always serialize here */
- BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
+ BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR);
}
if (fb->zsbuf) {
@@ -118,7 +118,7 @@ nv50_validate_fb(struct nv50_context *nv50)
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
- BCTX_REFN(nv50->bufctx_3d, FB, &mt->base, WR);
+ BCTX_REFN(nv50->bufctx_3d, 3D_FB, &mt->base, WR);
} else {
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -187,8 +187,8 @@ nv50_validate_scissor(struct nv50_context *nv50)
#ifdef NV50_SCISSORS_CLIPPING
int minx, maxx, miny, maxy, i;
- if (!(nv50->dirty &
- (NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT | NV50_NEW_FRAMEBUFFER)) &&
+ if (!(nv50->dirty_3d &
+ (NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT | NV50_NEW_3D_FRAMEBUFFER)) &&
nv50->state.scissor == nv50->rast->pipe.scissor)
return;
@@ -197,7 +197,7 @@ nv50_validate_scissor(struct nv50_context *nv50)
nv50->state.scissor = nv50->rast->pipe.scissor;
- if ((nv50->dirty & NV50_NEW_FRAMEBUFFER) && !nv50->state.scissor)
+ if ((nv50->dirty_3d & NV50_NEW_3D_FRAMEBUFFER) && !nv50->state.scissor)
nv50->scissors_dirty = (1 << NV50_MAX_VIEWPORTS) - 1;
for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
@@ -290,10 +290,10 @@ nv50_check_program_ucps(struct nv50_context *nv50,
vp->vp.clpd_nr = n;
if (likely(vp == nv50->vertprog)) {
- nv50->dirty |= NV50_NEW_VERTPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_VERTPROG;
nv50_vertprog_validate(nv50);
} else {
- nv50->dirty |= NV50_NEW_GMTYPROG;
+ nv50->dirty_3d |= NV50_NEW_3D_GMTYPROG;
nv50_gmtyprog_validate(nv50);
}
nv50_fp_linkage_validate(nv50);
@@ -342,7 +342,7 @@ nv50_validate_clip(struct nv50_context *nv50)
struct nv50_program *vp;
uint8_t clip_enable;
- if (nv50->dirty & NV50_NEW_CLIP) {
+ if (nv50->dirty_3d & NV50_NEW_3D_CLIP) {
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
PUSH_DATA (push, (NV50_CB_AUX_UCP_OFFSET << 8) | NV50_CB_AUX);
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), PIPE_MAX_CLIP_PLANES * 4);
@@ -436,7 +436,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)
else
ctx_to->state = ctx_to->screen->save_state;
- ctx_to->dirty = ~0;
+ ctx_to->dirty_3d = ~0;
+ ctx_to->dirty_cp = ~0;
ctx_to->viewports_dirty = ~0;
ctx_to->scissors_dirty = ~0;
@@ -445,71 +446,71 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)
ctx_to->constbuf_dirty[2] = (1 << NV50_MAX_PIPE_CONSTBUFS) - 1;
if (!ctx_to->vertex)
- ctx_to->dirty &= ~(NV50_NEW_VERTEX | NV50_NEW_ARRAYS);
+ ctx_to->dirty_3d &= ~(NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS);
if (!ctx_to->vertprog)
- ctx_to->dirty &= ~NV50_NEW_VERTPROG;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_VERTPROG;
if (!ctx_to->fragprog)
- ctx_to->dirty &= ~NV50_NEW_FRAGPROG;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_FRAGPROG;
if (!ctx_to->blend)
- ctx_to->dirty &= ~NV50_NEW_BLEND;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_BLEND;
if (!ctx_to->rast)
#ifdef NV50_SCISSORS_CLIPPING
- ctx_to->dirty &= ~(NV50_NEW_RASTERIZER | NV50_NEW_SCISSOR);
+ ctx_to->dirty_3d &= ~(NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_SCISSOR);
#else
- ctx_to->dirty &= ~NV50_NEW_RASTERIZER;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_RASTERIZER;
#endif
if (!ctx_to->zsa)
- ctx_to->dirty &= ~NV50_NEW_ZSA;
+ ctx_to->dirty_3d &= ~NV50_NEW_3D_ZSA;
ctx_to->screen->cur_ctx = ctx_to;
}
-static struct state_validate {
- void (*func)(struct nv50_context *);
- uint32_t states;
-} validate_list[] = {
- { nv50_validate_fb, NV50_NEW_FRAMEBUFFER },
- { nv50_validate_blend, NV50_NEW_BLEND },
- { nv50_validate_zsa, NV50_NEW_ZSA },
- { nv50_validate_sample_mask, NV50_NEW_SAMPLE_MASK },
- { nv50_validate_rasterizer, NV50_NEW_RASTERIZER },
- { nv50_validate_blend_colour, NV50_NEW_BLEND_COLOUR },
- { nv50_validate_stencil_ref, NV50_NEW_STENCIL_REF },
- { nv50_validate_stipple, NV50_NEW_STIPPLE },
+static struct nv50_state_validate
+validate_list_3d[] = {
+ { nv50_validate_fb, NV50_NEW_3D_FRAMEBUFFER },
+ { nv50_validate_blend, NV50_NEW_3D_BLEND },
+ { nv50_validate_zsa, NV50_NEW_3D_ZSA },
+ { nv50_validate_sample_mask, NV50_NEW_3D_SAMPLE_MASK },
+ { nv50_validate_rasterizer, NV50_NEW_3D_RASTERIZER },
+ { nv50_validate_blend_colour, NV50_NEW_3D_BLEND_COLOUR },
+ { nv50_validate_stencil_ref, NV50_NEW_3D_STENCIL_REF },
+ { nv50_validate_stipple, NV50_NEW_3D_STIPPLE },
#ifdef NV50_SCISSORS_CLIPPING
- { nv50_validate_scissor, NV50_NEW_SCISSOR | NV50_NEW_VIEWPORT |
- NV50_NEW_RASTERIZER |
- NV50_NEW_FRAMEBUFFER },
+ { nv50_validate_scissor, NV50_NEW_3D_SCISSOR | NV50_NEW_3D_VIEWPORT |
+ NV50_NEW_3D_RASTERIZER |
+ NV50_NEW_3D_FRAMEBUFFER },
#else
- { nv50_validate_scissor, NV50_NEW_SCISSOR },
+ { nv50_validate_scissor, NV50_NEW_3D_SCISSOR },
#endif
- { nv50_validate_viewport, NV50_NEW_VIEWPORT },
- { nv50_vertprog_validate, NV50_NEW_VERTPROG },
- { nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
- { nv50_fragprog_validate, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
- NV50_NEW_MIN_SAMPLES },
- { nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
- NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
- { nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
- { nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
- NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
- { nv50_validate_derived_3, NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER },
- { nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
- NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_constbufs_validate, NV50_NEW_CONSTBUF },
- { nv50_validate_textures, NV50_NEW_TEXTURES },
- { nv50_validate_samplers, NV50_NEW_SAMPLERS },
- { nv50_stream_output_validate, NV50_NEW_STRMOUT |
- NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
- { nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
+ { nv50_validate_viewport, NV50_NEW_3D_VIEWPORT },
+ { nv50_vertprog_validate, NV50_NEW_3D_VERTPROG },
+ { nv50_gmtyprog_validate, NV50_NEW_3D_GMTYPROG },
+ { nv50_fragprog_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER |
+ NV50_NEW_3D_MIN_SAMPLES },
+ { nv50_fp_linkage_validate, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_VERTPROG |
+ NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_RASTERIZER },
+ { nv50_gp_linkage_validate, NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_VERTPROG },
+ { nv50_validate_derived_rs, NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_RASTERIZER |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
+ { nv50_validate_derived_2, NV50_NEW_3D_ZSA | NV50_NEW_3D_FRAMEBUFFER },
+ { nv50_validate_derived_3, NV50_NEW_3D_BLEND | NV50_NEW_3D_FRAMEBUFFER },
+ { nv50_validate_clip, NV50_NEW_3D_CLIP | NV50_NEW_3D_RASTERIZER |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
+ { nv50_constbufs_validate, NV50_NEW_3D_CONSTBUF },
+ { nv50_validate_textures, NV50_NEW_3D_TEXTURES },
+ { nv50_validate_samplers, NV50_NEW_3D_SAMPLERS },
+ { nv50_stream_output_validate, NV50_NEW_3D_STRMOUT |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG },
+ { nv50_vertex_arrays_validate, NV50_NEW_3D_VERTEX | NV50_NEW_3D_ARRAYS },
+ { nv50_validate_min_samples, NV50_NEW_3D_MIN_SAMPLES },
};
bool
-nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
+nv50_state_validate(struct nv50_context *nv50, uint32_t mask,
+ struct nv50_state_validate *validate_list, int size,
+ uint32_t *dirty, struct nouveau_bufctx *bufctx)
{
uint32_t state_mask;
int ret;
@@ -518,16 +519,16 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
if (nv50->screen->cur_ctx != nv50)
nv50_switch_pipe_context(nv50);
- state_mask = nv50->dirty & mask;
+ state_mask = *dirty & mask;
if (state_mask) {
- for (i = 0; i < ARRAY_SIZE(validate_list); ++i) {
- struct state_validate *validate = &validate_list[i];
+ for (i = 0; i < size; i++) {
+ struct nv50_state_validate *validate = &validate_list[i];
if (state_mask & validate->states)
validate->func(nv50);
}
- nv50->dirty &= ~state_mask;
+ *dirty &= ~state_mask;
if (nv50->state.rt_serialize) {
nv50->state.rt_serialize = false;
@@ -535,14 +536,26 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask)
PUSH_DATA (nv50->base.pushbuf, 0);
}
- nv50_bufctx_fence(nv50->bufctx_3d, false);
+ nv50_bufctx_fence(bufctx, false);
}
- nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
+ nouveau_pushbuf_bufctx(nv50->base.pushbuf, bufctx);
ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
+ return !ret;
+}
+
+bool
+nv50_state_validate_3d(struct nv50_context *nv50, uint32_t mask)
+{
+ bool ret;
+
+ ret = nv50_state_validate(nv50, mask, validate_list_3d,
+ ARRAY_SIZE(validate_list_3d), &nv50->dirty_3d,
+ nv50->bufctx_3d);
+
if (unlikely(nv50->state.flushed)) {
nv50->state.flushed = false;
nv50_bufctx_fence(nv50->bufctx_3d, true);
}
- return !ret;
+ return ret;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 84646f6adb1..68b0e18ef8f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -353,7 +353,7 @@ nv50_clear_render_target(struct pipe_context *pipe,
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, nv50->cond_condmode);
- nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
}
static void
@@ -436,7 +436,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
PUSH_DATA (push, nv50->cond_condmode);
- nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
}
void
@@ -525,7 +525,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
uint32_t mode = 0;
/* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
- if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER))
+ if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER))
return;
/* We have to clear ALL of the layers, not up to the min number of layers
@@ -798,7 +798,7 @@ nv50_clear_buffer(struct pipe_context *pipe,
data, data_size);
}
- nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
+ nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
}
/* =============================== BLIT CODE ===================================
@@ -834,7 +834,7 @@ struct nv50_blitctx
struct pipe_sampler_view *texture[2];
struct nv50_tsc_entry *sampler[2];
unsigned min_samples;
- uint32_t dirty;
+ uint32_t dirty_3d;
} saved;
struct nv50_rasterizer_stateobj rast;
};
@@ -1253,15 +1253,15 @@ nv50_blitctx_pre_blit(struct nv50_blitctx *ctx)
nv50->min_samples = 1;
- ctx->saved.dirty = nv50->dirty;
+ ctx->saved.dirty_3d = nv50->dirty_3d;
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
- nv50->dirty =
- NV50_NEW_FRAMEBUFFER | NV50_NEW_MIN_SAMPLES |
- NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | NV50_NEW_GMTYPROG |
- NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS;
+ nv50->dirty_3d =
+ NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_MIN_SAMPLES |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_FRAGPROG | NV50_NEW_3D_GMTYPROG |
+ NV50_NEW_3D_TEXTURES | NV50_NEW_3D_SAMPLERS;
}
static void
@@ -1302,14 +1302,14 @@ nv50_blitctx_post_blit(struct nv50_blitctx *blit)
nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query,
nv50->cond_cond, nv50->cond_mode);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB);
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TEXTURES);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_FB);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_TEXTURES);
- nv50->dirty = blit->saved.dirty |
- (NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR | NV50_NEW_SAMPLE_MASK |
- NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_BLEND |
- NV50_NEW_TEXTURES | NV50_NEW_SAMPLERS |
- NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG | NV50_NEW_FRAGPROG);
+ nv50->dirty_3d = blit->saved.dirty_3d |
+ (NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR | NV50_NEW_3D_SAMPLE_MASK |
+ NV50_NEW_3D_RASTERIZER | NV50_NEW_3D_ZSA | NV50_NEW_3D_BLEND |
+ NV50_NEW_3D_TEXTURES | NV50_NEW_3D_SAMPLERS |
+ NV50_NEW_3D_VERTPROG | NV50_NEW_3D_GMTYPROG | NV50_NEW_3D_FRAGPROG);
nv50->scissors_dirty |= 1;
nv50->base.pipe.set_min_samples(&nv50->base.pipe, blit->saved.min_samples);
@@ -1344,7 +1344,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
nv50_blitctx_prepare_state(blit);
- nv50_state_validate(nv50, ~0);
+ nv50_state_validate_3d(nv50, ~0);
x_range = (float)info->src.box.width / (float)info->dst.box.width;
y_range = (float)info->src.box.height / (float)info->dst.box.height;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index 4b69c3bd504..414d326eeed 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -299,7 +299,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
- BCTX_REFN(nv50->bufctx_3d, TEXTURES, res, RD);
+ BCTX_REFN(nv50->bufctx_3d, 3D_TEXTURES, res, RD);
BEGIN_NV04(push, NV50_3D(BIND_TIC(s)), 1);
PUSH_DATA (push, (tic->id << 9) | (i << 1) | 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 6f60445d8d2..a11cdf847b1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -230,7 +230,7 @@ nv50_upload_user_buffers(struct nv50_context *nv50,
addrs[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer, base, size,
&bo);
if (addrs[b])
- BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART |
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, NOUVEAU_BO_GART |
NOUVEAU_BO_RD, bo);
}
nv50->base.vbo_dirty = true;
@@ -269,7 +269,7 @@ nv50_update_user_vbufs(struct nv50_context *nv50)
address[b] = nouveau_scratch_data(&nv50->base, vb->user_buffer,
base, size, &bo);
if (address[b])
- BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, bo_flags, bo);
+ BCTX_REFN_bo(nv50->bufctx_3d, 3D_VERTEX_TMP, bo_flags, bo);
}
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
@@ -286,7 +286,7 @@ static inline void
nv50_release_user_vbufs(struct nv50_context *nv50)
{
if (nv50->vbo_user) {
- nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX_TMP);
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_VERTEX_TMP);
nouveau_scratch_done(&nv50->base);
}
}
@@ -394,7 +394,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
struct nv04_resource *buf = nv04_resource(vb->buffer);
if (!(refd & (1 << b))) {
refd |= 1 << b;
- BCTX_REFN(nv50->bufctx_3d, VERTEX, buf, RD);
+ BCTX_REFN(nv50->bufctx_3d, 3D_VERTEX, buf, RD);
}
address = buf->address + vb->buffer_offset + ve->pipe.src_offset;
limit = buf->address + buf->base.width0 - 1;
@@ -779,9 +779,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv50->vbo_push_hint = /* the 64 is heuristic */
!(info->indexed && ((nv50->vb_elt_limit + 64) < info->count));
- if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_ARRAYS | NV50_NEW_VERTEX))) {
+ if (nv50->vbo_user && !(nv50->dirty_3d & (NV50_NEW_3D_ARRAYS | NV50_NEW_3D_VERTEX))) {
if (!!nv50->vbo_fifo != nv50->vbo_push_hint)
- nv50->dirty |= NV50_NEW_ARRAYS;
+ nv50->dirty_3d |= NV50_NEW_3D_ARRAYS;
else
if (!nv50->vbo_fifo)
nv50_update_user_vbufs(nv50);
@@ -790,7 +790,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
- nv50_state_validate(nv50, ~0);
+ nv50_state_validate_3d(nv50, ~0);
push->kick_notify = nv50_draw_vbo_kick_notify;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index 68002305d72..7056258d1bf 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -58,8 +58,8 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define SUBC_M2MF(m) 5, (m)
#define NV50_M2MF(n) SUBC_M2MF(NV50_M2MF_##n)
-#define SUBC_COMPUTE(m) 6, (m)
-#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
+#define SUBC_CP(m) 6, (m)
+#define NV50_CP(n) SUBC_CP(NV50_COMPUTE_##n)
static inline uint32_t
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index ffbb16f79de..6aaa7ce1aaf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -153,7 +153,7 @@ nvc0_compute_validate_constbufs(struct nvc0_context *nvc0)
if (nvc0->constbuf[s][i].user) {
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
- const unsigned base = s << 16;
+ const unsigned base = NVC0_CB_USR_INFO(s);
const unsigned size = nvc0->constbuf[s][0].size;
assert(i == 0); /* we really only want OpenGL uniforms here */
assert(nvc0->constbuf[s][0].u.data);
@@ -207,8 +207,8 @@ nvc0_compute_validate_driverconst(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1);
PUSH_DATA (push, (15 << 8) | 1);
@@ -219,15 +219,16 @@ static void
nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
const int s = 5;
int i;
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
if (nvc0->buffers[s][i].buffer) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 54afe887ebd..31e1272aeed 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -98,6 +98,31 @@
#define NVC0_BIND_M2MF 0
#define NVC0_BIND_FENCE 1
+/* 6 user uniform buffers, at 64K each */
+#define NVC0_CB_USR_INFO(s) (s << 16)
+#define NVC0_CB_USR_SIZE (6 << 16)
+/* 6 driver constbuts, at 1K each */
+#define NVC0_CB_AUX_INFO(s) NVC0_CB_USR_SIZE + (s << 10)
+#define NVC0_CB_AUX_SIZE (6 << 10)
+/* XXX: Figure out what this UNK data is. */
+#define NVC0_CB_AUX_UNK_INFO 0x000
+#define NVC0_CB_AUX_UNK_SIZE (8 * 4)
+/* 32 textures handles, at 1 32-bits integer each */
+#define NVC0_CB_AUX_TEX_INFO(i) 0x020 + (i) * 4
+#define NVC0_CB_AUX_TEX_SIZE (32 * 4)
+/* 8 user clip planes, at 4 32-bits floats each */
+#define NVC0_CB_AUX_UCP_INFO 0x100
+#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
+/* 8 sets of 32-bits integer pairs sample offsets */
+#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */
+#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
+/* draw parameters (index bais, base instance, drawid) */
+#define NVC0_CB_AUX_DRAW_INFO 0x180 /* VP */
+/* 32 user buffers, at 4 32-bits integers each */
+#define NVC0_CB_AUX_BUF_INFO(i) 0x200 + (i) * 4 * 4
+#define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4)
+/* 4 32-bits floats for the vertex runout, put at the end */
+#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + NVC0_CB_AUX_SIZE
struct nvc0_blitctx;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index bc884d6c08f..b7c6faf9cde 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -535,29 +535,27 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.genUserClip = prog->vp.num_ucps;
info->io.auxCBSlot = 15;
- info->io.ucpBase = 256;
- info->io.drawInfoBase = 256 + 128;
+ info->io.ucpBase = NVC0_CB_AUX_UCP_INFO;
+ info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
- info->io.resInfoCBSlot = 0;
+ info->io.auxCBSlot = 0;
info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
} else {
- info->io.resInfoCBSlot = 15;
- info->io.suInfoBase = 512;
+ info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
}
info->io.msInfoCBSlot = 0;
info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
} else {
if (chipset >= NVISA_GK104_CHIPSET) {
- info->io.texBindBase = 0x20;
+ info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
info->io.suInfoBase = 0; /* TODO */
}
- info->io.resInfoCBSlot = 15;
- info->io.sampleInfoBase = 256 + 128;
- info->io.suInfoBase = 512;
+ info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
+ info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 3c5b1da2063..553c001cd2b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -922,14 +922,14 @@ nvc0_screen_create(struct nouveau_device *dev)
/* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
unsigned j;
BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
- PUSH_DATA (push, 0);
+ PUSH_DATA (push, NVC0_CB_AUX_UNK_INFO);
for (j = 0; j < 8; ++j)
PUSH_DATA(push, j);
} else {
@@ -943,8 +943,8 @@ nvc0_screen_create(struct nouveau_device *dev)
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 256);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 0.0f);
@@ -952,8 +952,8 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index 8487abcf999..46b692df2e3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -66,7 +66,7 @@ struct nvc0_screen {
struct nouveau_bo *text;
struct nouveau_bo *parm; /* for COMPUTE */
- struct nouveau_bo *uniform_bo; /* for 3D */
+ struct nouveau_bo *uniform_bo;
struct nouveau_bo *tls;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
struct nouveau_bo *poly_cache;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 090a0395432..a100fc4c478 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -413,7 +413,7 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
{
unsigned s, i;
- for (s = 0; s < 5; ++s)
+ for (s = 0; s < 6; ++s)
for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
if (nvc0_context(pipe)->samplers[s][i] == hwcso)
nvc0_context(pipe)->samplers[s][i] = NULL;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index c0ed5c0043d..9c64482f2e2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -72,6 +72,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ struct nvc0_screen *screen = nvc0->screen;
unsigned i, ms;
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
bool serialize = false;
@@ -183,10 +184,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
- PUSH_DATA (push, 256 + 128);
+ PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
for (i = 0; i < ms; i++) {
float xy[2];
nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
@@ -313,14 +314,14 @@ static inline void
nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+ struct nvc0_screen *screen = nvc0->screen;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, bo->offset + (6 << 16) + (s << 10));
- PUSH_DATA (push, bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
- PUSH_DATA (push, 256);
+ PUSH_DATA (push, NVC0_CB_AUX_UCP_INFO);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
}
@@ -424,7 +425,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
if (nvc0->constbuf[s][i].user) {
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
- const unsigned base = s << 16;
+ const unsigned base = NVC0_CB_USR_INFO(s);
const unsigned size = nvc0->constbuf[s][0].size;
assert(i == 0); /* we really only want OpenGL uniforms here */
assert(nvc0->constbuf[s][0].u.data);
@@ -478,15 +479,16 @@ static void
nvc0_validate_buffers(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
int i, s;
for (s = 0; s < 5; s++) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, NVC0_CB_AUX_BUF_INFO(0));
for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
if (nvc0->buffers[s][i].buffer) {
struct nv04_resource *res =
@@ -550,8 +552,8 @@ nvc0_validate_driverconst(struct nvc0_context *nvc0)
for (i = 0; i < 5; ++i) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
- PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 53332400a4f..ce6a6dce39c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -707,21 +707,20 @@ void
nve4_set_tex_handles(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- uint64_t address;
+ struct nvc0_screen *screen = nvc0->screen;
unsigned s;
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
return;
- address = nvc0->screen->uniform_bo->offset + (6 << 16);
- for (s = 0; s < 5; ++s, address += (1 << 10)) {
+ for (s = 0; s < 5; ++s) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
if (!dirty)
continue;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 1024);
- PUSH_DATAh(push, address);
- PUSH_DATA (push, address);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
do {
int i = ffs(dirty) - 1;
dirty &= ~(1 << i);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index e0e0ad2a0f7..4d9cd5752b5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -820,6 +820,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
struct nv04_resource *buf_count = nv04_resource(info->indirect_params);
unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
uint32_t offset = buf->offset + info->indirect_offset;
+ struct nvc0_screen *screen = nvc0->screen;
PUSH_SPACE(push, 7);
@@ -833,10 +834,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
/* Queue things up to let the macros write params to the driver constbuf */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
- PUSH_DATA (push, 256 + 128);
+ PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
if (info->indexed) {
assert(nvc0->idxbuf.buffer);
@@ -934,6 +935,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
int s;
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
@@ -975,11 +977,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
if (!info->indirect) {
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
- PUSH_DATA (push, 256 + 128);
+ PUSH_DATA (push, NVC0_CB_AUX_DRAW_INFO);
PUSH_DATA (push, info->index_bias);
PUSH_DATA (push, info->start_instance);
PUSH_DATA (push, info->drawid);
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 6fa892089ec..d100a9df55b 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -385,7 +385,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
if (!r300->ctx)
goto fail;
- r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, NULL);
+ r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300);
if (r300->cs == NULL)
goto fail;
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 7a75b43a53e..63182cba2b2 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -53,7 +53,7 @@ static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags,
}
r300->flush_counter++;
- r300->rws->cs_flush(r300->cs, flags, fence, 0);
+ r300->rws->cs_flush(r300->cs, flags, fence);
r300->dirty_hw = 0;
/* New kitchen sink, baby. */
@@ -88,11 +88,11 @@ void r300_flush(struct pipe_context *pipe,
* and we cannot emit an empty CS. Let's write to some reg. */
CS_LOCALS(r300);
OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
- r300->rws->cs_flush(r300->cs, flags, fence, 0);
+ r300->rws->cs_flush(r300->cs, flags, fence);
} else {
/* Even if hw is not dirty, we should at least reset the CS in case
* the space checking failed for the first draw operation. */
- r300->rws->cs_flush(r300->cs, flags, NULL, 0);
+ r300->rws->cs_flush(r300->cs, flags, NULL);
}
}
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 57456c6d867..709345a492e 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -981,8 +981,8 @@ boolean r300_resource_get_handle(struct pipe_screen* screen,
return FALSE;
}
- return rws->buffer_get_handle(tex->buf,
- tex->tex.stride_in_bytes[0], whandle);
+ return rws->buffer_get_handle(tex->buf, tex->tex.stride_in_bytes[0],
+ 0, 0, whandle);
}
static const struct u_resource_vtbl r300_texture_vtbl =
@@ -1116,7 +1116,7 @@ struct pipe_resource *r300_texture_from_handle(struct pipe_screen *screen,
return NULL;
}
- buffer = rws->buffer_from_handle(rws, whandle, &stride);
+ buffer = rws->buffer_from_handle(rws, whandle, &stride, NULL);
if (!buffer)
return NULL;
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am
index 8317da727a2..f3bb03e54be 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -21,14 +21,6 @@ AM_CFLAGS += \
$(LLVM_CFLAGS) \
-I$(top_srcdir)/src/gallium/drivers/radeon/
-libr600_la_SOURCES += \
- $(LLVM_C_SOURCES)
-
-endif
-
-if USE_R600_LLVM_COMPILER
-AM_CFLAGS += \
- -DR600_USE_LLVM
endif
if HAVE_GALLIUM_COMPUTE
diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources
index 024dea3a002..8bf8083bbab 100644
--- a/src/gallium/drivers/r600/Makefile.sources
+++ b/src/gallium/drivers/r600/Makefile.sources
@@ -64,7 +64,3 @@ CXX_SOURCES = \
sb/sb_shader.h \
sb/sb_ssa_builder.cpp \
sb/sb_valtable.cpp
-
-LLVM_C_SOURCES = \
- r600_llvm.c \
- r600_llvm.h
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 2a1b2519ec7..f4b669000dc 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -192,6 +192,69 @@ static const struct u_resource_vtbl r600_global_buffer_vtbl =
r600_compute_global_transfer_inline_write /* transfer_inline_write */
};
+/* We need to define these R600 registers here, because we can't include
+ * evergreend.h and r600d.h.
+ */
+#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
+#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
+
+#ifdef HAVE_OPENCL
+
+static void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
+ struct r600_bytecode *bc,
+ uint64_t symbol_offset,
+ boolean *use_kill)
+{
+ unsigned i;
+ const unsigned char *config =
+ radeon_shader_binary_config_start(binary, symbol_offset);
+
+ for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
+ unsigned reg =
+ util_le32_to_cpu(*(uint32_t*)(config + i));
+ unsigned value =
+ util_le32_to_cpu(*(uint32_t*)(config + i + 4));
+ switch (reg) {
+ /* R600 / R700 */
+ case R_028850_SQ_PGM_RESOURCES_PS:
+ case R_028868_SQ_PGM_RESOURCES_VS:
+ /* Evergreen / Northern Islands */
+ case R_028844_SQ_PGM_RESOURCES_PS:
+ case R_028860_SQ_PGM_RESOURCES_VS:
+ case R_0288D4_SQ_PGM_RESOURCES_LS:
+ bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
+ bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
+ break;
+ case R_02880C_DB_SHADER_CONTROL:
+ *use_kill = G_02880C_KILL_ENABLE(value);
+ break;
+ case R_0288E8_SQ_LDS_ALLOC:
+ bc->nlds_dw = value;
+ break;
+ }
+ }
+}
+
+static unsigned r600_create_shader(struct r600_bytecode *bc,
+ const struct radeon_shader_binary *binary,
+ boolean *use_kill)
+
+{
+ assert(binary->code_size % 4 == 0);
+ bc->bytecode = CALLOC(1, binary->code_size);
+ memcpy(bc->bytecode, binary->code, binary->code_size);
+ bc->ndw = binary->code_size / 4;
+
+ r600_shader_binary_read_config(binary, bc, 0, use_kill);
+ return 0;
+}
+
+#endif
+
+static void r600_destroy_shader(struct r600_bytecode *bc)
+{
+ FREE(bc->bytecode);
+}
void *evergreen_create_compute_state(
struct pipe_context *ctx_,
@@ -236,13 +299,11 @@ void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
if (!shader)
return;
-#ifdef HAVE_OPENCL
radeon_shader_binary_clean(&shader->binary);
r600_destroy_shader(&shader->bc);
/* TODO destroy shader->code_bo, shader->const_bo
* we'll need something like r600_buffer_free */
-#endif
FREE(shader);
}
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h
index c8998d00f5a..e6ff7609aea 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.h
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.h
@@ -26,6 +26,10 @@
#define EVERGREEN_COMPUTE_INTERNAL_H
#include "r600_asm.h"
+#ifdef HAVE_OPENCL
+#include "radeon/radeon_llvm.h"
+#include <llvm-c/Core.h>
+#endif
struct r600_pipe_compute {
struct r600_context *ctx;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 4951297df42..7a6f957945b 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -57,18 +57,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
/* The number of dwords all the dirty states would take. */
mask = ctx->dirty_atoms;
- while (mask != 0) {
+ while (mask != 0)
num_dw += ctx->atoms[u_bit_scan64(&mask)]->num_dw;
- if (ctx->screen->b.trace_bo) {
- num_dw += R600_TRACE_CS_DWORDS;
- }
- }
/* The upper-bound of how much space a draw command would take. */
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
- if (ctx->screen->b.trace_bo) {
- num_dw += R600_TRACE_CS_DWORDS;
- }
}
/* Count in queries_suspend. */
@@ -273,7 +266,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
/* Flush the CS. */
- ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++);
+ ctx->b.ws->cs_flush(cs, flags, fence);
r600_begin_new_cs(ctx);
}
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
deleted file mode 100644
index 7eab29c6eb4..00000000000
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ /dev/null
@@ -1,943 +0,0 @@
-#include "r600_llvm.h"
-
-#include "gallivm/lp_bld_const.h"
-#include "gallivm/lp_bld_intr.h"
-#include "gallivm/lp_bld_gather.h"
-#include "tgsi/tgsi_parse.h"
-#include "util/list.h"
-#include "util/u_memory.h"
-
-#include "evergreend.h"
-#include "r600_asm.h"
-#include "r600_sq.h"
-#include "r600_opcodes.h"
-#include "r600_shader.h"
-#include "r600_pipe.h"
-#include "radeon_llvm.h"
-#include "radeon_llvm_emit.h"
-#include "radeon_elf_util.h"
-
-#include <stdio.h>
-
-#if defined R600_USE_LLVM || defined HAVE_OPENCL
-
-#define CONSTANT_BUFFER_0_ADDR_SPACE 8
-#define CONSTANT_BUFFER_1_ADDR_SPACE (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
-#define LLVM_R600_BUFFER_INFO_CONST_BUFFER \
- (CONSTANT_BUFFER_0_ADDR_SPACE + R600_BUFFER_INFO_CONST_BUFFER)
-
-static LLVMValueRef llvm_load_const_buffer(
- struct lp_build_tgsi_context * bld_base,
- LLVMValueRef OffsetValue,
- unsigned ConstantAddressSpace)
-{
- LLVMValueRef offset[2] = {
- LLVMConstInt(LLVMInt64TypeInContext(bld_base->base.gallivm->context), 0, false),
- OffsetValue
- };
-
- LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024),
- ConstantAddressSpace);
- LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, "");
- LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, "");
- return LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, "");
-}
-
-static LLVMValueRef llvm_fetch_const(
- struct lp_build_tgsi_context * bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle)
-{
- LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg->Register.Index);
- if (reg->Register.Indirect) {
- struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
- LLVMValueRef index = LLVMBuildLoad(bld_base->base.gallivm->builder, bld->addr[reg->Indirect.Index][reg->Indirect.Swizzle], "");
- offset = LLVMBuildAdd(bld_base->base.gallivm->builder, offset, index, "");
- }
- unsigned ConstantAddressSpace = CONSTANT_BUFFER_0_ADDR_SPACE ;
- if (reg->Register.Dimension) {
- ConstantAddressSpace += reg->Dimension.Index;
- }
- LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, ConstantAddressSpace);
- LLVMValueRef cval = LLVMBuildExtractElement(bld_base->base.gallivm->builder, cvecval, lp_build_const_int32(bld_base->base.gallivm, swizzle), "");
- return bitcast(bld_base, type, cval);
-}
-
-static void llvm_load_system_value(
- struct radeon_llvm_context * ctx,
- unsigned index,
- const struct tgsi_full_declaration *decl)
-{
- unsigned chan;
-
- switch (decl->Semantic.Name) {
- case TGSI_SEMANTIC_INSTANCEID: chan = 3; break;
- case TGSI_SEMANTIC_VERTEXID: chan = 0; break;
- default: assert(!"unknown system value");
- }
-
- ctx->system_values[index] = LLVMBuildExtractElement(ctx->gallivm.builder,
- LLVMGetParam(ctx->main_fn, 0), lp_build_const_int32(&(ctx->gallivm), chan),
- "");
-}
-
-static LLVMValueRef
-llvm_load_input_vector(
- struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
- boolean interp)
-{
- LLVMTypeRef VecType;
- LLVMValueRef Args[3] = {
- lp_build_const_int32(&(ctx->gallivm), location)
- };
- unsigned ArgCount = 1;
- if (interp) {
- VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 2);
- LLVMValueRef IJIndex = LLVMGetParam(ctx->main_fn, ijregs / 2);
- Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
- lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2)), "");
- Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
- lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), "");
- LLVMValueRef HalfVec[2] = {
- lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
- VecType, Args, ArgCount, LLVMReadNoneAttribute),
- lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
- VecType, Args, ArgCount, LLVMReadNoneAttribute)
- };
- LLVMValueRef MaskInputs[4] = {
- lp_build_const_int32(&(ctx->gallivm), 0),
- lp_build_const_int32(&(ctx->gallivm), 1),
- lp_build_const_int32(&(ctx->gallivm), 2),
- lp_build_const_int32(&(ctx->gallivm), 3)
- };
- LLVMValueRef Mask = LLVMConstVector(MaskInputs, 4);
- return LLVMBuildShuffleVector(ctx->gallivm.builder, HalfVec[0], HalfVec[1],
- Mask, "");
- } else {
- VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4);
- return lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
- VecType, Args, ArgCount, LLVMReadNoneAttribute);
- }
-}
-
-static LLVMValueRef
-llvm_face_select_helper(
- struct radeon_llvm_context * ctx,
- LLVMValueRef face, LLVMValueRef front_color, LLVMValueRef back_color)
-{
- const struct lp_build_context * bb = &ctx->soa.bld_base.base;
- LLVMValueRef is_front = LLVMBuildFCmp(
- bb->gallivm->builder, LLVMRealUGT, face,
- lp_build_const_float(bb->gallivm, 0.0f), "");
- return LLVMBuildSelect(bb->gallivm->builder, is_front,
- front_color, back_color, "");
-}
-
-static void llvm_load_input(
- struct radeon_llvm_context * ctx,
- unsigned input_index,
- const struct tgsi_full_declaration *decl)
-{
- const struct r600_shader_io * input = &ctx->r600_inputs[input_index];
- unsigned chan;
- int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR);
- LLVMValueRef v;
- boolean require_interp_intrinsic = ctx->chip_class >= EVERGREEN &&
- ctx->type == TGSI_PROCESSOR_FRAGMENT;
-
- if (require_interp_intrinsic && input->spi_sid) {
- v = llvm_load_input_vector(ctx, input->lds_pos, input->ij_index,
- (input->interpolate > 0));
- } else
- v = LLVMGetParam(ctx->main_fn, input->gpr);
-
- if (two_side) {
- struct r600_shader_io * back_input =
- &ctx->r600_inputs[input->back_color_input];
- LLVMValueRef v2;
- LLVMValueRef face = LLVMGetParam(ctx->main_fn, ctx->face_gpr);
- face = LLVMBuildExtractElement(ctx->gallivm.builder, face,
- lp_build_const_int32(&(ctx->gallivm), 0), "");
-
- if (require_interp_intrinsic && back_input->spi_sid)
- v2 = llvm_load_input_vector(ctx, back_input->lds_pos,
- back_input->ij_index, (back_input->interpolate > 0));
- else
- v2 = LLVMGetParam(ctx->main_fn, back_input->gpr);
- v = llvm_face_select_helper(ctx, face, v, v2);
- }
-
- for (chan = 0; chan < 4; chan++) {
- unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
-
- ctx->inputs[soa_index] = LLVMBuildExtractElement(ctx->gallivm.builder, v,
- lp_build_const_int32(&(ctx->gallivm), chan), "");
-
- if (input->name == TGSI_SEMANTIC_POSITION &&
- ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) {
- /* RCP for fragcoord.w */
- ctx->inputs[soa_index] = LLVMBuildFDiv(ctx->gallivm.builder,
- lp_build_const_float(&(ctx->gallivm), 1.0f),
- ctx->inputs[soa_index], "");
- }
- }
-}
-
-static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
-{
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
- radeon_llvm_shader_type(ctx->main_fn, ctx->type);
-
-}
-
-static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
-{
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
- struct lp_build_context * base = &bld_base->base;
- struct pipe_stream_output_info * so = ctx->stream_outputs;
- unsigned i;
- unsigned next_pos = 60;
- unsigned next_param = 0;
-
- unsigned color_count = 0;
- boolean has_color = false;
-
- if (ctx->type == TGSI_PROCESSOR_VERTEX && so->num_outputs) {
- for (i = 0; i < so->num_outputs; i++) {
- unsigned register_index = so->output[i].register_index;
- unsigned start_component = so->output[i].start_component;
- unsigned num_components = so->output[i].num_components;
- unsigned dst_offset = so->output[i].dst_offset;
- unsigned chan;
- LLVMValueRef elements[4];
- if (dst_offset < start_component) {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- elements[chan] = LLVMBuildLoad(base->gallivm->builder,
- ctx->soa.outputs[register_index][(chan + start_component) % TGSI_NUM_CHANNELS], "");
- }
- start_component = 0;
- } else {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- elements[chan] = LLVMBuildLoad(base->gallivm->builder,
- ctx->soa.outputs[register_index][chan], "");
- }
- }
- LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);
- LLVMValueRef args[4];
- args[0] = output;
- args[1] = lp_build_const_int32(base->gallivm, dst_offset - start_component);
- args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer);
- args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component);
- lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output",
- LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0);
- }
- }
-
- /* Add the necessary export instructions */
- for (i = 0; i < ctx->output_reg_count; i++) {
- unsigned chan;
- LLVMValueRef elements[4];
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- elements[chan] = LLVMBuildLoad(base->gallivm->builder,
- ctx->soa.outputs[i][chan], "");
- }
- if (ctx->alpha_to_one && ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->r600_outputs[i].name == TGSI_SEMANTIC_COLOR)
- elements[3] = lp_build_const_float(base->gallivm, 1.0f);
- LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);
-
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
- switch (ctx->r600_outputs[i].name) {
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_PSIZE: {
- LLVMValueRef args[3];
- args[0] = output;
- args[1] = lp_build_const_int32(base->gallivm, next_pos++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- break;
- }
- case TGSI_SEMANTIC_CLIPVERTEX: {
- LLVMValueRef args[3];
- unsigned reg_index;
- LLVMValueRef adjusted_elements[4];
- for (reg_index = 0; reg_index < 2; reg_index ++) {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg_index * 4 + chan);
- LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);
- args[0] = output;
- args[1] = base_vector;
- adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder,
- "llvm.AMDGPU.dp4", bld_base->base.elem_type,
- args, 2, LLVMReadNoneAttribute);
- }
- args[0] = lp_build_gather_values(base->gallivm,
- adjusted_elements, 4);
- args[1] = lp_build_const_int32(base->gallivm, next_pos++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- }
- break;
- }
- case TGSI_SEMANTIC_CLIPDIST : {
- LLVMValueRef args[3];
- args[0] = output;
- args[1] = lp_build_const_int32(base->gallivm, next_pos++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- args[1] = lp_build_const_int32(base->gallivm, next_param++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- break;
- }
- case TGSI_SEMANTIC_FOG: {
- elements[0] = LLVMBuildLoad(base->gallivm->builder,
- ctx->soa.outputs[i][0], "");
- elements[1] = elements[2] = lp_build_const_float(base->gallivm, 0.0f);
- elements[3] = lp_build_const_float(base->gallivm, 1.0f);
-
- LLVMValueRef args[3];
- args[0] = lp_build_gather_values(base->gallivm, elements, 4);
- args[1] = lp_build_const_int32(base->gallivm, next_param++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- break;
- }
- default: {
- LLVMValueRef args[3];
- args[0] = output;
- args[1] = lp_build_const_int32(base->gallivm, next_param++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- break;
- }
- }
- } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- switch (ctx->r600_outputs[i].name) {
- case TGSI_SEMANTIC_COLOR:
- has_color = true;
- if ( color_count < ctx->color_buffer_count) {
- LLVMValueRef args[3];
- args[0] = output;
- if (ctx->fs_color_all) {
- for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
- args[1] = lp_build_const_int32(base->gallivm, j);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- }
- } else {
- args[1] = lp_build_const_int32(base->gallivm, color_count++);
- args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- lp_build_intrinsic(
- base->gallivm->builder,
- "llvm.R600.store.swizzle",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 3, 0);
- }
- }
- break;
- case TGSI_SEMANTIC_POSITION:
- lp_build_intrinsic_unary(
- base->gallivm->builder,
- "llvm.R600.store.pixel.depth",
- LLVMVoidTypeInContext(base->gallivm->context),
- LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][2], ""));
- break;
- case TGSI_SEMANTIC_STENCIL:
- lp_build_intrinsic_unary(
- base->gallivm->builder,
- "llvm.R600.store.pixel.stencil",
- LLVMVoidTypeInContext(base->gallivm->context),
- LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][1], ""));
- break;
- }
- }
- }
- // Add dummy exports
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
- if (!next_param) {
- lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
- LLVMVoidTypeInContext(base->gallivm->context),
- lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM));
- }
- if (!(next_pos-60)) {
- lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
- LLVMVoidTypeInContext(base->gallivm->context),
- lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS));
- }
- }
- if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- if (!has_color) {
- lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
- LLVMVoidTypeInContext(base->gallivm->context),
- lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL));
- }
- }
-
-}
-
-static void llvm_emit_tex(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct gallivm_state * gallivm = bld_base->base.gallivm;
- LLVMValueRef args[7];
- unsigned c, sampler_src;
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
-
- if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
- switch (emit_data->inst->Instruction.Opcode) {
- case TGSI_OPCODE_TXQ: {
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
- ctx->uses_tex_buffers = true;
- bool isEgPlus = (ctx->chip_class >= EVERGREEN);
- LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm,
- isEgPlus ? 0 : 1);
- LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset,
- LLVM_R600_BUFFER_INFO_CONST_BUFFER);
- if (!isEgPlus) {
- LLVMValueRef maskval[4] = {
- lp_build_const_int32(gallivm, 1),
- lp_build_const_int32(gallivm, 2),
- lp_build_const_int32(gallivm, 3),
- lp_build_const_int32(gallivm, 0),
- };
- LLVMValueRef mask = LLVMConstVector(maskval, 4);
- cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval,
- mask, "");
- }
- emit_data->output[0] = cvecval;
- return;
- }
- case TGSI_OPCODE_TXF: {
- args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
- args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
- emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
- "llvm.R600.load.texbuf",
- emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
- if (ctx->chip_class >= EVERGREEN)
- return;
- ctx->uses_tex_buffers = true;
- LLVMDumpValue(emit_data->output[0]);
- emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
- emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4),
- "");
- LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
- lp_build_const_int32(gallivm, 0),
- LLVM_R600_BUFFER_INFO_CONST_BUFFER);
- Mask = LLVMBuildBitCast(gallivm->builder, Mask,
- LLVMVectorType(bld_base->base.int_elem_type, 4), "");
- emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
- emit_data->output[0],
- Mask);
- LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder,
- emit_data->output[0], lp_build_const_int32(gallivm, 3), "");
- Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1),
- LLVM_R600_BUFFER_INFO_CONST_BUFFER);
- Mask = LLVMBuildExtractElement(gallivm->builder, Mask,
- lp_build_const_int32(gallivm, 0), "");
- Mask = LLVMBuildBitCast(gallivm->builder, Mask,
- bld_base->base.int_elem_type, "");
- WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR,
- WComponent, Mask);
- emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder,
- emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), "");
- emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
- emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), "");
- }
- return;
- default:
- break;
- }
- }
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX ||
- emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
- LLVMValueRef Vector[4] = {
- LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
- lp_build_const_int32(gallivm, 0), ""),
- LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
- lp_build_const_int32(gallivm, 1), ""),
- LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
- lp_build_const_int32(gallivm, 2), ""),
- LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
- lp_build_const_int32(gallivm, 3), ""),
- };
- switch (emit_data->inst->Texture.Texture) {
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
- Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
- break;
- case TGSI_TEXTURE_1D:
- Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
- break;
- default:
- break;
- }
- args[0] = lp_build_gather_values(gallivm, Vector, 4);
- } else {
- args[0] = emit_data->args[0];
- }
-
- assert(emit_data->arg_count + 2 <= Elements(args));
-
- for (c = 1; c < emit_data->arg_count; ++c)
- args[c] = emit_data->args[c];
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
- args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), "");
- args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), "");
- args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), "");
- }
-
- sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;
-
- args[c++] = lp_build_const_int32(gallivm,
- emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS);
- args[c++] = lp_build_const_int32(gallivm,
- emit_data->inst->Src[sampler_src].Register.Index);
- args[c++] = lp_build_const_int32(gallivm,
- emit_data->inst->Texture.Texture);
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
- (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
- emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {
-
- switch (emit_data->inst->Texture.Texture) {
- case TGSI_TEXTURE_2D_MSAA:
- args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D);
- break;
- case TGSI_TEXTURE_2D_ARRAY_MSAA:
- args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY);
- break;
- default:
- break;
- }
-
- if (ctx->has_compressed_msaa_texturing) {
- LLVMValueRef ldptr_args[10] = {
- args[0], // Coord
- args[1], // Offset X
- args[2], // Offset Y
- args[3], // Offset Z
- args[4],
- args[5],
- lp_build_const_int32(gallivm, 1),
- lp_build_const_int32(gallivm, 1),
- lp_build_const_int32(gallivm, 1),
- lp_build_const_int32(gallivm, 1)
- };
- LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder,
- "llvm.R600.ldptr",
- emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
- LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
- lp_build_const_int32(gallivm, 3), "");
- Tmp = LLVMBuildMul(gallivm->builder, Tmp,
- lp_build_const_int32(gallivm, 4), "");
- LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr,
- lp_build_const_int32(gallivm, 0), "");
- ResX = LLVMBuildBitCast(gallivm->builder, ResX,
- bld_base->base.int_elem_type, "");
- Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, "");
- Tmp = LLVMBuildAnd(gallivm->builder, Tmp,
- lp_build_const_int32(gallivm, 0xF), "");
- args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp,
- lp_build_const_int32(gallivm, 3), "");
- args[c++] = lp_build_const_int32(gallivm,
- emit_data->inst->Texture.Texture);
- }
- }
-
- emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
- action->intr_name,
- emit_data->dst_type, args, c, LLVMReadNoneAttribute);
-
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
- ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
- if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
- LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
- LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
- llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER),
- lp_build_const_int32(gallivm, 0), "");
-
- emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
- struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
- ctx->has_txq_cube_array_z_comp = true;
- }
-}
-
-static void emit_cndlt(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMValueRef float_zero = lp_build_const_float(
- bld_base->base.gallivm, 0.0f);
- LLVMValueRef cmp = LLVMBuildFCmp(
- builder, LLVMRealULT, emit_data->args[0], float_zero, "");
- emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
- cmp, emit_data->args[1], emit_data->args[2], "");
-}
-
-static void dp_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct lp_build_context * base = &bld_base->base;
- unsigned chan;
- LLVMValueRef elements[2][4];
- unsigned opcode = emit_data->inst->Instruction.Opcode;
- unsigned dp_components = (opcode == TGSI_OPCODE_DP2 ? 2 :
- (opcode == TGSI_OPCODE_DP3 ? 3 : 4));
- for (chan = 0 ; chan < dp_components; chan++) {
- elements[0][chan] = lp_build_emit_fetch(bld_base,
- emit_data->inst, 0, chan);
- elements[1][chan] = lp_build_emit_fetch(bld_base,
- emit_data->inst, 1, chan);
- }
-
- for ( ; chan < 4; chan++) {
- elements[0][chan] = base->zero;
- elements[1][chan] = base->zero;
- }
-
- /* Fix up for DPH */
- if (opcode == TGSI_OPCODE_DPH) {
- elements[0][TGSI_CHAN_W] = base->one;
- }
-
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- elements[0], 4);
- emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm,
- elements[1], 4);
- emit_data->arg_count = 2;
-
- emit_data->dst_type = base->elem_type;
-}
-
-static struct lp_build_tgsi_action dot_action = {
- .fetch_args = dp_fetch_args,
- .emit = build_tgsi_intrinsic_nomem,
- .intr_name = "llvm.AMDGPU.dp4"
-};
-
-static void txd_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
-
- LLVMValueRef coords[4];
- unsigned chan, src;
- for (src = 0; src < 3; src++) {
- for (chan = 0; chan < 4; chan++)
- coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
-
- emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- }
- emit_data->arg_count = 3;
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-
-static void txp_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
- LLVMValueRef src_w;
- unsigned chan;
- LLVMValueRef coords[5];
-
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
- src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
-
- for (chan = 0; chan < 3; chan++ ) {
- LLVMValueRef arg = lp_build_emit_fetch(bld_base,
- emit_data->inst, 0, chan);
- coords[chan] = lp_build_emit_llvm_binary(bld_base,
- TGSI_OPCODE_DIV, arg, src_w);
- }
- coords[3] = bld_base->base.one;
-
- if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
- }
-
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- emit_data->arg_count = 1;
-}
-
-static void tex_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
-
- LLVMValueRef coords[5];
- unsigned chan;
- for (chan = 0; chan < 4; chan++) {
- coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
- }
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
- /* These instructions have additional operand that should be packed
- * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
- * That operand should be passed as a float value in the args array
- * right after the coord vector. After packing it's not used anymore,
- * that's why arg_count is not increased */
- coords[4] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
- }
-
- if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
- }
-
- emit_data->arg_count = 1;
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-static void txf_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
- struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
- const struct tgsi_texture_offset * off = inst->TexOffsets;
- LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
-
- /* fetch tex coords */
- tex_fetch_args(bld_base, emit_data);
-
- /* fetch tex offsets */
- if (inst->Texture.NumOffsets) {
- assert(inst->Texture.NumOffsets == 1);
-
- emit_data->args[1] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleX],
- offset_type);
- emit_data->args[2] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleY],
- offset_type);
- emit_data->args[3] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleZ],
- offset_type);
- } else {
- emit_data->args[1] = bld_base->int_bld.zero;
- emit_data->args[2] = bld_base->int_bld.zero;
- emit_data->args[3] = bld_base->int_bld.zero;
- }
-
- emit_data->arg_count = 4;
-}
-
-LLVMModuleRef r600_tgsi_llvm(
- struct radeon_llvm_context * ctx,
- const struct tgsi_token * tokens)
-{
- struct tgsi_shader_info shader_info;
- struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
- radeon_llvm_context_init(ctx, "r600--");
- LLVMTypeRef Arguments[32];
- unsigned ArgumentsCount = 0;
- for (unsigned i = 0; i < ctx->inputs_count; i++)
- Arguments[ArgumentsCount++] = LLVMVectorType(bld_base->base.elem_type, 4);
- radeon_llvm_create_func(ctx, NULL, 0, Arguments, ArgumentsCount);
- for (unsigned i = 0; i < ctx->inputs_count; i++) {
- LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
- LLVMAddAttribute(P, LLVMInRegAttribute);
- }
- tgsi_scan_shader(tokens, &shader_info);
-
- bld_base->info = &shader_info;
- bld_base->userdata = ctx;
- bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
- bld_base->emit_prologue = llvm_emit_prologue;
- bld_base->emit_epilogue = llvm_emit_epilogue;
- ctx->load_input = llvm_load_input;
- ctx->load_system_value = llvm_load_system_value;
-
- bld_base->op_actions[TGSI_OPCODE_DP2] = dot_action;
- bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
- bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
- bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
- bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
- bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
- bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
- bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
- bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
- bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
- bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
- bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
- bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
- bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
-
- lp_build_tgsi_llvm(bld_base, tokens);
-
- LLVMBuildRetVoid(bld_base->base.gallivm->builder);
- radeon_llvm_finalize_module(ctx);
-
- return ctx->gallivm.module;
-}
-
-/* We need to define these R600 registers here, because we can't include
- * evergreend.h and r600d.h.
- */
-#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
-#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
-
-void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
- struct r600_bytecode *bc,
- uint64_t symbol_offset,
- boolean *use_kill)
-{
- unsigned i;
- const unsigned char *config =
- radeon_shader_binary_config_start(binary, symbol_offset);
-
- for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
- unsigned reg =
- util_le32_to_cpu(*(uint32_t*)(config + i));
- unsigned value =
- util_le32_to_cpu(*(uint32_t*)(config + i + 4));
- switch (reg) {
- /* R600 / R700 */
- case R_028850_SQ_PGM_RESOURCES_PS:
- case R_028868_SQ_PGM_RESOURCES_VS:
- /* Evergreen / Northern Islands */
- case R_028844_SQ_PGM_RESOURCES_PS:
- case R_028860_SQ_PGM_RESOURCES_VS:
- case R_0288D4_SQ_PGM_RESOURCES_LS:
- bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
- bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
- break;
- case R_02880C_DB_SHADER_CONTROL:
- *use_kill = G_02880C_KILL_ENABLE(value);
- break;
- case R_0288E8_SQ_LDS_ALLOC:
- bc->nlds_dw = value;
- break;
- }
- }
-
-}
-
-unsigned r600_create_shader(struct r600_bytecode *bc,
- const struct radeon_shader_binary *binary,
- boolean *use_kill)
-
-{
- assert(binary->code_size % 4 == 0);
- bc->bytecode = CALLOC(1, binary->code_size);
- memcpy(bc->bytecode, binary->code, binary->code_size);
- bc->ndw = binary->code_size / 4;
-
- r600_shader_binary_read_config(binary, bc, 0, use_kill);
-
- return 0;
-}
-
-void r600_destroy_shader(struct r600_bytecode *bc)
-{
- FREE(bc->bytecode);
-}
-
-unsigned r600_llvm_compile(
- LLVMModuleRef mod,
- enum radeon_family family,
- struct r600_bytecode *bc,
- boolean *use_kill,
- unsigned dump,
- struct pipe_debug_callback *debug)
-{
- unsigned r;
- struct radeon_shader_binary binary;
- const char * gpu_family = r600_get_llvm_processor_name(family);
-
- radeon_shader_binary_init(&binary);
- if (dump)
- LLVMDumpModule(mod);
- r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug);
-
- r = r600_create_shader(bc, &binary, use_kill);
-
- radeon_shader_binary_clean(&binary);
-
- return r;
-}
-
-#endif
diff --git a/src/gallium/drivers/r600/r600_llvm.h b/src/gallium/drivers/r600/r600_llvm.h
deleted file mode 100644
index 3f7fc4bef7e..00000000000
--- a/src/gallium/drivers/r600/r600_llvm.h
+++ /dev/null
@@ -1,42 +0,0 @@
-
-#ifndef R600_LLVM_H
-#define R600_LLVM_H
-
-#if defined R600_USE_LLVM || defined HAVE_OPENCL
-
-#include "radeon/radeon_llvm.h"
-#include <llvm-c/Core.h>
-
-struct pipe_debug_callback;
-struct r600_bytecode;
-struct r600_shader_ctx;
-struct radeon_llvm_context;
-struct radeon_shader_binary;
-enum radeon_family;
-
-LLVMModuleRef r600_tgsi_llvm(
- struct radeon_llvm_context * ctx,
- const struct tgsi_token * tokens);
-
-unsigned r600_llvm_compile(
- LLVMModuleRef mod,
- enum radeon_family family,
- struct r600_bytecode *bc,
- boolean *use_kill,
- unsigned dump,
- struct pipe_debug_callback *debug);
-
-unsigned r600_create_shader(struct r600_bytecode *bc,
- const struct radeon_shader_binary *binary,
- boolean *use_kill);
-
-void r600_destroy_shader(struct r600_bytecode *bc);
-
-void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
- struct r600_bytecode *bc,
- uint64_t symbol_offset,
- boolean *use_kill);
-
-#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
-
-#endif /* R600_LLVM_H */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 7018088d204..b8011917907 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -43,9 +43,6 @@
static const struct debug_named_value r600_debug_options[] = {
/* features */
-#if defined(R600_USE_LLVM)
- { "llvm", DBG_LLVM, "Enable the LLVM shader compiler" },
-#endif
{ "nocpdma", DBG_NO_CP_DMA, "Disable CP DMA" },
/* shader backend */
@@ -187,9 +184,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen,
}
rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
- r600_context_gfx_flush, rctx,
- rscreen->b.trace_bo ?
- rscreen->b.trace_bo->buf : NULL);
+ r600_context_gfx_flush, rctx);
rctx->b.gfx.flush = r600_context_gfx_flush;
rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256,
@@ -622,8 +617,6 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS | DBG_TCS | DBG_TES;
if (!debug_get_bool_option("R600_HYPERZ", TRUE))
rscreen->b.debug_flags |= DBG_NO_HYPERZ;
- if (debug_get_bool_option("R600_LLVM", FALSE))
- rscreen->b.debug_flags |= DBG_LLVM;
if (rscreen->b.family == CHIP_UNKNOWN) {
fprintf(stderr, "r600: Unknown chipset 0x%04X\n", rscreen->b.info.pci_id);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index f8a20398355..cd0052a519f 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -28,8 +28,6 @@
#include "radeon/r600_pipe_common.h"
#include "radeon/r600_cs.h"
-
-#include "r600_llvm.h"
#include "r600_public.h"
#include "util/u_suballoc.h"
@@ -60,7 +58,6 @@
/* the number of CS dwords for flushing and drawing */
#define R600_MAX_FLUSH_CS_DWORDS 16
#define R600_MAX_DRAW_CS_DWORDS 58
-#define R600_TRACE_CS_DWORDS 7
#define R600_MAX_USER_CONST_BUFFERS 13
#define R600_MAX_DRIVER_CONST_BUFFERS 3
@@ -244,7 +241,6 @@ struct r600_gs_rings_state {
/* This must start from 16. */
/* features */
-#define DBG_LLVM (1 << 29)
#define DBG_NO_CP_DMA (1 << 30)
/* shader backend */
#define DBG_NO_SB (1 << 21)
@@ -571,15 +567,10 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx,
r600_set_atom_dirty(rctx, atom, true);
}
-void r600_trace_emit(struct r600_context *rctx);
-
static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(&rctx->b, atom);
r600_set_atom_dirty(rctx, atom, false);
- if (rctx->screen->b.trace_bo) {
- r600_trace_emit(rctx);
- }
}
static inline void r600_set_cso_state(struct r600_context *rctx,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index df40f94bdcf..77658f53551 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -21,7 +21,6 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "r600_sq.h"
-#include "r600_llvm.h"
#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600_shader.h"
@@ -194,10 +193,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
/* disable SB for shaders using doubles */
use_sb &= !shader->shader.uses_doubles;
- /* Check if the bytecode has already been built. When using the llvm
- * backend, r600_shader_from_tgsi() will take care of building the
- * bytecode.
- */
+ /* Check if the bytecode has already been built. */
if (!shader->shader.bc.bytecode) {
r = r600_bytecode_build(&shader->shader.bc);
if (r) {
@@ -332,7 +328,6 @@ struct r600_shader_ctx {
uint32_t *literals;
uint32_t nliterals;
uint32_t max_driver_temp_used;
- boolean use_llvm;
/* needed for evergreen interpolation */
struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
/* evergreen/cayman also store sample mask in face register */
@@ -661,11 +656,9 @@ static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
if (ctx->shader->input[index].interpolate > 0) {
evergreen_interp_assign_ij_index(ctx, index);
- if (!ctx->use_llvm)
- r = evergreen_interp_alu(ctx, index);
+ r = evergreen_interp_alu(ctx, index);
} else {
- if (!ctx->use_llvm)
- r = evergreen_interp_flat(ctx, index);
+ r = evergreen_interp_flat(ctx, index);
}
}
return r;
@@ -2936,22 +2929,16 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
int i, j, k, r = 0;
int next_param_base = 0, next_clip_base;
int max_color_exports = MAX2(key.ps.nr_cbufs, 1);
- /* Declarations used by llvm code */
- bool use_llvm = false;
bool indirect_gprs;
bool ring_outputs = false;
bool lds_outputs = false;
bool lds_inputs = false;
bool pos_emitted = false;
-#ifdef R600_USE_LLVM
- use_llvm = rscreen->b.debug_flags & DBG_LLVM;
-#endif
ctx.bc = &shader->bc;
ctx.shader = shader;
ctx.native_integers = true;
-
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
ctx.tokens = tokens;
@@ -3043,19 +3030,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.file_offset[i] = 0;
}
-#ifdef R600_USE_LLVM
- if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) {
- fprintf(stderr, "Warning: R600 LLVM backend does not support "
- "indirect adressing. Falling back to TGSI "
- "backend.\n");
- use_llvm = 0;
- }
-#endif
if (ctx.type == TGSI_PROCESSOR_VERTEX) {
ctx.file_offset[TGSI_FILE_INPUT] = 1;
- if (!use_llvm) {
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
- }
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
}
if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
if (ctx.bc->chip_class >= EVERGREEN)
@@ -3085,16 +3062,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
if (add_tess_inout)
ctx.file_offset[TGSI_FILE_INPUT]+=2;
}
- ctx.use_llvm = use_llvm;
- if (use_llvm) {
- ctx.file_offset[TGSI_FILE_OUTPUT] =
- ctx.file_offset[TGSI_FILE_INPUT];
- } else {
- ctx.file_offset[TGSI_FILE_OUTPUT] =
+ ctx.file_offset[TGSI_FILE_OUTPUT] =
ctx.file_offset[TGSI_FILE_INPUT] +
ctx.info.file_max[TGSI_FILE_INPUT] + 1;
- }
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
@@ -3234,71 +3205,12 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
}
}
-/* LLVM backend setup */
-#ifdef R600_USE_LLVM
- if (use_llvm) {
- struct radeon_llvm_context radeon_llvm_ctx;
- LLVMModuleRef mod;
- bool dump = r600_can_dump_shader(&rscreen->b,
- tgsi_get_processor_type(tokens));
- boolean use_kill = false;
-
- memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
- radeon_llvm_ctx.type = ctx.type;
- radeon_llvm_ctx.two_side = shader->two_side;
- radeon_llvm_ctx.face_gpr = ctx.face_gpr;
- radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1;
- radeon_llvm_ctx.r600_inputs = ctx.shader->input;
- radeon_llvm_ctx.r600_outputs = ctx.shader->output;
- radeon_llvm_ctx.color_buffer_count = max_color_exports;
- radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
- radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
- radeon_llvm_ctx.stream_outputs = &so;
- radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one;
- radeon_llvm_ctx.has_compressed_msaa_texturing =
- ctx.bc->has_compressed_msaa_texturing;
- mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
- ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp;
- ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers;
-
- if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill,
- dump, &rctx->b.debug)) {
- radeon_llvm_dispose(&radeon_llvm_ctx);
- use_llvm = 0;
- fprintf(stderr, "R600 LLVM backend failed to compile "
- "shader. Falling back to TGSI\n");
- } else {
- ctx.file_offset[TGSI_FILE_OUTPUT] =
- ctx.file_offset[TGSI_FILE_INPUT];
- }
- if (use_kill)
- ctx.shader->uses_kill = use_kill;
- radeon_llvm_dispose(&radeon_llvm_ctx);
- }
-#endif
-/* End of LLVM backend setup */
-
if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
shader->nr_ps_max_color_exports = 8;
- if (!use_llvm) {
- if (ctx.fragcoord_input >= 0) {
- if (ctx.bc->chip_class == CAYMAN) {
- for (j = 0 ; j < 4; j++) {
- struct r600_bytecode_alu alu;
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_RECIP_IEEE;
- alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
- alu.src[0].chan = 3;
-
- alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
- alu.dst.chan = j;
- alu.dst.write = (j == 3);
- alu.last = 1;
- if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
- return r;
- }
- } else {
+ if (ctx.fragcoord_input >= 0) {
+ if (ctx.bc->chip_class == CAYMAN) {
+ for (j = 0 ; j < 4; j++) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_RECIP_IEEE;
@@ -3306,87 +3218,100 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
alu.src[0].chan = 3;
alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
- alu.dst.chan = 3;
- alu.dst.write = 1;
+ alu.dst.chan = j;
+ alu.dst.write = (j == 3);
alu.last = 1;
if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
return r;
}
- }
-
- if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ } else {
struct r600_bytecode_alu alu;
- int r;
-
- /* GS thread with no output workaround - emit a cut at start of GS */
- if (ctx.bc->chip_class == R600)
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX);
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_RECIP_IEEE;
+ alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
+ alu.src[0].chan = 3;
- for (j = 0; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_MOV;
- alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[0].value = 0;
- alu.dst.sel = ctx.gs_export_gpr_tregs[j];
- alu.dst.write = 1;
- alu.last = 1;
- r = r600_bytecode_add_alu(ctx.bc, &alu);
- if (r)
- return r;
- }
+ alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+ if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
+ return r;
}
+ }
+
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ struct r600_bytecode_alu alu;
+ int r;
- if (ctx.type == TGSI_PROCESSOR_TESS_CTRL)
- r600_fetch_tess_io_info(&ctx);
+ /* GS thread with no output workaround - emit a cut at start of GS */
+ if (ctx.bc->chip_class == R600)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX);
- if (shader->two_side && ctx.colors_used) {
- if ((r = process_twoside_color_inputs(&ctx)))
+ for (j = 0; j < 4; j++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0;
+ alu.dst.sel = ctx.gs_export_gpr_tregs[j];
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
+ if (r)
return r;
}
+ }
- tgsi_parse_init(&ctx.parse, tokens);
- while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
- tgsi_parse_token(&ctx.parse);
- switch (ctx.parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- r = tgsi_is_supported(&ctx);
- if (r)
- goto out_err;
- ctx.max_driver_temp_used = 0;
- /* reserve first tmp for everyone */
- r600_get_temp(&ctx);
+ if (ctx.type == TGSI_PROCESSOR_TESS_CTRL)
+ r600_fetch_tess_io_info(&ctx);
+
+ if (shader->two_side && ctx.colors_used) {
+ if ((r = process_twoside_color_inputs(&ctx)))
+ return r;
+ }
- opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
- if ((r = tgsi_split_constant(&ctx)))
+ tgsi_parse_init(&ctx.parse, tokens);
+ while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
+ tgsi_parse_token(&ctx.parse);
+ switch (ctx.parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ r = tgsi_is_supported(&ctx);
+ if (r)
+ goto out_err;
+ ctx.max_driver_temp_used = 0;
+ /* reserve first tmp for everyone */
+ r600_get_temp(&ctx);
+
+ opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ if ((r = tgsi_split_constant(&ctx)))
+ goto out_err;
+ if ((r = tgsi_split_literal_constant(&ctx)))
+ goto out_err;
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ if ((r = tgsi_split_gs_inputs(&ctx)))
goto out_err;
- if ((r = tgsi_split_literal_constant(&ctx)))
+ } else if (lds_inputs) {
+ if ((r = tgsi_split_lds_inputs(&ctx)))
goto out_err;
- if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- if ((r = tgsi_split_gs_inputs(&ctx)))
- goto out_err;
- } else if (lds_inputs) {
- if ((r = tgsi_split_lds_inputs(&ctx)))
- goto out_err;
- }
- if (ctx.bc->chip_class == CAYMAN)
- ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
- else if (ctx.bc->chip_class >= EVERGREEN)
- ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
- else
- ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
- r = ctx.inst_info->process(&ctx);
+ }
+ if (ctx.bc->chip_class == CAYMAN)
+ ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
+ else if (ctx.bc->chip_class >= EVERGREEN)
+ ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
+ else
+ ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
+ r = ctx.inst_info->process(&ctx);
+ if (r)
+ goto out_err;
+
+ if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) {
+ r = r600_store_tcs_output(&ctx);
if (r)
goto out_err;
-
- if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) {
- r = r600_store_tcs_output(&ctx);
- if (r)
- goto out_err;
- }
- break;
- default:
- break;
}
+ break;
+ default:
+ break;
}
}
@@ -3437,8 +3362,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
alu.dst.write = (j == ochan);
if (j == 3)
alu.last = 1;
- if (!use_llvm)
- r = r600_bytecode_add_alu(ctx.bc, &alu);
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
if (r)
return r;
}
@@ -3446,7 +3370,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
}
/* Add stream outputs. */
- if (!use_llvm && so.num_outputs) {
+ if (so.num_outputs) {
bool emit = false;
if (!lds_outputs && !ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX)
emit = true;
@@ -3709,31 +3633,27 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
}
}
/* add output to bytecode */
- if (!use_llvm) {
- for (i = 0; i < noutput; i++) {
- r = r600_bytecode_add_output(ctx.bc, &output[i]);
- if (r)
- goto out_err;
- }
+ for (i = 0; i < noutput; i++) {
+ r = r600_bytecode_add_output(ctx.bc, &output[i]);
+ if (r)
+ goto out_err;
}
}
/* add program end */
- if (!use_llvm) {
- if (ctx.bc->chip_class == CAYMAN)
- cm_bytecode_add_cf_end(ctx.bc);
- else {
- const struct cf_op_info *last = NULL;
+ if (ctx.bc->chip_class == CAYMAN)
+ cm_bytecode_add_cf_end(ctx.bc);
+ else {
+ const struct cf_op_info *last = NULL;
- if (ctx.bc->cf_last)
- last = r600_isa_cf(ctx.bc->cf_last->op);
+ if (ctx.bc->cf_last)
+ last = r600_isa_cf(ctx.bc->cf_last->op);
- /* alu clause instructions don't have EOP bit, so add NOP */
- if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS)
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+ /* alu clause instructions don't have EOP bit, so add NOP */
+ if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
- ctx.bc->cf_last->end_of_program = 1;
- }
+ ctx.bc->cf_last->end_of_program = 1;
}
/* check GPR limit - we have 124 = 128 - 4
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 2211e07ceba..df41d3f028d 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2029,10 +2029,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT);
}
- if (rctx->screen->b.trace_bo) {
- r600_trace_emit(rctx);
- }
-
/* Set the depth buffer as dirty. */
if (rctx->framebuffer.state.zsbuf) {
struct pipe_surface *surf = rctx->framebuffer.state.zsbuf;
@@ -2927,22 +2923,3 @@ void r600_init_common_state_functions(struct r600_context *rctx)
rctx->b.set_occlusion_query_state = r600_set_occlusion_query_state;
rctx->b.need_gfx_cs_space = r600_need_gfx_cs_space;
}
-
-void r600_trace_emit(struct r600_context *rctx)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
- uint64_t va;
- uint32_t reloc;
-
- va = rscreen->b.trace_bo->gpu_address;
- reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rscreen->b.trace_bo,
- RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
- radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
- radeon_emit(cs, va & 0xFFFFFFFFUL);
- radeon_emit(cs, (va >> 32UL) & 0xFFUL);
- radeon_emit(cs, cs->cdw);
- radeon_emit(cs, rscreen->b.cs_count);
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, reloc);
-}
diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp
index 556a05da395..3dd3a4815ba 100644
--- a/src/gallium/drivers/r600/sb/sb_expr.cpp
+++ b/src/gallium/drivers/r600/sb/sb_expr.cpp
@@ -598,9 +598,13 @@ bool expr_handler::fold_assoc(alu_node *n) {
unsigned op = n->bc.op;
bool allow_neg = false, cur_neg = false;
+ bool distribute_neg = false;
switch(op) {
case ALU_OP2_ADD:
+ distribute_neg = true;
+ allow_neg = true;
+ break;
case ALU_OP2_MUL:
case ALU_OP2_MUL_IEEE:
allow_neg = true;
@@ -632,7 +636,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
if (v1->is_const()) {
literal arg = v1->get_const_value();
apply_alu_src_mod(a->bc, 1, arg);
- if (cur_neg)
+ if (cur_neg && distribute_neg)
arg.f = -arg.f;
if (a == n)
@@ -660,7 +664,7 @@ bool expr_handler::fold_assoc(alu_node *n) {
if (v0->is_const()) {
literal arg = v0->get_const_value();
apply_alu_src_mod(a->bc, 0, arg);
- if (cur_neg)
+ if (cur_neg && distribute_neg)
arg.f = -arg.f;
if (last_arg == 0) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index ea028272ccd..eed9d83ee49 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -229,7 +229,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
struct radeon_winsys_cs *cs = rctx->dma.cs;
if (cs->cdw)
- rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence, 0);
+ rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
if (fence)
rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
}
@@ -318,7 +318,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
if (rscreen->info.has_sdma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
- rctx, NULL);
+ rctx);
rctx->dma.flush = r600_flush_dma_ring;
}
@@ -379,7 +379,6 @@ static const struct debug_named_value common_debug_options[] = {
{ "tex", DBG_TEX, "Print texture info" },
{ "compute", DBG_COMPUTE, "Print compute info" },
{ "vm", DBG_VM, "Print virtual addresses when creating resources" },
- { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
{ "info", DBG_INFO, "Print driver information" },
/* shaders */
@@ -893,19 +892,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
- if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) ||
- rscreen->info.drm_major == 3) &&
- (rscreen->debug_flags & DBG_TRACE_CS)) {
- rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b,
- PIPE_BIND_CUSTOM,
- PIPE_USAGE_STAGING,
- 4096);
- if (rscreen->trace_bo) {
- rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->buf, NULL,
- PIPE_TRANSFER_UNSYNCHRONIZED);
- }
- }
-
if (rscreen->debug_flags & DBG_INFO) {
printf("pci_id = 0x%x\n", rscreen->info.pci_id);
printf("family = %i (%s)\n", rscreen->info.family,
@@ -951,9 +937,6 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen)
pipe_mutex_destroy(rscreen->aux_context_lock);
rscreen->aux_context->destroy(rscreen->aux_context);
- if (rscreen->trace_bo)
- pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
-
rscreen->ws->destroy(rscreen->ws);
FREE(rscreen);
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index cf8dcf7ea88..381ad21a4e3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -61,7 +61,7 @@
/* gap - reuse */
#define DBG_COMPUTE (1 << 2)
#define DBG_VM (1 << 3)
-#define DBG_TRACE_CS (1 << 4)
+/* gap - reuse */
/* shader logging */
#define DBG_FS (1 << 5)
#define DBG_VS (1 << 6)
@@ -303,10 +303,6 @@ struct r600_common_screen {
struct pipe_context *aux_context;
pipe_mutex aux_context_lock;
- struct r600_resource *trace_bo;
- uint32_t *trace_ptr;
- unsigned cs_count;
-
/* This must be in the screen, because UE4 uses one context for
* compilation and another one for rendering.
*/
@@ -610,6 +606,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct r600_atom *fb_state,
unsigned *buffers, unsigned *dirty_cbufs,
const union pipe_color_union *color);
+void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex);
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 115c7289c4c..7322f3ee985 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -201,9 +201,11 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
static int r600_setup_surface(struct pipe_screen *screen,
struct r600_texture *rtex,
- unsigned pitch_in_bytes_override)
+ unsigned pitch_in_bytes_override,
+ unsigned offset)
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ unsigned i;
int r;
r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface);
@@ -225,6 +227,11 @@ static int r600_setup_surface(struct pipe_screen *screen,
rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
}
}
+
+ if (offset) {
+ for (i = 0; i < Elements(rtex->surface.level); ++i)
+ rtex->surface.level[i].offset += offset;
+ }
return 0;
}
@@ -290,8 +297,8 @@ static void r600_texture_disable_cmask(struct r600_common_screen *rscreen,
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
-static void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
+void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
+ struct r600_texture *rtex)
{
struct r600_common_context *rctx =
(struct r600_common_context *)rscreen->aux_context;
@@ -366,6 +373,8 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
return rscreen->ws->buffer_get_handle(res->buf,
rtex->surface.level[0].pitch_bytes,
+ rtex->surface.level[0].offset,
+ rtex->surface.level[0].slice_size,
whandle);
}
@@ -629,8 +638,14 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
return 0;
- /* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */
- if (rscreen->family == CHIP_STONEY)
+ /* Overalign HTILE on P2 configs to work around GPU hangs in
+ * piglit/depthstencil-render-miplevels 585.
+ *
+ * This has been confirmed to help Kabini & Stoney, where the hangs
+ * are always reproducible. I think I have seen the test hang
+ * on Carrizo too, though it was very rare there.
+ */
+ if (rscreen->chip_class >= CIK && num_pipes < 4)
num_pipes = 4;
switch (num_pipes) {
@@ -791,6 +806,7 @@ static struct r600_texture *
r600_texture_create_object(struct pipe_screen *screen,
const struct pipe_resource *base,
unsigned pitch_in_bytes_override,
+ unsigned offset,
struct pb_buffer *buf,
struct radeon_surf *surface)
{
@@ -812,7 +828,7 @@ r600_texture_create_object(struct pipe_screen *screen,
rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
rtex->surface = *surface;
- if (r600_setup_surface(screen, rtex, pitch_in_bytes_override)) {
+ if (r600_setup_surface(screen, rtex, pitch_in_bytes_override, offset)) {
FREE(rtex);
return NULL;
}
@@ -979,7 +995,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
if (r) {
return NULL;
}
- return (struct pipe_resource *)r600_texture_create_object(screen, templ,
+ return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0,
0, NULL, &surface);
}
@@ -990,7 +1006,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct pb_buffer *buf = NULL;
- unsigned stride = 0;
+ unsigned stride = 0, offset = 0;
unsigned array_mode;
struct radeon_surf surface;
int r;
@@ -1002,7 +1018,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
templ->depth0 != 1 || templ->last_level != 0)
return NULL;
- buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride);
+ buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
if (!buf)
return NULL;
@@ -1029,8 +1045,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
if (metadata.scanout)
surface.flags |= RADEON_SURF_SCANOUT;
- rtex = r600_texture_create_object(screen, templ,
- stride, buf, &surface);
+ rtex = r600_texture_create_object(screen, templ, stride,
+ offset, buf, &surface);
if (!rtex)
return NULL;
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index bdee2f8020a..0a164bba307 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -51,24 +51,8 @@ struct radeon_llvm_loop {
};
struct radeon_llvm_context {
-
struct lp_build_tgsi_soa_context soa;
- unsigned chip_class;
- unsigned type;
- unsigned face_gpr;
- unsigned two_side;
- unsigned inputs_count;
- struct r600_shader_io * r600_inputs;
- struct r600_shader_io * r600_outputs;
- struct pipe_stream_output_info *stream_outputs;
- unsigned color_buffer_count;
- unsigned fs_color_all;
- unsigned alpha_to_one;
- unsigned has_txq_cube_array_z_comp;
- unsigned uses_tex_buffers;
- unsigned has_compressed_msaa_texturing;
-
/*=== Front end configuration ===*/
/* Instructions that are not described by any of the TGSI opcodes. */
@@ -90,7 +74,6 @@ struct radeon_llvm_context {
*/
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
- unsigned output_reg_count;
/** This pointer is used to contain the temporary values.
* The amount of temporary used in tgsi can't be bound to a max value and
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index c74397fb5c9..fb883cb585e 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -363,9 +363,6 @@ static void emit_declaration(
ctx->soa.bld_base.base.elem_type, "");
}
}
-
- ctx->output_reg_count = MAX2(ctx->output_reg_count,
- decl->Range.Last + 1);
break;
}
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index b8efc58eaab..233f46091a4 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -92,7 +92,7 @@ struct ruvd_decoder {
/* flush IB to the hardware */
static void flush(struct ruvd_decoder *dec)
{
- dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+ dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL);
}
/* add a new set register command to the IB */
@@ -1142,7 +1142,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
dec->stream_handle = rvid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
- dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, NULL);
+ dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL);
if (!dec->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index 087d9422c04..2ab74e9eb6c 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -56,7 +56,7 @@
*/
static void flush(struct rvce_encoder *enc)
{
- enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+ enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL);
enc->task_info_idx = 0;
enc->bs_idx = 0;
}
@@ -429,7 +429,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
enc->screen = context->screen;
enc->ws = ws;
- enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, NULL);
+ enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc);
if (!enc->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index b8a065957a7..d35e963133e 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -515,7 +515,7 @@ struct radeon_winsys {
*/
struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws,
struct winsys_handle *whandle,
- unsigned *stride);
+ unsigned *stride, unsigned *offset);
/**
* Get a winsys buffer from a user pointer. The resulting buffer can't
@@ -546,7 +546,8 @@ struct radeon_winsys {
* \return TRUE on success.
*/
boolean (*buffer_get_handle)(struct pb_buffer *buf,
- unsigned stride,
+ unsigned stride, unsigned offset,
+ unsigned slice_size,
struct winsys_handle *whandle);
/**
@@ -592,14 +593,12 @@ struct radeon_winsys {
* \param ring_type The ring type (GFX, DMA, UVD)
* \param flush Flush callback function associated with the command stream.
* \param user User pointer that will be passed to the flush callback.
- * \param trace_buf Trace buffer when tracing is enabled
*/
struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys_ctx *ctx,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
- void *flush_ctx,
- struct pb_buffer *trace_buf);
+ void *flush_ctx);
/**
* Destroy a command stream.
@@ -672,12 +671,10 @@ struct radeon_winsys {
* \param flags, RADEON_FLUSH_ASYNC or 0.
* \param fence Pointer to a fence. If non-NULL, a fence is inserted
* after the CS and is returned through this parameter.
- * \param cs_trace_id A unique identifier of the cs, used for tracing.
*/
void (*cs_flush)(struct radeon_winsys_cs *cs,
unsigned flags,
- struct pipe_fence_handle **fence,
- uint32_t cs_trace_id);
+ struct pipe_fence_handle **fence);
/**
* Return TRUE if a buffer is referenced by a command stream.
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index f9a6de48f6b..e0dbec5fb79 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -325,8 +325,8 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
}
static void
-si_decompress_color_textures(struct si_context *sctx,
- struct si_textures_info *textures)
+si_decompress_sampler_color_textures(struct si_context *sctx,
+ struct si_textures_info *textures)
{
unsigned i;
unsigned mask = textures->compressed_colortex_mask;
@@ -350,6 +350,33 @@ si_decompress_color_textures(struct si_context *sctx,
}
}
+static void
+si_decompress_image_color_textures(struct si_context *sctx,
+ struct si_images_info *images)
+{
+ unsigned i;
+ unsigned mask = images->compressed_colortex_mask;
+
+ while (mask) {
+ const struct pipe_image_view *view;
+ struct r600_texture *tex;
+
+ i = u_bit_scan(&mask);
+
+ view = &images->views[i];
+ assert(view->resource->target != PIPE_BUFFER);
+
+ tex = (struct r600_texture *)view->resource;
+ if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
+ continue;
+
+ si_blit_decompress_color(&sctx->b.b, tex,
+ view->u.tex.level, view->u.tex.level,
+ 0, util_max_layer(&tex->resource.b.b, view->u.tex.level),
+ false);
+ }
+}
+
void si_decompress_textures(struct si_context *sctx)
{
unsigned compressed_colortex_counter;
@@ -370,7 +397,10 @@ void si_decompress_textures(struct si_context *sctx)
si_flush_depth_textures(sctx, &sctx->samplers[i]);
}
if (sctx->samplers[i].compressed_colortex_mask) {
- si_decompress_color_textures(sctx, &sctx->samplers[i]);
+ si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]);
+ }
+ if (sctx->images[i].compressed_colortex_mask) {
+ si_decompress_image_color_textures(sctx, &sctx->images[i]);
}
}
}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index d12b3e6b28a..815b87bbd7e 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -64,7 +64,8 @@
#include "util/u_upload_mgr.h"
-/* NULL image and buffer descriptor.
+/* NULL image and buffer descriptor for textures (alpha = 1) and images
+ * (alpha = 0).
*
* For images, all fields must be zero except for the swizzle, which
* supports arbitrary combinations of 0s and 1s. The texture type must be
@@ -74,7 +75,7 @@
*
* This is the only reason why the buffer descriptor must be in words [4:7].
*/
-static uint32_t null_descriptor[8] = {
+static uint32_t null_texture_descriptor[8] = {
0,
0,
0,
@@ -84,10 +85,20 @@ static uint32_t null_descriptor[8] = {
* descriptor */
};
+static uint32_t null_image_descriptor[8] = {
+ 0,
+ 0,
+ 0,
+ S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
+ /* the rest must contain zeros, which is also used by the buffer
+ * descriptor */
+};
+
static void si_init_descriptors(struct si_descriptors *desc,
unsigned shader_userdata_index,
unsigned element_dw_size,
- unsigned num_elements)
+ unsigned num_elements,
+ const uint32_t *null_descriptor)
{
int i;
@@ -100,10 +111,12 @@ static void si_init_descriptors(struct si_descriptors *desc,
desc->shader_userdata_offset = shader_userdata_index * 4;
/* Initialize the array to NULL descriptors if the element size is 8. */
- if (element_dw_size % 8 == 0)
+ if (null_descriptor) {
+ assert(element_dw_size % 8 == 0);
for (i = 0; i < num_elements * element_dw_size / 8; i++)
- memcpy(desc->list + i*8, null_descriptor,
- sizeof(null_descriptor));
+ memcpy(desc->list + i * 8, null_descriptor,
+ 8 * 4);
+ }
}
static void si_release_descriptors(struct si_descriptors *desc)
@@ -210,7 +223,7 @@ static void si_set_sampler_view(struct si_context *sctx,
} else {
/* Disable FMASK and bind sampler state in [12:15]. */
memcpy(views->desc.list + slot*16 + 8,
- null_descriptor, 4*4);
+ null_texture_descriptor, 4*4);
if (views->sampler_states[slot])
memcpy(views->desc.list + slot*16 + 12,
@@ -220,9 +233,9 @@ static void si_set_sampler_view(struct si_context *sctx,
views->desc.enabled_mask |= 1llu << slot;
} else {
pipe_sampler_view_reference(&views->views[slot], NULL);
- memcpy(views->desc.list + slot*16, null_descriptor, 8*4);
+ memcpy(views->desc.list + slot*16, null_texture_descriptor, 8*4);
/* Only clear the lower dwords of FMASK. */
- memcpy(views->desc.list + slot*16 + 8, null_descriptor, 4*4);
+ memcpy(views->desc.list + slot*16 + 8, null_texture_descriptor, 4*4);
views->desc.enabled_mask &= ~(1llu << slot);
}
@@ -301,6 +314,160 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers)
}
}
+/* IMAGE VIEWS */
+
+static void
+si_release_image_views(struct si_images_info *images)
+{
+ unsigned i;
+
+ for (i = 0; i < SI_NUM_IMAGES; ++i) {
+ struct pipe_image_view *view = &images->views[i];
+
+ pipe_resource_reference(&view->resource, NULL);
+ }
+
+ si_release_descriptors(&images->desc);
+}
+
+static void
+si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *images)
+{
+ uint mask = images->desc.enabled_mask;
+
+ /* Add buffers to the CS. */
+ while (mask) {
+ int i = u_bit_scan(&mask);
+ struct pipe_image_view *view = &images->views[i];
+
+ assert(view->resource);
+
+ si_sampler_view_add_buffer(sctx, view->resource);
+ }
+
+ if (images->desc.buffer) {
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ images->desc.buffer,
+ RADEON_USAGE_READ,
+ RADEON_PRIO_DESCRIPTORS);
+ }
+}
+
+static void
+si_disable_shader_image(struct si_images_info *images, unsigned slot)
+{
+ if (images->desc.enabled_mask & (1llu << slot)) {
+ pipe_resource_reference(&images->views[slot].resource, NULL);
+ images->compressed_colortex_mask &= ~(1 << slot);
+
+ memcpy(images->desc.list + slot*8, null_image_descriptor, 8*4);
+ images->desc.enabled_mask &= ~(1llu << slot);
+ images->desc.list_dirty = true;
+ }
+}
+
+static void
+si_set_shader_images(struct pipe_context *pipe, unsigned shader,
+ unsigned start_slot, unsigned count,
+ struct pipe_image_view *views)
+{
+ struct si_context *ctx = (struct si_context *)pipe;
+ struct si_screen *screen = ctx->screen;
+ struct si_images_info *images = &ctx->images[shader];
+ unsigned i, slot;
+
+ assert(shader < SI_NUM_SHADERS);
+
+ if (!count)
+ return;
+
+ assert(start_slot + count <= SI_NUM_IMAGES);
+
+ for (i = 0, slot = start_slot; i < count; ++i, ++slot) {
+ struct r600_resource *res;
+
+ if (!views || !views[i].resource) {
+ si_disable_shader_image(images, slot);
+ continue;
+ }
+
+ res = (struct r600_resource *)views[i].resource;
+ util_copy_image_view(&images->views[slot], &views[i]);
+
+ si_sampler_view_add_buffer(ctx, &res->b.b);
+
+ if (res->b.b.target == PIPE_BUFFER) {
+ si_make_buffer_descriptor(screen, res,
+ views[i].format,
+ views[i].u.buf.first_element,
+ views[i].u.buf.last_element,
+ images->desc.list + slot * 8);
+ images->compressed_colortex_mask &= ~(1 << slot);
+ } else {
+ static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
+ struct r600_texture *tex = (struct r600_texture *)res;
+ unsigned level;
+ unsigned width, height, depth;
+
+ assert(!tex->is_depth);
+ assert(tex->fmask.size == 0);
+
+ if (tex->dcc_offset &&
+ views[i].access & PIPE_IMAGE_ACCESS_WRITE)
+ r600_texture_disable_dcc(&screen->b, tex);
+
+ if (is_compressed_colortex(tex)) {
+ images->compressed_colortex_mask |= 1 << slot;
+ } else {
+ images->compressed_colortex_mask &= ~(1 << slot);
+ }
+
+ /* Always force the base level to the selected level.
+ *
+ * This is required for 3D textures, where otherwise
+ * selecting a single slice for non-layered bindings
+ * fails. It doesn't hurt the other targets.
+ */
+ level = views[i].u.tex.level;
+ width = u_minify(res->b.b.width0, level);
+ height = u_minify(res->b.b.height0, level);
+ depth = u_minify(res->b.b.depth0, level);
+
+ si_make_texture_descriptor(screen, tex, false, res->b.b.target,
+ views[i].format, swizzle,
+ level, 0, 0,
+ views[i].u.tex.first_layer, views[i].u.tex.last_layer,
+ width, height, depth,
+ images->desc.list + slot * 8,
+ NULL);
+ }
+
+ images->desc.enabled_mask |= 1llu << slot;
+ images->desc.list_dirty = true;
+ }
+}
+
+static void
+si_images_update_compressed_colortex_mask(struct si_images_info *images)
+{
+ uint64_t mask = images->desc.enabled_mask;
+
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ struct pipe_resource *res = images->views[i].resource;
+
+ if (res && res->target != PIPE_BUFFER) {
+ struct r600_texture *rtex = (struct r600_texture *)res;
+
+ if (is_compressed_colortex(rtex)) {
+ images->compressed_colortex_mask |= 1 << i;
+ } else {
+ images->compressed_colortex_mask &= ~(1 << i);
+ }
+ }
+ }
+}
+
/* SAMPLER STATES */
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
@@ -351,7 +518,7 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers,
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
si_init_descriptors(&buffers->desc, shader_userdata_index, 4,
- num_buffers);
+ num_buffers, NULL);
}
static void si_release_buffer_resources(struct si_buffer_resources *buffers)
@@ -804,6 +971,7 @@ void si_update_compressed_colortex_masks(struct si_context *sctx)
{
for (int i = 0; i < SI_NUM_SHADERS; ++i) {
si_samplers_update_compressed_colortex_mask(&sctx->samplers[i]);
+ si_images_update_compressed_colortex_mask(&sctx->images[i]);
}
}
@@ -925,6 +1093,28 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
}
}
}
+
+ /* Shader images */
+ for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
+ struct si_images_info *images = &sctx->images[shader];
+ unsigned mask = images->desc.enabled_mask;
+
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+
+ if (images->views[i].resource == buf) {
+ si_desc_reset_buffer_offset(
+ ctx, images->desc.list + i * 8 + 4,
+ old_va, buf);
+ images->desc.list_dirty = true;
+
+ radeon_add_to_buffer_list(
+ &sctx->b, &sctx->b.gfx, rbuffer,
+ RADEON_USAGE_READWRITE,
+ RADEON_PRIO_SAMPLER_BUFFER);
+ }
+ }
+ }
}
/* SHADER USER DATA */
@@ -1055,6 +1245,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
+ si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, false);
}
si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
}
@@ -1074,14 +1265,20 @@ void si_init_all_descriptors(struct si_context *sctx)
RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
si_init_descriptors(&sctx->samplers[i].views.desc,
- SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS);
+ SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
+ null_texture_descriptor);
+
+ si_init_descriptors(&sctx->images[i].desc,
+ SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
+ null_image_descriptor);
}
si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
- 4, SI_NUM_VERTEX_BUFFERS);
+ 4, SI_NUM_VERTEX_BUFFERS, NULL);
/* Set pipe_context functions. */
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
+ sctx->b.b.set_shader_images = si_set_shader_images;
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
@@ -1105,7 +1302,8 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
- !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc))
+ !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
+ !si_upload_descriptors(sctx, &sctx->images[i].desc))
return false;
}
return si_upload_vertex_buffer_descriptors(sctx);
@@ -1119,6 +1317,7 @@ void si_release_all_descriptors(struct si_context *sctx)
si_release_buffer_resources(&sctx->const_buffers[i]);
si_release_buffer_resources(&sctx->rw_buffers[i]);
si_release_sampler_views(&sctx->samplers[i].views);
+ si_release_image_views(&sctx->images[i]);
}
si_release_descriptors(&sctx->vertex_buffers);
}
@@ -1131,6 +1330,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
+ si_image_views_begin_new_cs(sctx, &sctx->images[i]);
}
si_vertex_buffers_begin_new_cs(sctx);
si_shader_userdata_begin_new_cs(sctx);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index b5a4034cc12..8c900a4ecb6 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -118,8 +118,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
}
/* Flush the CS. */
- ws->cs_flush(cs, flags, &ctx->last_gfx_fence,
- ctx->screen->b.cs_count++);
+ ws->cs_flush(cs, flags, &ctx->last_gfx_fence);
if (fence)
ws->fence_reference(fence, ctx->last_gfx_fence);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 8b50a49cba0..dd1103eed06 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -140,9 +140,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->b.b.create_video_buffer = vl_video_buffer_create;
}
- sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
- sctx, sscreen->b.trace_bo ?
- sscreen->b.trace_bo->buf : NULL);
+ sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
+ si_context_gfx_flush, sctx);
sctx->b.gfx.flush = si_context_gfx_flush;
/* Border colors. */
@@ -539,8 +538,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
- case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+ return HAVE_LLVM >= 0x0309 ? SI_NUM_IMAGES : 0;
}
return 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 0fef5f72098..6d0d687fe4c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -141,6 +141,12 @@ struct si_textures_info {
uint32_t compressed_colortex_mask;
};
+struct si_images_info {
+ struct si_descriptors desc;
+ struct pipe_image_view views[SI_NUM_IMAGES];
+ uint32_t compressed_colortex_mask;
+};
+
struct si_framebuffer {
struct r600_atom atom;
struct pipe_framebuffer_state state;
@@ -251,6 +257,7 @@ struct si_context {
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
struct si_textures_info samplers[SI_NUM_SHADERS];
+ struct si_images_info images[SI_NUM_SHADERS];
/* other shader resources */
struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8c1151aa493..9eb531f8d80 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -40,6 +40,7 @@
#include "util/u_memory.h"
#include "util/u_pstipple.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_build.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_dump.h"
@@ -99,6 +100,7 @@ struct si_shader_context
LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
LLVMValueRef fmasks[SI_NUM_USER_SAMPLERS];
+ LLVMValueRef images[SI_NUM_IMAGES];
LLVMValueRef so_buffers[4];
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
@@ -530,6 +532,37 @@ static LLVMValueRef get_indirect_index(struct si_shader_context *ctx,
}
/**
+ * Like get_indirect_index, but restricts the return value to a (possibly
+ * undefined) value inside [0..num).
+ */
+static LLVMValueRef get_bounded_indirect_index(struct si_shader_context *ctx,
+ const struct tgsi_ind_register *ind,
+ int rel_index, unsigned num)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef result = get_indirect_index(ctx, ind, rel_index);
+ LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
+ LLVMValueRef cc;
+
+ if (util_is_power_of_two(num)) {
+ result = LLVMBuildAnd(builder, result, c_max, "");
+ } else {
+ /* In theory, this MAX pattern should result in code that is
+ * as good as the bit-wise AND above.
+ *
+ * In practice, LLVM generates worse code (at the time of
+ * writing), because its value tracking is not strong enough.
+ */
+ cc = LLVMBuildICmp(builder, LLVMIntULE, result, c_max, "");
+ result = LLVMBuildSelect(builder, cc, result, c_max, "");
+ }
+
+ return result;
+}
+
+
+/**
* Calculate a dword address given an input or output register and a stride.
*/
static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
@@ -2656,10 +2689,90 @@ static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
ctx->return_value = ret;
}
+/**
+ * Given a v8i32 resource descriptor for a buffer, extract the size of the
+ * buffer in number of elements and return it as an i32.
+ */
+static LLVMValueRef get_buffer_size(
+ struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef descriptor)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef size =
+ LLVMBuildExtractElement(builder, descriptor,
+ lp_build_const_int32(gallivm, 6), "");
+
+ if (ctx->screen->b.chip_class >= VI) {
+ /* On VI, the descriptor contains the size in bytes,
+ * but TXQ must return the size in elements.
+ * The stride is always non-zero for resources using TXQ.
+ */
+ LLVMValueRef stride =
+ LLVMBuildExtractElement(builder, descriptor,
+ lp_build_const_int32(gallivm, 5), "");
+ stride = LLVMBuildLShr(builder, stride,
+ lp_build_const_int32(gallivm, 16), "");
+ stride = LLVMBuildAnd(builder, stride,
+ lp_build_const_int32(gallivm, 0x3FFF), "");
+
+ size = LLVMBuildUDiv(builder, size, stride, "");
+ }
+
+ return size;
+}
+
+/**
+ * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
+ * intrinsic names).
+ */
+static void build_int_type_name(
+ LLVMTypeRef type,
+ char *buf, unsigned bufsize)
+{
+ assert(bufsize >= 6);
+
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+ snprintf(buf, bufsize, "v%ui32",
+ LLVMGetVectorSize(type));
+ else
+ strcpy(buf, "i32");
+}
+
static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
+/* Prevent optimizations (at least of memory accesses) across the current
+ * point in the program by emitting empty inline assembly that is marked as
+ * having side effects.
+ */
+static void emit_optimization_barrier(struct si_shader_context *ctx)
+{
+ LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
+ LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+}
+
+static void membar_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ /* Since memoryBarrier only makes guarantees about atomics and
+ * coherent image accesses (which bypass TC L1), we do not need to emit
+ * any special cache handling here.
+ *
+ * We do have to prevent LLVM from re-ordering loads across
+ * the barrier though.
+ */
+ emit_optimization_barrier(ctx);
+}
+
static bool tgsi_is_array_sampler(unsigned target)
{
return target == TGSI_TEXTURE_1D_ARRAY ||
@@ -2671,6 +2784,459 @@ static bool tgsi_is_array_sampler(unsigned target)
target == TGSI_TEXTURE_2D_ARRAY_MSAA;
}
+static bool tgsi_is_array_image(unsigned target)
+{
+ return target == TGSI_TEXTURE_3D ||
+ target == TGSI_TEXTURE_CUBE ||
+ target == TGSI_TEXTURE_1D_ARRAY ||
+ target == TGSI_TEXTURE_2D_ARRAY ||
+ target == TGSI_TEXTURE_CUBE_ARRAY ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA;
+}
+
+/**
+ * Given a 256-bit resource descriptor, force the DCC enable bit to off.
+ *
+ * At least on Tonga, executing image stores on images with DCC enabled and
+ * non-trivial can eventually lead to lockups. This can occur when an
+ * application binds an image as read-only but then uses a shader that writes
+ * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
+ * program termination) in this case, but it doesn't cost much to be a bit
+ * nicer: disabling DCC in the shader still leads to undefined results but
+ * avoids the lockup.
+ */
+static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
+ LLVMValueRef rsrc)
+{
+ if (ctx->screen->b.chip_class <= CIK) {
+ return rsrc;
+ } else {
+ LLVMBuilderRef builder = ctx->radeon_bld.gallivm.builder;
+ LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
+ LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
+ LLVMValueRef tmp;
+
+ tmp = LLVMBuildExtractElement(builder, rsrc, i32_6, "");
+ tmp = LLVMBuildAnd(builder, tmp, i32_C, "");
+ return LLVMBuildInsertElement(builder, rsrc, tmp, i32_6, "");
+ }
+}
+
+/**
+ * Load the resource descriptor for \p image.
+ */
+static void
+image_fetch_rsrc(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *image,
+ bool dcc_off,
+ LLVMValueRef *rsrc)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ assert(image->Register.File == TGSI_FILE_IMAGE);
+
+ if (!image->Register.Indirect) {
+ /* Fast path: use preloaded resources */
+ *rsrc = ctx->images[image->Register.Index];
+ } else {
+ /* Indexing and manual load */
+ LLVMValueRef ind_index;
+ LLVMValueRef rsrc_ptr;
+ LLVMValueRef tmp;
+
+ /* From the GL_ARB_shader_image_load_store extension spec:
+ *
+ * If a shader performs an image load, store, or atomic
+ * operation using an image variable declared as an array,
+ * and if the index used to select an individual element is
+ * negative or greater than or equal to the size of the
+ * array, the results of the operation are undefined but may
+ * not lead to termination.
+ */
+ ind_index = get_bounded_indirect_index(ctx, &image->Indirect,
+ image->Register.Index,
+ SI_NUM_IMAGES);
+
+ rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
+ tmp = build_indexed_load_const(ctx, rsrc_ptr, ind_index);
+ if (dcc_off)
+ tmp = force_dcc_off(ctx, tmp);
+ *rsrc = tmp;
+ }
+}
+
+static LLVMValueRef image_fetch_coords(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_instruction *inst,
+ unsigned src)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned target = inst->Memory.Texture;
+ int sample;
+ unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &sample);
+ LLVMValueRef coords[4];
+ LLVMValueRef tmp;
+ int chan;
+
+ for (chan = 0; chan < num_coords; ++chan) {
+ tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
+ tmp = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+ coords[chan] = tmp;
+ }
+
+ if (num_coords == 1)
+ return coords[0];
+
+ if (num_coords == 3) {
+ /* LLVM has difficulties lowering 3-element vectors. */
+ coords[3] = bld_base->uint_bld.undef;
+ num_coords = 4;
+ }
+
+ return lp_build_gather_values(gallivm, coords, num_coords);
+}
+
+/**
+ * Append the extra mode bits that are used by image load and store.
+ */
+static void image_append_args(
+ struct si_shader_context *ctx,
+ struct lp_build_emit_data * emit_data,
+ unsigned target,
+ bool atomic)
+{
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
+ LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
+
+ emit_data->args[emit_data->arg_count++] = i1false; /* r128 */
+ emit_data->args[emit_data->arg_count++] =
+ tgsi_is_array_image(target) ? i1true : i1false; /* da */
+ if (!atomic) {
+ emit_data->args[emit_data->arg_count++] =
+ inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
+ i1true : i1false; /* glc */
+ }
+ emit_data->args[emit_data->arg_count++] = i1false; /* slc */
+}
+
+/**
+ * Append the resource and indexing arguments for buffer intrinsics.
+ *
+ * \param rsrc the 256 bit resource
+ * \param index index into the buffer
+ */
+static void buffer_append_args(
+ struct si_shader_context *ctx,
+ struct lp_build_emit_data *emit_data,
+ LLVMValueRef rsrc,
+ LLVMValueRef index,
+ bool atomic)
+{
+ struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
+ struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ LLVMTypeRef v2i128 = LLVMVectorType(ctx->i128, 2);
+ LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
+ LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
+
+ rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, v2i128, "");
+ rsrc = LLVMBuildExtractElement(gallivm->builder, rsrc, bld_base->uint_bld.one, "");
+ rsrc = LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, "");
+
+ emit_data->args[emit_data->arg_count++] = rsrc;
+ emit_data->args[emit_data->arg_count++] = index; /* vindex */
+ emit_data->args[emit_data->arg_count++] = bld_base->uint_bld.zero; /* voffset */
+ if (!atomic) {
+ emit_data->args[emit_data->arg_count++] =
+ inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
+ i1true : i1false; /* glc */
+ }
+ emit_data->args[emit_data->arg_count++] = i1false; /* slc */
+}
+
+static void load_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef coords;
+ LLVMValueRef rsrc;
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+
+ image_fetch_rsrc(bld_base, &inst->Src[0], false, &rsrc);
+ coords = image_fetch_coords(bld_base, inst, 1);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ buffer_append_args(ctx, emit_data, rsrc, coords, false);
+ } else {
+ emit_data->args[0] = coords;
+ emit_data->args[1] = rsrc;
+ emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
+ emit_data->arg_count = 3;
+
+ image_append_args(ctx, emit_data, target, false);
+ }
+}
+
+static void load_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ char intrinsic_name[32];
+ char coords_type[8];
+
+ if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
+ emit_optimization_barrier(ctx);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
+ builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
+ } else {
+ build_int_type_name(LLVMTypeOf(emit_data->args[0]),
+ coords_type, sizeof(coords_type));
+
+ snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.image.load.%s", coords_type);
+
+ emit_data->output[emit_data->chan] =
+ lp_build_intrinsic(
+ builder, intrinsic_name, emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
+ }
+}
+
+static void store_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ struct tgsi_full_src_register image;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef chans[4];
+ LLVMValueRef data;
+ LLVMValueRef coords;
+ LLVMValueRef rsrc;
+ unsigned chan;
+
+ emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
+
+ image = tgsi_full_src_register_from_dst(&inst->Dst[0]);
+ coords = image_fetch_coords(bld_base, inst, 0);
+
+ for (chan = 0; chan < 4; ++chan) {
+ chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
+ }
+ data = lp_build_gather_values(gallivm, chans, 4);
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ image_fetch_rsrc(bld_base, &image, false, &rsrc);
+ emit_data->args[0] = data;
+ emit_data->arg_count = 1;
+
+ buffer_append_args(ctx, emit_data, rsrc, coords, false);
+ } else {
+ emit_data->args[0] = data;
+ emit_data->args[1] = coords;
+ image_fetch_rsrc(bld_base, &image, true, &emit_data->args[2]);
+ emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
+ emit_data->arg_count = 4;
+
+ image_append_args(ctx, emit_data, target, false);
+ }
+}
+
+static void store_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ char intrinsic_name[32];
+ char coords_type[8];
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
+ builder, "llvm.amdgcn.buffer.store.format.v4f32",
+ emit_data->dst_type, emit_data->args, emit_data->arg_count,
+ LLVMNoUnwindAttribute);
+ } else {
+ build_int_type_name(LLVMTypeOf(emit_data->args[1]),
+ coords_type, sizeof(coords_type));
+ snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.image.store.%s", coords_type);
+
+ emit_data->output[emit_data->chan] =
+ lp_build_intrinsic(
+ builder, intrinsic_name, emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMNoUnwindAttribute);
+ }
+}
+
+static void atomic_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef data1, data2;
+ LLVMValueRef coords;
+ LLVMValueRef rsrc;
+ LLVMValueRef tmp;
+
+ emit_data->dst_type = bld_base->base.elem_type;
+
+ image_fetch_rsrc(bld_base, &inst->Src[0], target != TGSI_TEXTURE_BUFFER,
+ &rsrc);
+ coords = image_fetch_coords(bld_base, inst, 1);
+
+ tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
+ data1 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
+ data2 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+ }
+
+ /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
+ * of arguments, which is reversed relative to TGSI (and GLSL)
+ */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
+ emit_data->args[emit_data->arg_count++] = data2;
+ emit_data->args[emit_data->arg_count++] = data1;
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ buffer_append_args(ctx, emit_data, rsrc, coords, true);
+ } else {
+ emit_data->args[emit_data->arg_count++] = coords;
+ emit_data->args[emit_data->arg_count++] = rsrc;
+
+ image_append_args(ctx, emit_data, target, true);
+ }
+}
+
+static void atomic_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ char intrinsic_name[40];
+ LLVMValueRef tmp;
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
+ } else {
+ char coords_type[8];
+
+ build_int_type_name(LLVMTypeOf(emit_data->args[1]),
+ coords_type, sizeof(coords_type));
+ snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.image.atomic.%s.%s",
+ action->intr_name, coords_type);
+ }
+
+ tmp = lp_build_intrinsic(
+ builder, intrinsic_name, bld_base->uint_bld.elem_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMNoUnwindAttribute);
+ emit_data->output[emit_data->chan] =
+ LLVMBuildBitCast(builder, tmp, bld_base->base.elem_type, "");
+}
+
+static void resq_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ const struct tgsi_full_src_register *reg = &inst->Src[0];
+ unsigned tex_target = inst->Memory.Texture;
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+
+ if (tex_target == TGSI_TEXTURE_BUFFER) {
+ image_fetch_rsrc(bld_base, reg, false, &emit_data->args[0]);
+ emit_data->arg_count = 1;
+ } else {
+ emit_data->args[0] = bld_base->uint_bld.zero; /* mip level */
+ image_fetch_rsrc(bld_base, reg, false, &emit_data->args[1]);
+ emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
+ emit_data->args[3] = bld_base->uint_bld.zero; /* unorm */
+ emit_data->args[4] = bld_base->uint_bld.zero; /* r128 */
+ emit_data->args[5] = tgsi_is_array_image(tex_target) ?
+ bld_base->uint_bld.one : bld_base->uint_bld.zero; /* da */
+ emit_data->args[6] = bld_base->uint_bld.zero; /* glc */
+ emit_data->args[7] = bld_base->uint_bld.zero; /* slc */
+ emit_data->args[8] = bld_base->uint_bld.zero; /* tfe */
+ emit_data->args[9] = bld_base->uint_bld.zero; /* lwe */
+ emit_data->arg_count = 10;
+ }
+}
+
+static void resq_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ unsigned target = inst->Memory.Texture;
+ LLVMValueRef out;
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ out = get_buffer_size(bld_base, emit_data->args[0]);
+ } else {
+ out = lp_build_intrinsic(
+ builder, "llvm.SI.getresinfo.i32", emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+
+ /* Divide the number of layers by 6 to get the number of cubes. */
+ if (target == TGSI_TEXTURE_CUBE_ARRAY) {
+ LLVMValueRef imm2 = lp_build_const_int32(gallivm, 2);
+ LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
+
+ LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
+ z = LLVMBuildBitCast(builder, z, bld_base->uint_bld.elem_type, "");
+ z = LLVMBuildSDiv(builder, z, imm6, "");
+ z = LLVMBuildBitCast(builder, z, bld_base->base.elem_type, "");
+ out = LLVMBuildInsertElement(builder, out, z, imm2, "");
+ }
+ }
+
+ emit_data->output[emit_data->chan] = out;
+}
+
static void set_tex_fetch_args(struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data,
unsigned opcode, unsigned target,
@@ -2836,26 +3402,7 @@ static void tex_fetch_args(
if (target == TGSI_TEXTURE_BUFFER) {
/* Read the size from the buffer descriptor directly. */
LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, ctx->v8i32, "");
- LLVMValueRef size = LLVMBuildExtractElement(builder, res,
- lp_build_const_int32(gallivm, 6), "");
-
- if (ctx->screen->b.chip_class >= VI) {
- /* On VI, the descriptor contains the size in bytes,
- * but TXQ must return the size in elements.
- * The stride is always non-zero for resources using TXQ.
- */
- LLVMValueRef stride =
- LLVMBuildExtractElement(builder, res,
- lp_build_const_int32(gallivm, 5), "");
- stride = LLVMBuildLShr(builder, stride,
- lp_build_const_int32(gallivm, 16), "");
- stride = LLVMBuildAnd(builder, stride,
- lp_build_const_int32(gallivm, 0x3FFF), "");
-
- size = LLVMBuildUDiv(builder, size, stride, "");
- }
-
- emit_data->args[0] = size;
+ emit_data->args[0] = get_buffer_size(bld_base, res);
return;
}
@@ -3236,14 +3783,9 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
return;
}
- if (LLVMGetTypeKind(LLVMTypeOf(emit_data->args[0])) == LLVMVectorTypeKind)
- sprintf(type, ".v%ui32",
- LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
- else
- strcpy(type, ".i32");
-
/* Add the type and suffixes .c, .o if needed. */
- sprintf(intr_name, "%s%s%s%s%s",
+ build_int_type_name(LLVMTypeOf(emit_data->args[0]), type, sizeof(type));
+ sprintf(intr_name, "%s%s%s%s.%s",
name, is_shadow ? ".c" : "", infix,
has_offset ? ".o" : "", type);
@@ -3865,8 +4407,8 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
- params[SI_PARAM_UNUSED] = LLVMPointerType(ctx->i32, CONST_ADDR_SPACE);
- last_array_pointer = SI_PARAM_UNUSED;
+ params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
+ last_array_pointer = SI_PARAM_IMAGES;
switch (ctx->type) {
case TGSI_PROCESSOR_VERTEX:
@@ -4153,6 +4695,34 @@ static void preload_samplers(struct si_shader_context *ctx)
}
}
+static void preload_images(struct si_shader_context *ctx)
+{
+ struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ unsigned num_images = bld_base->info->file_max[TGSI_FILE_IMAGE] + 1;
+ LLVMValueRef res_ptr;
+ unsigned i;
+
+ if (num_images == 0)
+ return;
+
+ res_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
+
+ for (i = 0; i < num_images; ++i) {
+ /* Rely on LLVM to shrink the load for buffer resources. */
+ LLVMValueRef rsrc =
+ build_indexed_load_const(ctx, res_ptr,
+ lp_build_const_int32(gallivm, i));
+
+ if (info->images_writemask & (1 << i) &&
+ !(info->images_buffers & (1 << i)))
+ rsrc = force_dcc_off(ctx, rsrc);
+
+ ctx->images[i] = rsrc;
+ }
+}
+
static void preload_streamout_buffers(struct si_shader_context *ctx)
{
struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
@@ -4792,6 +5362,7 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
LLVMTargetMachineRef tm)
{
struct lp_build_tgsi_context *bld_base;
+ struct lp_build_tgsi_action tmpl = {};
memset(ctx, 0, sizeof(*ctx));
radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--");
@@ -4839,6 +5410,38 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
+ bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
+ bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
+ bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
+
+ tmpl.fetch_args = atomic_fetch_args;
+ tmpl.emit = atomic_emit;
+ bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
+ bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
+ bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
+ bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
+ bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
+ bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
+ bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
+ bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
+ bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
+ bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl;
+ bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
+
+ bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
+
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
@@ -4926,6 +5529,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
create_function(&ctx);
preload_constants(&ctx);
preload_samplers(&ctx);
+ preload_images(&ctx);
preload_streamout_buffers(&ctx);
preload_ring_buffers(&ctx);
@@ -5383,7 +5987,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen,
last_array_pointer = SI_PARAM_RW_BUFFERS;
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
- params[SI_PARAM_UNUSED] = ctx.i64;
+ params[SI_PARAM_IMAGES] = ctx.i64;
params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
@@ -5633,7 +6237,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
params[SI_PARAM_RW_BUFFERS] = ctx.i64;
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
- params[SI_PARAM_UNUSED] = ctx.i64;
+ params[SI_PARAM_IMAGES] = ctx.i64;
params[SI_PARAM_ALPHA_REF] = ctx.f32;
last_array_pointer = -1;
last_sgpr = SI_PARAM_ALPHA_REF;
@@ -5897,12 +6501,15 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *mainp = shader->selector->main_shader_part;
int r;
- /* LS and ES are always compiled on demand. */
+ /* LS, ES, VS are compiled on demand if the main part hasn't been
+ * compiled for that stage.
+ */
if (!mainp ||
(shader->selector->type == PIPE_SHADER_VERTEX &&
- (shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+ (shader->key.vs.as_es != mainp->key.vs.as_es ||
+ shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
(shader->selector->type == PIPE_SHADER_TESS_EVAL &&
- shader->key.tes.as_es)) {
+ shader->key.tes.as_es != mainp->key.tes.as_es)) {
/* Monolithic shader (compiled as a whole, has many variants,
* may take a long time to compile).
*/
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index de23e642fe4..8059edf6395 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -80,7 +80,7 @@ struct radeon_shader_reloc;
#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */
#define SI_SGPR_CONST_BUFFERS 2
#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */
-/* TODO: gap */
+#define SI_SGPR_IMAGES 6
#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
@@ -104,7 +104,7 @@ struct radeon_shader_reloc;
#define SI_PARAM_RW_BUFFERS 0
#define SI_PARAM_CONST_BUFFERS 1
#define SI_PARAM_SAMPLERS 2
-#define SI_PARAM_UNUSED 3 /* TODO: use */
+#define SI_PARAM_IMAGES 3
/* VS only parameters */
#define SI_PARAM_VERTEX_BUFFERS 4
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f823af188c7..1245f56c08a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2797,7 +2797,7 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
* Build the sampler view descriptor for a buffer texture.
* @param state 256-bit descriptor; only the high 128 bits are filled in
*/
-static void
+void
si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
enum pipe_format format,
unsigned first_element, unsigned last_element,
@@ -2838,9 +2838,10 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
/**
* Build the sampler view descriptor for a texture.
*/
-static void
+void
si_make_texture_descriptor(struct si_screen *screen,
struct r600_texture *tex,
+ bool sampler,
enum pipe_texture_target target,
enum pipe_format pipe_format,
const unsigned char state_swizzle[4],
@@ -2855,7 +2856,7 @@ si_make_texture_descriptor(struct si_screen *screen,
const struct util_format_description *desc;
unsigned char swizzle[4];
int first_non_void;
- unsigned num_format, data_format;
+ unsigned num_format, data_format, type;
uint32_t pitch;
uint64_t va;
@@ -2973,12 +2974,30 @@ si_make_texture_descriptor(struct si_screen *screen,
data_format = 0;
}
- if (res->target == PIPE_TEXTURE_1D_ARRAY) {
+ if (!sampler &&
+ (res->target == PIPE_TEXTURE_CUBE ||
+ res->target == PIPE_TEXTURE_CUBE_ARRAY ||
+ res->target == PIPE_TEXTURE_3D)) {
+ /* For the purpose of shader images, treat cube maps and 3D
+ * textures as 2D arrays. For 3D textures, the address
+ * calculations for mipmaps are different, so we rely on the
+ * caller to effectively disable mipmaps.
+ */
+ type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
+
+ assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0));
+ } else {
+ type = si_tex_dim(res->target, target, res->nr_samples);
+ }
+
+ if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
height = 1;
depth = res->array_size;
- } else if (res->target == PIPE_TEXTURE_2D_ARRAY) {
- depth = res->array_size;
- } else if (res->target == PIPE_TEXTURE_CUBE_ARRAY)
+ } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
+ type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
+ if (sampler || res->target != PIPE_TEXTURE_3D)
+ depth = res->array_size;
+ } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
depth = res->array_size / 6;
pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format);
@@ -3001,7 +3020,7 @@ si_make_texture_descriptor(struct si_screen *screen,
last_level) |
S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level, false)) |
S_008F1C_POW2_PAD(res->last_level > 0) |
- S_008F1C_TYPE(si_tex_dim(res->target, target, res->nr_samples)));
+ S_008F1C_TYPE(type));
state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
state[5] = (S_008F24_BASE_ARRAY(first_layer) |
S_008F24_LAST_ARRAY(last_layer));
@@ -3155,7 +3174,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
state->target == PIPE_TEXTURE_CUBE)
last_layer = state->u.tex.first_layer;
- si_make_texture_descriptor(sctx->screen, tmp, state->target,
+ si_make_texture_descriptor(sctx->screen, tmp, true, state->target,
state->format, state_swizzle,
base_level, first_level, last_level,
state->u.tex.first_layer, last_layer,
@@ -3503,6 +3522,52 @@ static void si_texture_barrier(struct pipe_context *ctx)
SI_CONTEXT_FLUSH_AND_INV_CB;
}
+static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+
+ /* Subsequent commands must wait for all shader invocations to
+ * complete. */
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+
+ if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
+ sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+ SI_CONTEXT_INV_VMEM_L1;
+
+ if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
+ PIPE_BARRIER_SHADER_BUFFER |
+ PIPE_BARRIER_TEXTURE |
+ PIPE_BARRIER_IMAGE |
+ PIPE_BARRIER_STREAMOUT_BUFFER)) {
+ /* As far as I can tell, L1 contents are written back to L2
+ * automatically at end of shader, but the contents of other
+ * L1 caches might still be stale. */
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
+ }
+
+ if (flags & PIPE_BARRIER_INDEX_BUFFER) {
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
+
+ /* Indices are read through TC L2 since VI. */
+ if (sctx->screen->b.chip_class <= CIK)
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ }
+
+ if (flags & PIPE_BARRIER_FRAMEBUFFER)
+ sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+
+ if (flags & (PIPE_BARRIER_MAPPED_BUFFER |
+ PIPE_BARRIER_FRAMEBUFFER |
+ PIPE_BARRIER_INDIRECT_BUFFER)) {
+ /* Not sure if INV_GLOBAL_L2 is the best thing here.
+ *
+ * We need to make sure that TC L1 & L2 are written back to
+ * memory, because neither CPU accesses nor CB fetches consider
+ * TC, but there's no need to invalidate any TC cache lines. */
+ sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+ }
+}
+
static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
{
struct pipe_blend_state blend;
@@ -3583,6 +3648,7 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.set_index_buffer = si_set_index_buffer;
sctx->b.b.texture_barrier = si_texture_barrier;
+ sctx->b.b.memory_barrier = si_memory_barrier;
sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
sctx->b.b.set_min_samples = si_set_min_samples;
sctx->b.b.set_tess_state = si_set_tess_state;
@@ -3637,7 +3703,8 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
md->metadata[1] = (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
- si_make_texture_descriptor(sscreen, rtex, res->target, res->format,
+ si_make_texture_descriptor(sscreen, rtex, true,
+ res->target, res->format,
swizzle, 0, 0, res->last_level, 0,
is_array ? res->array_size - 1 : 0,
res->width0, res->height0, res->depth0,
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 60c34f19e55..c4d6b9d9eee 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -158,6 +158,8 @@ struct si_shader_data {
#define SI_DRIVER_STATE_CONST_BUF SI_NUM_USER_CONST_BUFFERS
#define SI_NUM_CONST_BUFFERS (SI_DRIVER_STATE_CONST_BUF + 1)
+#define SI_NUM_IMAGES 16
+
/* Read-write buffer slots.
*
* Ring buffers: 0..1
@@ -272,6 +274,23 @@ unsigned cik_tile_split(unsigned tile_split);
unsigned si_array_mode(unsigned mode);
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex);
unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil);
+void
+si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
+ enum pipe_format format,
+ unsigned first_element, unsigned last_element,
+ uint32_t *state);
+void
+si_make_texture_descriptor(struct si_screen *screen,
+ struct r600_texture *tex,
+ bool sampler,
+ enum pipe_texture_target target,
+ enum pipe_format pipe_format,
+ const unsigned char state_swizzle[4],
+ unsigned base_level, unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned last_layer,
+ unsigned width, unsigned height, unsigned depth,
+ uint32_t *state,
+ uint32_t *fmask_state);
struct pipe_sampler_view *
si_create_sampler_view_custom(struct pipe_context *ctx,
struct pipe_resource *texture,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 5fe1f7960f3..02489583423 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -794,9 +794,15 @@ static void si_shader_ps(struct si_shader *shader)
* - the shader uses at least 2 VMEM instructions, or
* - the code size is at least 50 2-dword instructions or 100 1-dword
* instructions.
+ *
+ * Shaders with side effects that must execute independently of the
+ * depth test require LATE_Z.
*/
- if (info->num_memory_instructions >= 2 ||
- shader->binary.code_size > 100*4)
+ if (info->writes_memory &&
+ !info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
+ shader->z_order = V_02880C_LATE_Z;
+ else if (info->num_memory_instructions >= 2 ||
+ shader->binary.code_size > 100*4)
shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z;
else
shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
@@ -1042,6 +1048,31 @@ static int si_shader_select(struct pipe_context *ctx,
return si_shader_select_with_key(ctx, state, &key);
}
+static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
+ union si_shader_key *key)
+{
+ unsigned next_shader = info->properties[TGSI_PROPERTY_NEXT_SHADER];
+
+ switch (info->processor) {
+ case TGSI_PROCESSOR_VERTEX:
+ switch (next_shader) {
+ case TGSI_PROCESSOR_GEOMETRY:
+ key->vs.as_es = 1;
+ break;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ case TGSI_PROCESSOR_TESS_EVAL:
+ key->vs.as_ls = 1;
+ break;
+ }
+ break;
+
+ case TGSI_PROCESSOR_TESS_EVAL:
+ if (next_shader == TGSI_PROCESSOR_GEOMETRY)
+ key->tes.as_es = 1;
+ break;
+ }
+}
+
static void *si_create_shader_selector(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
@@ -1157,6 +1188,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1);
+ if (sel->info.writes_memory)
+ sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) |
+ S_02880C_EXEC_ON_NOOP(1);
+
/* Compile the main shader part for use with a prolog and/or epilog. */
if (sel->type != PIPE_SHADER_GEOMETRY &&
!sscreen->use_monolithic_shaders) {
@@ -1167,6 +1202,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
goto error;
shader->selector = sel;
+ si_parse_next_shader_property(&sel->info, &shader->key);
tgsi_binary = si_get_tgsi_binary(sel);
@@ -1202,6 +1238,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
union si_shader_key key;
memset(&key, 0, sizeof(key));
+ si_parse_next_shader_property(&sel->info, &key);
/* Set reasonable defaults, so that the shader key doesn't
* cause any code to be eliminated.
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index da4281490ae..896dcdf59d0 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -247,6 +247,7 @@ struct pipe_context *svga_context_create(struct pipe_screen *screen,
sizeof(svga->state.hw_draw.default_constbuf_size));
memset(svga->state.hw_draw.enabled_constbufs, 0,
sizeof(svga->state.hw_draw.enabled_constbufs));
+ svga->state.hw_draw.ib = NULL;
/* Create a no-operation blend state which we will bind whenever the
* requested blend state is impossible (e.g. due to having an integer
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 1976f98e5c1..ead47c07980 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -55,16 +55,21 @@
#define SVGA_QUERY_COMMAND_BUFFER_SIZE (PIPE_QUERY_DRIVER_SPECIFIC + 7)
#define SVGA_QUERY_FLUSH_TIME (PIPE_QUERY_DRIVER_SPECIFIC + 8)
#define SVGA_QUERY_SURFACE_WRITE_FLUSHES (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define SVGA_QUERY_NUM_READBACKS (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define SVGA_QUERY_NUM_RESOURCE_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define SVGA_QUERY_NUM_BUFFER_UPLOADS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define SVGA_QUERY_NUM_CONST_BUF_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 13)
+#define SVGA_QUERY_NUM_CONST_UPDATES (PIPE_QUERY_DRIVER_SPECIFIC + 14)
/* running total counters */
-#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 10)
-#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 11)
-#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 12)
-#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 13)
-#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 14)
-#define SVGA_QUERY_NUM_GENERATE_MIPMAP (PIPE_QUERY_DRIVER_SPECIFIC + 15)
+#define SVGA_QUERY_MEMORY_USED (PIPE_QUERY_DRIVER_SPECIFIC + 15)
+#define SVGA_QUERY_NUM_SHADERS (PIPE_QUERY_DRIVER_SPECIFIC + 16)
+#define SVGA_QUERY_NUM_RESOURCES (PIPE_QUERY_DRIVER_SPECIFIC + 17)
+#define SVGA_QUERY_NUM_STATE_OBJECTS (PIPE_QUERY_DRIVER_SPECIFIC + 18)
+#define SVGA_QUERY_NUM_SURFACE_VIEWS (PIPE_QUERY_DRIVER_SPECIFIC + 19)
+#define SVGA_QUERY_NUM_GENERATE_MIPMAP (PIPE_QUERY_DRIVER_SPECIFIC + 20)
/*SVGA_QUERY_MAX has to be last because it is size of an array*/
-#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 16)
+#define SVGA_QUERY_MAX (PIPE_QUERY_DRIVER_SPECIFIC + 21)
/**
* Maximum supported number of constant buffers per shader
@@ -499,20 +504,25 @@ struct svga_context
/** performance / info queries for HUD */
struct {
- uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */
- uint64_t num_fallbacks; /**< SVGA_QUERY_NUM_FALLBACKS */
- uint64_t num_flushes; /**< SVGA_QUERY_NUM_FLUSHES */
- uint64_t num_validations; /**< SVGA_QUERY_NUM_VALIDATIONS */
- uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */
- uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */
- uint64_t command_buffer_size; /**< SVGA_QUERY_COMMAND_BUFFER_SIZE */
- uint64_t flush_time; /**< SVGA_QUERY_FLUSH_TIME */
- uint64_t surface_write_flushes; /**< SVGA_QUERY_SURFACE_WRITE_FLUSHES */
- uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */
- uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */
- uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
- uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
- uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */
+ uint64_t num_draw_calls; /**< SVGA_QUERY_DRAW_CALLS */
+ uint64_t num_fallbacks; /**< SVGA_QUERY_NUM_FALLBACKS */
+ uint64_t num_flushes; /**< SVGA_QUERY_NUM_FLUSHES */
+ uint64_t num_validations; /**< SVGA_QUERY_NUM_VALIDATIONS */
+ uint64_t map_buffer_time; /**< SVGA_QUERY_MAP_BUFFER_TIME */
+ uint64_t num_resources_mapped; /**< SVGA_QUERY_NUM_RESOURCES_MAPPED */
+ uint64_t command_buffer_size; /**< SVGA_QUERY_COMMAND_BUFFER_SIZE */
+ uint64_t flush_time; /**< SVGA_QUERY_FLUSH_TIME */
+ uint64_t surface_write_flushes; /**< SVGA_QUERY_SURFACE_WRITE_FLUSHES */
+ uint64_t num_readbacks; /**< SVGA_QUERY_NUM_READBACKS */
+ uint64_t num_resource_updates; /**< SVGA_QUERY_NUM_RESOURCE_UPDATES */
+ uint64_t num_buffer_uploads; /**< SVGA_QUERY_NUM_BUFFER_UPLOADS */
+ uint64_t num_const_buf_updates; /**< SVGA_QUERY_NUM_CONST_BUF_UPDATES */
+ uint64_t num_const_updates; /**< SVGA_QUERY_NUM_CONST_UPDATES */
+ uint64_t num_shaders; /**< SVGA_QUERY_NUM_SHADERS */
+ uint64_t num_state_objects; /**< SVGA_QUERY_NUM_STATE_OBJECTS */
+ uint64_t num_surface_views; /**< SVGA_QUERY_NUM_SURFACE_VIEWS */
+ uint64_t num_bytes_uploaded; /**< SVGA_QUERY_NUM_BYTES_UPLOADED */
+ uint64_t num_generate_mipmap; /**< SVGA_QUERY_NUM_GENERATE_MIPMAP */
} hud;
/** The currently bound stream output targets */
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index fe6cf71a6e5..0b9ea889afa 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -458,6 +458,14 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
ret = svga_rebind_shaders(svga);
if (ret != PIPE_OK)
return ret;
+
+ /* Rebind stream output targets */
+ ret = svga_rebind_stream_output_targets(svga);
+ if (ret != PIPE_OK)
+ return ret;
+
+ /* Force rebinding the index buffer when needed */
+ svga->state.hw_draw.ib = NULL;
}
ret = validate_sampler_resources(svga);
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
index af9356d7c75..a26e577d8f7 100644
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -254,10 +254,13 @@ svga_set_debug_callback(struct pipe_context *pipe,
{
struct svga_context *svga = svga_context(pipe);
- if (cb)
+ if (cb) {
svga->debug.callback = *cb;
- else
+ svga->swc->debug_callback = &svga->debug.callback;
+ } else {
memset(&svga->debug.callback, 0, sizeof(svga->debug.callback));
+ svga->swc->debug_callback = NULL;
+ }
}
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index 845f4ef3a1c..88f41eadc1d 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -72,11 +72,14 @@ struct svga_query {
/** cast wrapper */
static inline struct svga_query *
-svga_query( struct pipe_query *q )
+svga_query(struct pipe_query *q)
{
return (struct svga_query *)q;
}
+/**
+ * VGPU9
+ */
static boolean
svga_get_query_result(struct pipe_context *pipe,
@@ -736,6 +739,11 @@ svga_create_query(struct pipe_context *pipe,
case SVGA_QUERY_NUM_STATE_OBJECTS:
case SVGA_QUERY_NUM_SURFACE_VIEWS:
case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+ case SVGA_QUERY_NUM_READBACKS:
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ case SVGA_QUERY_NUM_CONST_UPDATES:
break;
default:
assert(!"unexpected query type in svga_create_query()");
@@ -808,6 +816,11 @@ svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_NUM_STATE_OBJECTS:
case SVGA_QUERY_NUM_SURFACE_VIEWS:
case SVGA_QUERY_NUM_GENERATE_MIPMAP:
+ case SVGA_QUERY_NUM_READBACKS:
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ case SVGA_QUERY_NUM_CONST_UPDATES:
/* nothing */
break;
default:
@@ -899,6 +912,21 @@ svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
sq->begin_count = svga->hud.surface_write_flushes;
break;
+ case SVGA_QUERY_NUM_READBACKS:
+ sq->begin_count = svga->hud.num_readbacks;
+ break;
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ sq->begin_count = svga->hud.num_resource_updates;
+ break;
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ sq->begin_count = svga->hud.num_buffer_uploads;
+ break;
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ sq->begin_count = svga->hud.num_const_buf_updates;
+ break;
+ case SVGA_QUERY_NUM_CONST_UPDATES:
+ sq->begin_count = svga->hud.num_const_updates;
+ break;
case SVGA_QUERY_MEMORY_USED:
case SVGA_QUERY_NUM_SHADERS:
case SVGA_QUERY_NUM_RESOURCES:
@@ -1002,6 +1030,21 @@ svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
sq->end_count = svga->hud.surface_write_flushes;
break;
+ case SVGA_QUERY_NUM_READBACKS:
+ sq->end_count = svga->hud.num_readbacks;
+ break;
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ sq->end_count = svga->hud.num_resource_updates;
+ break;
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ sq->end_count = svga->hud.num_buffer_uploads;
+ break;
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ sq->end_count = svga->hud.num_const_buf_updates;
+ break;
+ case SVGA_QUERY_NUM_CONST_UPDATES:
+ sq->end_count = svga->hud.num_const_updates;
+ break;
case SVGA_QUERY_MEMORY_USED:
case SVGA_QUERY_NUM_SHADERS:
case SVGA_QUERY_NUM_RESOURCES:
@@ -1103,6 +1146,11 @@ svga_get_query_result(struct pipe_context *pipe,
case SVGA_QUERY_COMMAND_BUFFER_SIZE:
case SVGA_QUERY_FLUSH_TIME:
case SVGA_QUERY_SURFACE_WRITE_FLUSHES:
+ case SVGA_QUERY_NUM_READBACKS:
+ case SVGA_QUERY_NUM_RESOURCE_UPDATES:
+ case SVGA_QUERY_NUM_BUFFER_UPLOADS:
+ case SVGA_QUERY_NUM_CONST_BUF_UPDATES:
+ case SVGA_QUERY_NUM_CONST_UPDATES:
vresult->u64 = sq->end_count - sq->begin_count;
break;
/* These are running total counters */
diff --git a/src/gallium/drivers/svga/svga_pipe_streamout.c b/src/gallium/drivers/svga/svga_pipe_streamout.c
index 3f443c44eee..1318b5565ce 100644
--- a/src/gallium/drivers/svga/svga_pipe_streamout.c
+++ b/src/gallium/drivers/svga/svga_pipe_streamout.c
@@ -311,6 +311,25 @@ svga_set_stream_output_targets(struct pipe_context *pipe,
svga->num_so_targets = num_targets;
}
+/**
+ * Rebind stream output target surfaces
+ */
+enum pipe_error
+svga_rebind_stream_output_targets(struct svga_context *svga)
+{
+ struct svga_winsys_context *swc = svga->swc;
+ enum pipe_error ret;
+ unsigned i;
+
+ for (i = 0; i < svga->num_so_targets; i++) {
+ ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE);
+ if (ret != PIPE_OK)
+ return ret;
+ }
+
+ return PIPE_OK;
+}
+
void
svga_init_stream_output_functions(struct svga_context *svga)
{
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index a8ffcc7f680..9ecb97509c2 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -109,6 +109,8 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
assert(ret == PIPE_OK);
}
+ svga->hud.num_readbacks++;
+
svga_context_finish(svga);
sbuf->dirty = FALSE;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer_upload.c b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
index 7f7ceab0aa5..1121b780af1 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -311,6 +311,8 @@ svga_buffer_upload_gb_command(struct svga_context *svga,
swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
sbuf->dma.flags.discard = FALSE;
+ svga->hud.num_resource_updates++;
+
return PIPE_OK;
}
@@ -385,6 +387,8 @@ svga_buffer_upload_command(struct svga_context *svga,
swc->hints |= SVGA_HINT_FLAG_CAN_PRE_FLUSH;
sbuf->dma.flags.discard = FALSE;
+ svga->hud.num_buffer_uploads++;
+
return PIPE_OK;
}
@@ -433,6 +437,7 @@ svga_buffer_upload_flush(struct svga_context *svga,
assert(box->x + box->w <= sbuf->b.b.width0);
svga->hud.num_bytes_uploaded += box->w;
+ svga->hud.num_buffer_uploads++;
}
}
else {
@@ -460,6 +465,7 @@ svga_buffer_upload_flush(struct svga_context *svga,
assert(box->x + box->w <= sbuf->b.b.width0);
svga->hud.num_bytes_uploaded += box->w;
+ svga->hud.num_buffer_uploads++;
}
}
diff --git a/src/gallium/drivers/svga/svga_resource_texture.c b/src/gallium/drivers/svga/svga_resource_texture.c
index 1edb41dabee..db730802c7a 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -448,6 +448,8 @@ svga_texture_transfer_map(struct pipe_context *pipe,
ret = readback_image_vgpu9(svga, surf, st->slice, transfer->level);
}
+ svga->hud.num_readbacks++;
+
assert(ret == PIPE_OK);
(void) ret;
@@ -681,6 +683,8 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
ret = update_image_vgpu9(svga, surf, &box, st->slice, transfer->level);
}
+ svga->hud.num_resource_updates++;
+
assert(ret == PIPE_OK);
(void) ret;
}
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index bcc512041f7..c0873c0c65a 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -837,6 +837,16 @@ svga_get_driver_query_info(struct pipe_screen *screen,
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS),
QUERY("surface-write-flushes", SVGA_QUERY_SURFACE_WRITE_FLUSHES,
PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-readbacks", SVGA_QUERY_NUM_READBACKS,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-resource-updates", SVGA_QUERY_NUM_RESOURCE_UPDATES,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-buffer-uploads", SVGA_QUERY_NUM_BUFFER_UPLOADS,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-const-buf-updates", SVGA_QUERY_NUM_CONST_BUF_UPDATES,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
+ QUERY("num-const-updates", SVGA_QUERY_NUM_CONST_UPDATES,
+ PIPE_DRIVER_QUERY_TYPE_UINT64),
/* running total counters */
QUERY("memory-used", SVGA_QUERY_MEMORY_USED,
diff --git a/src/gallium/drivers/svga/svga_shader.c b/src/gallium/drivers/svga/svga_shader.c
index 5c99e16d976..78eb3f65b61 100644
--- a/src/gallium/drivers/svga/svga_shader.c
+++ b/src/gallium/drivers/svga/svga_shader.c
@@ -180,18 +180,18 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
assert(view->texture);
assert(view->texture->target < (1 << 4)); /* texture_target:4 */
- key->tex[i].texture_target = view->texture->target;
-
/* 1D/2D array textures with one slice are treated as non-arrays
* by the SVGA3D device. Convert the texture type here so that
* we emit the right TEX/SAMPLE instruction in the shader.
*/
- if (view->texture->array_size == 1) {
- if (view->texture->target == PIPE_TEXTURE_1D_ARRAY) {
- key->tex[i].texture_target = PIPE_TEXTURE_1D;
+ if (view->texture->target == PIPE_TEXTURE_1D_ARRAY ||
+ view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
+ if (view->texture->array_size == 1) {
+ key->tex[i].is_array = 0;
}
- else if (view->texture->target == PIPE_TEXTURE_2D_ARRAY) {
- key->tex[i].texture_target = PIPE_TEXTURE_2D;
+ else {
+ assert(view->texture->array_size > 1);
+ key->tex[i].is_array = 1;
}
}
@@ -207,8 +207,6 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
key->tex[i].swizzle_g = view->swizzle_g;
key->tex[i].swizzle_b = view->swizzle_b;
key->tex[i].swizzle_a = view->swizzle_a;
-
- key->tex[i].return_type = svga_get_texture_datatype(view->format);
}
}
key->num_textures = svga->curr.num_sampler_views[shader];
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index f49fdb46d0e..3f915740b1f 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -98,14 +98,13 @@ struct svga_compile_key
unsigned compare_func:3;
unsigned unnormalized:1;
unsigned width_height_idx:5; /**< texture unit */
- unsigned texture_target:4; /**< PIPE_TEXTURE_x */
+ unsigned is_array:1;
unsigned texture_msaa:1; /**< A multisample texture? */
unsigned sprite_texgen:1;
unsigned swizzle_r:3;
unsigned swizzle_g:3;
unsigned swizzle_b:3;
unsigned swizzle_a:3;
- unsigned return_type:3; /**< TGSI_RETURN_TYPE_x */
} tex[PIPE_MAX_SAMPLERS];
/* Note: svga_compile_keys_equal() depends on the variable-size
* tex[] array being at the end of this structure.
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 8ab1693088a..5ae0382cd45 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -301,6 +301,8 @@ emit_const(struct svga_context *svga, unsigned shader, unsigned i,
return ret;
memcpy(svga->state.hw_draw.cb[shader][i], value, 4 * sizeof(float));
+
+ svga->hud.num_const_updates++;
}
return ret;
@@ -420,6 +422,9 @@ emit_const_range(struct svga_context *svga,
(j - i) * 4 * sizeof(float));
i = j + 1;
+
+ svga->hud.num_const_updates++;
+
} else {
++i;
}
@@ -549,6 +554,7 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
void *src_map = NULL, *dst_map;
unsigned offset;
const struct svga_shader_variant *variant;
+ unsigned alloc_buf_size;
assert(shader == PIPE_SHADER_VERTEX ||
shader == PIPE_SHADER_GEOMETRY ||
@@ -613,7 +619,16 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
*/
new_buf_size = align(new_buf_size, 16);
- u_upload_alloc(svga->const0_upload, 0, new_buf_size,
+ /* Constant buffer size in the upload buffer must be in multiples of 256.
+ * In order to maximize the chance of merging the upload buffer chunks
+ * when svga_buffer_add_range() is called,
+ * the allocate buffer size needs to be in multiples of 256 as well.
+ * Otherwise, since there is gap between each dirty range of the upload buffer,
+ * each dirty range will end up in its own UPDATE_GB_IMAGE command.
+ */
+ alloc_buf_size = align(new_buf_size, CONST0_UPLOAD_ALIGNMENT);
+
+ u_upload_alloc(svga->const0_upload, 0, alloc_buf_size,
CONST0_UPLOAD_ALIGNMENT, &offset,
&dst_buffer, &dst_map);
if (!dst_map) {
@@ -664,6 +679,8 @@ emit_constbuf_vgpu10(struct svga_context *svga, unsigned shader)
pipe_resource_reference(&dst_buffer, NULL);
+ svga->hud.num_const_buf_updates++;
+
return ret;
}
@@ -732,6 +749,8 @@ emit_consts_vgpu10(struct svga_context *svga, unsigned shader)
size);
if (ret != PIPE_OK)
return ret;
+
+ svga->hud.num_const_buf_updates++;
}
svga->state.hw_draw.enabled_constbufs[shader] = enabled_constbufs;
diff --git a/src/gallium/drivers/svga/svga_streamout.h b/src/gallium/drivers/svga/svga_streamout.h
index da0c4457d2e..1daa1ad5352 100644
--- a/src/gallium/drivers/svga/svga_streamout.h
+++ b/src/gallium/drivers/svga/svga_streamout.h
@@ -47,4 +47,7 @@ void
svga_delete_stream_output(struct svga_context *svga,
struct svga_stream_output *streamout);
+enum pipe_error
+svga_rebind_stream_output_targets(struct svga_context *svga);
+
#endif /* SVGA_STREAMOUT_H */
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
index ca4009b9e38..204b814a964 100644
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -517,15 +517,15 @@ vs30_output(struct svga_shader_emitter *emit,
static ubyte
svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
{
- switch (emit->key.tex[idx].texture_target) {
- case PIPE_TEXTURE_1D:
+ switch (emit->sampler_target[idx]) {
+ case TGSI_TEXTURE_1D:
return SVGA3DSAMP_2D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
return SVGA3DSAMP_2D;
- case PIPE_TEXTURE_3D:
+ case TGSI_TEXTURE_3D:
return SVGA3DSAMP_VOLUME;
- case PIPE_TEXTURE_CUBE:
+ case TGSI_TEXTURE_CUBE:
return SVGA3DSAMP_CUBE;
}
@@ -585,6 +585,14 @@ svga_translate_decl_sm30( struct svga_shader_emitter *emit,
ok = ps30_output( emit, decl->Semantic, idx );
break;
+ case TGSI_FILE_SAMPLER_VIEW:
+ {
+ unsigned unit = decl->Range.First;
+ assert(decl->Range.First == decl->Range.Last);
+ emit->sampler_target[unit] = decl->SamplerView.Resource;
+ }
+ break;
+
default:
/* don't need to declare other vars */
ok = TRUE;
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 83f0c8bd4d0..7a593ba6e9d 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -136,6 +136,8 @@ struct svga_shader_emitter
int current_arl;
unsigned pstipple_sampler_unit;
+
+ uint8_t sampler_target[PIPE_MAX_SAMPLERS];
};
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 489e68f88e8..3188c411863 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -3849,7 +3849,7 @@ svga_shader_emit_instructions(struct svga_shader_emitter *emit,
if (new_tokens) {
/* Setup texture state for stipple */
- emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+ emit->sampler_target[unit] = TGSI_TEXTURE_2D;
emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index 0c5afeb4cf9..0d5628251df 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -134,6 +134,8 @@ struct svga_shader_emitter_v10
/* Samplers */
unsigned num_samplers;
+ ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
+ ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
/* Address regs (really implemented with temps) */
unsigned num_address_regs;
@@ -2312,9 +2314,13 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
return TRUE;
case TGSI_FILE_SAMPLER_VIEW:
- /* Not used at this time, but maybe in the future.
- * See emit_resource_declarations().
- */
+ {
+ unsigned unit = decl->Range.First;
+ assert(decl->Range.First == decl->Range.Last);
+ emit->sampler_target[unit] = decl->SamplerView.Resource;
+ /* Note: we can ignore YZW return types for now */
+ emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
+ }
return TRUE;
default:
@@ -2854,7 +2860,7 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
/* Texture buffer sizes */
for (i = 0; i < emit->num_samplers; i++) {
- if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
+ if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
emit->texture_buffer_size_index[i] = total_consts++;
}
}
@@ -2918,30 +2924,44 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
/**
- * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
+ * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
*/
static unsigned
-pipe_texture_to_resource_dimension(unsigned target, bool msaa)
+tgsi_texture_to_resource_dimension(unsigned target, boolean is_array)
{
switch (target) {
- case PIPE_BUFFER:
+ case TGSI_TEXTURE_BUFFER:
return VGPU10_RESOURCE_DIMENSION_BUFFER;
- case PIPE_TEXTURE_1D:
+ case TGSI_TEXTURE_1D:
return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
- return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
- : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
- case PIPE_TEXTURE_3D:
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+ case TGSI_TEXTURE_3D:
return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
- case PIPE_TEXTURE_CUBE:
+ case TGSI_TEXTURE_CUBE:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
+ case TGSI_TEXTURE_SHADOW1D:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
+ : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
+ : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+ case TGSI_TEXTURE_SHADOWCUBE:
return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
- case PIPE_TEXTURE_1D_ARRAY:
- return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
- case PIPE_TEXTURE_2D_ARRAY:
- return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
- : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
- case PIPE_TEXTURE_CUBE_ARRAY:
+ case TGSI_TEXTURE_2D_MSAA:
+ return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
+ : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
+ case TGSI_TEXTURE_CUBE_ARRAY:
return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
default:
assert(!"Unexpected resource type");
@@ -2993,8 +3013,8 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
opcode0.value = 0;
opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
opcode0.resourceDimension =
- pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
- emit->key.tex[i].texture_msaa);
+ tgsi_texture_to_resource_dimension(emit->sampler_target[i],
+ emit->key.tex[i].is_array);
operand0.value = 0;
operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
@@ -3008,10 +3028,10 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
- assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
- rt = emit->key.tex[i].return_type + 1;
+ assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
+ rt = emit->sampler_return_type[i] + 1;
#else
- switch (emit->key.tex[i].return_type) {
+ switch (emit->sampler_return_type[i]) {
case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
@@ -5024,7 +5044,7 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
unsigned writemask_0 = 0, writemask_1 = 0;
- boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
+ boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
/* Swizzle w/out zero/one terms */
struct tgsi_full_src_register src_swizzled =
@@ -5131,7 +5151,7 @@ is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
boolean valid = TRUE;
if (tgsi_is_shadow_target(target) &&
- is_integer_type(emit->key.tex[unit].return_type)) {
+ is_integer_type(emit->sampler_return_type[unit])) {
debug_printf("Invalid SAMPLE_C with an integer texture!\n");
valid = FALSE;
}
@@ -5528,7 +5548,7 @@ emit_txq(struct svga_shader_emitter_v10 *emit,
{
const uint unit = inst->Src[1].Register.Index;
- if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
+ if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
/* RESINFO does not support querying texture buffers, so we instead
* store texture buffer sizes in shader constants, then copy them to
* implement TXQ instead of emitting RESINFO.
@@ -6617,7 +6637,7 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
emit->fs.pstipple_sampler_unit = unit;
/* Setup texture state for stipple */
- emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+ emit->sampler_target[unit] = TGSI_TEXTURE_2D;
emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
index 0ad6b5e6c76..7da2c4e77ca 100644
--- a/src/gallium/drivers/svga/svga_winsys.h
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -48,6 +48,7 @@ struct svga_winsys_screen;
struct svga_winsys_buffer;
struct pipe_screen;
struct pipe_context;
+struct pipe_debug_callback;
struct pipe_fence_handle;
struct pipe_resource;
struct svga_region;
@@ -286,6 +287,9 @@ struct svga_winsys_context
struct svga_winsys_surface *surface,
struct svga_winsys_gb_shader *shader,
unsigned flags);
+
+ /** To report perf/conformance/etc issues to the state tracker */
+ struct pipe_debug_callback *debug_callback;
};
diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp
index c8cb145d334..78b8fdf619b 100644
--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -129,7 +129,7 @@ swr_transfer_map(struct pipe_context *pipe,
swr_fence_submit(swr_context(pipe), screen->flush_fence);
swr_fence_finish(pipe->screen, screen->flush_fence, 0);
- swr_resource_unused(pipe, spr);
+ swr_resource_unused(resource);
}
}
}
@@ -206,8 +206,8 @@ swr_resource_copy(struct pipe_context *pipe,
swr_store_dirty_resource(pipe, dst, SWR_TILE_RESOLVED);
swr_fence_finish(pipe->screen, screen->flush_fence, 0);
- swr_resource_unused(pipe, swr_resource(src));
- swr_resource_unused(pipe, swr_resource(dst));
+ swr_resource_unused(src);
+ swr_resource_unused(dst);
if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER)
|| (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) {
@@ -293,6 +293,7 @@ static void
swr_destroy(struct pipe_context *pipe)
{
struct swr_context *ctx = swr_context(pipe);
+ struct swr_screen *screen = swr_screen(pipe->screen);
if (ctx->blitter)
util_blitter_destroy(ctx->blitter);
@@ -306,6 +307,9 @@ swr_destroy(struct pipe_context *pipe)
swr_destroy_scratch_buffers(ctx);
+ assert(screen);
+ screen->pipe = NULL;
+
FREE(ctx);
}
@@ -324,9 +328,10 @@ swr_render_condition(struct pipe_context *pipe,
}
struct pipe_context *
-swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
+swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)
{
struct swr_context *ctx = CALLOC_STRUCT(swr_context);
+ struct swr_screen *screen = swr_screen(p_screen);
ctx->blendJIT =
new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;
@@ -347,7 +352,8 @@ swr_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
if (ctx->swrContext == NULL)
goto fail;
- ctx->pipe.screen = screen;
+ screen->pipe = &ctx->pipe;
+ ctx->pipe.screen = p_screen;
ctx->pipe.destroy = swr_destroy;
ctx->pipe.priv = priv;
ctx->pipe.create_surface = swr_create_surface;
diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h
index 2fdc7683cb8..59cf0284461 100644
--- a/src/gallium/drivers/swr/swr_resource.h
+++ b/src/gallium/drivers/swr/swr_resource.h
@@ -54,9 +54,6 @@ struct swr_resource {
unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS];
enum swr_resource_status status;
-
- /* pipe_context to which resource is currently bound. */
- struct pipe_context *bound_to_context;
};
@@ -120,24 +117,21 @@ swr_resource_status & operator|=(enum swr_resource_status & a,
}
static INLINE void
-swr_resource_read(struct pipe_context *pipe, struct swr_resource *resource)
+swr_resource_read(struct pipe_resource *resource)
{
- resource->status |= SWR_RESOURCE_READ;
- resource->bound_to_context = pipe;
+ swr_resource(resource)->status |= SWR_RESOURCE_READ;
}
static INLINE void
-swr_resource_write(struct pipe_context *pipe, struct swr_resource *resource)
+swr_resource_write(struct pipe_resource *resource)
{
- resource->status |= SWR_RESOURCE_WRITE;
- resource->bound_to_context = pipe;
+ swr_resource(resource)->status |= SWR_RESOURCE_WRITE;
}
static INLINE void
-swr_resource_unused(struct pipe_context *pipe, struct swr_resource *resource)
+swr_resource_unused(struct pipe_resource *resource)
{
- resource->status = SWR_RESOURCE_UNUSED;
- resource->bound_to_context = nullptr;
+ swr_resource(resource)->status = SWR_RESOURCE_UNUSED;
}
#endif
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index e46df47570f..f9e52be2367 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -620,7 +620,7 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
{
struct swr_screen *screen = swr_screen(p_screen);
struct swr_resource *spr = swr_resource(pt);
- struct pipe_context *pipe = spr->bound_to_context;
+ struct pipe_context *pipe = screen->pipe;
/* Only wait on fence if the resource is being used */
if (pipe && spr->status) {
@@ -630,7 +630,7 @@ swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt)
swr_fence_submit(swr_context(pipe), screen->flush_fence);
swr_fence_finish(p_screen, screen->flush_fence, 0);
- swr_resource_unused(pipe, spr);
+ swr_resource_unused(pt);
}
/*
@@ -661,11 +661,11 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen,
struct swr_screen *screen = swr_screen(p_screen);
struct sw_winsys *winsys = screen->winsys;
struct swr_resource *spr = swr_resource(resource);
- struct pipe_context *pipe = spr->bound_to_context;
+ struct pipe_context *pipe = screen->pipe;
if (pipe) {
swr_fence_finish(p_screen, screen->flush_fence, 0);
- swr_resource_unused(pipe, spr);
+ swr_resource_unused(resource);
SwrEndFrame(swr_context(pipe)->swrContext);
}
diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h
index a96dc44cf66..0c82a2eff7a 100644
--- a/src/gallium/drivers/swr/swr_screen.h
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -32,6 +32,7 @@ struct sw_winsys;
struct swr_screen {
struct pipe_screen base;
+ struct pipe_context *pipe;
struct pipe_fence_handle *flush_fence;
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 47ee3cb2664..e7bf3618a7d 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -646,24 +646,24 @@ swr_update_resource_status(struct pipe_context *pipe,
if (fb->nr_cbufs)
for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
if (fb->cbufs[i])
- swr_resource_write(pipe, swr_resource(fb->cbufs[i]->texture));
+ swr_resource_write(fb->cbufs[i]->texture);
/* depth/stencil target */
if (fb->zsbuf)
- swr_resource_write(pipe, swr_resource(fb->zsbuf->texture));
+ swr_resource_write(fb->zsbuf->texture);
/* VBO vertex buffers */
for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
if (!vb->user_buffer)
- swr_resource_read(pipe, swr_resource(vb->buffer));
+ swr_resource_read(vb->buffer);
}
/* VBO index buffer */
if (p_draw_info && p_draw_info->indexed) {
struct pipe_index_buffer *ib = &ctx->index_buffer;
if (!ib->user_buffer)
- swr_resource_read(pipe, swr_resource(ib->buffer));
+ swr_resource_read(ib->buffer);
}
/* texture sampler views */
@@ -671,7 +671,7 @@ swr_update_resource_status(struct pipe_context *pipe,
struct pipe_sampler_view *view =
ctx->sampler_views[PIPE_SHADER_FRAGMENT][i];
if (view)
- swr_resource_read(pipe, swr_resource(view->texture));
+ swr_resource_read(view->texture);
}
}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
index a13e309985a..49a314cdb25 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -62,7 +62,7 @@ vc4_nir_get_dst_color(nir_builder *b, int sample)
load->num_components = 1;
load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample;
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
return &load->dest.ssa;
}
@@ -627,7 +627,7 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr_create(b->shader,
nir_intrinsic_load_sample_mask_in);
load->num_components = 1;
- nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
nir_ssa_def *bitmask = &load->dest.ssa;
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
index d47e3bf52b0..d08ad588e5b 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -183,7 +183,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
* with an offset value of 0.
*/
assert(nir_src_as_const_value(intr->src[0]) &&
- nir_src_as_const_value(intr->src[0])->u[0] == 0);
+ nir_src_as_const_value(intr->src[0])->u32[0] == 0);
/* Generate dword loads for the VPM values (Since these intrinsics may
* be reordered, the actual reads will be generated at the top of the
@@ -197,7 +197,7 @@ vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
intr_comp->num_components = 1;
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &intr_comp->instr);
vpm_reads[i] = &intr_comp->dest.ssa;
@@ -256,7 +256,7 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
* with an offset value of 0.
*/
assert(nir_src_as_const_value(intr->src[0]) &&
- nir_src_as_const_value(intr->src[0])->u[0] == 0);
+ nir_src_as_const_value(intr->src[0])->u32[0] == 0);
/* Generate scalar loads equivalent to the original VEC4. */
nir_ssa_def *dests[4];
@@ -267,7 +267,7 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &intr_comp->instr);
dests[i] = &intr_comp->dest.ssa;
@@ -339,7 +339,7 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
* with an offset value of 0.
*/
assert(nir_src_as_const_value(intr->src[1]) &&
- nir_src_as_const_value(intr->src[1])->u[0] == 0);
+ nir_src_as_const_value(intr->src[1])->u32[0] == 0);
b->cursor = nir_before_instr(&intr->instr);
@@ -378,7 +378,7 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
nir_intrinsic_instr *intr_comp =
nir_intrinsic_instr_create(c->s, intr->intrinsic);
intr_comp->num_components = 1;
- nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL);
/* Convert the uniform (not user_clip_plane) offset to bytes.
* If it happens to be a constant, constant-folding will clean
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
index f6ba5b802ad..a2d89ef3349 100644
--- a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
@@ -123,7 +123,7 @@ vc4_nir_lower_txf_ms_instr(struct vc4_compile *c, nir_builder *b,
txf->src[0].src_type = nir_tex_src_coord;
txf->src[0].src = nir_src_for_ssa(nir_vec2(b, addr, nir_imm_int(b, 0)));
- nir_ssa_dest_init(&txf->instr, &txf->dest, 4, NULL);
+ nir_ssa_dest_init(&txf->instr, &txf->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &txf->instr);
nir_ssa_def_rewrite_uses(&txf_ms->dest.ssa,
nir_src_for_ssa(&txf->dest.ssa));
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index f5826d85174..71a1ebbb313 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -118,7 +118,7 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4;
intr->num_components = 1;
intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+ nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &intr->instr);
return &intr->dest.ssa;
}
@@ -885,7 +885,9 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest,
struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
- if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float)
+ unsigned unsized_type =
+ nir_alu_type_get_base_type(nir_op_infos[compare_instr->op].input_types[0]);
+ if (unsized_type == nir_type_float)
qir_SF(c, qir_FSUB(c, src0, src1));
else
qir_SF(c, qir_SUB(c, src0, src1));
@@ -1519,7 +1521,7 @@ ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
{
struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
for (int i = 0; i < instr->def.num_components; i++)
- qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
+ qregs[i] = qir_uniform_ui(c, instr->value.u32[i]);
_mesa_hash_table_insert(c->def_ht, &instr->def, qregs);
}
@@ -1553,7 +1555,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
assert(instr->num_components == 1);
const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
assert(offset % 4 == 0);
/* We need dwords */
offset = offset / 4;
@@ -1584,7 +1586,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
const_offset = nir_src_as_const_value(instr->src[0]);
assert(const_offset && "vc4 doesn't support indirect inputs");
if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) {
- assert(const_offset->u[0] == 0);
+ assert(const_offset->u32[0] == 0);
/* Reads of the per-sample color need to be done in
* order.
*/
@@ -1598,7 +1600,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
}
*dest = c->color_reads[sample_index];
} else {
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
*dest = c->inputs[offset];
}
break;
@@ -1606,7 +1608,7 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_store_output:
const_offset = nir_src_as_const_value(instr->src[1]);
assert(const_offset && "vc4 doesn't support indirect outputs");
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
/* MSAA color outputs are the only case where we have an
* output that's not lowered to being a store of a single 32