summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-02-10 17:10:19 -0800
committerJason Ekstrand <[email protected]>2016-02-10 17:12:11 -0800
commitf710f3ca377a4583b1fc5081cc28ee1d4aba71cb (patch)
treeab4d8226ab88b0e5ef2d9a4a6ebfdf79a244a6e2 /src/gallium
parent7ef3e47c27fb9b11b113bb699019785960d6bf9a (diff)
parent8750299a420af76cebd3067f6f603eacde06ae06 (diff)
Merge remote-tracking branch 'mesa-public/master' into vulkan
This also reverts commit 1d65abfa582a371558113f699ffbf16d60b64c90 because now NIR handles texture offsets in a much more sane way.
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c15
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_ra.c30
-rw-r--r--src/gallium/drivers/r300/r300_transfer.c2
-rw-r--r--src/gallium/drivers/radeon/radeon_uvd.c3
-rw-r--r--src/gallium/drivers/radeonsi/si_blit.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_descriptors.c115
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h1
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c203
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h8
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h19
-rw-r--r--src/gallium/winsys/radeon/drm/radeon_drm_winsys.c6
11 files changed, 228 insertions, 176 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 63dd1700f9c..ffa75775505 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1434,21 +1434,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
tex_info(tex, &flags, &coords);
- if (!has_off) {
- /* could still have a constant offset: */
- if (tex->const_offset[0] || tex->const_offset[1] ||
- tex->const_offset[2] || tex->const_offset[3]) {
- off = const_off;
-
- off[0] = create_immed(b, tex->const_offset[0]);
- off[1] = create_immed(b, tex->const_offset[1]);
- off[2] = create_immed(b, tex->const_offset[2]);
- off[3] = create_immed(b, tex->const_offset[3]);
-
- has_off = true;
- }
- }
-
/* scale up integer coords for TXF based on the LOD */
if (ctx->unminify_coords && (opc == OPC_ISAML)) {
assert(has_lod);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index 2ed78818e61..bcad96e8a30 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -605,21 +605,21 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
struct ir3_ra_block_data *bd;
unsigned bitset_words = BITSET_WORDS(ctx->alloc_count);
- void def(unsigned name, struct ir3_instruction *instr)
- {
- /* defined on first write: */
- if (!ctx->def[name])
- ctx->def[name] = instr->ip;
- ctx->use[name] = instr->ip;
- BITSET_SET(bd->def, name);
- }
-
- void use(unsigned name, struct ir3_instruction *instr)
- {
- ctx->use[name] = MAX2(ctx->use[name], instr->ip);
- if (!BITSET_TEST(bd->def, name))
- BITSET_SET(bd->use, name);
- }
+#define def(name, instr) \
+ do { \
+ /* defined on first write: */ \
+ if (!ctx->def[name]) \
+ ctx->def[name] = instr->ip; \
+ ctx->use[name] = instr->ip; \
+ BITSET_SET(bd->def, name); \
+ } while(0);
+
+#define use(name, instr) \
+ do { \
+ ctx->use[name] = MAX2(ctx->use[name], instr->ip); \
+ if (!BITSET_TEST(bd->def, name)) \
+ BITSET_SET(bd->use, name); \
+ } while(0);
bd = rzalloc(ctx->g, struct ir3_ra_block_data);
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index 842e70a6899..9d00f4d9373 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -246,9 +246,7 @@ r300_texture_transfer_map(struct pipe_context *ctx,
void r300_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
- struct radeon_winsys *rws = r300_context(ctx)->rws;
struct r300_transfer *trans = r300_transfer(transfer);
- struct r300_resource *tex = r300_resource(transfer->resource);
if (trans->linear_texture) {
if (transfer->usage & PIPE_TRANSFER_WRITE) {
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 1f5a16aaca6..367aabc7a18 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -402,6 +402,9 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
switch (dec->base.chroma_format) {
+ case PIPE_VIDEO_CHROMA_FORMAT_NONE:
+ /* TODO: assert? */
+ break;
case PIPE_VIDEO_CHROMA_FORMAT_400:
result.chroma_format = 0;
break;
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index a93887ec271..115877060ba 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -80,7 +80,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
if (op & SI_SAVE_TEXTURES) {
util_blitter_save_fragment_sampler_states(
sctx->blitter, 2,
- sctx->samplers[PIPE_SHADER_FRAGMENT].states.saved_states);
+ sctx->samplers[PIPE_SHADER_FRAGMENT].views.sampler_states);
util_blitter_save_fragment_sampler_views(sctx->blitter, 2,
sctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 6c796731a18..34cc06fc078 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -41,6 +41,18 @@
*
* Also, uploading descriptors to newly allocated memory doesn't require
* a KCACHE flush.
+ *
+ *
+ * Possible scenarios for one 16 dword image+sampler slot:
+ *
+ * | Image | w/ FMASK | Buffer | NULL
+ * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
+ * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
+ * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
+ * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
+ *
+ * FMASK implies MSAA, therefore no sampler state.
+ * Sampler states are never unbound except when FMASK is bound.
*/
#include "radeon/r600_cs.h"
@@ -88,9 +100,9 @@ static void si_init_descriptors(struct si_descriptors *desc,
desc->shader_userdata_offset = shader_userdata_index * 4;
/* Initialize the array to NULL descriptors if the element size is 8. */
- if (element_dw_size == 8)
- for (i = 0; i < num_elements; i++)
- memcpy(desc->list + i*element_dw_size, null_descriptor,
+ if (element_dw_size % 8 == 0)
+ for (i = 0; i < num_elements * element_dw_size / 8; i++)
+ memcpy(desc->list + i*8, null_descriptor,
sizeof(null_descriptor));
}
@@ -174,27 +186,42 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
}
-static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
- unsigned slot, struct pipe_sampler_view *view,
- unsigned *view_desc)
+static void si_set_sampler_view(struct si_context *sctx,
+ struct si_sampler_views *views,
+ unsigned slot, struct pipe_sampler_view *view)
{
- struct si_sampler_views *views = &sctx->samplers[shader].views;
-
if (views->views[slot] == view)
return;
if (view) {
struct si_sampler_view *rview =
(struct si_sampler_view*)view;
+ struct r600_texture *rtex = (struct r600_texture*)view->texture;
si_sampler_view_add_buffers(sctx, rview);
pipe_sampler_view_reference(&views->views[slot], view);
- memcpy(views->desc.list + slot*8, view_desc, 8*4);
+ memcpy(views->desc.list + slot * 16, rview->state, 8*4);
+
+ if (rtex && rtex->fmask.size) {
+ memcpy(views->desc.list + slot*16 + 8,
+ rview->fmask_state, 8*4);
+ } else {
+ /* Disable FMASK and bind sampler state in [12:15]. */
+ memcpy(views->desc.list + slot*16 + 8,
+ null_descriptor, 4*4);
+
+ if (views->sampler_states[slot])
+ memcpy(views->desc.list + slot*16 + 12,
+ views->sampler_states[slot], 4*4);
+ }
+
views->desc.enabled_mask |= 1llu << slot;
} else {
pipe_sampler_view_reference(&views->views[slot], NULL);
- memcpy(views->desc.list + slot*8, null_descriptor, 8*4);
+ memcpy(views->desc.list + slot*16, null_descriptor, 8*4);
+ /* Only clear the lower dwords of FMASK. */
+ memcpy(views->desc.list + slot*16 + 8, null_descriptor, 4*4);
views->desc.enabled_mask &= ~(1llu << slot);
}
@@ -208,7 +235,6 @@ static void si_set_sampler_views(struct pipe_context *ctx,
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_textures_info *samplers = &sctx->samplers[shader];
- struct si_sampler_view **rviews = (struct si_sampler_view **)views;
int i;
if (!count || shader >= SI_NUM_SHADERS)
@@ -220,13 +246,11 @@ static void si_set_sampler_views(struct pipe_context *ctx,
if (!views || !views[i]) {
samplers->depth_texture_mask &= ~(1 << slot);
samplers->compressed_colortex_mask &= ~(1 << slot);
- si_set_sampler_view(sctx, shader, slot, NULL, NULL);
- si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
- NULL, NULL);
+ si_set_sampler_view(sctx, &samplers->views, slot, NULL);
continue;
}
- si_set_sampler_view(sctx, shader, slot, views[i], rviews[i]->state);
+ si_set_sampler_view(sctx, &samplers->views, slot, views[i]);
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
struct r600_texture *rtex =
@@ -243,60 +267,46 @@ static void si_set_sampler_views(struct pipe_context *ctx,
} else {
samplers->compressed_colortex_mask &= ~(1 << slot);
}
-
- if (rtex->fmask.size) {
- si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
- views[i], rviews[i]->fmask_state);
- } else {
- si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
- NULL, NULL);
- }
} else {
samplers->depth_texture_mask &= ~(1 << slot);
samplers->compressed_colortex_mask &= ~(1 << slot);
- si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
- NULL, NULL);
}
}
}
/* SAMPLER STATES */
-static void si_sampler_states_begin_new_cs(struct si_context *sctx,
- struct si_sampler_states *states)
-{
- if (!states->desc.buffer)
- return;
- radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, states->desc.buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
-}
-
static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
unsigned start, unsigned count, void **states)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_sampler_states *samplers = &sctx->samplers[shader].states;
+ struct si_textures_info *samplers = &sctx->samplers[shader];
+ struct si_descriptors *desc = &samplers->views.desc;
struct si_sampler_state **sstates = (struct si_sampler_state**)states;
int i;
if (!count || shader >= SI_NUM_SHADERS)
return;
- if (start == 0)
- samplers->saved_states[0] = states[0];
- if (start == 1)
- samplers->saved_states[1] = states[0];
- else if (start == 0 && count >= 2)
- samplers->saved_states[1] = states[1];
-
for (i = 0; i < count; i++) {
unsigned slot = start + i;
- if (!sstates[i])
+ if (!sstates[i] ||
+ sstates[i] == samplers->views.sampler_states[slot])
+ continue;
+
+ samplers->views.sampler_states[slot] = sstates[i];
+
+ /* If FMASK is bound, don't overwrite it.
+ * The sampler state will be set after FMASK is unbound.
+ */
+ if (samplers->views.views[i] &&
+ samplers->views.views[i]->texture &&
+ ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size)
continue;
- memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4);
- samplers->desc.list_dirty = true;
+ memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
+ desc->list_dirty = true;
}
}
@@ -862,7 +872,9 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
while (mask) {
unsigned i = u_bit_scan64(&mask);
if (views->views[i]->texture == buf) {
- si_desc_reset_buffer_offset(ctx, views->desc.list + i*8+4,
+ si_desc_reset_buffer_offset(ctx,
+ views->desc.list +
+ i * 16 + 4,
old_va, buf);
views->desc.list_dirty = true;
@@ -882,7 +894,6 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx,
sctx->const_buffers[shader].desc.pointer_dirty = true;
sctx->rw_buffers[shader].desc.pointer_dirty = true;
sctx->samplers[shader].views.desc.pointer_dirty = true;
- sctx->samplers[shader].states.desc.pointer_dirty = true;
if (shader == PIPE_SHADER_VERTEX)
sctx->vertex_buffers.pointer_dirty = true;
@@ -1003,7 +1014,6 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
- si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false);
}
si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
}
@@ -1023,9 +1033,7 @@ void si_init_all_descriptors(struct si_context *sctx)
RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
si_init_descriptors(&sctx->samplers[i].views.desc,
- SI_SGPR_SAMPLER_VIEWS, 8, SI_NUM_SAMPLER_VIEWS);
- si_init_descriptors(&sctx->samplers[i].states.desc,
- SI_SGPR_SAMPLER_STATES, 4, SI_NUM_SAMPLER_STATES);
+ SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS);
}
si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
@@ -1056,8 +1064,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
- !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
- !si_upload_descriptors(sctx, &sctx->samplers[i].states.desc))
+ !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc))
return false;
}
return si_upload_vertex_buffer_descriptors(sctx);
@@ -1071,7 +1078,6 @@ void si_release_all_descriptors(struct si_context *sctx)
si_release_buffer_resources(&sctx->const_buffers[i]);
si_release_buffer_resources(&sctx->rw_buffers[i]);
si_release_sampler_views(&sctx->samplers[i].views);
- si_release_descriptors(&sctx->samplers[i].states.desc);
}
si_release_descriptors(&sctx->vertex_buffers);
}
@@ -1084,7 +1090,6 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
- si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states);
}
si_vertex_buffers_begin_new_cs(sctx);
si_shader_userdata_begin_new_cs(sctx);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 3c963db5078..b5790d6b564 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -113,7 +113,6 @@ struct si_cs_shader_state {
struct si_textures_info {
struct si_sampler_views views;
- struct si_sampler_states states;
uint32_t depth_texture_mask; /* which textures are depth */
uint32_t compressed_colortex_mask;
};
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index c1d3edc7143..34b84eb81d9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -86,8 +86,9 @@ struct si_shader_context
LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
LLVMValueRef lds;
LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
- LLVMValueRef sampler_views[SI_NUM_SAMPLER_VIEWS];
- LLVMValueRef sampler_states[SI_NUM_SAMPLER_STATES];
+ LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
+ LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
+ LLVMValueRef fmasks[SI_NUM_USER_SAMPLERS];
LLVMValueRef so_buffers[4];
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
@@ -2286,7 +2287,6 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct lp_build_context *base = &bld_base->base;
- LLVMValueRef args[9];
int i;
/* Clamp color */
@@ -2308,27 +2308,46 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (index == 0 &&
- si_shader_ctx->shader->key.ps.last_cbuf > 0) {
- for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
+ if (si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+ LLVMValueRef args[8][9];
+ int c, last = -1;
+
+ /* Get the export arguments, also find out what the last one is. */
+ for (c = 0; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
si_llvm_init_export_args(bld_base, color,
- V_008DFC_SQ_EXP_MRT + c, args);
+ V_008DFC_SQ_EXP_MRT + c, args[c]);
+ if (args[c][0] != bld_base->uint_bld.zero)
+ last = c;
+ }
+
+ /* Emit all exports. */
+ for (c = 0; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
+ if (is_last && last == c) {
+ args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
+ args[c][2] = bld_base->uint_bld.one; /* DONE bit */
+ } else if (args[c][0] == bld_base->uint_bld.zero)
+ continue; /* unnecessary NULL export */
+
lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9, 0);
+ args[c], 9, 0);
}
+ } else {
+ LLVMValueRef args[9];
+
+ /* Export */
+ si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
+ args);
+ if (is_last) {
+ args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
+ args[2] = bld_base->uint_bld.one; /* DONE bit */
+ } else if (args[0] == bld_base->uint_bld.zero)
+ return; /* unnecessary NULL export */
+
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
}
-
- /* Export */
- si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
- args);
- if (is_last) {
- args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
- args[2] = bld_base->uint_bld.one; /* DONE bit */
- }
- lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9, 0);
}
static void si_export_null(struct lp_build_tgsi_context *bld_base)
@@ -2363,19 +2382,43 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
int last_color_export = -1;
int i;
- /* If there are no outputs, add a dummy export. */
- if (!info->num_outputs) {
- si_export_null(bld_base);
- return;
- }
-
/* Determine the last export. If MRTZ is present, it's always last.
* Otherwise, find the last color export.
*/
- if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask)
- for (i = 0; i < info->num_outputs; i++)
- if (info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR)
+ if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) {
+ unsigned spi_format = shader->key.ps.spi_shader_col_format;
+
+ /* Don't export NULL and return if alpha-test is enabled. */
+ if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS &&
+ shader->key.ps.alpha_func != PIPE_FUNC_NEVER &&
+ (spi_format & 0xf) == 0)
+ spi_format |= V_028714_SPI_SHADER_32_AR;
+
+ for (i = 0; i < info->num_outputs; i++) {
+ unsigned index = info->output_semantic_index[i];
+
+ if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR)
+ continue;
+
+ /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+ if (shader->key.ps.last_cbuf > 0) {
+ /* Just set this if any of the colorbuffers are enabled. */
+ if (spi_format &
+ ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1))
+ last_color_export = i;
+ continue;
+ }
+
+ if ((spi_format >> (index * 4)) & 0xf)
last_color_export = i;
+ }
+
+ /* If there are no outputs, export NULL. */
+ if (last_color_export == -1) {
+ si_export_null(bld_base);
+ return;
+ }
+ }
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
@@ -2480,13 +2523,58 @@ static void set_tex_fetch_args(struct gallivm_state *gallivm,
static const struct lp_build_tgsi_action tex_action;
+enum desc_type {
+ DESC_IMAGE,
+ DESC_FMASK,
+ DESC_SAMPLER
+};
+
+static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
+{
+ return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
+ CONST_ADDR_SPACE);
+}
+
+/**
+ * Load an image view, fmask view. or sampler state descriptor.
+ */
+static LLVMValueRef get_sampler_desc(struct si_shader_context *si_shader_ctx,
+ LLVMValueRef index, enum desc_type type)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_SAMPLERS);
+
+ switch (type) {
+ case DESC_IMAGE:
+ /* The image is at [0:7]. */
+ index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 2, 0), "");
+ break;
+ case DESC_FMASK:
+ /* The FMASK is at [8:15]. */
+ index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 2, 0), "");
+ index = LLVMBuildAdd(builder, index, LLVMConstInt(i32, 1, 0), "");
+ break;
+ case DESC_SAMPLER:
+ /* The sampler state is at [12:15]. */
+ index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 4, 0), "");
+ index = LLVMBuildAdd(builder, index, LLVMConstInt(i32, 3, 0), "");
+ ptr = LLVMBuildPointerCast(builder, ptr,
+ const_array(LLVMVectorType(i32, 4), 0), "");
+ break;
+ }
+
+ return build_indexed_load_const(si_shader_ctx, ptr, index);
+}
+
static void tex_fetch_ptrs(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data,
LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
{
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = bld_base->base.gallivm;
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned target = inst->Texture.Texture;
unsigned sampler_src;
@@ -2501,24 +2589,20 @@ static void tex_fetch_ptrs(
ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
- *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
- *res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index);
-
- *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
- *samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index);
+ *res_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_IMAGE);
if (target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
- ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
- lp_build_const_int32(gallivm,
- SI_FMASK_TEX_OFFSET), "");
- *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
- *fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index);
+ *samp_ptr = NULL;
+ *fmask_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_FMASK);
+ } else {
+ *samp_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_SAMPLER);
+ *fmask_ptr = NULL;
}
} else {
*res_ptr = si_shader_ctx->sampler_views[sampler_index];
*samp_ptr = si_shader_ctx->sampler_states[sampler_index];
- *fmask_ptr = si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + sampler_index];
+ *fmask_ptr = si_shader_ctx->fmasks[sampler_index];
}
}
@@ -3498,12 +3582,6 @@ static void create_meta_data(struct si_shader_context *si_shader_ctx)
si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3);
}
-static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
-{
- return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
- CONST_ADDR_SPACE);
-}
-
static void declare_streamout_params(struct si_shader_context *si_shader_ctx,
struct pipe_stream_output_info *so,
LLVMTypeRef *params, LLVMTypeRef i32,
@@ -3530,7 +3608,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct si_shader *shader = si_shader_ctx->shader;
- LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32;
+ LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v8i32;
unsigned i, last_array_pointer, last_sgpr, num_params;
i8 = LLVMInt8TypeInContext(gallivm->context);
@@ -3538,15 +3616,14 @@ static void create_function(struct si_shader_context *si_shader_ctx)
f32 = LLVMFloatTypeInContext(gallivm->context);
v2i32 = LLVMVectorType(i32, 2);
v3i32 = LLVMVectorType(i32, 3);
- v4i32 = LLVMVectorType(i32, 4);
v8i32 = LLVMVectorType(i32, 8);
v16i8 = LLVMVectorType(i8, 16);
params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS);
params[SI_PARAM_CONST_BUFFERS] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
- params[SI_PARAM_SAMPLER_STATES] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
- params[SI_PARAM_SAMPLER_VIEWS] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
- last_array_pointer = SI_PARAM_SAMPLER_VIEWS;
+ params[SI_PARAM_SAMPLERS] = const_array(v8i32, SI_NUM_SAMPLERS);
+ params[SI_PARAM_UNUSED] = LLVMPointerType(i32, CONST_ADDR_SPACE);
+ last_array_pointer = SI_PARAM_UNUSED;
switch (si_shader_ctx->type) {
case TGSI_PROCESSOR_VERTEX:
@@ -3747,34 +3824,26 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx)
struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
struct gallivm_state * gallivm = bld_base->base.gallivm;
const struct tgsi_shader_info * info = bld_base->info;
-
unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
-
- LLVMValueRef res_ptr, samp_ptr;
LLVMValueRef offset;
if (num_samplers == 0)
return;
- res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
- samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
-
/* Load the resources and samplers, we rely on the code sinking to do the rest */
for (i = 0; i < num_samplers; ++i) {
/* Resource */
offset = lp_build_const_int32(gallivm, i);
- si_shader_ctx->sampler_views[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
-
- /* Sampler */
- offset = lp_build_const_int32(gallivm, i);
- si_shader_ctx->sampler_states[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
+ si_shader_ctx->sampler_views[i] =
+ get_sampler_desc(si_shader_ctx, offset, DESC_IMAGE);
/* FMASK resource */
- if (info->is_msaa_sampler[i]) {
- offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i);
- si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + i] =
- build_indexed_load_const(si_shader_ctx, res_ptr, offset);
- }
+ if (info->is_msaa_sampler[i])
+ si_shader_ctx->fmasks[i] =
+ get_sampler_desc(si_shader_ctx, offset, DESC_FMASK);
+ else
+ si_shader_ctx->sampler_states[i] =
+ get_sampler_desc(si_shader_ctx, offset, DESC_SAMPLER);
}
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index c42c51e0455..dc75e0330e4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -77,8 +77,8 @@ struct radeon_shader_reloc;
#define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */
#define SI_SGPR_CONST_BUFFERS 2
-#define SI_SGPR_SAMPLER_STATES 4
-#define SI_SGPR_SAMPLER_VIEWS 6
+#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */
+/* TODO: gap */
#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
@@ -101,8 +101,8 @@ struct radeon_shader_reloc;
/* LLVM function parameter indices */
#define SI_PARAM_RW_BUFFERS 0
#define SI_PARAM_CONST_BUFFERS 1
-#define SI_PARAM_SAMPLER_STATES 2
-#define SI_PARAM_SAMPLER_VIEWS 3
+#define SI_PARAM_SAMPLERS 2
+#define SI_PARAM_UNUSED 3 /* TODO: use */
/* VS only parameters */
#define SI_PARAM_VERTEX_BUFFERS 4
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index e9a017534d1..f64c4d45f1b 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -144,17 +144,12 @@ struct si_shader_data {
uint32_t sh_base[SI_NUM_SHADERS];
};
-#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */
-#define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS
-#define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1)
-
/* User sampler views: 0..15
* Polygon stipple tex: 16
- * FMASK sampler views: 17..33 (no sampler states)
*/
-#define SI_FMASK_TEX_OFFSET SI_NUM_SAMPLERS
-#define SI_NUM_SAMPLER_VIEWS (SI_FMASK_TEX_OFFSET + SI_NUM_SAMPLERS)
-#define SI_NUM_SAMPLER_STATES SI_NUM_SAMPLERS
+#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */
+#define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS
+#define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1)
/* User constant buffers: 0..15
* Driver state constants: 16
@@ -210,12 +205,8 @@ struct si_descriptors {
struct si_sampler_views {
struct si_descriptors desc;
- struct pipe_sampler_view *views[SI_NUM_SAMPLER_VIEWS];
-};
-
-struct si_sampler_states {
- struct si_descriptors desc;
- void *saved_states[2]; /* saved for u_blitter */
+ struct pipe_sampler_view *views[SI_NUM_SAMPLERS];
+ void *sampler_states[SI_NUM_SAMPLERS];
};
struct si_buffer_resources {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 49c310cfdf7..8151c447065 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -405,8 +405,10 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
&ws->info.num_tile_pipes);
- /* The kernel returns 12 for some cards for an unknown reason.
- * I thought this was supposed to be a power of two.
+ /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the
+ * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti)
+ * reports a different value (12). Fix it by setting what's in the
+ * GB_TILE_MODE array (8).
*/
if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12)
ws->info.num_tile_pipes = 8;