diff options
author | Jason Ekstrand <[email protected]> | 2016-02-10 17:10:19 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-02-10 17:12:11 -0800 |
commit | f710f3ca377a4583b1fc5081cc28ee1d4aba71cb (patch) | |
tree | ab4d8226ab88b0e5ef2d9a4a6ebfdf79a244a6e2 /src/gallium | |
parent | 7ef3e47c27fb9b11b113bb699019785960d6bf9a (diff) | |
parent | 8750299a420af76cebd3067f6f603eacde06ae06 (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
This also reverts commit 1d65abfa582a371558113f699ffbf16d60b64c90 because
now NIR handles texture offsets in a much more sane way.
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/ir3/ir3_ra.c | 30 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_transfer.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_uvd.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 115 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 203 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 19 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 |
11 files changed, 228 insertions, 176 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 63dd1700f9c..ffa75775505 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1434,21 +1434,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) tex_info(tex, &flags, &coords); - if (!has_off) { - /* could still have a constant offset: */ - if (tex->const_offset[0] || tex->const_offset[1] || - tex->const_offset[2] || tex->const_offset[3]) { - off = const_off; - - off[0] = create_immed(b, tex->const_offset[0]); - off[1] = create_immed(b, tex->const_offset[1]); - off[2] = create_immed(b, tex->const_offset[2]); - off[3] = create_immed(b, tex->const_offset[3]); - - has_off = true; - } - } - /* scale up integer coords for TXF based on the LOD */ if (ctx->unminify_coords && (opc == OPC_ISAML)) { assert(has_lod); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index 2ed78818e61..bcad96e8a30 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -605,21 +605,21 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) struct ir3_ra_block_data *bd; unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); - void def(unsigned name, struct ir3_instruction *instr) - { - /* defined on first write: */ - if (!ctx->def[name]) - ctx->def[name] = instr->ip; - ctx->use[name] = instr->ip; - BITSET_SET(bd->def, name); - } - - void use(unsigned name, struct ir3_instruction *instr) - { - ctx->use[name] = MAX2(ctx->use[name], instr->ip); - if (!BITSET_TEST(bd->def, name)) - BITSET_SET(bd->use, name); - } +#define def(name, instr) \ + do { \ + /* defined on first write: */ \ + if (!ctx->def[name]) \ + ctx->def[name] = instr->ip; \ + ctx->use[name] = instr->ip; \ + BITSET_SET(bd->def, name); \ + } while(0); + +#define use(name, instr) \ + do { \ + ctx->use[name] = MAX2(ctx->use[name], instr->ip); \ + if (!BITSET_TEST(bd->def, name)) \ + BITSET_SET(bd->use, name); \ + } while(0); bd = rzalloc(ctx->g, struct ir3_ra_block_data); diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c index 842e70a6899..9d00f4d9373 100644 --- a/src/gallium/drivers/r300/r300_transfer.c +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -246,9 +246,7 @@ r300_texture_transfer_map(struct pipe_context *ctx, void r300_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer) { - struct radeon_winsys *rws = r300_context(ctx)->rws; struct r300_transfer *trans = r300_transfer(transfer); - struct r300_resource *tex = r300_resource(transfer->resource); if (trans->linear_texture) { if (transfer->usage & PIPE_TRANSFER_WRITE) { diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 1f5a16aaca6..367aabc7a18 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -402,6 +402,9 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_ result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4; switch (dec->base.chroma_format) { + case PIPE_VIDEO_CHROMA_FORMAT_NONE: + /* TODO: assert? */ + break; case PIPE_VIDEO_CHROMA_FORMAT_400: result.chroma_format = 0; break; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index a93887ec271..115877060ba 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -80,7 +80,7 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op) if (op & SI_SAVE_TEXTURES) { util_blitter_save_fragment_sampler_states( sctx->blitter, 2, - sctx->samplers[PIPE_SHADER_FRAGMENT].states.saved_states); + sctx->samplers[PIPE_SHADER_FRAGMENT].views.sampler_states); util_blitter_save_fragment_sampler_views(sctx->blitter, 2, sctx->samplers[PIPE_SHADER_FRAGMENT].views.views); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 6c796731a18..34cc06fc078 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -41,6 +41,18 @@ * * Also, uploading descriptors to newly allocated memory doesn't require * a KCACHE flush. + * + * + * Possible scenarios for one 16 dword image+sampler slot: + * + * | Image | w/ FMASK | Buffer | NULL + * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3] + * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0 + * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3] + * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3] + * + * FMASK implies MSAA, therefore no sampler state. + * Sampler states are never unbound except when FMASK is bound. */ #include "radeon/r600_cs.h" @@ -88,9 +100,9 @@ static void si_init_descriptors(struct si_descriptors *desc, desc->shader_userdata_offset = shader_userdata_index * 4; /* Initialize the array to NULL descriptors if the element size is 8. */ - if (element_dw_size == 8) - for (i = 0; i < num_elements; i++) - memcpy(desc->list + i*element_dw_size, null_descriptor, + if (element_dw_size % 8 == 0) + for (i = 0; i < num_elements * element_dw_size / 8; i++) + memcpy(desc->list + i*8, null_descriptor, sizeof(null_descriptor)); } @@ -174,27 +186,42 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx, RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); } -static void si_set_sampler_view(struct si_context *sctx, unsigned shader, - unsigned slot, struct pipe_sampler_view *view, - unsigned *view_desc) +static void si_set_sampler_view(struct si_context *sctx, + struct si_sampler_views *views, + unsigned slot, struct pipe_sampler_view *view) { - struct si_sampler_views *views = &sctx->samplers[shader].views; - if (views->views[slot] == view) return; if (view) { struct si_sampler_view *rview = (struct si_sampler_view*)view; + struct r600_texture *rtex = (struct r600_texture*)view->texture; si_sampler_view_add_buffers(sctx, rview); pipe_sampler_view_reference(&views->views[slot], view); - memcpy(views->desc.list + slot*8, view_desc, 8*4); + memcpy(views->desc.list + slot * 16, rview->state, 8*4); + + if (rtex && rtex->fmask.size) { + memcpy(views->desc.list + slot*16 + 8, + rview->fmask_state, 8*4); + } else { + /* Disable FMASK and bind sampler state in [12:15]. */ + memcpy(views->desc.list + slot*16 + 8, + null_descriptor, 4*4); + + if (views->sampler_states[slot]) + memcpy(views->desc.list + slot*16 + 12, + views->sampler_states[slot], 4*4); + } + views->desc.enabled_mask |= 1llu << slot; } else { pipe_sampler_view_reference(&views->views[slot], NULL); - memcpy(views->desc.list + slot*8, null_descriptor, 8*4); + memcpy(views->desc.list + slot*16, null_descriptor, 8*4); + /* Only clear the lower dwords of FMASK. */ + memcpy(views->desc.list + slot*16 + 8, null_descriptor, 4*4); views->desc.enabled_mask &= ~(1llu << slot); } @@ -208,7 +235,6 @@ static void si_set_sampler_views(struct pipe_context *ctx, { struct si_context *sctx = (struct si_context *)ctx; struct si_textures_info *samplers = &sctx->samplers[shader]; - struct si_sampler_view **rviews = (struct si_sampler_view **)views; int i; if (!count || shader >= SI_NUM_SHADERS) @@ -220,13 +246,11 @@ static void si_set_sampler_views(struct pipe_context *ctx, if (!views || !views[i]) { samplers->depth_texture_mask &= ~(1 << slot); samplers->compressed_colortex_mask &= ~(1 << slot); - si_set_sampler_view(sctx, shader, slot, NULL, NULL); - si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot, - NULL, NULL); + si_set_sampler_view(sctx, &samplers->views, slot, NULL); continue; } - si_set_sampler_view(sctx, shader, slot, views[i], rviews[i]->state); + si_set_sampler_view(sctx, &samplers->views, slot, views[i]); if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) { struct r600_texture *rtex = @@ -243,60 +267,46 @@ static void si_set_sampler_views(struct pipe_context *ctx, } else { samplers->compressed_colortex_mask &= ~(1 << slot); } - - if (rtex->fmask.size) { - si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot, - views[i], rviews[i]->fmask_state); - } else { - si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot, - NULL, NULL); - } } else { samplers->depth_texture_mask &= ~(1 << slot); samplers->compressed_colortex_mask &= ~(1 << slot); - si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot, - NULL, NULL); } } } /* SAMPLER STATES */ -static void si_sampler_states_begin_new_cs(struct si_context *sctx, - struct si_sampler_states *states) -{ - if (!states->desc.buffer) - return; - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, states->desc.buffer, - RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS); -} - static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader, unsigned start, unsigned count, void **states) { struct si_context *sctx = (struct si_context *)ctx; - struct si_sampler_states *samplers = &sctx->samplers[shader].states; + struct si_textures_info *samplers = &sctx->samplers[shader]; + struct si_descriptors *desc = &samplers->views.desc; struct si_sampler_state **sstates = (struct si_sampler_state**)states; int i; if (!count || shader >= SI_NUM_SHADERS) return; - if (start == 0) - samplers->saved_states[0] = states[0]; - if (start == 1) - samplers->saved_states[1] = states[0]; - else if (start == 0 && count >= 2) - samplers->saved_states[1] = states[1]; - for (i = 0; i < count; i++) { unsigned slot = start + i; - if (!sstates[i]) + if (!sstates[i] || + sstates[i] == samplers->views.sampler_states[slot]) + continue; + + samplers->views.sampler_states[slot] = sstates[i]; + + /* If FMASK is bound, don't overwrite it. + * The sampler state will be set after FMASK is unbound. + */ + if (samplers->views.views[i] && + samplers->views.views[i]->texture && + ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size) continue; - memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4); - samplers->desc.list_dirty = true; + memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4); + desc->list_dirty = true; } } @@ -862,7 +872,9 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource while (mask) { unsigned i = u_bit_scan64(&mask); if (views->views[i]->texture == buf) { - si_desc_reset_buffer_offset(ctx, views->desc.list + i*8+4, + si_desc_reset_buffer_offset(ctx, + views->desc.list + + i * 16 + 4, old_va, buf); views->desc.list_dirty = true; @@ -882,7 +894,6 @@ static void si_mark_shader_pointers_dirty(struct si_context *sctx, sctx->const_buffers[shader].desc.pointer_dirty = true; sctx->rw_buffers[shader].desc.pointer_dirty = true; sctx->samplers[shader].views.desc.pointer_dirty = true; - sctx->samplers[shader].states.desc.pointer_dirty = true; if (shader == PIPE_SHADER_VERTEX) sctx->vertex_buffers.pointer_dirty = true; @@ -1003,7 +1014,6 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom) si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false); si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false); - si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false); } si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false); } @@ -1023,9 +1033,7 @@ void si_init_all_descriptors(struct si_context *sctx) RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT); si_init_descriptors(&sctx->samplers[i].views.desc, - SI_SGPR_SAMPLER_VIEWS, 8, SI_NUM_SAMPLER_VIEWS); - si_init_descriptors(&sctx->samplers[i].states.desc, - SI_SGPR_SAMPLER_STATES, 4, SI_NUM_SAMPLER_STATES); + SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS); } si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, @@ -1056,8 +1064,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx) for (i = 0; i < SI_NUM_SHADERS; i++) { if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) || !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) || - !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) || - !si_upload_descriptors(sctx, &sctx->samplers[i].states.desc)) + !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc)) return false; } return si_upload_vertex_buffer_descriptors(sctx); @@ -1071,7 +1078,6 @@ void si_release_all_descriptors(struct si_context *sctx) si_release_buffer_resources(&sctx->const_buffers[i]); si_release_buffer_resources(&sctx->rw_buffers[i]); si_release_sampler_views(&sctx->samplers[i].views); - si_release_descriptors(&sctx->samplers[i].states.desc); } si_release_descriptors(&sctx->vertex_buffers); } @@ -1084,7 +1090,6 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx) si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]); si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]); si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views); - si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states); } si_vertex_buffers_begin_new_cs(sctx); si_shader_userdata_begin_new_cs(sctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 3c963db5078..b5790d6b564 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -113,7 +113,6 @@ struct si_cs_shader_state { struct si_textures_info { struct si_sampler_views views; - struct si_sampler_states states; uint32_t depth_texture_mask; /* which textures are depth */ uint32_t compressed_colortex_mask; }; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c1d3edc7143..34b84eb81d9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -86,8 +86,9 @@ struct si_shader_context LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS]; LLVMValueRef lds; LLVMValueRef *constants[SI_NUM_CONST_BUFFERS]; - LLVMValueRef sampler_views[SI_NUM_SAMPLER_VIEWS]; - LLVMValueRef sampler_states[SI_NUM_SAMPLER_STATES]; + LLVMValueRef sampler_views[SI_NUM_SAMPLERS]; + LLVMValueRef sampler_states[SI_NUM_SAMPLERS]; + LLVMValueRef fmasks[SI_NUM_USER_SAMPLERS]; LLVMValueRef so_buffers[4]; LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; @@ -2286,7 +2287,6 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); struct lp_build_context *base = &bld_base->base; - LLVMValueRef args[9]; int i; /* Clamp color */ @@ -2308,27 +2308,46 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (index == 0 && - si_shader_ctx->shader->key.ps.last_cbuf > 0) { - for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { + if (si_shader_ctx->shader->key.ps.last_cbuf > 0) { + LLVMValueRef args[8][9]; + int c, last = -1; + + /* Get the export arguments, also find out what the last one is. */ + for (c = 0; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { si_llvm_init_export_args(bld_base, color, - V_008DFC_SQ_EXP_MRT + c, args); + V_008DFC_SQ_EXP_MRT + c, args[c]); + if (args[c][0] != bld_base->uint_bld.zero) + last = c; + } + + /* Emit all exports. */ + for (c = 0; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) { + if (is_last && last == c) { + args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ + args[c][2] = bld_base->uint_bld.one; /* DONE bit */ + } else if (args[c][0] == bld_base->uint_bld.zero) + continue; /* unnecessary NULL export */ + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", LLVMVoidTypeInContext(base->gallivm->context), - args, 9, 0); + args[c], 9, 0); } + } else { + LLVMValueRef args[9]; + + /* Export */ + si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index, + args); + if (is_last) { + args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ + args[2] = bld_base->uint_bld.one; /* DONE bit */ + } else if (args[0] == bld_base->uint_bld.zero) + return; /* unnecessary NULL export */ + + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); } - - /* Export */ - si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index, - args); - if (is_last) { - args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ - args[2] = bld_base->uint_bld.one; /* DONE bit */ - } - lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - LLVMVoidTypeInContext(base->gallivm->context), - args, 9, 0); } static void si_export_null(struct lp_build_tgsi_context *bld_base) @@ -2363,19 +2382,43 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) int last_color_export = -1; int i; - /* If there are no outputs, add a dummy export. */ - if (!info->num_outputs) { - si_export_null(bld_base); - return; - } - /* Determine the last export. If MRTZ is present, it's always last. * Otherwise, find the last color export. */ - if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) - for (i = 0; i < info->num_outputs; i++) - if (info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) + if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) { + unsigned spi_format = shader->key.ps.spi_shader_col_format; + + /* Don't export NULL and return if alpha-test is enabled. */ + if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS && + shader->key.ps.alpha_func != PIPE_FUNC_NEVER && + (spi_format & 0xf) == 0) + spi_format |= V_028714_SPI_SHADER_32_AR; + + for (i = 0; i < info->num_outputs; i++) { + unsigned index = info->output_semantic_index[i]; + + if (info->output_semantic_name[i] != TGSI_SEMANTIC_COLOR) + continue; + + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (shader->key.ps.last_cbuf > 0) { + /* Just set this if any of the colorbuffers are enabled. */ + if (spi_format & + ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1)) + last_color_export = i; + continue; + } + + if ((spi_format >> (index * 4)) & 0xf) last_color_export = i; + } + + /* If there are no outputs, export NULL. */ + if (last_color_export == -1) { + si_export_null(bld_base); + return; + } + } for (i = 0; i < info->num_outputs; i++) { unsigned semantic_name = info->output_semantic_name[i]; @@ -2480,13 +2523,58 @@ static void set_tex_fetch_args(struct gallivm_state *gallivm, static const struct lp_build_tgsi_action tex_action; +enum desc_type { + DESC_IMAGE, + DESC_FMASK, + DESC_SAMPLER +}; + +static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) +{ + return LLVMPointerType(LLVMArrayType(elem_type, num_elements), + CONST_ADDR_SPACE); +} + +/** + * Load an image view, fmask view. or sampler state descriptor. + */ +static LLVMValueRef get_sampler_desc(struct si_shader_context *si_shader_ctx, + LLVMValueRef index, enum desc_type type) +{ + struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; + LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_SAMPLERS); + + switch (type) { + case DESC_IMAGE: + /* The image is at [0:7]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 2, 0), ""); + break; + case DESC_FMASK: + /* The FMASK is at [8:15]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 2, 0), ""); + index = LLVMBuildAdd(builder, index, LLVMConstInt(i32, 1, 0), ""); + break; + case DESC_SAMPLER: + /* The sampler state is at [12:15]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(i32, 4, 0), ""); + index = LLVMBuildAdd(builder, index, LLVMConstInt(i32, 3, 0), ""); + ptr = LLVMBuildPointerCast(builder, ptr, + const_array(LLVMVectorType(i32, 4), 0), ""); + break; + } + + return build_indexed_load_const(si_shader_ctx, ptr, index); +} + static void tex_fetch_ptrs( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data, LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr) { struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); - struct gallivm_state *gallivm = bld_base->base.gallivm; const struct tgsi_full_instruction * inst = emit_data->inst; unsigned target = inst->Texture.Texture; unsigned sampler_src; @@ -2501,24 +2589,20 @@ static void tex_fetch_ptrs( ind_index = get_indirect_index(si_shader_ctx, ®->Indirect, reg->Register.Index); - *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS); - *res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index); - - *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES); - *samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index); + *res_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_IMAGE); if (target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - ind_index = LLVMBuildAdd(gallivm->builder, ind_index, - lp_build_const_int32(gallivm, - SI_FMASK_TEX_OFFSET), ""); - *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS); - *fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index); + *samp_ptr = NULL; + *fmask_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_FMASK); + } else { + *samp_ptr = get_sampler_desc(si_shader_ctx, ind_index, DESC_SAMPLER); + *fmask_ptr = NULL; } } else { *res_ptr = si_shader_ctx->sampler_views[sampler_index]; *samp_ptr = si_shader_ctx->sampler_states[sampler_index]; - *fmask_ptr = si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + sampler_index]; + *fmask_ptr = si_shader_ctx->fmasks[sampler_index]; } } @@ -3498,12 +3582,6 @@ static void create_meta_data(struct si_shader_context *si_shader_ctx) si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3); } -static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) -{ - return LLVMPointerType(LLVMArrayType(elem_type, num_elements), - CONST_ADDR_SPACE); -} - static void declare_streamout_params(struct si_shader_context *si_shader_ctx, struct pipe_stream_output_info *so, LLVMTypeRef *params, LLVMTypeRef i32, @@ -3530,7 +3608,7 @@ static void create_function(struct si_shader_context *si_shader_ctx) struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; struct si_shader *shader = si_shader_ctx->shader; - LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32; + LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v8i32; unsigned i, last_array_pointer, last_sgpr, num_params; i8 = LLVMInt8TypeInContext(gallivm->context); @@ -3538,15 +3616,14 @@ static void create_function(struct si_shader_context *si_shader_ctx) f32 = LLVMFloatTypeInContext(gallivm->context); v2i32 = LLVMVectorType(i32, 2); v3i32 = LLVMVectorType(i32, 3); - v4i32 = LLVMVectorType(i32, 4); v8i32 = LLVMVectorType(i32, 8); v16i8 = LLVMVectorType(i8, 16); params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS); params[SI_PARAM_CONST_BUFFERS] = const_array(v16i8, SI_NUM_CONST_BUFFERS); - params[SI_PARAM_SAMPLER_STATES] = const_array(v4i32, SI_NUM_SAMPLER_STATES); - params[SI_PARAM_SAMPLER_VIEWS] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS); - last_array_pointer = SI_PARAM_SAMPLER_VIEWS; + params[SI_PARAM_SAMPLERS] = const_array(v8i32, SI_NUM_SAMPLERS); + params[SI_PARAM_UNUSED] = LLVMPointerType(i32, CONST_ADDR_SPACE); + last_array_pointer = SI_PARAM_UNUSED; switch (si_shader_ctx->type) { case TGSI_PROCESSOR_VERTEX: @@ -3747,34 +3824,26 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx) struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; struct gallivm_state * gallivm = bld_base->base.gallivm; const struct tgsi_shader_info * info = bld_base->info; - unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1; - - LLVMValueRef res_ptr, samp_ptr; LLVMValueRef offset; if (num_samplers == 0) return; - res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS); - samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES); - /* Load the resources and samplers, we rely on the code sinking to do the rest */ for (i = 0; i < num_samplers; ++i) { /* Resource */ offset = lp_build_const_int32(gallivm, i); - si_shader_ctx->sampler_views[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset); - - /* Sampler */ - offset = lp_build_const_int32(gallivm, i); - si_shader_ctx->sampler_states[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset); + si_shader_ctx->sampler_views[i] = + get_sampler_desc(si_shader_ctx, offset, DESC_IMAGE); /* FMASK resource */ - if (info->is_msaa_sampler[i]) { - offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i); - si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + i] = - build_indexed_load_const(si_shader_ctx, res_ptr, offset); - } + if (info->is_msaa_sampler[i]) + si_shader_ctx->fmasks[i] = + get_sampler_desc(si_shader_ctx, offset, DESC_FMASK); + else + si_shader_ctx->sampler_states[i] = + get_sampler_desc(si_shader_ctx, offset, DESC_SAMPLER); } } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index c42c51e0455..dc75e0330e4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -77,8 +77,8 @@ struct radeon_shader_reloc; #define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */ #define SI_SGPR_CONST_BUFFERS 2 -#define SI_SGPR_SAMPLER_STATES 4 -#define SI_SGPR_SAMPLER_VIEWS 6 +#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */ +/* TODO: gap */ #define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */ #define SI_SGPR_BASE_VERTEX 10 /* VS only */ #define SI_SGPR_START_INSTANCE 11 /* VS only */ @@ -101,8 +101,8 @@ struct radeon_shader_reloc; /* LLVM function parameter indices */ #define SI_PARAM_RW_BUFFERS 0 #define SI_PARAM_CONST_BUFFERS 1 -#define SI_PARAM_SAMPLER_STATES 2 -#define SI_PARAM_SAMPLER_VIEWS 3 +#define SI_PARAM_SAMPLERS 2 +#define SI_PARAM_UNUSED 3 /* TODO: use */ /* VS only parameters */ #define SI_PARAM_VERTEX_BUFFERS 4 diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index e9a017534d1..f64c4d45f1b 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -144,17 +144,12 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; -#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */ -#define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS -#define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1) - /* User sampler views: 0..15 * Polygon stipple tex: 16 - * FMASK sampler views: 17..33 (no sampler states) */ -#define SI_FMASK_TEX_OFFSET SI_NUM_SAMPLERS -#define SI_NUM_SAMPLER_VIEWS (SI_FMASK_TEX_OFFSET + SI_NUM_SAMPLERS) -#define SI_NUM_SAMPLER_STATES SI_NUM_SAMPLERS +#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */ +#define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS +#define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1) /* User constant buffers: 0..15 * Driver state constants: 16 @@ -210,12 +205,8 @@ struct si_descriptors { struct si_sampler_views { struct si_descriptors desc; - struct pipe_sampler_view *views[SI_NUM_SAMPLER_VIEWS]; -}; - -struct si_sampler_states { - struct si_descriptors desc; - void *saved_states[2]; /* saved for u_blitter */ + struct pipe_sampler_view *views[SI_NUM_SAMPLERS]; + void *sampler_states[SI_NUM_SAMPLERS]; }; struct si_buffer_resources { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 49c310cfdf7..8151c447065 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -405,8 +405,10 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, &ws->info.num_tile_pipes); - /* The kernel returns 12 for some cards for an unknown reason. - * I thought this was supposed to be a power of two. + /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the + * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) + * reports a different value (12). Fix it by setting what's in the + * GB_TILE_MODE array (8). */ if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) ws->info.num_tile_pipes = 8; |