diff options
author | Marek Olšák <[email protected]> | 2018-01-07 21:05:52 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2018-04-02 13:55:25 -0400 |
commit | 2be6143032939c5c5fb6de4a44ffe3b076e1f098 (patch) | |
tree | 2675ebfeb419e00aa3939e4d3499d5288134df4a /src/gallium | |
parent | e04631b0f24f83ef7ee6566f04604d6222765662 (diff) |
radeonsi: implement GL_KHR_blend_equation_advanced
MSAA is supported using sample shading. Layered rendering and all texture
targets are also supported.
Tested-by: Dieter Nützel <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_blit.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_descriptors.c | 87 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_get.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_internal.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 59 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.c | 19 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_binning.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 18 |
12 files changed, 203 insertions, 17 deletions
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index f1c4f6d1e72..d9d489825f8 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -806,6 +806,14 @@ void si_decompress_textures(struct si_context *sctx, unsigned shader_mask) si_decompress_resident_images(sctx); } + if (sctx->ps_uses_fbfetch) { + struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; + si_decompress_color_texture(sctx, + (struct r600_texture*)cb0->texture, + cb0->u.tex.first_layer, + cb0->u.tex.last_layer); + } + si_check_render_feedback(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 07d1420d8ee..38befa445d2 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -687,7 +687,7 @@ si_mark_image_range_valid(const struct pipe_image_view *view) static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_image_view *view, bool skip_decompress, - uint32_t *desc) + uint32_t *desc, uint32_t *fmask_desc) { struct si_screen *screen = ctx->screen; struct r600_resource *res; @@ -715,13 +715,14 @@ static void si_set_shader_image_desc(struct si_context *ctx, * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases, * so we don't wanna trigger it. */ - if (tex->is_depth || tex->resource.b.b.nr_samples >= 2) { + if (tex->is_depth || + (!fmask_desc && tex->fmask.size != 0)) { assert(!"Z/S and MSAA image stores are not supported"); access &= ~PIPE_IMAGE_ACCESS_WRITE; } assert(!tex->is_depth); - assert(tex->fmask.size == 0); + assert(fmask_desc || tex->fmask.size == 0); if (uses_dcc && !skip_decompress && (view->access & PIPE_IMAGE_ACCESS_WRITE || @@ -762,7 +763,7 @@ static void si_set_shader_image_desc(struct si_context *ctx, view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, - desc, NULL); + desc, fmask_desc); si_set_mutable_tex_desc_fields(screen, tex, &tex->surface.u.legacy.level[level], level, level, @@ -792,7 +793,7 @@ static void si_set_shader_image(struct si_context *ctx, if (&images->views[slot] != view) util_copy_image_view(&images->views[slot], view); - si_set_shader_image_desc(ctx, view, skip_decompress, desc); + si_set_shader_image_desc(ctx, view, skip_decompress, desc, NULL); if (res->b.b.target == PIPE_BUFFER) { images->needs_color_decompress_mask &= ~(1 << slot); @@ -870,6 +871,77 @@ si_images_update_needs_color_decompress_mask(struct si_images *images) } } +void si_update_ps_colorbuf0_slot(struct si_context *sctx) +{ + struct si_buffer_resources *buffers = &sctx->rw_buffers; + struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; + unsigned slot = SI_PS_IMAGE_COLORBUF0; + struct pipe_surface *surf = NULL; + + /* si_texture_disable_dcc can get us here again. */ + if (sctx->blitter->running) + return; + + /* See whether FBFETCH is used and color buffer 0 is set. */ + if (sctx->ps_shader.cso && + sctx->ps_shader.cso->info.opcode_count[TGSI_OPCODE_FBFETCH] && + sctx->framebuffer.state.nr_cbufs && + sctx->framebuffer.state.cbufs[0]) + surf = sctx->framebuffer.state.cbufs[0]; + + /* Return if FBFETCH transitions from disabled to disabled. */ + if (!buffers->buffers[slot] && !surf) + return; + + sctx->ps_uses_fbfetch = surf != NULL; + si_update_ps_iter_samples(sctx); + + if (surf) { + struct r600_texture *tex = (struct r600_texture*)surf->texture; + struct pipe_image_view view; + + assert(tex); + assert(!tex->is_depth); + + /* Disable DCC, because the texture is used as both a sampler + * and color buffer. + */ + si_texture_disable_dcc(&sctx->b, tex); + + if (tex->resource.b.b.nr_samples <= 1 && tex->cmask_buffer) { + /* Disable CMASK. */ + assert(tex->cmask_buffer != &tex->resource); + si_eliminate_fast_color_clear(&sctx->b, tex); + si_texture_discard_cmask(sctx->screen, tex); + } + + view.resource = surf->texture; + view.format = surf->format; + view.access = PIPE_IMAGE_ACCESS_READ; + view.u.tex.first_layer = surf->u.tex.first_layer; + view.u.tex.last_layer = surf->u.tex.last_layer; + view.u.tex.level = surf->u.tex.level; + + /* Set the descriptor. */ + uint32_t *desc = descs->list + slot*4; + memset(desc, 0, 16 * 4); + si_set_shader_image_desc(sctx, &view, true, desc, desc + 8); + + pipe_resource_reference(&buffers->buffers[slot], &tex->resource.b.b); + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + &tex->resource, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_RW_IMAGE); + buffers->enabled_mask |= 1u << slot; + } else { + /* Clear the descriptor. */ + memset(descs->list + slot*4, 0, 8*4); + pipe_resource_reference(&buffers->buffers[slot], NULL); + buffers->enabled_mask &= ~(1u << slot); + } + + sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; +} + /* SAMPLER STATES */ static void si_bind_sampler_states(struct pipe_context *ctx, @@ -1855,7 +1927,7 @@ static void si_update_bindless_image_descriptor(struct si_context *sctx, memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list)); si_set_shader_image_desc(sctx, view, true, - desc->list + desc_slot_offset); + desc->list + desc_slot_offset, NULL); if (memcmp(desc_list, desc->list + desc_slot_offset, sizeof(desc_list))) { @@ -1921,6 +1993,7 @@ void si_update_all_texture_descriptors(struct si_context *sctx) } si_update_all_resident_texture_descriptors(sctx); + si_update_ps_colorbuf0_slot(sctx); } /* SHADER USER DATA */ @@ -2460,7 +2533,7 @@ static uint64_t si_create_image_handle(struct pipe_context *ctx, memset(desc_list, 0, sizeof(desc_list)); si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor); - si_set_shader_image_desc(sctx, view, false, &desc_list[0]); + si_set_shader_image_desc(sctx, view, false, &desc_list[0], NULL); img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list)); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index fc2be33b3e4..f75c97685ce 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -191,6 +191,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS: case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: case PIPE_CAP_TGSI_VOTE: + case PIPE_CAP_TGSI_FS_FBFETCH: return 1; case PIPE_CAP_TGSI_BALLOT: @@ -267,7 +268,6 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES: case PIPE_CAP_MAX_WINDOW_RECTANGLES: - case PIPE_CAP_TGSI_FS_FBFETCH: case PIPE_CAP_TGSI_MUL_ZERO_WINS: case PIPE_CAP_UMA: case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index dbb04ed7e45..e3d45ef6c3b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -532,6 +532,7 @@ struct si_context { /* MSAA config state. */ int ps_iter_samples; + bool ps_uses_fbfetch; bool smoothing_enabled; /* DB render state. */ @@ -931,4 +932,12 @@ vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level) return tex->tc_compatible_htile && level == 0; } +static inline unsigned si_get_ps_iter_samples(struct si_context *sctx) +{ + if (sctx->ps_uses_fbfetch) + return sctx->framebuffer.nr_samples; + + return sctx->ps_iter_samples; +} + #endif diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ac6f2572c65..dd2969aa755 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1929,7 +1929,7 @@ static void declare_input_fs( si_llvm_load_input_fs(ctx, input_index, out); } -static LLVMValueRef get_sample_id(struct si_shader_context *ctx) +LLVMValueRef si_get_sample_id(struct si_shader_context *ctx) { return si_unpack_param(ctx, SI_PARAM_ANCILLARY, 8, 4); } @@ -2152,7 +2152,7 @@ void si_load_system_value(struct si_shader_context *ctx, break; case TGSI_SEMANTIC_SAMPLEID: - value = get_sample_id(ctx); + value = si_get_sample_id(ctx); break; case TGSI_SEMANTIC_SAMPLEPOS: { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index f58978989d4..f598b762e1d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -541,6 +541,9 @@ struct si_shader_key { unsigned vs_export_prim_id:1; struct { unsigned interpolate_at_sample_force_center:1; + unsigned fbfetch_msaa; + unsigned fbfetch_is_1D; + unsigned fbfetch_layered; } ps; } u; } mono; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 1bd52722413..941c6fc736d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -300,6 +300,7 @@ LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx, LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx, const struct tgsi_ind_register *ind, int rel_index, unsigned num); +LLVMValueRef si_get_sample_id(struct si_shader_context *ctx); void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base); void si_shader_context_init_mem(struct si_shader_context *ctx); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index f5fa18fd38a..b90edddf2ef 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -1873,6 +1873,63 @@ static void si_llvm_emit_txqs( emit_data->output[emit_data->chan] = samples; } +static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct ac_image_args args = {}; + LLVMValueRef ptr, image, fmask, addr_vec; + + /* Ignore src0, because KHR_blend_func_extended disallows multiple render + * targets. + */ + + /* Load the image descriptor. */ + STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); + ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); + ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr, + ac_array_in_const32_addr_space(ctx->v8i32), ""); + image = ac_build_load_to_sgpr(&ctx->ac, ptr, + LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); + + LLVMValueRef addr[4]; + unsigned chan = 0; + + addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); + + if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) + addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); + + /* Get the current render target layer index. */ + if (ctx->shader->key.mono.u.ps.fbfetch_layered) + addr[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa) + addr[chan++] = si_get_sample_id(ctx); + + while (chan < 4) + addr[chan++] = LLVMGetUndef(ctx->i32); + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa) { + fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, + LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); + + ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false); + } + + addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr)); + + args.opcode = ac_image_load; + args.resource = image; + args.addr = addr_vec; + args.dmask = 0xf; + args.da = ctx->shader->key.mono.u.ps.fbfetch_layered; + + emit_data->output[emit_data->chan] = + ac_build_image_opcode(&ctx->ac, &args); +} + static const struct lp_build_tgsi_action tex_action = { .fetch_args = tex_fetch_args, .emit = build_tex_intrinsic, @@ -1905,6 +1962,8 @@ void si_shader_context_init_mem(struct si_shader_context *ctx) bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs; + bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_llvm_emit_fbfetch; + bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args; bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit; bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index c9ed7256f04..cd64d3bc0fb 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2899,6 +2899,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_context_add_resource_size(ctx, surf->base.texture); } + si_update_ps_colorbuf0_slot(sctx); si_update_poly_offset_state(sctx); si_mark_atom_dirty(sctx, &sctx->cb_render_state); si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); @@ -3361,8 +3362,9 @@ static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) 8, /* 16x MSAA */ }; unsigned log_samples = util_logbase2(setup_samples); + unsigned ps_iter_samples = si_get_ps_iter_samples(sctx); unsigned log_ps_iter_samples = - util_logbase2(util_next_power_of_two(sctx->ps_iter_samples)); + util_logbase2(util_next_power_of_two(ps_iter_samples)); radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2); radeon_emit(cs, sc_line_cntl | @@ -3380,7 +3382,7 @@ static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1)); radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, - S_028A4C_PS_ITER_SAMPLE(sctx->ps_iter_samples > 1) | + S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) | sc_mode_cntl_1); } else if (sctx->smoothing_enabled) { radeon_set_context_reg(cs, R_028804_DB_EQAA, @@ -3409,6 +3411,14 @@ static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom) } } +void si_update_ps_iter_samples(struct si_context *sctx) +{ + if (sctx->framebuffer.nr_samples > 1) + si_mark_atom_dirty(sctx, &sctx->msaa_config); + if (sctx->screen->dpbb_allowed) + si_mark_atom_dirty(sctx, &sctx->dpbb_state); +} + static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) { struct si_context *sctx = (struct si_context *)ctx; @@ -3419,10 +3429,7 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) sctx->ps_iter_samples = min_samples; sctx->do_update_shaders = true; - if (sctx->framebuffer.nr_samples > 1) - si_mark_atom_dirty(sctx, &sctx->msaa_config); - if (sctx->screen->dpbb_allowed) - si_mark_atom_dirty(sctx, &sctx->dpbb_state); + si_update_ps_iter_samples(sctx); } /* diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 02659a7a4f3..37887853388 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -217,6 +217,12 @@ enum { SI_PS_CONST_POLY_STIPPLE, SI_PS_CONST_SAMPLE_POSITIONS, + /* Image descriptor of color buffer 0 for KHR_blend_equation_advanced. */ + SI_PS_IMAGE_COLORBUF0, + SI_PS_IMAGE_COLORBUF0_HI, + SI_PS_IMAGE_COLORBUF0_FMASK, + SI_PS_IMAGE_COLORBUF0_FMASK_HI, + SI_NUM_RW_BUFFERS, }; @@ -324,6 +330,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, unsigned base_level, unsigned first_level, unsigned block_width, bool is_stencil, uint32_t *state); +void si_update_ps_colorbuf0_slot(struct si_context *sctx); void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot, struct pipe_constant_buffer *cbuf); void si_get_shader_buffers(struct si_context *sctx, @@ -395,6 +402,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, unsigned width0, unsigned height0, unsigned force_level); void si_update_fb_dirtiness_after_rendering(struct si_context *sctx); +void si_update_ps_iter_samples(struct si_context *sctx); /* si_state_binning.c */ void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state); diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c index 0f50ea755cb..c0f129717a4 100644 --- a/src/gallium/drivers/radeonsi/si_state_binning.c +++ b/src/gallium/drivers/radeonsi/si_state_binning.c @@ -82,7 +82,7 @@ static struct uvec2 si_get_color_bin_size(struct si_context *sctx, /* Multiply the sum by some function of the number of samples. */ if (nr_samples >= 2) { - if (sctx->ps_iter_samples >= 2) + if (si_get_ps_iter_samples(sctx) >= 2) sum *= nr_samples; else sum *= 2; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8fe4c04ae79..d7742eafb04 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1460,6 +1460,23 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, } key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx); + + /* ps_uses_fbfetch is true only if the color buffer is bound. */ + if (sctx->ps_uses_fbfetch) { + struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0]; + struct pipe_resource *tex = cb0->texture; + + /* 1D textures are allocated and used as 2D on GFX9. */ + key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1; + key->mono.u.ps.fbfetch_is_1D = sctx->b.chip_class != GFX9 && + (tex->target == PIPE_TEXTURE_1D || + tex->target == PIPE_TEXTURE_1D_ARRAY); + key->mono.u.ps.fbfetch_layered = tex->target == PIPE_TEXTURE_1D_ARRAY || + tex->target == PIPE_TEXTURE_2D_ARRAY || + tex->target == PIPE_TEXTURE_CUBE || + tex->target == PIPE_TEXTURE_CUBE_ARRAY || + tex->target == PIPE_TEXTURE_3D; + } break; } default: @@ -2426,6 +2443,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) si_mark_atom_dirty(sctx, &sctx->msaa_config); } si_set_active_descriptors_for_shader(sctx, sel); + si_update_ps_colorbuf0_slot(sctx); } static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) |