diff options
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 114 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_internal.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_shaders.c | 75 |
7 files changed, 217 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5e20a0b8e38..3001a3a8cd9 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -84,6 +84,16 @@ static void si_destroy_context(struct pipe_context *context) sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_eliminate_fastclear); if (sctx->custom_blend_dcc_decompress) sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_dcc_decompress); + if (sctx->vs_blit_pos) + sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_pos); + if (sctx->vs_blit_pos_layered) + sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_pos_layered); + if (sctx->vs_blit_color) + sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_color); + if (sctx->vs_blit_color_layered) + sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_color_layered); + if (sctx->vs_blit_texcoord) + sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_texcoord); if (sctx->blitter) util_blitter_destroy(sctx->blitter); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 97dd875a64e..c45cc2d8145 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -320,6 +320,11 @@ struct si_context { void *custom_blend_fmask_decompress; void *custom_blend_eliminate_fastclear; void *custom_blend_dcc_decompress; + void *vs_blit_pos; + void *vs_blit_pos_layered; + void *vs_blit_color; + void *vs_blit_color_layered; + void *vs_blit_texcoord; struct si_screen *screen; LLVMTargetMachineRef tm; /* only non-threaded compilation */ struct si_shader_ctx_state fixed_func_tcs_shader; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c6cb45b78d5..da53ac3bd26 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -454,11 +454,97 @@ static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx, return LLVMBuildFPTrunc(builder, value, ctx->f32, ""); } +static LLVMValueRef unpack_sint16(struct si_shader_context *ctx, + LLVMValueRef i32, unsigned index) +{ + assert(index <= 1); + + if (index == 1) + return LLVMBuildAShr(ctx->ac.builder, i32, + LLVMConstInt(ctx->i32, 16, 0), ""); + + return LLVMBuildSExt(ctx->ac.builder, + LLVMBuildTrunc(ctx->ac.builder, i32, + ctx->ac.i16, ""), + ctx->i32, ""); +} + void si_llvm_load_input_vs( struct si_shader_context *ctx, unsigned input_index, LLVMValueRef out[4]) { + unsigned vs_blit_property = + ctx->shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS]; + + if (vs_blit_property) { + LLVMValueRef vertex_id = ctx->abi.vertex_id; + LLVMValueRef sel_x1 = LLVMBuildICmp(ctx->ac.builder, + LLVMIntULE, vertex_id, + ctx->i32_1, ""); + /* Use LLVMIntNE, because we have 3 vertices and only + * the middle one should use y2. + */ + LLVMValueRef sel_y1 = LLVMBuildICmp(ctx->ac.builder, + LLVMIntNE, vertex_id, + ctx->i32_1, ""); + + if (input_index == 0) { + /* Position: */ + LLVMValueRef x1y1 = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs); + LLVMValueRef x2y2 = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 1); + + LLVMValueRef x1 = unpack_sint16(ctx, x1y1, 0); + LLVMValueRef y1 = unpack_sint16(ctx, x1y1, 1); + LLVMValueRef x2 = unpack_sint16(ctx, x2y2, 0); + LLVMValueRef y2 = unpack_sint16(ctx, x2y2, 1); + + LLVMValueRef x = LLVMBuildSelect(ctx->ac.builder, sel_x1, + x1, x2, ""); + LLVMValueRef y = LLVMBuildSelect(ctx->ac.builder, sel_y1, + y1, y2, ""); + + out[0] = LLVMBuildSIToFP(ctx->ac.builder, x, ctx->f32, ""); + out[1] = LLVMBuildSIToFP(ctx->ac.builder, y, ctx->f32, ""); + out[2] = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 2); + out[3] = ctx->ac.f32_1; + return; + } + + /* Color or texture coordinates: */ + assert(input_index == 1); + + if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) { + for (int i = 0; i < 4; i++) { + out[i] = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 3 + i); + } + } else { + assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD); + LLVMValueRef x1 = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 3); + LLVMValueRef y1 = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 4); + LLVMValueRef x2 = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 5); + LLVMValueRef y2 = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 6); + + out[0] = LLVMBuildSelect(ctx->ac.builder, sel_x1, + x1, x2, ""); + out[1] = LLVMBuildSelect(ctx->ac.builder, sel_y1, + y1, y2, ""); + out[2] = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 7); + out[3] = LLVMGetParam(ctx->main_fn, + ctx->param_vs_blit_inputs + 8); + } + return; + } + unsigned chan; unsigned fix_fetch; unsigned num_fetches; @@ -4256,6 +4342,8 @@ static void create_function(struct si_shader_context *ctx) unsigned num_returns = 0; unsigned num_prolog_vgprs = 0; unsigned type = ctx->type; + unsigned vs_blit_property = + shader->selector->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS]; si_init_function_info(&fninfo); @@ -4272,6 +4360,32 @@ static void create_function(struct si_shader_context *ctx) switch (type) { case PIPE_SHADER_VERTEX: declare_global_desc_pointers(ctx, &fninfo); + + if (vs_blit_property) { + ctx->param_vs_blit_inputs = fninfo.num_params; + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */ + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* depth */ + + if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) { + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color0 */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color1 */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color2 */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color3 */ + } else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) { + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */ + add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */ + } + + /* VGPRs */ + declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); + break; + } + declare_per_stage_desc_pointers(ctx, &fninfo, true); declare_vs_specific_input_sgprs(ctx, &fninfo); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index f457f8e20b1..ba80f550e49 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -176,6 +176,8 @@ enum { SI_SGPR_VS_STATE_BITS, SI_VS_NUM_USER_SGPR, + SI_SGPR_VS_BLIT_DATA = SI_SGPR_CONST_AND_SHADER_BUFFERS, + /* TES */ SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, SI_SGPR_TES_OFFCHIP_ADDR_BASE64K, @@ -263,6 +265,16 @@ enum { TGSI_SEMANTIC_DEFAULT_TESSINNER_SI, }; +enum { + /* Use a property enum that VS wouldn't use. */ + TGSI_PROPERTY_VS_BLIT_SGPRS = TGSI_PROPERTY_FS_COORD_ORIGIN, + + /* These represent the number of SGPRs the shader uses. */ + SI_VS_BLIT_SGPRS_POS = 3, + SI_VS_BLIT_SGPRS_POS_COLOR = 7, + SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9, +}; + /* For VS shader key fix_fetch. */ enum { SI_FIX_FETCH_NONE = 0, diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 932e457f7fd..5c736f61251 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -134,6 +134,7 @@ struct si_shader_context { * max = 32*4 */ int param_vs_state_bits; + int param_vs_blit_inputs; /* HW VS */ int param_streamout_config; int param_streamout_write_index; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 9cc323dc9be..fa2c147f5d8 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -402,6 +402,8 @@ void si_destroy_shader_cache(struct si_screen *sscreen); void si_get_active_slot_masks(const struct tgsi_shader_info *info, uint32_t *const_and_shader_buffers, uint64_t *samplers_and_images); +void *si_get_blit_vs(struct si_context *sctx, enum blitter_attrib_type type, + unsigned num_layers); /* si_state_draw.c */ void si_init_ia_multi_vgt_param_table(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 1fadc7ec5d9..d3b5dd51c14 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -891,7 +891,13 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. */ vgpr_comp_cnt = enable_prim_id ? 2 : (shader->info.uses_instanceid ? 1 : 0); - num_user_sgprs = SI_VS_NUM_USER_SGPR; + + if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]) { + num_user_sgprs = SI_SGPR_VS_BLIT_DATA + + info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]; + } else { + num_user_sgprs = SI_VS_NUM_USER_SGPR; + } } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = enable_prim_id ? 3 : 2; num_user_sgprs = SI_TES_NUM_USER_SGPR; @@ -2043,7 +2049,8 @@ static void *si_create_shader_selector(struct pipe_context *ctx, /* The prolog is a no-op if there are no inputs. */ sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX && - sel->info.num_inputs; + sel->info.num_inputs && + !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS]; /* Set which opcode uses which (i,j) pair. */ if (sel->info.uses_persp_opcode_interp_centroid) @@ -3397,6 +3404,70 @@ static void si_emit_scratch_state(struct si_context *sctx, } } +void *si_get_blit_vs(struct si_context *sctx, enum blitter_attrib_type type, + unsigned num_layers) +{ + struct pipe_context *pipe = &sctx->b.b; + unsigned vs_blit_property; + void **vs; + + switch (type) { + case UTIL_BLITTER_ATTRIB_NONE: + vs = num_layers > 1 ? &sctx->vs_blit_pos_layered : + &sctx->vs_blit_pos; + vs_blit_property = SI_VS_BLIT_SGPRS_POS; + break; + case UTIL_BLITTER_ATTRIB_COLOR: + vs = num_layers > 1 ? &sctx->vs_blit_color_layered : + &sctx->vs_blit_color; + vs_blit_property = SI_VS_BLIT_SGPRS_POS_COLOR; + break; + case UTIL_BLITTER_ATTRIB_TEXCOORD_XY: + case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW: + assert(num_layers == 1); + vs = &sctx->vs_blit_texcoord; + vs_blit_property = SI_VS_BLIT_SGPRS_POS_TEXCOORD; + break; + default: + assert(0); + return NULL; + } + if (*vs) + return *vs; + + struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); + if (!ureg) + return NULL; + + /* Tell the shader to load VS inputs from SGPRs: */ + ureg_property(ureg, TGSI_PROPERTY_VS_BLIT_SGPRS, vs_blit_property); + + /* This is just a pass-through shader with 1-3 MOV instructions. */ + ureg_MOV(ureg, + ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), + ureg_DECL_vs_input(ureg, 0)); + + if (type != UTIL_BLITTER_ATTRIB_NONE) { + ureg_MOV(ureg, + ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0), + ureg_DECL_vs_input(ureg, 1)); + } + + if (num_layers > 1) { + struct ureg_src instance_id = + ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0); + struct ureg_dst layer = + ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); + + ureg_MOV(ureg, ureg_writemask(layer, TGSI_WRITEMASK_X), + ureg_scalar(instance_id, TGSI_SWIZZLE_X)); + } + ureg_END(ureg); + + *vs = ureg_create_shader_and_destroy(ureg, pipe); + return *vs; +} + void si_init_shader_functions(struct si_context *sctx) { si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); |