From 6dda2455c88a752d513d842cc9be1833fe98a89c Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 2 Jan 2016 02:54:29 +0100 Subject: radeonsi: move BCOLOR PS input locations after all other inputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BCOLOR inputs were immediately after COLOR inputs. Thus, all following inputs were offset by 1 if color_two_side was enabled, and not offset if it was not enabled, which is a variation that's problematic if we want to have 1 variant per shader and the variant doesn't care about color_two_side (that should be handled by other bytecode attached at the beginning). Instead, move BCOLOR inputs after all other inputs, so BCOLOR0 is at location "num_inputs" if it's present. BCOLOR1 is next. This also allows removing si_shader::nparam and si_shader::ps_input_param_offset, which are useless now. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/drivers/radeonsi/si_shader.h') diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 98bdb890a45..86d8f725cb6 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -290,9 +290,7 @@ struct si_shader { struct radeon_shader_binary binary; struct si_shader_config config; - unsigned nparam; unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS]; - unsigned ps_input_param_offset[PIPE_MAX_SHADER_INPUTS]; unsigned ps_input_interpolate[PIPE_MAX_SHADER_INPUTS]; bool uses_instanceid; unsigned nr_pos_exports; -- cgit v1.2.3 From 4596f3c1b8bbcc83b841b6c7ea4a287a6f3210f7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 2 Jan 2016 03:18:03 +0100 Subject: radeonsi: remove si_shader::ps_input_interpolate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tgsi_shader_info has this too. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 8 +++----- src/gallium/drivers/radeonsi/si_shader.h | 1 - 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/radeonsi/si_shader.h') diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 79255004f28..c595f208274 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -914,7 +914,6 @@ static void declare_input_fs( attr_number = lp_build_const_int32(gallivm, input_index); - shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate; interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate, decl->Interp.Location); if (interp_param_idx == -1) @@ -3257,17 +3256,17 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, LLVMValueRef interp_param; const struct tgsi_full_instruction *inst = emit_data->inst; const char *intr_name; - int input_index; + int input_index = inst->Src[0].Register.Index; int chan; int i; LLVMValueRef attr_number; LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK); int interp_param_idx; + unsigned interp = shader->selector->info.input_interpolate[input_index]; unsigned location; assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); - input_index = inst->Src[0].Register.Index; if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) @@ -3275,8 +3274,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, else location = TGSI_INTERPOLATE_LOC_CENTROID; - interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index], - location); + interp_param_idx = lookup_interp_param_index(interp, location); if (interp_param_idx == -1) return; else if (interp_param_idx) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 86d8f725cb6..d3609d46334 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -291,7 +291,6 @@ struct si_shader { struct si_shader_config config; unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS]; - unsigned ps_input_interpolate[PIPE_MAX_SHADER_INPUTS]; bool uses_instanceid; unsigned nr_pos_exports; unsigned nr_param_exports; -- cgit v1.2.3 From b9126dcda834ba9cf58af32e97f4b5d93c9817a3 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sun, 3 Jan 2016 19:00:29 +0100 Subject: radeonsi: implement forcing per-sample_interpolation using the shader key only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was partly a state and partly emulated by shader code, but since we want to do this in a fragment shader prolog, we need to put it into the shader key, which will be used to generate the prolog. This also removes the spi_ps_input states and moves the registers to the PS state. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_hw_context.c | 1 - src/gallium/drivers/radeonsi/si_pipe.h | 2 - src/gallium/drivers/radeonsi/si_shader.c | 46 +++------- src/gallium/drivers/radeonsi/si_shader.h | 50 +++++------ src/gallium/drivers/radeonsi/si_state.h | 1 - src/gallium/drivers/radeonsi/si_state_shaders.c | 107 ++++++------------------ 6 files changed, 55 insertions(+), 152 deletions(-) (limited to 'src/gallium/drivers/radeonsi/si_shader.h') diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index d60c4515625..b5a4034cc12 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -182,7 +182,6 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->db_render_state); si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); si_mark_atom_dirty(ctx, &ctx->spi_map); - si_mark_atom_dirty(ctx, &ctx->spi_ps_input); si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); si_all_descriptors_begin_new_cs(ctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 48947442757..3c963db5078 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -202,7 +202,6 @@ struct si_context { struct si_viewports viewports; struct si_stencil_ref stencil_ref; struct r600_atom spi_map; - struct r600_atom spi_ps_input; /* Precomputed states. */ struct si_pm4_state *init_config; @@ -222,7 +221,6 @@ struct si_context { struct si_vertex_element *vertex_elements; unsigned sprite_coord_enable; bool flatshade; - bool force_persample_interp; /* shader descriptors */ struct si_descriptors vertex_buffers; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c595f208274..0a92a7b54e6 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -833,14 +833,11 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) } /* This shouldn't be used by explicit INTERP opcodes. */ -static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx, - unsigned param) +static unsigned select_interp_param(struct si_shader_context *si_shader_ctx, + unsigned param) { - struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; - unsigned sample_param = 0; - LLVMValueRef default_ij, sample_ij, force_sample; - - default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param); + if (!si_shader_ctx->shader->key.ps.force_persample_interp) + return param; /* If the shader doesn't use center/centroid, just return the parameter. * @@ -850,36 +847,15 @@ static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx, switch (param) { case SI_PARAM_PERSP_CENTROID: case SI_PARAM_PERSP_CENTER: - if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp) - return default_ij; - - sample_param = SI_PARAM_PERSP_SAMPLE; - break; + return SI_PARAM_PERSP_SAMPLE; case SI_PARAM_LINEAR_CENTROID: case SI_PARAM_LINEAR_CENTER: - if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear) - return default_ij; - - sample_param = SI_PARAM_LINEAR_SAMPLE; - break; + return SI_PARAM_LINEAR_SAMPLE; default: - return default_ij; + return param; } - - /* Otherwise, we have to select (i,j) based on a user data SGPR. */ - sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param); - - /* TODO: this can be done more efficiently by switching between - * 2 prologs. - */ - force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, - SI_PARAM_PS_STATE_BITS); - force_sample = LLVMBuildTrunc(gallivm->builder, force_sample, - LLVMInt1TypeInContext(gallivm->context), ""); - return LLVMBuildSelect(gallivm->builder, force_sample, - sample_ij, default_ij, ""); } static void declare_input_fs( @@ -918,8 +894,11 @@ static void declare_input_fs( decl->Interp.Location); if (interp_param_idx == -1) return; - else if (interp_param_idx) - interp_param = get_interp_param(si_shader_ctx, interp_param_idx); + else if (interp_param_idx) { + interp_param_idx = select_interp_param(si_shader_ctx, + interp_param_idx); + interp_param = LLVMGetParam(main_fn, interp_param_idx); + } /* fs.constant returns the param from the middle vertex, so it's not * really useful for flat shading. It's meant to be used for custom @@ -3633,7 +3612,6 @@ static void create_function(struct si_shader_context *si_shader_ctx) case TGSI_PROCESSOR_FRAGMENT: params[SI_PARAM_ALPHA_REF] = f32; - params[SI_PARAM_PS_STATE_BITS] = i32; params[SI_PARAM_PRIM_MASK] = i32; last_sgpr = SI_PARAM_PRIM_MASK; params[SI_PARAM_PERSP_SAMPLE] = v2i32; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d3609d46334..0d6a45a319a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -88,7 +88,6 @@ struct radeon_shader_reloc; #define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */ #define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */ #define SI_SGPR_ALPHA_REF 8 /* PS only */ -#define SI_SGPR_PS_STATE_BITS 9 /* PS only */ #define SI_VS_NUM_USER_SGPR 13 /* API VS */ #define SI_ES_NUM_USER_SGPR 12 /* API VS */ @@ -97,7 +96,7 @@ struct radeon_shader_reloc; #define SI_TES_NUM_USER_SGPR 10 #define SI_GS_NUM_USER_SGPR 8 #define SI_GSCOPY_NUM_USER_SGPR 4 -#define SI_PS_NUM_USER_SGPR 10 +#define SI_PS_NUM_USER_SGPR 9 /* LLVM function parameter indices */ #define SI_PARAM_RW_BUFFERS 0 @@ -152,27 +151,23 @@ struct radeon_shader_reloc; /* PS only parameters */ #define SI_PARAM_ALPHA_REF 4 -/* Bits: - * 0: force_persample_interp - */ -#define SI_PARAM_PS_STATE_BITS 5 -#define SI_PARAM_PRIM_MASK 6 -#define SI_PARAM_PERSP_SAMPLE 7 -#define SI_PARAM_PERSP_CENTER 8 -#define SI_PARAM_PERSP_CENTROID 9 -#define SI_PARAM_PERSP_PULL_MODEL 10 -#define SI_PARAM_LINEAR_SAMPLE 11 -#define SI_PARAM_LINEAR_CENTER 12 -#define SI_PARAM_LINEAR_CENTROID 13 -#define SI_PARAM_LINE_STIPPLE_TEX 14 -#define SI_PARAM_POS_X_FLOAT 15 -#define SI_PARAM_POS_Y_FLOAT 16 -#define SI_PARAM_POS_Z_FLOAT 17 -#define SI_PARAM_POS_W_FLOAT 18 -#define SI_PARAM_FRONT_FACE 19 -#define SI_PARAM_ANCILLARY 20 -#define SI_PARAM_SAMPLE_COVERAGE 21 -#define SI_PARAM_POS_FIXED_PT 22 +#define SI_PARAM_PRIM_MASK 5 +#define SI_PARAM_PERSP_SAMPLE 6 +#define SI_PARAM_PERSP_CENTER 7 +#define SI_PARAM_PERSP_CENTROID 8 +#define SI_PARAM_PERSP_PULL_MODEL 9 +#define SI_PARAM_LINEAR_SAMPLE 10 +#define SI_PARAM_LINEAR_CENTER 11 +#define SI_PARAM_LINEAR_CENTROID 12 +#define SI_PARAM_LINE_STIPPLE_TEX 13 +#define SI_PARAM_POS_X_FLOAT 14 +#define SI_PARAM_POS_Y_FLOAT 15 +#define SI_PARAM_POS_Z_FLOAT 16 +#define SI_PARAM_POS_W_FLOAT 17 +#define SI_PARAM_FRONT_FACE 18 +#define SI_PARAM_ANCILLARY 19 +#define SI_PARAM_SAMPLE_COVERAGE 20 +#define SI_PARAM_POS_FIXED_PT 21 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) @@ -193,14 +188,6 @@ struct si_shader_selector { /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; - /* Whether the shader has to use a conditional assignment to - * choose between weights when emulating - * pipe_rasterizer_state::force_persample_interp. - * If false, "si_emit_spi_ps_input" will take care of it instead. - */ - bool forces_persample_interp_for_persp; - bool forces_persample_interp_for_linear; - /* GS parameters. */ unsigned esgs_itemsize; unsigned gs_input_verts_per_prim; @@ -245,6 +232,7 @@ union si_shader_key { unsigned poly_stipple:1; unsigned poly_line_smoothing:1; unsigned clamp_color:1; + unsigned force_persample_interp:1; } ps; struct { unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 507f45938ce..e9a017534d1 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -133,7 +133,6 @@ union si_state_atoms { struct r600_atom *viewports; struct r600_atom *stencil_ref; struct r600_atom *spi_map; - struct r600_atom *spi_ps_input; } s; struct r600_atom *array[0]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 6e7311807dd..59511c67ed0 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -472,6 +472,17 @@ static void si_shader_ps(struct si_shader *shader) unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); uint64_t va; bool has_centroid; + unsigned input_ena = shader->config.spi_ps_input_ena; + + /* we need to enable at least one of them, otherwise we hang the GPU */ + assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || + G_0286CC_PERSP_CENTER_ENA(input_ena) || + G_0286CC_PERSP_CENTROID_ENA(input_ena) || + G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) || + G_0286CC_LINEAR_SAMPLE_ENA(input_ena) || + G_0286CC_LINEAR_CENTER_ENA(input_ena) || + G_0286CC_LINEAR_CENTROID_ENA(input_ena) || + G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena)); pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state); @@ -515,6 +526,9 @@ static void si_shader_ps(struct si_shader *shader) shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)) spi_shader_col_format = V_028714_SPI_SHADER_32_R; + si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena); + si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, input_ena); + /* Set interpolation controls. */ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) || G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena); @@ -706,6 +720,15 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, (is_line && rs->line_smooth)) && sctx->framebuffer.nr_samples <= 1; key->ps.clamp_color = rs->clamp_fragment_color; + + key->ps.force_persample_interp = rs->force_persample_interp && + rs->multisample_enable && + sctx->framebuffer.nr_samples > 1 && + sctx->ps_iter_samples > 1 && + (sel->info.uses_persp_center || + sel->info.uses_persp_centroid || + sel->info.uses_linear_center || + sel->info.uses_linear_centroid); } key->ps.alpha_func = si_get_alpha_test_func(sctx); @@ -808,7 +831,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->type = util_pipe_shader_from_tgsi_processor(sel->info.processor); p_atomic_inc(&sscreen->b.num_shaders_created); - /* First set which opcode uses which (i,j) pair. */ + /* Set which opcode uses which (i,j) pair. */ if (sel->info.uses_persp_opcode_interp_centroid) sel->info.uses_persp_centroid = true; @@ -823,19 +846,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->info.uses_linear_opcode_interp_sample) sel->info.uses_linear_center = true; - /* Determine if the shader has to use a conditional assignment when - * emulating force_persample_interp. - */ - sel->forces_persample_interp_for_persp = - sel->info.uses_persp_center + - sel->info.uses_persp_centroid + - sel->info.uses_persp_sample >= 2; - - sel->forces_persample_interp_for_linear = - sel->info.uses_linear_center + - sel->info.uses_linear_centroid + - sel->info.uses_linear_sample >= 2; - switch (sel->type) { case PIPE_SHADER_GEOMETRY: sel->gs_output_prim = @@ -1181,68 +1191,6 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) assert(num_interp == num_written); } -static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom) -{ - struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - struct si_shader *ps = sctx->ps_shader.current; - unsigned input_ena; - - if (!ps) - return; - - input_ena = ps->config.spi_ps_input_ena; - - /* we need to enable at least one of them, otherwise we hang the GPU */ - assert(G_0286CC_PERSP_SAMPLE_ENA(input_ena) || - G_0286CC_PERSP_CENTER_ENA(input_ena) || - G_0286CC_PERSP_CENTROID_ENA(input_ena) || - G_0286CC_PERSP_PULL_MODEL_ENA(input_ena) || - G_0286CC_LINEAR_SAMPLE_ENA(input_ena) || - G_0286CC_LINEAR_CENTER_ENA(input_ena) || - G_0286CC_LINEAR_CENTROID_ENA(input_ena) || - G_0286CC_LINE_STIPPLE_TEX_ENA(input_ena)); - - if (sctx->force_persample_interp) { - unsigned num_persp = G_0286CC_PERSP_SAMPLE_ENA(input_ena) + - G_0286CC_PERSP_CENTER_ENA(input_ena) + - G_0286CC_PERSP_CENTROID_ENA(input_ena); - unsigned num_linear = G_0286CC_LINEAR_SAMPLE_ENA(input_ena) + - G_0286CC_LINEAR_CENTER_ENA(input_ena) + - G_0286CC_LINEAR_CENTROID_ENA(input_ena); - - /* If only one set of (i,j) coordinates is used, we can disable - * CENTER/CENTROID, enable SAMPLE and it will load SAMPLE coordinates - * where CENTER/CENTROID are expected, effectively forcing per-sample - * interpolation. - */ - if (num_persp == 1) { - input_ena &= C_0286CC_PERSP_CENTER_ENA; - input_ena &= C_0286CC_PERSP_CENTROID_ENA; - input_ena |= G_0286CC_PERSP_SAMPLE_ENA(1); - } - if (num_linear == 1) { - input_ena &= C_0286CC_LINEAR_CENTER_ENA; - input_ena &= C_0286CC_LINEAR_CENTROID_ENA; - input_ena |= G_0286CC_LINEAR_SAMPLE_ENA(1); - } - - /* If at least 2 sets of coordinates are used, we can't use this - * trick and have to select SAMPLE using a conditional assignment - * in the shader with "force_persample_interp" being a shader constant. - */ - } - - radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2); - radeon_emit(cs, input_ena); - radeon_emit(cs, input_ena); - - if (ps->selector->forces_persample_interp_for_persp || - ps->selector->forces_persample_interp_for_linear) - radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + - SI_SGPR_PS_STATE_BITS * 4, - sctx->force_persample_interp); -} - /** * Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that. */ @@ -1774,12 +1722,6 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->spi_map); } - if (si_pm4_state_changed(sctx, ps) || - sctx->force_persample_interp != rs->force_persample_interp) { - sctx->force_persample_interp = rs->force_persample_interp; - si_mark_atom_dirty(sctx, &sctx->spi_ps_input); - } - if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps)) si_mark_atom_dirty(sctx, &sctx->cb_render_state); @@ -1812,7 +1754,6 @@ bool si_update_shaders(struct si_context *sctx) void si_init_shader_functions(struct si_context *sctx) { si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); - si_init_atom(sctx, &sctx->spi_ps_input, &sctx->atoms.s.spi_ps_input, si_emit_spi_ps_input); sctx->b.b.create_vs_state = si_create_shader_selector; sctx->b.b.create_tcs_state = si_create_shader_selector; -- cgit v1.2.3 From 5a53628f45787370636b3b0a0c7d29cb80e1ada7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 6 Jan 2016 16:03:38 +0100 Subject: radeonsi: read SPI_PS_INPUT_ADDR from LLVM if it returns it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 5 ++++- src/gallium/drivers/radeonsi/si_shader.h | 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/radeonsi/si_shader.h') diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 68ce3871b7e..85203e046fc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3875,7 +3875,7 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary, conf->spi_ps_input_ena = value; break; case R_0286D0_SPI_PS_INPUT_ADDR: - /* Not used yet, but will be in the future */ + conf->spi_ps_input_addr = value; break; case R_0286E8_SPI_TMPRING_SIZE: case R_00B860_COMPUTE_TMPRING_SIZE: @@ -3895,6 +3895,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary, } break; } + + if (!conf->spi_ps_input_addr) + conf->spi_ps_input_addr = conf->spi_ps_input_ena; } } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 0d6a45a319a..04b977af943 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -260,6 +260,7 @@ struct si_shader_config { unsigned num_vgprs; unsigned lds_size; unsigned spi_ps_input_ena; + unsigned spi_ps_input_addr; unsigned float_mode; unsigned scratch_bytes_per_wave; unsigned rsrc1; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 59511c67ed0..6e656b07ca1 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -527,7 +527,8 @@ static void si_shader_ps(struct si_shader *shader) spi_shader_col_format = V_028714_SPI_SHADER_32_R; si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena); - si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, input_ena); + si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, + shader->config.spi_ps_input_addr); /* Set interpolation controls. */ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) || -- cgit v1.2.3 From b6d5666fbf2a4196462db7ea82918feae883daae Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 26 Jan 2016 17:27:54 +0100 Subject: radeonsi: remove useless code that handles dx10_clamp_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "enable-no-nans-fp-math" is a wrong string and there was a disagreement about fixing it. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_shader.c | 7 ------- src/gallium/drivers/radeonsi/si_shader.h | 1 - src/gallium/drivers/radeonsi/si_state_shaders.c | 12 ++++++------ 3 files changed, 6 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers/radeonsi/si_shader.h') diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index bd45d4ae8db..c92f07cff63 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3654,10 +3654,6 @@ static void create_function(struct si_shader_context *si_shader_ctx) radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params); radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type); - if (shader->dx10_clamp_mode) - LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn, - "enable-no-nans-fp-math", "true"); - for (i = 0; i <= last_sgpr; ++i) { LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i); @@ -4341,9 +4337,6 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, si_init_shader_ctx(&si_shader_ctx, sscreen, shader, tm, poly_stipple ? &stipple_shader_info : &sel->info); - if (sel->type != PIPE_SHADER_COMPUTE) - shader->dx10_clamp_mode = true; - shader->uses_instanceid = sel->info.uses_instanceid; bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 04b977af943..e3d1f4f39a1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -283,7 +283,6 @@ struct si_shader { bool uses_instanceid; unsigned nr_pos_exports; unsigned nr_param_exports; - bool dx10_clamp_mode; /* convert NaNs to 0 */ }; static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 6e656b07ca1..f48d7ca360f 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -124,7 +124,7 @@ static void si_shader_ls(struct si_shader *shader) shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B528_SGPRS((num_sgprs - 1) / 8) | S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B528_DX10_CLAMP(shader->dx10_clamp_mode); + S_00B528_DX10_CLAMP(1); shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); } @@ -157,7 +157,7 @@ static void si_shader_hs(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B428_SGPRS((num_sgprs - 1) / 8) | - S_00B428_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B428_DX10_CLAMP(1)); si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, S_00B42C_USER_SGPR(num_user_sgprs) | S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); @@ -203,7 +203,7 @@ static void si_shader_es(struct si_shader *shader) S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B328_SGPRS((num_sgprs - 1) / 8) | S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B328_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B328_DX10_CLAMP(1)); si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, S_00B32C_USER_SGPR(num_user_sgprs) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); @@ -292,7 +292,7 @@ static void si_shader_gs(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B228_SGPRS((num_sgprs - 1) / 8) | - S_00B228_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B228_DX10_CLAMP(1)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, S_00B22C_USER_SGPR(num_user_sgprs) | S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); @@ -381,7 +381,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B128_SGPRS((num_sgprs - 1) / 8) | S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B128_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B128_DX10_CLAMP(1)); si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, S_00B12C_USER_SGPR(num_user_sgprs) | S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | @@ -567,7 +567,7 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B028_SGPRS((num_sgprs - 1) / 8) | - S_00B028_DX10_CLAMP(shader->dx10_clamp_mode)); + S_00B028_DX10_CLAMP(1)); si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | S_00B02C_USER_SGPR(num_user_sgprs) | -- cgit v1.2.3 From dc5fc3c2f60b4c208369e0eddbf416af059d88c7 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Tue, 26 Jan 2016 23:32:23 +0100 Subject: radeonsi: make LLVM IR dumping less messy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_compute.c | 3 ++- src/gallium/drivers/radeonsi/si_shader.c | 18 +++++++++++------- src/gallium/drivers/radeonsi/si_shader.h | 3 ++- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/radeonsi/si_shader.h') diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 825fbb181ba..4d27e86b414 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -124,7 +124,8 @@ static void *si_create_compute_state( code, header->num_bytes); si_compile_llvm(sctx->screen, &program->kernels[i].binary, &program->kernels[i].config, sctx->tm, - mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE); + mod, &sctx->b.debug, TGSI_PROCESSOR_COMPUTE, + "Compute Shader"); si_shader_dump(sctx->screen, &program->kernels[i], &sctx->b.debug, TGSI_PROCESSOR_COMPUTE); si_shader_binary_upload(sctx->screen, &program->kernels[i]); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 7f151516b8b..4551831dd2a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4081,7 +4081,8 @@ int si_compile_llvm(struct si_screen *sscreen, LLVMTargetMachineRef tm, LLVMModuleRef mod, struct pipe_debug_callback *debug, - unsigned processor) + unsigned processor, + const char *name) { int r = 0; unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations); @@ -4089,8 +4090,11 @@ int si_compile_llvm(struct si_screen *sscreen, if (r600_can_dump_shader(&sscreen->b, processor)) { fprintf(stderr, "radeonsi: Compiling shader %d\n", count); - if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) + if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) { + fprintf(stderr, "%s LLVM IR:\n\n", name); LLVMDumpModule(mod); + fprintf(stderr, "\n"); + } } if (!si_replace_shader(count, binary)) { @@ -4183,14 +4187,14 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld); - if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY)) - fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n"); - r = si_compile_llvm(sscreen, &si_shader_ctx->shader->binary, &si_shader_ctx->shader->config, si_shader_ctx->tm, bld_base->base.gallivm->module, - debug, TGSI_PROCESSOR_GEOMETRY); + debug, TGSI_PROCESSOR_GEOMETRY, + "GS Copy Shader"); if (!r) { + if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY)) + fprintf(stderr, "GS Copy Shader:\n"); si_shader_dump(sscreen, si_shader_ctx->shader, debug, TGSI_PROCESSOR_GEOMETRY); r = si_shader_binary_upload(sscreen, si_shader_ctx->shader); @@ -4410,7 +4414,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm, - mod, debug, si_shader_ctx.type); + mod, debug, si_shader_ctx.type, "TGSI shader"); if (r) { fprintf(stderr, "LLVM failed to compile shader\n"); goto out; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index e3d1f4f39a1..c42c51e0455 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -328,7 +328,8 @@ int si_compile_llvm(struct si_screen *sscreen, LLVMTargetMachineRef tm, LLVMModuleRef mod, struct pipe_debug_callback *debug, - unsigned processor); + unsigned processor, + const char *name); void si_shader_destroy(struct si_shader *shader); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); -- cgit v1.2.3