diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 9 |
3 files changed, 20 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index eb573450db5..ac427210288 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5186,6 +5186,7 @@ static void create_function(struct si_shader_context *ctx) break; case PIPE_SHADER_TESS_CTRL: + params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32; @@ -5211,6 +5212,7 @@ static void create_function(struct si_shader_context *ctx) break; case PIPE_SHADER_TESS_EVAL: + params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32; num_params = SI_PARAM_TCS_OUT_LAYOUT+1; @@ -6768,6 +6770,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen, params[SI_PARAM_SAMPLERS] = ctx.i64; params[SI_PARAM_IMAGES] = ctx.i64; params[SI_PARAM_SHADER_BUFFERS] = ctx.i64; + params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx.i32; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 7b1cbf9f6b0..26be25ea456 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -107,7 +107,8 @@ enum { SI_LS_NUM_USER_SGPR, /* both TCS and TES */ - SI_SGPR_TCS_OUT_OFFSETS = SI_NUM_RESOURCE_SGPRS, + SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, + SI_SGPR_TCS_OUT_OFFSETS, SI_SGPR_TCS_OUT_LAYOUT, SI_TES_NUM_USER_SGPR, @@ -147,11 +148,18 @@ enum { SI_PARAM_LS_OUT_LAYOUT = SI_PARAM_START_INSTANCE + 1, /* the other VS parameters are assigned dynamically */ + /* Layout of TCS outputs in the offchip buffer + * [0:8] = the number of patches per threadgroup. + * [9:15] = the number of output vertices per patch. + * [16:31] = the offset of per patch attributes in the buffer in bytes. + */ + SI_PARAM_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */ + /* Offsets where TCS outputs and TCS patch outputs live in LDS: * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32 */ - SI_PARAM_TCS_OUT_OFFSETS = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */ + SI_PARAM_TCS_OUT_OFFSETS, /* for TCS & TES */ /* Layout of TCS outputs / TES inputs: * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4 diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index dab0dcce03b..e14a1c983f7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -108,6 +108,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned input_patch_size, output_patch_size, output_patch0_offset; unsigned perpatch_output_offset, lds_size, ls_rsrc2; unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets; + unsigned offchip_layout; *num_patches = 1; /* TODO: calculate this */ @@ -183,6 +184,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, ((output_vertex_size / 4) << 13); tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16); + offchip_layout = (pervertex_output_patch_size * *num_patches << 16) | + (num_tcs_output_cp << 9) | *num_patches; /* Set them for LS. */ radeon_set_sh_reg(cs, @@ -191,13 +194,15 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Set them for TCS. */ radeon_set_sh_reg_seq(cs, - R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3); + R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4); + radeon_emit(cs, offchip_layout); radeon_emit(cs, tcs_out_offsets); radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26)); radeon_emit(cs, tcs_in_layout); /* Set them for TES. */ - radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2); + radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3); + radeon_emit(cs, offchip_layout); radeon_emit(cs, tcs_out_offsets); radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26)); } |