summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2019-07-29 17:43:55 -0400
committerMarek Olšák <[email protected]>2019-08-06 17:09:02 -0400
commit91227a1e177a579adf0fd2d53b356618de374e9a (patch)
tree39cdca4a25ae293bbeab463cfb98081e033da802 /src/gallium/drivers
parentf064b530f6f7245e1ae57a63b3b09bef48548623 (diff)
radeonsi/gfx10: add global use_ngg and use_ngg_streamout flags
Reviewed-by: Samuel Pitoiset <[email protected]> Acked-by: Pierre-Eric Pelloux-Prayer <[email protected]>
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/radeonsi/si_get.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_gfx_cs.c12
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_query.c4
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c10
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c9
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c12
-rw-r--r--src/gallium/drivers/radeonsi/si_state_streamout.c18
9 files changed, 41 insertions, 32 deletions
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 15777392555..f0eed6df30b 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -161,7 +161,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 1;
case PIPE_CAP_QUERY_SO_OVERFLOW:
- return sscreen->info.chip_class <= GFX9;
+ return !sscreen->use_ngg_streamout;
case PIPE_CAP_POST_DEPTH_COVERAGE:
return sscreen->info.chip_class >= GFX10;
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 1560e3a2df3..b30839d25b5 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -162,11 +162,11 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
si_emit_streamout_end(ctx);
ctx->streamout.suspended = true;
- /* Since streamout uses GDS on gfx10, we need to make
- * GDS idle when we leave the IB, otherwise another
- * process might overwrite it while our shaders are busy.
+ /* Since NGG streamout uses GDS, we need to make GDS
+ * idle when we leave the IB, otherwise another process
+ * might overwrite it while our shaders are busy.
*/
- if (ctx->chip_class >= GFX10)
+ if (ctx->screen->use_ngg_streamout)
wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
}
}
@@ -303,7 +303,7 @@ void si_allocate_gds(struct si_context *sctx)
if (sctx->gds)
return;
- assert(sctx->chip_class >= GFX10); /* for gfx10 streamout */
+ assert(sctx->screen->use_ngg_streamout);
/* 4 streamout GDS counters.
* We need 256B (64 dw) of GDS, otherwise streamout hangs.
@@ -405,7 +405,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->atoms.s.dpbb_state);
si_mark_atom_dirty(ctx, &ctx->atoms.s.stencil_ref);
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_map);
- if (ctx->chip_class < GFX10)
+ if (!ctx->screen->use_ngg_streamout)
si_mark_atom_dirty(ctx, &ctx->atoms.s.streamout_enable);
si_mark_atom_dirty(ctx, &ctx->atoms.s.render_cond);
/* CLEAR_STATE disables all window rectangles. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 28f23b26be6..1de2b3dd624 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -501,7 +501,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
if (!sctx->border_color_map)
goto fail;
- sctx->ngg = sctx->chip_class >= GFX10;
+ sctx->ngg = sscreen->use_ngg;
/* Initialize context functions used by graphics and compute. */
if (sctx->chip_class >= GFX10)
@@ -1154,6 +1154,8 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->info.family == CHIP_RAVEN;
sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2 ||
sscreen->info.chip_class >= GFX10;
+ sscreen->use_ngg = sscreen->info.chip_class >= GFX10;
+ sscreen->use_ngg_streamout = sscreen->info.chip_class >= GFX10;
/* Only enable primitive binning on APUs by default. */
if (sscreen->info.chip_class >= GFX10) {
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5b09bb2b90f..ddc1ce0c6de 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -505,6 +505,8 @@ struct si_screen {
bool dpbb_allowed;
bool dfsm_allowed;
bool llvm_has_working_vgpr_indexing;
+ bool use_ngg;
+ bool use_ngg_streamout;
struct {
#define OPT_BOOL(name, dflt, description) bool name:1;
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index 920febadba2..53cedb5b83c 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -1013,7 +1013,7 @@ static void si_emit_query_predication(struct si_context *ctx)
if (!query)
return;
- if (ctx->chip_class == GFX10 &&
+ if (ctx->screen->use_ngg_streamout &&
(query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)) {
assert(!"not implemented");
@@ -1100,7 +1100,7 @@ static struct pipe_query *si_create_query(struct pipe_context *ctx, unsigned que
query_type != SI_QUERY_TIME_ELAPSED_SDMA))
return si_query_sw_create(query_type);
- if (sscreen->info.chip_class >= GFX10 &&
+ if (sscreen->use_ngg_streamout &&
(query_type == PIPE_QUERY_PRIMITIVES_EMITTED ||
query_type == PIPE_QUERY_PRIMITIVES_GENERATED ||
query_type == PIPE_QUERY_SO_STATISTICS ||
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 8d3763c15bf..8dd608b5378 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3460,7 +3460,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
ret = si_insert_input_ptr(ctx, ret,
ctx->param_bindless_samplers_and_images,
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
- if (ctx->screen->info.chip_class >= GFX10) {
+ if (ctx->screen->use_ngg) {
ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits,
8 + SI_SGPR_VS_STATE_BITS);
}
@@ -3666,7 +3666,7 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
}
}
- if (ctx->ac.chip_class <= GFX9 &&
+ if (!ctx->screen->use_ngg_streamout &&
ctx->shader->selector->so.num_outputs)
si_llvm_emit_streamout(ctx, outputs, i, 0);
@@ -4462,7 +4462,7 @@ static void declare_streamout_params(struct si_shader_context *ctx,
struct pipe_stream_output_info *so,
struct si_function_info *fninfo)
{
- if (ctx->ac.chip_class >= GFX10)
+ if (ctx->screen->use_ngg_streamout)
return;
/* Streamout SGPRs. */
@@ -5738,7 +5738,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
/* Fetch the vertex stream ID.*/
LLVMValueRef stream_id;
- if (ctx.ac.chip_class <= GFX9 && gs_selector->so.num_outputs)
+ if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs)
stream_id = si_unpack_param(&ctx, ctx.param_streamout_config, 24, 2);
else
stream_id = ctx.i32_0;
@@ -5798,7 +5798,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
}
/* Streamout and exports. */
- if (ctx.ac.chip_class <= GFX9 && gs_selector->so.num_outputs) {
+ if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) {
si_llvm_emit_streamout(&ctx, outputs,
gsinfo->num_outputs,
stream);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 8254d7ba2a3..118d87e4734 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -586,10 +586,11 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
struct radeon_cmdbuf *cs = sctx->gfx_cs;
enum pipe_prim_type rast_prim = sctx->current_rast_prim;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ bool use_ngg = sctx->screen->use_ngg;
if (likely(rast_prim == sctx->last_rast_prim &&
rs->pa_sc_line_stipple == sctx->last_sc_line_stipple &&
- (sctx->chip_class <= GFX9 ||
+ (!use_ngg ||
rs->flatshade_first == sctx->last_flatshade_first)))
return;
@@ -610,13 +611,13 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
sctx->context_roll = true;
- if (sctx->chip_class >= GFX10) {
+ if (use_ngg) {
sctx->current_vs_state &= C_VS_STATE_OUTPRIM;
sctx->current_vs_state |= S_VS_STATE_OUTPRIM(gs_out);
}
}
- if (sctx->chip_class >= GFX10) {
+ if (use_ngg) {
unsigned vtx_index = rs->flatshade_first ? 0 : gs_out;
sctx->current_vs_state &= C_VS_STATE_PROVOKING_VTX_INDEX;
sctx->current_vs_state |= S_VS_STATE_PROVOKING_VTX_INDEX(vtx_index);
@@ -662,7 +663,7 @@ static void si_emit_vs_state(struct si_context *sctx,
}
/* For NGG: */
- if (sctx->chip_class >= GFX10 &&
+ if (sctx->screen->use_ngg &&
sctx->shader_pointers.sh_base[PIPE_SHADER_VERTEX] !=
R_00B230_SPI_SHADER_USER_DATA_GS_0) {
radeon_set_sh_reg(cs,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index d3c3677d82b..18cdc989cf8 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1413,8 +1413,10 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
S_00B12C_OC_LDS_EN(oc_lds_en) |
S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
- if (sscreen->info.chip_class <= GFX9) {
+ if (sscreen->info.chip_class <= GFX9)
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
+
+ if (!sscreen->use_ngg_streamout) {
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
@@ -2453,7 +2455,9 @@ static void si_init_shader_selector_async(void *job, int thread_index)
si_parse_next_shader_property(&sel->info,
sel->so.num_outputs != 0,
&shader->key);
- if (sscreen->info.chip_class >= GFX10 &&
+
+ if (sscreen->use_ngg &&
+ (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
((sel->type == PIPE_SHADER_VERTEX &&
!shader->key.as_ls && !shader->key.as_es) ||
sel->type == PIPE_SHADER_TESS_EVAL ||
@@ -2537,7 +2541,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
/* The GS copy shader is always pre-compiled. */
if (sel->type == PIPE_SHADER_GEOMETRY &&
- (sscreen->info.chip_class <= GFX9 || sel->tess_turns_off_ngg)) {
+ (!sscreen->use_ngg || sel->tess_turns_off_ngg)) {
sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
if (!sel->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
@@ -2993,7 +2997,7 @@ static void si_update_tess_uses_prim_id(struct si_context *sctx)
static bool si_update_ngg(struct si_context *sctx)
{
- if (sctx->chip_class <= GFX9)
+ if (!sctx->screen->use_ngg)
return false;
bool new_ngg = true;
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index da8c5465488..ae91c55e0c2 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -50,7 +50,7 @@ si_create_so_target(struct pipe_context *ctx,
return NULL;
}
- unsigned buf_filled_size_size = sctx->chip_class >= GFX10 ? 8 : 4;
+ unsigned buf_filled_size_size = sctx->screen->use_ngg_streamout ? 8 : 4;
u_suballocator_alloc(sctx->allocator_zeroed_memory, buf_filled_size_size, 4,
&t->buf_filled_size_offset,
(struct pipe_resource**)&t->buf_filled_size);
@@ -127,7 +127,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
SI_CONTEXT_INV_VCACHE;
/* The BUFFER_FILLED_SIZE is written using a PS_DONE event. */
- if (sctx->chip_class >= GFX10) {
+ if (sctx->screen->use_ngg_streamout) {
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
/* Wait now. This is needed to make sure that GDS is not
@@ -146,7 +146,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
* start writing to the targets.
*/
if (num_targets) {
- if (sctx->chip_class >= GFX10)
+ if (sctx->screen->use_ngg_streamout)
si_allocate_gds(sctx);
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
@@ -197,7 +197,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
struct pipe_shader_buffer sbuf;
sbuf.buffer = targets[i]->buffer;
- if (sctx->chip_class >= GFX10) {
+ if (sctx->screen->use_ngg_streamout) {
sbuf.buffer_offset = targets[i]->buffer_offset;
sbuf.buffer_size = targets[i]->buffer_size;
} else {
@@ -370,7 +370,7 @@ static void si_emit_streamout_begin(struct si_context *sctx)
void si_emit_streamout_end(struct si_context *sctx)
{
- if (sctx->chip_class >= GFX10) {
+ if (sctx->screen->use_ngg_streamout) {
gfx10_emit_streamout_end(sctx);
return;
}
@@ -423,7 +423,7 @@ void si_emit_streamout_end(struct si_context *sctx)
static void si_emit_streamout_enable(struct si_context *sctx)
{
- assert(sctx->chip_class < GFX10);
+ assert(!sctx->screen->use_ngg_streamout);
radeon_set_context_reg_seq(sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
radeon_emit(sctx->gfx_cs,
@@ -449,7 +449,7 @@ static void si_set_streamout_enable(struct si_context *sctx, bool enable)
(sctx->streamout.enabled_mask << 8) |
(sctx->streamout.enabled_mask << 12);
- if (sctx->chip_class < GFX10 &&
+ if (!sctx->screen->use_ngg_streamout &&
((old_strmout_en != si_get_strmout_en(sctx)) ||
(old_hw_enabled_mask != sctx->streamout.hw_enabled_mask)))
si_mark_atom_dirty(sctx, &sctx->atoms.s.streamout_enable);
@@ -458,7 +458,7 @@ static void si_set_streamout_enable(struct si_context *sctx, bool enable)
void si_update_prims_generated_query_state(struct si_context *sctx,
unsigned type, int diff)
{
- if (sctx->chip_class < GFX10 &&
+ if (!sctx->screen->use_ngg_streamout &&
type == PIPE_QUERY_PRIMITIVES_GENERATED) {
bool old_strmout_en = si_get_strmout_en(sctx);
@@ -479,7 +479,7 @@ void si_init_streamout_functions(struct si_context *sctx)
sctx->b.stream_output_target_destroy = si_so_target_destroy;
sctx->b.set_stream_output_targets = si_set_streamout_targets;
- if (sctx->chip_class >= GFX10) {
+ if (sctx->screen->use_ngg_streamout) {
sctx->atoms.s.streamout_begin.emit = gfx10_emit_streamout_begin;
} else {
sctx->atoms.s.streamout_begin.emit = si_emit_streamout_begin;