diff options
author | Iago Toral Quiroga <[email protected]> | 2019-08-14 09:27:13 +0200 |
---|---|---|
committer | Jose Maria Casanova Crespo <[email protected]> | 2019-10-18 14:08:52 +0200 |
commit | 46182fc1da0b5cabc09e818bddc6b7968d4d2b7b (patch) | |
tree | 684bbd74617b7590b62e5a0a24ba2a5c3d892233 | |
parent | d2203d74c654b5519a9ebfce061f763b495ed568 (diff) |
v3d: add new flag dirty TMU cache at v3d_compiler
That we set for any TMU write on spills and general tmu. It is then
used as part of v3d_emit_gl_shader_state later.
v2: add a new flag instead at v3d_compiler instead of dirty the flag
at v3dx if there is any spill (change suggested by Eric, added by
Alejandro)
v3: set this for anything that is not a load and do it also in
v3d40_vir_emit_image_load_store (Eric)
Reviewed-by: Eric Anholt <[email protected]>
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 3 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d40_tex.c | 3 | ||||
-rw-r--r-- | src/broadcom/compiler/v3d_compiler.h | 4 | ||||
-rw-r--r-- | src/broadcom/compiler/vir.c | 1 | ||||
-rw-r--r-- | src/broadcom/compiler/vir_register_allocate.c | 1 | ||||
-rw-r--r-- | src/gallium/drivers/v3d/v3dx_draw.c | 5 |
6 files changed, 17 insertions, 0 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 1ca7c2fc646..2de7f7e32b0 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -208,6 +208,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr, instr->intrinsic == nir_intrinsic_load_scratch || instr->intrinsic == nir_intrinsic_load_shared); + if (!is_load) + c->tmu_dirty_rcl = true; + bool has_index = !is_shared_or_scratch; int offset_src; diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d40_tex.c index 9ee7df21421..287116381fb 100644 --- a/src/broadcom/compiler/v3d40_tex.c +++ b/src/broadcom/compiler/v3d40_tex.c @@ -410,4 +410,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c, if (nir_intrinsic_dest_components(instr) == 0) vir_TMUWT(c); + + if (instr->intrinsic != nir_intrinsic_image_deref_load) + c->tmu_dirty_rcl = true; } diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index b61119f5615..fbb4b64a365 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -639,6 +639,8 @@ struct v3d_compile { bool lock_scoreboard_on_first_thrsw; bool failed; + + bool tmu_dirty_rcl; }; struct v3d_uniform_list { @@ -658,6 +660,8 @@ struct v3d_prog_data { * after-final-THRSW state. */ bool single_seg; + + bool tmu_dirty_rcl; }; struct v3d_vs_prog_data { diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index b86ffc82ea3..dc5d3fe3bed 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -710,6 +710,7 @@ v3d_set_prog_data(struct v3d_compile *c, prog_data->threads = c->threads; prog_data->single_seg = !c->last_thrsw; prog_data->spill_size = c->spill_size; + prog_data->tmu_dirty_rcl = c->tmu_dirty_rcl; v3d_set_prog_data_uniforms(c, prog_data); diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c index 7583acf155c..623cc22cefa 100644 --- a/src/broadcom/compiler/vir_register_allocate.c +++ b/src/broadcom/compiler/vir_register_allocate.c @@ -270,6 +270,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp) vir_emit_thrsw(c); vir_TMUWT(c); c->spills++; + c->tmu_dirty_rcl = true; } } diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index b0b52fa00b6..5795279b886 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -349,6 +349,11 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, v3d_write_uniforms(v3d, v3d->prog.cs, PIPE_SHADER_VERTEX); + /* Update the cache dirty flag based on the shader progs data */ + job->tmu_dirty_rcl |= v3d->prog.cs->prog_data.vs->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= v3d->prog.vs->prog_data.vs->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= v3d->prog.fs->prog_data.fs->base.tmu_dirty_rcl; + /* See GFXH-930 workaround below */ uint32_t num_elements_to_emit = MAX2(vtx->num_elements, 1); uint32_t shader_rec_offset = |