diff options
Diffstat (limited to 'src/gallium/drivers/etnaviv/etnaviv_emit.c')
-rw-r--r-- | src/gallium/drivers/etnaviv/etnaviv_emit.c | 249 |
1 files changed, 187 insertions, 62 deletions
diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c index 508c7b1b6ab..f63ec774864 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c @@ -133,6 +133,91 @@ required_stream_size(struct etna_context *ctx) return size; } +/* Emit state that only exists on HALTI5+ */ +static void +emit_halti5_only_state(struct etna_context *ctx, int vs_output_count) +{ + struct etna_cmd_stream *stream = ctx->stream; + uint32_t dirty = ctx->dirty; + struct etna_coalesce coalesce; + + etna_coalesce_start(stream, &coalesce); + if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { + /* Magic states (load balancing, inter-unit sync, buffers) */ + /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8)); + /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20)); + for (int x = 0; x < 4; ++x) { + /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]); + } + } + if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) { + for (int x = 0; x < 4; ++x) { + /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]); + } + } + if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { + /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS); + /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count); + /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS); + /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS); + } + etna_coalesce_end(stream, &coalesce); +} + +/* Emit state that no longer exists on HALTI5 */ +static void +emit_pre_halti5_state(struct etna_context *ctx) +{ + struct etna_cmd_stream *stream = ctx->stream; + uint32_t dirty = ctx->dirty; + struct etna_coalesce coalesce; + + etna_coalesce_start(stream, &coalesce); + if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { + /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC); + } + if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { + for (int x = 0; x < 4; ++x) { + /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]); + } + } + if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) { + for (int x = 0; x < 4; ++x) { + /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]); + } + } + if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { + /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC); + } + if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { + for (int x = 0; x < 10; ++x) { + /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]); + } + } + if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) { + /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04); + for (int x = 0; x < 4; ++x) { + /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]); + } + for (int x = 0; x < 16; ++x) { + /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]); + } + } + if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) { + /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC); + } + if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) { + /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC); + } + if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { + /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS); + for (int x = 0; x < 2; ++x) { + /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]); + } + } + etna_coalesce_end(stream, &coalesce); +} + /* Weave state before draw operation. This function merges all the compiled * state blocks under the context into one device register state. Parts of * this state that are changed since last call (dirty) will be uploaded as @@ -197,16 +282,31 @@ etna_emit_state(struct etna_context *ctx) * a) the number of vertex elements written matters: so write only active ones * b) the vertex element states must all be written: do not skip entries that stay the same */ if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) { - /* Special case: vertex elements must always be sent in full if changed */ - /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0), - ctx->vertex_elements->num_elements, - ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG); - if (ctx->specs.halti >= 2) { - /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0), + if (ctx->specs.halti >= 5) { + /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0), + ctx->vertex_elements->num_elements, + ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0); + /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0), ctx->vertex_elements->num_elements, ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE); + /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0), + ctx->vertex_elements->num_elements, + ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1); + } else { + /* Special case: vertex elements must always be sent in full if changed */ + /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0), + ctx->vertex_elements->num_elements, + ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG); + if (ctx->specs.halti >= 2) { + /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0), + ctx->vertex_elements->num_elements, + ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE); + } } } + unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex + ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE + : ctx->shader_state.VS_OUTPUT_COUNT; /* The following code is originally generated by gen_merge_state.py, to * emit state in increasing order of address (this makes it possible to merge @@ -247,52 +347,48 @@ etna_emit_state(struct etna_context *ctx) /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR); /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL); } - if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS) && ctx->specs.stream_count == 1))) { - /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR); - /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL); - } if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) { /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX); } - if (likely((dirty & (ETNA_DIRTY_VERTEX_BUFFERS)) && ctx->specs.stream_count > 1)) { - for (int x = 0; x < ctx->vertex_buffer.count; ++x) { - /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR); - } - for (int x = 0; x < ctx->vertex_buffer.count; ++x) { - if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { - /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL); + if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) { + if (ctx->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */ + for (int x = 0; x < ctx->vertex_buffer.count; ++x) { + /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR); + } + for (int x = 0; x < ctx->vertex_buffer.count; ++x) { + if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { + /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL); + } + } + for (int x = 0; x < ctx->vertex_buffer.count; ++x) { + if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { + /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_UNK14680(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_UNK14680); + } + } + } else if(ctx->specs.stream_count >= 1) { /* hw w/ multiple vertex streams */ + for (int x = 0; x < ctx->vertex_buffer.count; ++x) { + /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR); } + for (int x = 0; x < ctx->vertex_buffer.count; ++x) { + if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { + /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL); + } + } + } else { /* hw w/ single vertex stream */ + /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR); + /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL); } } - if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { - /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC); - } if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) { - bool point_size_per_vertex = - etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex; - /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, - point_size_per_vertex - ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE - : ctx->shader_state.VS_OUTPUT_COUNT); + /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count); } if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) { /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT); /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL); } if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { - for (int x = 0; x < 4; ++x) { - /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]); - } - } - if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) { - for (int x = 0; x < 4; ++x) { - /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]); - } - } - if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING); - /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC); } if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) { /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X); @@ -321,11 +417,6 @@ etna_emit_state(struct etna_context *ctx) /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH); /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH); } - if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { - for (int x = 0; x < 10; ++x) { - /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]); - } - } if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) { /* this is a bit of a mess: rasterizer.scissor determines whether to use @@ -381,17 +472,7 @@ etna_emit_state(struct etna_context *ctx) if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL); } - if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) { - /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04); - for (int x = 0; x < 4; ++x) { - /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]); - } - for (int x = 0; x < 16; ++x) { - /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]); - } - } if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) { - /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC); /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG); /*01008*/ EMIT_STATE(PS_INPUT_COUNT, ctx->framebuffer.msaa_mode @@ -402,7 +483,6 @@ etna_emit_state(struct etna_context *ctx) ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA : ctx->shader_state.PS_TEMP_REGISTER_CONTROL); /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL); - /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC); } if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) { uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG; @@ -583,12 +663,9 @@ etna_emit_state(struct etna_context *ctx) } } } + if (unlikely(dirty & (ETNA_DIRTY_SHADER))) { /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS); - /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS); - for (int x = 0; x < 2; ++x) { - /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]); - } } if (unlikely(ctx->specs.tex_astc && (dirty & (ETNA_DIRTY_SAMPLER_VIEWS)))) { for (int x = 0; x < VIVS_TE_SAMPLER__LEN; ++x) { @@ -601,6 +678,12 @@ etna_emit_state(struct etna_context *ctx) etna_coalesce_end(stream, &coalesce); /* end only EMIT_STATE */ + /* Emit strongly architecture-specific state */ + if (ctx->specs.halti >= 5) + emit_halti5_only_state(ctx, vs_output_count); + else + emit_pre_halti5_state(ctx); + /* Insert a FE/PE stall as changing the shader instructions (and maybe * the uniforms) can corrupt the previous in-progress draw operation. * Observed with amoeba on GC2000 during the right-to-left rendering @@ -635,10 +718,40 @@ etna_emit_state(struct etna_context *ctx) ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size); /**** Large dynamically-sized state ****/ + bool do_uniform_flush = ctx->specs.halti < 5; if (dirty & (ETNA_DIRTY_SHADER)) { /* Special case: a new shader was loaded; simply re-load all uniforms and * shader code at once */ - if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) { + /* This sequence is special, do not change ordering unless necessary. According to comment + snippets in the Vivante kernel driver a process called "steering" goes on while programming + shader state. This (as I understand it) means certain unified states are "steered" + toward a specific shader unit (VS/PS/...) based on either explicit flags in register + 00860, or what other state is written before "auto-steering". So this means some + state can legitimately be programmed multiple times. + */ + + if (ctx->specs.halti >= 5) { /* ICACHE (HALTI5) */ + assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo); + /* Set icache (VS) */ + etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0); + etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4); + assert(ctx->shader_state.VS_INST_ADDR.bo); + etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR); + etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002); + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE); + etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1); + + /* Set icache (PS) */ + etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0); + etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4); + assert(ctx->shader_state.PS_INST_ADDR.bo); + etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR); + etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002); + etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE); + etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1); + + } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) { + /* ICACHE (pre-HALTI5) */ assert(ctx->specs.has_icache && ctx->specs.has_shader_range_registers); /* Set icache (VS) */ etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16); @@ -680,11 +793,14 @@ etna_emit_state(struct etna_context *ctx) etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0); etna_set_state(stream, VIVS_PS_UNIFORM_BASE, ctx->specs.max_vs_uniforms); } - etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH); + + if (do_uniform_flush) + etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH); etna_set_state_multi(stream, ctx->specs.vs_uniforms_offset, ctx->shader_state.vs_uniforms_size, ctx->shader_state.VS_UNIFORMS); - etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS); + if (do_uniform_flush) + etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS); etna_set_state_multi(stream, ctx->specs.ps_uniforms_offset, ctx->shader_state.ps_uniforms_size, ctx->shader_state.PS_UNIFORMS); @@ -698,9 +814,17 @@ etna_emit_state(struct etna_context *ctx) ctx->shader_state.vs_uniforms_size * 4); memcpy(ctx->gpu3d.PS_UNIFORMS, ctx->shader_state.PS_UNIFORMS, ctx->shader_state.ps_uniforms_size * 4); + + if (ctx->specs.halti >= 5) { + /* HALTI5 needs to be prompted to pre-fetch shaders */ + etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000); + etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000); + etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); + } } else { /* ideally this cache would only be flushed if there are VS uniform changes */ - etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH); + if (do_uniform_flush) + etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH); etna_coalesce_start(stream, &coalesce); for (int x = 0; x < ctx->shader.vs->uniforms.const_count; ++x) { if (ctx->gpu3d.VS_UNIFORMS[x] != ctx->shader_state.VS_UNIFORMS[x]) { @@ -711,7 +835,8 @@ etna_emit_state(struct etna_context *ctx) etna_coalesce_end(stream, &coalesce); /* ideally this cache would only be flushed if there are PS uniform changes */ - etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS); + if (do_uniform_flush) + etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS); etna_coalesce_start(stream, &coalesce); for (int x = 0; x < ctx->shader.fs->uniforms.const_count; ++x) { if (ctx->gpu3d.PS_UNIFORMS[x] != ctx->shader_state.PS_UNIFORMS[x]) { |