diff options
-rw-r--r-- | src/freedreno/computerator/a6xx.c | 11 | ||||
-rw-r--r-- | src/freedreno/registers/a6xx.xml | 27 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_clear_blit.c | 13 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_cmd_buffer.c | 22 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_pipeline.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_compute.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 15 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/freedreno/a6xx/fd6_program.c | 13 |
9 files changed, 130 insertions, 22 deletions
diff --git a/src/freedreno/computerator/a6xx.c b/src/freedreno/computerator/a6xx.c index 72b0f067efe..21d7d1c4894 100644 --- a/src/freedreno/computerator/a6xx.c +++ b/src/freedreno/computerator/a6xx.c @@ -117,8 +117,15 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) const struct ir3_info *i = &v->info; enum a3xx_threadsize thrsz = FOUR_QUADS; - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xff); + OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1); + OUT_RING(ring, A6XX_HLSQ_INVALIDATE_CMD_VS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_HS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_DS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_GS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_FS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_CS_STATE | + A6XX_HLSQ_INVALIDATE_CMD_CS_IBO | + A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO); unsigned constlen = align(v->constlen, 4); OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml index 56e1746303b..f821832ba06 100644 --- a/src/freedreno/registers/a6xx.xml +++ b/src/freedreno/registers/a6xx.xml @@ -3408,8 +3408,31 @@ to upconvert to 32b float internally? <bitfield name="EVENT" low="0" high="6" type="vgt_event_type"/> </reg32> - <!-- probably: --> - <reg32 offset="0xbb08" name="HLSQ_UPDATE_CNTL"/> + <reg32 offset="0xbb08" name="HLSQ_INVALIDATE_CMD"> + <doc> + This register clears pending loads queued up by + CP_LOAD_STATE6. Each bit resets a particular kind(s) of + CP_LOAD_STATE6. + </doc> + + <!-- per-stage state: shader, non-bindless UBO, textures, and samplers --> + <bitfield name="VS_STATE" pos="0" type="boolean"/> + <bitfield name="HS_STATE" pos="1" type="boolean"/> + <bitfield name="DS_STATE" pos="2" type="boolean"/> + <bitfield name="GS_STATE" pos="3" type="boolean"/> + <bitfield name="FS_STATE" pos="4" type="boolean"/> + <bitfield name="CS_STATE" pos="5" type="boolean"/> + + <bitfield name="CS_IBO" pos="6" type="boolean"/> + <bitfield name="GFX_IBO" pos="7" type="boolean"/> + + <bitfield name="CS_SHARED_CONST" pos="19" type="boolean"/> + <bitfield name="GFX_SHARED_CONST" pos="8" type="boolean"/> + + <!-- SS6_BINDLESS: one bit per bindless base --> + <bitfield name="CS_BINDLESS" low="9" high="13" type="hex"/> + <bitfield name="GFX_BINDLESS" low="14" high="18" type="hex"/> + </reg32> <reg32 offset="0xbb10" name="HLSQ_FS_CNTL" type="a6xx_hlsq_xs_cntl"/> diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 29b952548bf..2be3e38dccc 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -428,7 +428,18 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .const_state = &dummy_const_state, }; - tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff)); + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + .gfx_shared_const = true, + .gfx_bindless = 0x1f, + .cs_bindless = 0x1f)); tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, global_iova(cmd, shaders[GLOBAL_SH_VS])); tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 765732ad9a9..d1145bfb13e 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -719,7 +719,19 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff); + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + .gfx_shared_const = true, + .cs_shared_const = true, + .gfx_bindless = 0x1f, + .cs_bindless = 0x1f)); tu_cs_emit_regs(cs, A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass)); @@ -1684,7 +1696,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, } assert(dyn_idx == dynamicOffsetCount); - uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_update_value; + uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value; uint64_t addr[MAX_SETS + 1] = {}; struct tu_cs cs; @@ -1709,7 +1721,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0); hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0); - hlsq_update_value = 0x7c000; + hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f); cmd->state.dirty |= TU_CMD_DIRTY_DESCRIPTOR_SETS | TU_CMD_DIRTY_SHADER_CONSTS; } else { @@ -1717,7 +1729,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0); hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0); - hlsq_update_value = 0x3e00; + hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f); cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS; } @@ -1728,7 +1740,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, tu_cs_emit_array(&cs, (const uint32_t*) addr, 10); tu_cs_emit_pkt4(&cs, hlsq_bindless_base_reg, 10); tu_cs_emit_array(&cs, (const uint32_t*) addr, 10); - tu_cs_emit_regs(&cs, A6XX_HLSQ_UPDATE_CNTL(.dword = hlsq_update_value)); + tu_cs_emit_regs(&cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value)); struct tu_cs_entry ib = tu_cs_end_sub_stream(&cmd->sub_cs, &cs); if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 88cdca7e19c..84cb9c465d9 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -468,8 +468,15 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, const struct ir3_shader_variant *v, uint32_t binary_iova) { - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - tu_cs_emit(cs, 0xff); + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .cs_ibo = true, + .gfx_ibo = true)); tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, binary_iova); @@ -1355,8 +1362,15 @@ tu6_emit_program(struct tu_cs *cs, STATIC_ASSERT(MESA_SHADER_VERTEX == 0); - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - tu_cs_emit(cs, 0xff); /* XXX */ + tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .cs_ibo = true, + .gfx_ibo = true)); /* Don't use the binning pass variant when GS is present because we don't * support compiling correct binning pass variants with GS. diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index 438557600f2..75d4b965f6f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -34,6 +34,7 @@ #include "fd6_const.h" #include "fd6_context.h" #include "fd6_emit.h" +#include "fd6_pack.h" struct fd6_compute_stateobj { struct ir3_shader *shader; @@ -78,8 +79,16 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) const struct ir3_info *i = &v->info; enum a3xx_threadsize thrsz = FOUR_QUADS; - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xff); + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + )); OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) | diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index 4fa32b5d2ac..ab8fdea19de 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -356,8 +356,19 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); OUT_RING(ring, fd6_ctx->magic.RB_CCU_CNTL_bypass); - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0x7ffff); + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + .gfx_shared_const = true, + .gfx_bindless = 0x1f, + .cs_bindless = 0x1f + )); emit_marker6(ring, 7); OUT_PKT7(ring, CP_SET_MARKER, 1); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index f20666c145b..4740f60ab45 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1130,8 +1130,20 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) fd6_cache_inv(batch, ring); - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xfffff); + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + .gfx_shared_const = true, + .cs_shared_const = true, + .gfx_bindless = 0x1f, + .cs_bindless = 0x1f + )); OUT_WFI5(ring); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 72a47c1f571..4ee227b027e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -39,6 +39,7 @@ #include "fd6_emit.h" #include "fd6_texture.h" #include "fd6_format.h" +#include "fd6_pack.h" void fd6_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) @@ -225,8 +226,16 @@ setup_stream_out(struct fd6_program_state *state, const struct ir3_shader_varian static void setup_config_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state) { - OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - OUT_RING(ring, 0xff); /* XXX */ + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD( + .vs_state = true, + .hs_state = true, + .ds_state = true, + .gs_state = true, + .fs_state = true, + .cs_state = true, + .gfx_ibo = true, + .cs_ibo = true, + )); debug_assert(state->vs->constlen >= state->bs->constlen); |