diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/common/gen_mi_builder.h | 45 | ||||
-rw-r--r-- | src/intel/common/tests/gen_mi_builder_test.cpp | 30 | ||||
-rw-r--r-- | src/intel/vulkan/anv_genX.h | 8 | ||||
-rw-r--r-- | src/intel/vulkan/genX_cmd_buffer.c | 31 | ||||
-rw-r--r-- | src/intel/vulkan/genX_gpu_memcpy.c | 74 | ||||
-rw-r--r-- | src/intel/vulkan/genX_query.c | 9 |
6 files changed, 106 insertions, 91 deletions
diff --git a/src/intel/common/gen_mi_builder.h b/src/intel/common/gen_mi_builder.h index 3dd8fcb739c..22c0aee3343 100644 --- a/src/intel/common/gen_mi_builder.h +++ b/src/intel/common/gen_mi_builder.h @@ -450,6 +450,51 @@ gen_mi_store(struct gen_mi_builder *b, gen_mi_value_unref(b, dst); } +static inline void +gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst, + uint32_t value, uint32_t size) +{ +#if GEN_GEN >= 8 || GEN_IS_HASWELL + assert(b->num_math_dwords == 0); +#endif + + /* This memset operates in units of dwords. */ + assert(size % 4 == 0); + + for (uint32_t i = 0; i < size; i += 4) { + gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)), + gen_mi_imm(value)); + } +} + +/* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */ +static inline void +gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst, + __gen_address_type src, uint32_t size) +{ +#if GEN_GEN >= 8 || GEN_IS_HASWELL + assert(b->num_math_dwords == 0); +#endif + + /* This memcpy operates in units of dwords. */ + assert(size % 4 == 0); + + for (uint32_t i = 0; i < size; i += 4) { + struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i)); + struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i)); +#if GEN_GEN >= 8 || GEN_IS_HASWELL + gen_mi_store(b, dst_val, src_val); +#else + /* IVB does not have a general purpose register for command streamer + * commands. Therefore, we use an alternate temporary register. + */ + struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */ + gen_mi_store(b, tmp_reg, src_val); + gen_mi_store(b, dst_val, tmp_reg); +#endif + } +} + /* * MI_MATH Section. Only available on Haswell+ */ diff --git a/src/intel/common/tests/gen_mi_builder_test.cpp b/src/intel/common/tests/gen_mi_builder_test.cpp index e68d35c9fb9..d192e063d00 100644 --- a/src/intel/common/tests/gen_mi_builder_test.cpp +++ b/src/intel/common/tests/gen_mi_builder_test.cpp @@ -422,6 +422,36 @@ TEST_F(gen_mi_builder_test, mem_reg) EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value); } +TEST_F(gen_mi_builder_test, memset) +{ + const unsigned memset_size = 256; + + gen_mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size); + + submit_batch(); + + uint32_t *out_u32 = (uint32_t *)output; + for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++) + EXPECT_EQ(out_u32[i], 0xdeadbeef); +} + +TEST_F(gen_mi_builder_test, memcpy) +{ + const unsigned memcpy_size = 256; + + uint8_t *in_u8 = (uint8_t *)input; + for (unsigned i = 0; i < memcpy_size; i++) + in_u8[i] = i; + + gen_mi_memcpy(&b, out_addr(0), in_addr(0), 256); + + submit_batch(); + + uint8_t *out_u8 = (uint8_t *)output; + for (unsigned i = 0; i < memcpy_size; i++) + EXPECT_EQ(out_u8[i], i); +} + /* Start of MI_MATH section */ #if GEN_GEN >= 8 || GEN_IS_HASWELL diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 8fd32cabf1e..a5435e566a3 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -78,13 +78,5 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, struct anv_address dst, struct anv_address src, uint32_t size); -void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer, - struct anv_address dst, struct anv_address src, - uint32_t size); - -void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer, - struct anv_address dst, uint32_t value, - uint32_t size); - void genX(blorp_exec)(struct blorp_batch *batch, const struct blorp_params *params); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index ec72010cb86..7771afe663e 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -822,12 +822,35 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer, anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect); unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size; +#if GEN_GEN == 7 + /* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM + * and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is + * in-flight when they are issued even if the memory touched is not + * currently active for rendering. The weird bit is that it is not the + * MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight + * rendering hangs such that the next stalling command after the + * MI_LOAD/STORE_REGISTER_MEM commands will catch the hang. + * + * It is unclear exactly why this hang occurs. Both MI commands come with + * warnings about the 3D pipeline but that doesn't seem to fully explain + * it. My (Jason's) best theory is that it has something to do with the + * fact that we're using a GPU state register as our temporary and that + * something with reading/writing it is causing problems. + * + * In order to work around this issue, we emit a PIPE_CONTROL with the + * command streamer stall bit set. + */ + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); +#endif + + struct gen_mi_builder b; + gen_mi_builder_init(&b, &cmd_buffer->batch); + if (copy_from_surface_state) { - genX(cmd_buffer_mi_memcpy)(cmd_buffer, entry_addr, - ss_clear_addr, copy_size); + gen_mi_memcpy(&b, entry_addr, ss_clear_addr, copy_size); } else { - genX(cmd_buffer_mi_memcpy)(cmd_buffer, ss_clear_addr, - entry_addr, copy_size); + gen_mi_memcpy(&b, ss_clear_addr, entry_addr, copy_size); /* Updating a surface state object may require that the state cache be * invalidated. From the SKL PRM, Shared Functions -> State -> State diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 7ef14dd14ef..49230c3da0c 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -52,80 +52,6 @@ gcd_pow2_u64(uint64_t a, uint64_t b) } void -genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer, - struct anv_address dst, struct anv_address src, - uint32_t size) -{ - /* This memcpy operates in units of dwords. */ - assert(size % 4 == 0); - assert(dst.offset % 4 == 0); - assert(src.offset % 4 == 0); - -#if GEN_GEN == 7 - /* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM - * and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is - * in-flight when they are issued even if the memory touched is not - * currently active for rendering. The weird bit is that it is not the - * MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight - * rendering hangs such that the next stalling command after the - * MI_LOAD/STORE_REGISTER_MEM commands will catch the hang. - * - * It is unclear exactly why this hang occurs. Both MI commands come with - * warnings about the 3D pipeline but that doesn't seem to fully explain - * it. My (Jason's) best theory is that it has something to do with the - * fact that we're using a GPU state register as our temporary and that - * something with reading/writing it is causing problems. - * - * In order to work around this issue, we emit a PIPE_CONTROL with the - * command streamer stall bit set. - */ - cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT; - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); -#endif - - for (uint32_t i = 0; i < size; i += 4) { -#if GEN_GEN >= 8 - anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) { - cp.DestinationMemoryAddress = anv_address_add(dst, i); - cp.SourceMemoryAddress = anv_address_add(src, i); - } -#else - /* IVB does not have a general purpose register for command streamer - * commands. Therefore, we use an alternate temporary register. - */ -#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */ - anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) { - load.RegisterAddress = TEMP_REG; - load.MemoryAddress = anv_address_add(src, i); - } - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) { - store.RegisterAddress = TEMP_REG; - store.MemoryAddress = anv_address_add(dst, i); - } -#undef TEMP_REG -#endif - } - return; -} - -void -genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer, - struct anv_address dst, uint32_t value, - uint32_t size) -{ - /* This memset operates in units of dwords. */ - assert(size % 4 == 0); - assert(dst.offset % 4 == 0); - - for (uint32_t i = 0; i < size; i += 4) { - anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) { - sdi.Address = anv_address_add(dst, i); - sdi.ImmediateData = value; - } - } -} - -void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, struct anv_address dst, struct anv_address src, uint32_t size) diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 3374ba6417e..5fbf92c4ab1 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -363,14 +363,13 @@ emit_query_availability(struct anv_cmd_buffer *cmd_buffer, */ static void emit_zero_queries(struct anv_cmd_buffer *cmd_buffer, - struct anv_query_pool *pool, + struct gen_mi_builder *b, struct anv_query_pool *pool, uint32_t first_index, uint32_t num_queries) { for (uint32_t i = 0; i < num_queries; i++) { struct anv_address slot_addr = anv_query_address(pool, first_index + i); - genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8), - 0, pool->stride - 8); + gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8); emit_query_availability(cmd_buffer, slot_addr); } } @@ -574,7 +573,7 @@ void genX(CmdEndQueryIndexedEXT)( const uint32_t num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask); if (num_queries > 1) - emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1); + emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1); } } @@ -628,7 +627,7 @@ void genX(CmdWriteTimestamp)( const uint32_t num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask); if (num_queries > 1) - emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1); + emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1); } } |