diff options
author | Nanley Chery <[email protected]> | 2017-01-25 14:54:39 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-07-22 20:12:09 -0700 |
commit | 0b16600056521b97fb8ab943d67a9739cbc91cd2 (patch) | |
tree | 7fff1daf5add8814ae82a70a3e0dc71bdc91a7f2 | |
parent | dcff5ab9f164afbc29c051b18990a377bb46e4bc (diff) |
anv/gpu_memcpy: Add a lighter-weight GPU memcpy function
We'll be performing a GPU memcpy in more places to copy small amounts of
data. Add an alternate function that thrashes less state.
v2:
- Make a new function (Jason Ekstrand).
- Move the #define into the function.
v3:
- Update the function name (Jason).
- Update comments.
v4: Use an indirect drawing register as TEMP_REG (Jason Ekstrand).
Signed-off-by: Nanley Chery <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r-- | src/intel/vulkan/anv_genX.h | 5 | ||||
-rw-r--r-- | src/intel/vulkan/genX_gpu_memcpy.c | 40 |
2 files changed, 45 insertions, 0 deletions
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 8da5e075dc3..0b7322e2812 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -69,5 +69,10 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *src, uint32_t src_offset, uint32_t size); +void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dst, uint32_t dst_offset, + struct anv_bo *src, uint32_t src_offset, + uint32_t size); + void genX(blorp_exec)(struct blorp_batch *batch, const struct blorp_params *params); diff --git a/src/intel/vulkan/genX_gpu_memcpy.c b/src/intel/vulkan/genX_gpu_memcpy.c index 5ef35e62833..db723d4a528 100644 --- a/src/intel/vulkan/genX_gpu_memcpy.c +++ b/src/intel/vulkan/genX_gpu_memcpy.c @@ -52,6 +52,46 @@ gcd_pow2_u64(uint64_t a, uint64_t b) } void +genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer, + struct anv_bo *dst, uint32_t dst_offset, + struct anv_bo *src, uint32_t src_offset, + uint32_t size) +{ + /* This memcpy operates in units of dwords. */ + assert(size % 4 == 0); + assert(dst_offset % 4 == 0); + assert(src_offset % 4 == 0); + + for (uint32_t i = 0; i < size; i += 4) { + const struct anv_address src_addr = + (struct anv_address) { src, src_offset + i}; + const struct anv_address dst_addr = + (struct anv_address) { dst, dst_offset + i}; +#if GEN_GEN >= 8 + anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) { + cp.DestinationMemoryAddress = dst_addr; + cp.SourceMemoryAddress = src_addr; + } +#else + /* IVB does not have a general purpose register for command streamer + * commands. Therefore, we use an alternate temporary register. + */ +#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */ + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) { + load.RegisterAddress = TEMP_REG; + load.MemoryAddress = src_addr; + } + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) { + store.RegisterAddress = TEMP_REG; + store.MemoryAddress = dst_addr; + } +#undef TEMP_REG +#endif + } + return; +} + +void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer, struct anv_bo *dst, uint32_t dst_offset, struct anv_bo *src, uint32_t src_offset, |