diff options
author | Eric Anholt <[email protected]> | 2017-11-28 16:17:16 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2017-12-01 15:37:28 -0800 |
commit | 0ed952c7e9b811bc11dec64bd4bebcdf4222cc85 (patch) | |
tree | 3f559f6dd6705c0724f3494736fbeebf1d7d96f5 | |
parent | 230e646a4013ed5d9c80c54d48ef0ac9ee4edbb0 (diff) |
broadcom/vc4: Use a single-entry cached last_hindex value.
Since almost all BOs will be in one CL at a time, this cache will almost
always hit except for the first usage of the BO in each CL.
This didn't show up as statistically significant on the minetest trace
(n=340), but if I lop off the throttled lobe of the bimodal distribution,
it very clearly does (0.74731% +/- 0.162093%, n=269).
-rw-r--r-- | src/gallium/drivers/vc4/vc4_bufmgr.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/vc4/vc4_cl.c | 14 |
2 files changed, 20 insertions, 2 deletions
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h index 4e7b23e0862..e0f6bbcfd8b 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.h +++ b/src/gallium/drivers/vc4/vc4_bufmgr.h @@ -39,6 +39,14 @@ struct vc4_bo { uint32_t handle; uint32_t size; + /* This will be read/written by multiple threads without a lock -- you + * should take a snapshot and use it to see if you happen to be in the + * CL's handles at this position, to make most lookups O(1). It's + * volatile to make sure that the compiler doesn't emit multiple loads + * from the address, which would make the lookup racy. + */ + volatile uint32_t last_hindex; + /** Entry in the linked list of buffers freed, by age. */ struct list_head time_list; /** Entry in the per-page-count linked list of buffers freed (by age). */ diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c index 508281a27bb..7ae092ebce3 100644 --- a/src/gallium/drivers/vc4/vc4_cl.c +++ b/src/gallium/drivers/vc4/vc4_cl.c @@ -61,10 +61,19 @@ vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo) { uint32_t hindex; uint32_t *current_handles = job->bo_handles.base; + uint32_t cl_hindex_count = cl_offset(&job->bo_handles) / 4; + uint32_t last_hindex = bo->last_hindex; /* volatile read! */ - for (hindex = 0; hindex < cl_offset(&job->bo_handles) / 4; hindex++) { - if (current_handles[hindex] == bo->handle) + if (last_hindex < cl_hindex_count && + current_handles[last_hindex] == bo->handle) { + return last_hindex; + } + + for (hindex = 0; hindex < cl_hindex_count; hindex++) { + if (current_handles[hindex] == bo->handle) { + bo->last_hindex = hindex; return hindex; + } } struct vc4_cl_out *out; @@ -79,5 +88,6 @@ vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo) job->bo_space += bo->size; + bo->last_hindex = hindex; return hindex; } |