diff options
author | Aravindan Muthukumar <[email protected]> | 2017-11-09 11:15:28 +0530 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2017-11-20 14:52:42 -0800 |
commit | 971b3c019b7309c32048b68f61af03d1ae2b7d10 (patch) | |
tree | 4323fe782f4c81c46ef64557d7136791abd780e1 | |
parent | c8417c8d25e57b00e7202908d93e56daf54513af (diff) |
i965: Optimize bucket index calculation
Reducing Bucket index calculation to O(1).
This algorithm calculates the index using matrix method. Assuming
PAGE_SIZE is 4096, matrix arrangement is as below:
1*4096 2*4096 3*4096 4*4096
5*4096 6*4096 7*4096 8*4096
10*4096 12*4096 14*4096 16*4096
20*4096 24*4096 28*4096 32*4096
... ... ... ...
... ... ... ...
... ... ... max_cache_size
From this matrix its clearly seen that every row follows the below way:
... ... ... n
n+(1/4)n n+(1/2)n n+(3/4)n 2n
Row is calculated as log2(size/PAGE_SIZE) Column is calculated as
converting the difference between the elements to fit into power size of
two and indexing it.
Final Index is (row*4)+(col-1)
Tested with Intel Mesa CI.
Improves performance of 3DMark on BXT by 0.705966% +/- 0.229767% (n=20)
v4: Review comments on style and code comments implemented (Ian).
v3: Review comments implemented (Ian).
v2: Review comments implemented (Jason).
Signed-off-by: Aravindan Muthukumar <[email protected]>
Signed-off-by: Kedar Karanje <[email protected]>
Reviewed-by: Yogesh Marathe <[email protected]>
Signed-off-by: Ian Romanick <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_bufmgr.c | 47 |
1 files changed, 39 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 2e5e0aa5d11..469895ed6b0 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -86,6 +86,8 @@ #define memclear(s) memset(&s, 0, sizeof(s)) +#define PAGE_SIZE 4096 + #define FILE_DEBUG_FLAG DEBUG_BUFMGR static inline int @@ -180,19 +182,44 @@ bo_tile_pitch(struct brw_bufmgr *bufmgr, uint32_t pitch, uint32_t tiling) return ALIGN(pitch, tile_width); } +/** + * This function finds the correct bucket fit for the input size. + * The function works with O(1) complexity when the requested size + * was queried instead of iterating the size through all the buckets. + */ static struct bo_cache_bucket * bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t size) { - int i; + /* Calculating the pages and rounding up to the page size. */ + const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + + /* Row Bucket sizes clz((x-1) | 3) Row Column + * in pages stride size + * 0: 1 2 3 4 -> 30 30 30 30 4 1 + * 1: 5 6 7 8 -> 29 29 29 29 4 1 + * 2: 10 12 14 16 -> 28 28 28 28 8 2 + * 3: 20 24 28 32 -> 27 27 27 27 16 4 + */ + const unsigned row = 30 - __builtin_clz((pages - 1) | 3); + const unsigned row_max_pages = 4 << row; - for (i = 0; i < bufmgr->num_buckets; i++) { - struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; - if (bucket->size >= size) { - return bucket; - } - } + /* The '& ~2' is the special case for row 1. In row 1, max pages / + * 2 is 2, but the previous row maximum is zero (because there is + * no previous row). All row maximum sizes are power of 2, so that + * is the only case where that bit will be set. + */ + const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2; + int col_size_log2 = row - 1; + col_size_log2 += (col_size_log2 < 0); - return NULL; + const unsigned col = (pages - prev_row_max_pages + + ((1 << col_size_log2) - 1)) >> col_size_log2; + + /* Calculating the index based on the row and column. */ + const unsigned index = (row * 4) + (col - 1); + + return (index < bufmgr->num_buckets) ? + &bufmgr->cache_bucket[index] : NULL; } int @@ -1267,6 +1294,10 @@ add_bucket(struct brw_bufmgr *bufmgr, int size) list_inithead(&bufmgr->cache_bucket[i].head); bufmgr->cache_bucket[i].size = size; bufmgr->num_buckets++; + + assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); + assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); + assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); } static void |