diff options
author | Grazvydas Ignotas <[email protected]> | 2015-08-10 00:42:35 +0300 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2015-08-11 14:46:54 +0200 |
commit | 50545882113b389decc3f05771764f6c62213af3 (patch) | |
tree | e7a5cd5ecc4354968e10ec2ad34cd9e8d3aaab3e /src/gallium/drivers/r600/r600_pipe.h | |
parent | c58534c1384dc63bb1b13eb37c06bdb4652c13ff (diff) |
r600g: use a bitfield to track dirty atoms
r600 currently has 73 atoms and looping through their dirty flags has
become costly because checking each flag requires a pointer
dereference before the read. To avoid having to do that add additional
bitfield which can be checked really quickly thanks to tzcnt instruction.
id field was added to struct r600_atom but that doesn't affect memory
usage for both 32 and 64 bit CPUs because it was stuffed into padding.
The performance improvement is ~2% for benchmarks that can have FPS in
the thousands but is hardly measurable in "real" programs.
Signed-off-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/r600/r600_pipe.h')
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 8b612698b13..5d10bb48157 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -85,6 +85,9 @@ #define R600_BIG_ENDIAN 0 #endif +#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8) +#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS) + struct r600_context; struct r600_bytecode; struct r600_shader_key; @@ -426,6 +429,8 @@ struct r600_context { /* State binding slots are here. */ struct r600_atom *atoms[R600_NUM_ATOMS]; + /* Dirty atom bitmask for fast tests */ + unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN]; /* States for CS initialization. */ struct r600_command_buffer start_cs_cmd; /* invariant state mostly */ /** Compute specific registers initializations. The start_cs_cmd atom @@ -502,7 +507,18 @@ static inline void r600_set_atom_dirty(struct r600_context *rctx, struct r600_atom *atom, bool dirty) { + unsigned long mask; + unsigned int w; + atom->dirty = dirty; + + assert(atom->id != 0); + w = atom->id / R600_DIRTY_ATOM_WORD_BITS; + mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS); + if (dirty) + rctx->dirty_atoms[w] |= mask; + else + rctx->dirty_atoms[w] &= ~mask; } static inline void r600_mark_atom_dirty(struct r600_context *rctx, @@ -511,6 +527,35 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx, r600_set_atom_dirty(rctx, atom, true); } +static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx, + unsigned int id) +{ +#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ) + unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS; + unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS; + unsigned long bits, mask = (1ul << bit) - 1; + + for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) { + bits = rctx->dirty_atoms[w] & ~mask; + if (bits == 0) + continue; + return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits); + } + + return R600_NUM_ATOMS; +#else + for (; id < R600_NUM_ATOMS; id++) { + bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] & + (1ul << (id % R600_DIRTY_ATOM_WORD_BITS))); + assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty)); + if (dirty) + break; + } + + return id; +#endif +} + void r600_trace_emit(struct r600_context *rctx); static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) |