diff options
Diffstat (limited to 'src/gallium/drivers/r600/r600_pipe.h')
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 128 |
1 files changed, 92 insertions, 36 deletions
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 4ea270d3839..9b66105641a 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -36,7 +36,7 @@ #include "util/list.h" #include "util/u_transfer.h" -#define R600_NUM_ATOMS 73 +#define R600_NUM_ATOMS 75 #define R600_MAX_VIEWPORTS 16 @@ -85,6 +85,9 @@ #define R600_BIG_ENDIAN 0 #endif +#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8) +#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS) + struct r600_context; struct r600_bytecode; struct r600_shader_key; @@ -426,6 +429,8 @@ struct r600_context { /* State binding slots are here. */ struct r600_atom *atoms[R600_NUM_ATOMS]; + /* Dirty atom bitmask for fast tests */ + unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN]; /* States for CS initialization. */ struct r600_command_buffer start_cs_cmd; /* invariant state mostly */ /** Compute specific registers initializations. The start_cs_cmd atom @@ -490,37 +495,92 @@ struct r600_context { struct r600_isa *isa; }; -static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs, +static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs, struct r600_command_buffer *cb) { - assert(cs->cdw + cb->num_dw <= RADEON_MAX_CMDBUF_DWORDS); + assert(cs->cdw + cb->num_dw <= cs->max_dw); memcpy(cs->buf + cs->cdw, cb->buf, 4 * cb->num_dw); cs->cdw += cb->num_dw; } +static inline void r600_set_atom_dirty(struct r600_context *rctx, + struct r600_atom *atom, + bool dirty) +{ + unsigned long mask; + unsigned int w; + + atom->dirty = dirty; + + assert(atom->id != 0); + w = atom->id / R600_DIRTY_ATOM_WORD_BITS; + mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS); + if (dirty) + rctx->dirty_atoms[w] |= mask; + else + rctx->dirty_atoms[w] &= ~mask; +} + +static inline void r600_mark_atom_dirty(struct r600_context *rctx, + struct r600_atom *atom) +{ + r600_set_atom_dirty(rctx, atom, true); +} + +static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx, + unsigned int id) +{ +#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ) + unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS; + unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS; + unsigned long bits, mask = (1ul << bit) - 1; + + for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) { + bits = rctx->dirty_atoms[w] & ~mask; + if (bits == 0) + continue; + return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits); + } + + return R600_NUM_ATOMS; +#else + for (; id < R600_NUM_ATOMS; id++) { + bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] & + (1ul << (id % R600_DIRTY_ATOM_WORD_BITS))); + assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty)); + if (dirty) + break; + } + + return id; +#endif +} + void r600_trace_emit(struct r600_context *rctx); -static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) +static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) { atom->emit(&rctx->b, atom); - atom->dirty = false; + r600_set_atom_dirty(rctx, atom, false); if (rctx->screen->b.trace_bo) { r600_trace_emit(rctx); } } -static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso) +static inline void r600_set_cso_state(struct r600_context *rctx, + struct r600_cso_state *state, void *cso) { state->cso = cso; - state->atom.dirty = cso != NULL; + r600_set_atom_dirty(rctx, &state->atom, cso != NULL); } -static INLINE void r600_set_cso_state_with_cb(struct r600_cso_state *state, void *cso, +static inline void r600_set_cso_state_with_cb(struct r600_context *rctx, + struct r600_cso_state *state, void *cso, struct r600_command_buffer *cb) { state->cb = cb; state->atom.num_dw = cb ? cb->num_dw : 0; - r600_set_cso_state(state, cso); + r600_set_cso_state(rctx, state, cso); } /* compute_memory_pool.c */ @@ -529,11 +589,6 @@ void compute_memory_pool_delete(struct compute_memory_pool* pool); struct compute_memory_pool* compute_memory_pool_new( struct r600_screen *rscreen); -/* evergreen_compute.c */ -void evergreen_set_cs_sampler_view(struct pipe_context *ctx_, - unsigned start_slot, unsigned count, - struct pipe_sampler_view **views); - /* evergreen_state.c */ struct pipe_sampler_view * evergreen_create_sampler_view_custom(struct pipe_context *ctx, @@ -656,6 +711,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom); void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom); void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a); +void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id); void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id, void (*emit)(struct r600_context *ctx, struct r600_atom *state), unsigned num_dw); @@ -719,19 +775,19 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe, /*Evergreen Compute packet3*/ #define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE) -static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned value) +static inline void r600_store_value(struct r600_command_buffer *cb, unsigned value) { cb->buf[cb->num_dw++] = value; } -static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr) +static inline void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr) { assert(cb->num_dw+num <= cb->max_num_dw); memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0])); cb->num_dw += num; } -static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg < R600_CONTEXT_REG_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -743,7 +799,7 @@ static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, uns * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute * shaders. */ -static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -755,7 +811,7 @@ static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, un * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute * shaders. */ -static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg >= R600_CTL_CONST_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -763,7 +819,7 @@ static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsi cb->buf[cb->num_dw++] = (reg - R600_CTL_CONST_OFFSET) >> 2; } -static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg >= R600_LOOP_CONST_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -775,7 +831,7 @@ static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, uns * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute * shaders. */ -static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg >= EG_LOOP_CONST_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -783,31 +839,31 @@ static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsig cb->buf[cb->num_dw++] = (reg - EG_LOOP_CONST_OFFSET) >> 2; } -static INLINE void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) { r600_store_config_reg_seq(cb, reg, 1); r600_store_value(cb, value); } -static INLINE void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) { r600_store_context_reg_seq(cb, reg, 1); r600_store_value(cb, value); } -static INLINE void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) { r600_store_ctl_const_seq(cb, reg, 1); r600_store_value(cb, value); } -static INLINE void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) { r600_store_loop_const_seq(cb, reg, 1); r600_store_value(cb, value); } -static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) { eg_store_loop_const_seq(cb, reg, 1); r600_store_value(cb, value); @@ -816,28 +872,28 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw); void r600_release_command_buffer(struct r600_command_buffer *cb); -static INLINE void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) +static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) { r600_write_context_reg_seq(cs, reg, num); /* Set the compute bit on the packet header */ cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE; } -static INLINE void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) +static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) { assert(reg >= R600_CTL_CONST_OFFSET); - assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS); + assert(cs->cdw+2+num <= cs->max_dw); cs->buf[cs->cdw++] = PKT3(PKT3_SET_CTL_CONST, num, 0); cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2; } -static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) +static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) { r600_write_compute_context_reg_seq(cs, reg, 1); radeon_emit(cs, value); } -static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag) +static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag) { if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) { r600_write_compute_context_reg(cs, reg, value); @@ -846,7 +902,7 @@ static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsi } } -static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) +static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) { r600_write_ctl_const_seq(cs, reg, 1); radeon_emit(cs, value); @@ -855,21 +911,21 @@ static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned re /* * common helpers */ -static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) +static inline uint32_t S_FIXED(float value, uint32_t frac_bits) { return value * (1 << frac_bits); } #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) /* 12.4 fixed-point */ -static INLINE unsigned r600_pack_float_12p4(float x) +static inline unsigned r600_pack_float_12p4(float x) { return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16; } /* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */ -static INLINE bool r600_can_read_depth(struct r600_texture *rtex) +static inline bool r600_can_read_depth(struct r600_texture *rtex) { return rtex->resource.b.b.nr_samples <= 1 && (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM || @@ -880,7 +936,7 @@ static INLINE bool r600_can_read_depth(struct r600_texture *rtex) #define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1 #define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2 -static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode) +static inline unsigned r600_conv_prim_to_gs_out(unsigned mode) { static const int prim_conv[] = { V_028A6C_OUTPRIM_TYPE_POINTLIST, |