summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600/r600_pipe.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600/r600_pipe.h')
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h128
1 files changed, 92 insertions, 36 deletions
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 4ea270d3839..9b66105641a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -36,7 +36,7 @@
#include "util/list.h"
#include "util/u_transfer.h"
-#define R600_NUM_ATOMS 73
+#define R600_NUM_ATOMS 75
#define R600_MAX_VIEWPORTS 16
@@ -85,6 +85,9 @@
#define R600_BIG_ENDIAN 0
#endif
+#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
+#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
+
struct r600_context;
struct r600_bytecode;
struct r600_shader_key;
@@ -426,6 +429,8 @@ struct r600_context {
/* State binding slots are here. */
struct r600_atom *atoms[R600_NUM_ATOMS];
+ /* Dirty atom bitmask for fast tests */
+ unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
/* States for CS initialization. */
struct r600_command_buffer start_cs_cmd; /* invariant state mostly */
/** Compute specific registers initializations. The start_cs_cmd atom
@@ -490,37 +495,92 @@ struct r600_context {
struct r600_isa *isa;
};
-static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
+static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
struct r600_command_buffer *cb)
{
- assert(cs->cdw + cb->num_dw <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw + cb->num_dw <= cs->max_dw);
memcpy(cs->buf + cs->cdw, cb->buf, 4 * cb->num_dw);
cs->cdw += cb->num_dw;
}
+static inline void r600_set_atom_dirty(struct r600_context *rctx,
+ struct r600_atom *atom,
+ bool dirty)
+{
+ unsigned long mask;
+ unsigned int w;
+
+ atom->dirty = dirty;
+
+ assert(atom->id != 0);
+ w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
+ mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
+ if (dirty)
+ rctx->dirty_atoms[w] |= mask;
+ else
+ rctx->dirty_atoms[w] &= ~mask;
+}
+
+static inline void r600_mark_atom_dirty(struct r600_context *rctx,
+ struct r600_atom *atom)
+{
+ r600_set_atom_dirty(rctx, atom, true);
+}
+
+static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
+ unsigned int id)
+{
+#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
+ unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
+ unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
+ unsigned long bits, mask = (1ul << bit) - 1;
+
+ for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
+ bits = rctx->dirty_atoms[w] & ~mask;
+ if (bits == 0)
+ continue;
+ return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
+ }
+
+ return R600_NUM_ATOMS;
+#else
+ for (; id < R600_NUM_ATOMS; id++) {
+ bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
+ (1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
+ assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
+ if (dirty)
+ break;
+ }
+
+ return id;
+#endif
+}
+
void r600_trace_emit(struct r600_context *rctx);
-static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
+static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(&rctx->b, atom);
- atom->dirty = false;
+ r600_set_atom_dirty(rctx, atom, false);
if (rctx->screen->b.trace_bo) {
r600_trace_emit(rctx);
}
}
-static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
+static inline void r600_set_cso_state(struct r600_context *rctx,
+ struct r600_cso_state *state, void *cso)
{
state->cso = cso;
- state->atom.dirty = cso != NULL;
+ r600_set_atom_dirty(rctx, &state->atom, cso != NULL);
}
-static INLINE void r600_set_cso_state_with_cb(struct r600_cso_state *state, void *cso,
+static inline void r600_set_cso_state_with_cb(struct r600_context *rctx,
+ struct r600_cso_state *state, void *cso,
struct r600_command_buffer *cb)
{
state->cb = cb;
state->atom.num_dw = cb ? cb->num_dw : 0;
- r600_set_cso_state(state, cso);
+ r600_set_cso_state(rctx, state, cso);
}
/* compute_memory_pool.c */
@@ -529,11 +589,6 @@ void compute_memory_pool_delete(struct compute_memory_pool* pool);
struct compute_memory_pool* compute_memory_pool_new(
struct r600_screen *rscreen);
-/* evergreen_compute.c */
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
- unsigned start_slot, unsigned count,
- struct pipe_sampler_view **views);
-
/* evergreen_state.c */
struct pipe_sampler_view *
evergreen_create_sampler_view_custom(struct pipe_context *ctx,
@@ -656,6 +711,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a);
+void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id);
void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
unsigned num_dw);
@@ -719,19 +775,19 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
/*Evergreen Compute packet3*/
#define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE)
-static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned value)
+static inline void r600_store_value(struct r600_command_buffer *cb, unsigned value)
{
cb->buf[cb->num_dw++] = value;
}
-static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
+static inline void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
{
assert(cb->num_dw+num <= cb->max_num_dw);
memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0]));
cb->num_dw += num;
}
-static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -743,7 +799,7 @@ static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, uns
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -755,7 +811,7 @@ static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, un
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -763,7 +819,7 @@ static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsi
cb->buf[cb->num_dw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_LOOP_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -775,7 +831,7 @@ static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, uns
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= EG_LOOP_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -783,31 +839,31 @@ static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsig
cb->buf[cb->num_dw++] = (reg - EG_LOOP_CONST_OFFSET) >> 2;
}
-static INLINE void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_config_reg_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_context_reg_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_ctl_const_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_loop_const_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
eg_store_loop_const_seq(cb, reg, 1);
r600_store_value(cb, value);
@@ -816,28 +872,28 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
void r600_release_command_buffer(struct r600_command_buffer *cb);
-static INLINE void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
r600_write_context_reg_seq(cs, reg, num);
/* Set the compute bit on the packet header */
cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
}
-static INLINE void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
cs->buf[cs->cdw++] = PKT3(PKT3_SET_CTL_CONST, num, 0);
cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_compute_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
+static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
{
if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
r600_write_compute_context_reg(cs, reg, value);
@@ -846,7 +902,7 @@ static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsi
}
}
-static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_ctl_const_seq(cs, reg, 1);
radeon_emit(cs, value);
@@ -855,21 +911,21 @@ static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned re
/*
* common helpers
*/
-static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
+static inline uint32_t S_FIXED(float value, uint32_t frac_bits)
{
return value * (1 << frac_bits);
}
#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
/* 12.4 fixed-point */
-static INLINE unsigned r600_pack_float_12p4(float x)
+static inline unsigned r600_pack_float_12p4(float x)
{
return x <= 0 ? 0 :
x >= 4096 ? 0xffff : x * 16;
}
/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
-static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+static inline bool r600_can_read_depth(struct r600_texture *rtex)
{
return rtex->resource.b.b.nr_samples <= 1 &&
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
@@ -880,7 +936,7 @@ static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
#define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1
#define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2
-static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode)
+static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
{
static const int prim_conv[] = {
V_028A6C_OUTPRIM_TYPE_POINTLIST,