summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/Makefile.am2
-rw-r--r--src/gallium/drivers/r600/eg_asm.c17
-rw-r--r--src/gallium/drivers/r600/eg_sq.h7
-rw-r--r--src/gallium/drivers/r600/evergreen_compute.c29
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c98
-rw-r--r--src/gallium/drivers/r600/evergreend.h5
-rw-r--r--src/gallium/drivers/r600/r600_blit.c18
-rw-r--r--src/gallium/drivers/r600/r600_formats.h4
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c75
-rw-r--r--src/gallium/drivers/r600/r600_llvm.c191
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c13
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h128
-rw-r--r--src/gallium/drivers/r600/r600_shader.c22
-rw-r--r--src/gallium/drivers/r600/r600_shader.h1
-rw-r--r--src/gallium/drivers/r600/r600_state.c36
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c115
-rw-r--r--src/gallium/drivers/r600/sb/sb_sched.cpp2
17 files changed, 511 insertions, 252 deletions
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am
index dc0d90d759b..8317da727a2 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 295cb4d80b7..42e8b0b1761 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -160,6 +160,9 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
alu.op = ALU_OP1_MOVA_INT;
alu.src[0].sel = bc->index_reg[id];
alu.src[0].chan = 0;
+ if (bc->chip_class == CAYMAN)
+ alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
+
alu.last = 1;
r = r600_bytecode_add_alu(bc, &alu);
if (r)
@@ -167,12 +170,14 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
bc->ar_loaded = 0; /* clobbered */
- memset(&alu, 0, sizeof(alu));
- alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
- alu.last = 1;
- r = r600_bytecode_add_alu(bc, &alu);
- if (r)
- return r;
+ if (bc->chip_class == EVERGREEN) {
+ memset(&alu, 0, sizeof(alu));
+ alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(bc, &alu);
+ if (r)
+ return r;
+ }
/* Must split ALU group as index only applies to following group */
if (inside_alu_clause) {
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index b534872f062..97e230f56c7 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -521,4 +521,11 @@
#define V_SQ_REL_ABSOLUTE 0
#define V_SQ_REL_RELATIVE 1
+
+/* CAYMAN has special encoding for MOVA_INT destination */
+#define CM_V_SQ_MOVA_DST_AR_X 0
+#define CM_V_SQ_MOVA_DST_CF_PC 1
+#define CM_V_SQ_MOVA_DST_CF_IDX0 2
+#define CM_V_SQ_MOVA_DST_CF_IDX1 3
+
#endif
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 4c3c34cd664..c52e43e9c2a 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -163,7 +163,7 @@ static void evergreen_cs_set_vertex_buffer(
rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
state->enabled_mask |= 1 << vb_index;
state->dirty_mask |= 1 << vb_index;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
static void evergreen_cs_set_constant_buffer(
@@ -226,7 +226,7 @@ void *evergreen_create_compute_state(
}
#else
memset(&shader->binary, 0, sizeof(shader->binary));
- radeon_elf_read(code, header->num_bytes, &shader->binary, true);
+ radeon_elf_read(code, header->num_bytes, &shader->binary);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
@@ -487,6 +487,12 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
/* Emit constant buffer state */
r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);
+ /* Emit sampler state */
+ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom);
+
+ /* Emit sampler view (texture resource) state */
+ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom);
+
/* Emit compute shader state */
r600_emit_atom(ctx, &ctx->cs_shader_state.atom);
@@ -655,25 +661,6 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
}
}
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
- unsigned start_slot, unsigned count,
- struct pipe_sampler_view **views)
-{
- struct r600_pipe_sampler_view **resource =
- (struct r600_pipe_sampler_view **)views;
-
- for (unsigned i = 0; i < count; i++) {
- if (resource[i]) {
- assert(i+1 < 12);
- /* XXX: Implement */
- assert(!"Compute samplers not implemented.");
- ///FETCH0 = VTX0 (param buffer),
- //FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX
- }
- }
-}
-
-
static void evergreen_set_global_binding(
struct pipe_context *ctx_, unsigned first, unsigned n,
struct pipe_resource **resources,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 4ddbc0beba5..6a91d4709f4 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -32,7 +32,7 @@
#include "evergreen_compute.h"
#include "util/u_math.h"
-static INLINE unsigned evergreen_array_mode(unsigned mode)
+static inline unsigned evergreen_array_mode(unsigned mode)
{
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_028C70_ARRAY_LINEAR_ALIGNED;
@@ -485,7 +485,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
/* offset */
rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
+ rs->offset_scale = state->offset_scale * 16.0f;
rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;
if (state->point_size_per_vertex) {
@@ -896,7 +896,7 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx,
for (i = start_slot; i < start_slot + num_scissors; i++) {
rctx->scissor[i].scissor = state[i - start_slot];
- rctx->scissor[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
}
}
@@ -1028,7 +1028,10 @@ void evergreen_init_color_surface(struct r600_context *rctx,
macro_aspect = rtex->surface.mtilea;
bankw = rtex->surface.bankw;
bankh = rtex->surface.bankh;
- fmask_bankh = rtex->fmask.bank_height;
+ if (rtex->fmask.size)
+ fmask_bankh = rtex->fmask.bank_height;
+ else
+ fmask_bankh = rtex->surface.bankh;
tile_split = eg_tile_split(tile_split);
macro_aspect = eg_macro_tile_aspect(macro_aspect);
bankw = eg_bank_wh(bankw);
@@ -1149,10 +1152,11 @@ void evergreen_init_color_surface(struct r600_context *rctx,
surf->cb_color_attrib = color_attrib;
if (rtex->fmask.size) {
surf->cb_color_fmask = (base_offset + rtex->fmask.offset) >> 8;
+ surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
} else {
surf->cb_color_fmask = surf->cb_color_base;
+ surf->cb_color_fmask_slice = S_028C88_TILE_MAX(slice);
}
- surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
surf->color_initialized = true;
}
@@ -1342,11 +1346,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
if (rctx->alphatest_state.bypass != alphatest_bypass) {
rctx->alphatest_state.bypass = alphatest_bypass;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
if (rctx->alphatest_state.cb0_export_16bpc != export_16bpc) {
rctx->alphatest_state.cb0_export_16bpc = export_16bpc;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -1362,28 +1366,28 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
rctx->poly_offset_state.zs_format = state->zsbuf->format;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
if (rctx->db_state.rsurf != surf) {
rctx->db_state.rsurf = surf;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
} else if (rctx->db_state.rsurf) {
rctx->db_state.rsurf = NULL;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
rctx->alphatest_state.bypass = false;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
log_samples = util_logbase2(rctx->framebuffer.nr_samples);
@@ -1392,7 +1396,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
rctx->b.family == CHIP_RV770) &&
rctx->db_misc_state.log_samples != log_samples) {
rctx->db_misc_state.log_samples = log_samples;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
@@ -1420,7 +1424,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
rctx->framebuffer.atom.num_dw += 4;
}
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
r600_set_sample_locations_constant_buffer(rctx);
}
@@ -1434,7 +1438,7 @@ static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_sam
rctx->ps_iter_samples = min_samples;
if (rctx->framebuffer.nr_samples > 1) {
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
}
}
@@ -1732,10 +1736,10 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
- /* Always enable the first colorbuffer in CB_SHADER_MASK. This
- * will assure that the alpha-test will work even if there is
- * no colorbuffer bound. */
- radeon_emit(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */
+ /* This must match the used export instructions exactly.
+ * Other values may lead to undefined behavior and hangs.
+ */
+ radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */
}
static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -1980,7 +1984,7 @@ static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct
static void evergreen_emit_sampler_views(struct r600_context *rctx,
struct r600_samplerview_state *state,
- unsigned resource_id_base)
+ unsigned resource_id_base, unsigned pkt_flags)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
@@ -1993,7 +1997,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
rview = state->views[resource_index];
assert(rview);
- radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
+ radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
radeon_emit(cs, (resource_id_base + resource_index) * 8);
radeon_emit_array(cs, rview->tex_resource_words, 8);
@@ -2002,11 +2006,11 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
rview->tex_resource->b.b.nr_samples > 1 ?
RADEON_PRIO_SHADER_TEXTURE_MSAA :
RADEON_PRIO_SHADER_TEXTURE_RO);
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, reloc);
if (!rview->skip_mip_address_reloc) {
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, reloc);
}
}
@@ -2015,23 +2019,33 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 176 + R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
+ 176 + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 336 + R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views,
+ 336 + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views,
+ R600_MAX_CONST_BUFFERS, 0);
+}
+
+static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
+{
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views,
+ 816 + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_sampler_states(struct r600_context *rctx,
struct r600_textures_info *texinfo,
unsigned resource_id_base,
- unsigned border_index_reg)
+ unsigned border_index_reg,
+ unsigned pkt_flags)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint32_t dirty_mask = texinfo->states.dirty_mask;
@@ -2043,7 +2057,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
rstate = texinfo->states.states[i];
assert(rstate);
- radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0));
+ radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags);
radeon_emit(cs, (resource_id_base + i) * 3);
radeon_emit_array(cs, rstate->tex_sampler_words, 3);
@@ -2058,17 +2072,27 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18,
+ R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0);
}
static void evergreen_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, R_00A428_TD_GS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36,
+ R_00A428_TD_GS_SAMPLER0_BORDER_INDEX, 0);
}
static void evergreen_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0,
+ R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0);
+}
+
+static void evergreen_emit_cs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
+{
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE], 90,
+ R_00A464_TD_CS_SAMPLER0_BORDER_INDEX,
+ RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
@@ -3176,7 +3200,7 @@ void evergreen_update_db_shader_control(struct r600_context * rctx)
if (db_shader_control != rctx->db_misc_state.db_shader_control) {
rctx->db_misc_state.db_shader_control = db_shader_control;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -3431,12 +3455,14 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, evergreen_emit_vs_sampler_states, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, evergreen_emit_gs_sampler_states, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, evergreen_emit_ps_sampler_states, 0);
+ r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom, id++, evergreen_emit_cs_sampler_states, 0);
/* resources */
r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0);
r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, evergreen_emit_vs_sampler_views, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0);
+ r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom, id++, evergreen_emit_cs_sampler_views, 0);
r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10);
@@ -3466,8 +3492,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
}
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
- rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
- rctx->atoms[id++] = &rctx->b.streamout.enable_atom;
+ r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
+ r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index cd4ff46b103..ad6ad434b78 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1253,6 +1253,11 @@
#define R_00A430_TD_GS_SAMPLER0_BORDER_GREEN 0x00A430
#define R_00A434_TD_GS_SAMPLER0_BORDER_BLUE 0x00A434
#define R_00A438_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A438
+#define R_00A464_TD_CS_SAMPLER0_BORDER_INDEX 0x00A464
+#define R_00A468_TD_CS_SAMPLER0_BORDER_RED 0x00A468
+#define R_00A46C_TD_CS_SAMPLER0_BORDER_GREEN 0x00A46C
+#define R_00A470_TD_CS_SAMPLER0_BORDER_BLUE 0x00A470
+#define R_00A474_TD_CS_SAMPLER0_BORDER_ALPHA 0x00A474
#define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000
#define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0)
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 01262a59e90..b0002c3b50f 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -145,7 +145,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
rctx->db_misc_state.copy_depth = util_format_has_depth(desc);
rctx->db_misc_state.copy_stencil = util_format_has_stencil(desc);
rctx->db_misc_state.copy_sample = first_sample;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
for (level = first_level; level <= last_level; level++) {
if (!staging && !(texture->dirty_level_mask & (1 << level)))
@@ -162,7 +162,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
if (sample != rctx->db_misc_state.copy_sample) {
rctx->db_misc_state.copy_sample = sample;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
surf_tmpl.format = texture->resource.b.b.format;
@@ -197,7 +197,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
/* reenable compression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_through_cb = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
@@ -210,7 +210,7 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
/* Enable decompression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_in_place = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
surf_tmpl.format = texture->resource.b.b.format;
@@ -248,7 +248,7 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
/* Disable decompression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_in_place = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
void r600_decompress_depth_textures(struct r600_context *rctx,
@@ -396,6 +396,8 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
&buffers, color);
+ if (!buffers)
+ return; /* all buffers have been fast cleared */
}
if (buffers & PIPE_CLEAR_COLOR) {
@@ -435,10 +437,10 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
fb->zsbuf->u.tex.last_layer == util_max_layer(&rtex->resource.b.b, level)) {
if (rtex->depth_clear_value != depth) {
rtex->depth_clear_value = depth;
- rctx->db_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
}
rctx->db_misc_state.htile_clear = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -451,7 +453,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
/* disable fast clear */
if (rctx->db_misc_state.htile_clear) {
rctx->db_misc_state.htile_clear = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h
index fa374d92e6f..9533aaa1378 100644
--- a/src/gallium/drivers/r600/r600_formats.h
+++ b/src/gallium/drivers/r600/r600_formats.h
@@ -64,7 +64,7 @@
#define ENDIAN_8IN32 2
#define ENDIAN_8IN64 3
-static INLINE unsigned r600_endian_swap(unsigned size)
+static inline unsigned r600_endian_swap(unsigned size)
{
if (R600_BIG_ENDIAN) {
switch (size) {
@@ -82,7 +82,7 @@ static INLINE unsigned r600_endian_swap(unsigned size)
}
}
-static INLINE bool r600_is_vertex_format_supported(enum pipe_format format)
+static inline bool r600_is_vertex_format_supported(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
unsigned i;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 8eb0c6806b9..64451516c23 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -51,13 +51,13 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
unsigned i;
/* The number of dwords all the dirty states would take. */
- for (i = 0; i < R600_NUM_ATOMS; i++) {
- if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
- num_dw += ctx->atoms[i]->num_dw;
- if (ctx->screen->b.trace_bo) {
- num_dw += R600_TRACE_CS_DWORDS;
- }
+ i = r600_next_dirty_atom(ctx, 0);
+ while (i < R600_NUM_ATOMS) {
+ num_dw += ctx->atoms[i]->num_dw;
+ if (ctx->screen->b.trace_bo) {
+ num_dw += R600_TRACE_CS_DWORDS;
}
+ i = r600_next_dirty_atom(ctx, i + 1);
}
/* The upper-bound of how much space a draw command would take. */
@@ -68,7 +68,8 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
}
/* Count in queries_suspend. */
- num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend;
+ num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
+ ctx->b.num_cs_dw_timer_queries_suspend;
/* Count in streamout_end at the end of CS. */
if (ctx->b.streamout.begin_emitted) {
@@ -92,7 +93,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += 10;
/* Flush if there's not enough space. */
- if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+ if (num_dw > ctx->b.rings.gfx.cs->max_dw) {
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
}
@@ -295,43 +296,45 @@ void r600_begin_new_cs(struct r600_context *ctx)
r600_emit_command_buffer(ctx->b.rings.gfx.cs, &ctx->start_cs_cmd);
/* Re-emit states. */
- ctx->alphatest_state.atom.dirty = true;
- ctx->blend_color.atom.dirty = true;
- ctx->cb_misc_state.atom.dirty = true;
- ctx->clip_misc_state.atom.dirty = true;
- ctx->clip_state.atom.dirty = true;
- ctx->db_misc_state.atom.dirty = true;
- ctx->db_state.atom.dirty = true;
- ctx->framebuffer.atom.dirty = true;
- ctx->pixel_shader.atom.dirty = true;
- ctx->poly_offset_state.atom.dirty = true;
- ctx->vgt_state.atom.dirty = true;
- ctx->sample_mask.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->blend_color.atom);
+ r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->clip_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->db_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+ r600_mark_atom_dirty(ctx, &ctx->pixel_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
- ctx->scissor[i].atom.dirty = true;
- ctx->viewport[i].atom.dirty = true;
- }
- ctx->config_state.atom.dirty = true;
- ctx->stencil_ref.atom.dirty = true;
- ctx->vertex_fetch_shader.atom.dirty = true;
- ctx->export_shader.atom.dirty = true;
- ctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->scissor[i].atom);
+ r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom);
+ }
+ if (ctx->b.chip_class < EVERGREEN) {
+ r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
+ }
+ r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
+ r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->export_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom);
if (ctx->gs_shader) {
- ctx->geometry_shader.atom.dirty = true;
- ctx->gs_rings.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->geometry_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom);
}
- ctx->vertex_shader.atom.dirty = true;
- ctx->b.streamout.enable_atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->vertex_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
if (ctx->blend_state.cso)
- ctx->blend_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);
if (ctx->dsa_state.cso)
- ctx->dsa_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom);
if (ctx->rasterizer_state.cso)
- ctx->rasterizer_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom);
if (ctx->b.chip_class <= R700) {
- ctx->seamless_cube_map.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom);
}
ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 72e2dc42f7e..faf538ccbb5 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -84,7 +84,7 @@ static void llvm_load_system_value(
#else
LLVMValueRef reg = lp_build_const_int32(
ctx->soa.bld_base.base.gallivm, chan);
- ctx->system_values[index] = build_intrinsic(
+ ctx->system_values[index] = lp_build_intrinsic(
ctx->soa.bld_base.base.gallivm->builder,
"llvm.R600.load.input",
ctx->soa.bld_base.base.elem_type, &reg, 1,
@@ -111,9 +111,9 @@ llvm_load_input_vector(
Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), "");
LLVMValueRef HalfVec[2] = {
- build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
+ lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
VecType, Args, ArgCount, LLVMReadNoneAttribute),
- build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
+ lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
VecType, Args, ArgCount, LLVMReadNoneAttribute)
};
LLVMValueRef MaskInputs[4] = {
@@ -127,7 +127,7 @@ llvm_load_input_vector(
Mask, "");
} else {
VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4);
- return build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
+ return lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
VecType, Args, ArgCount, LLVMReadNoneAttribute);
}
}
@@ -153,7 +153,7 @@ llvm_load_input_helper(
arg_count = 1;
}
- return build_intrinsic(bb->gallivm->builder, intrinsic,
+ return lp_build_intrinsic(bb->gallivm->builder, intrinsic,
bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute);
}
#endif
@@ -332,7 +332,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer);
args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component);
lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output",
- LLVMVoidTypeInContext(base->gallivm->context), args, 4);
+ LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0);
}
}
@@ -356,7 +356,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -373,7 +373,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);
args[0] = output;
args[1] = base_vector;
- adjusted_elements[chan] = build_intrinsic(base->gallivm->builder,
+ adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder,
"llvm.AMDGPU.dp4", bld_base->base.elem_type,
args, 2, LLVMReadNoneAttribute);
}
@@ -381,7 +381,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
adjusted_elements, 4);
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -394,14 +394,14 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
args, 3, 0);
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -418,7 +418,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = lp_build_gather_values(base->gallivm, elements, 4);
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -430,7 +430,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -449,7 +449,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
args[1] = lp_build_const_int32(base->gallivm, j);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -458,7 +458,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
} else {
args[1] = lp_build_const_int32(base->gallivm, color_count++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -543,7 +543,7 @@ static void llvm_emit_tex(
case TGSI_OPCODE_TXF: {
args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
- emit_data->output[0] = build_intrinsic(gallivm->builder,
+ emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
"llvm.R600.load.texbuf",
emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
if (ctx->chip_class >= EVERGREEN)
@@ -658,7 +658,7 @@ static void llvm_emit_tex(
lp_build_const_int32(gallivm, 1),
lp_build_const_int32(gallivm, 1)
};
- LLVMValueRef ptr = build_intrinsic(gallivm->builder,
+ LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder,
"llvm.R600.ldptr",
emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
@@ -679,7 +679,7 @@ static void llvm_emit_tex(
}
}
- emit_data->output[0] = build_intrinsic(gallivm->builder,
+ emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
action->intr_name,
emit_data->dst_type, args, c, LLVMReadNoneAttribute);
@@ -754,7 +754,131 @@ static struct lp_build_tgsi_action dot_action = {
.intr_name = "llvm.AMDGPU.dp4"
};
+static void txd_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+
+ LLVMValueRef coords[4];
+ unsigned chan, src;
+ for (src = 0; src < 3; src++) {
+ for (chan = 0; chan < 4; chan++)
+ coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
+
+ emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ }
+ emit_data->arg_count = 3;
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+
+static void txp_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ LLVMValueRef src_w;
+ unsigned chan;
+ LLVMValueRef coords[5];
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+ src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+
+ for (chan = 0; chan < 3; chan++ ) {
+ LLVMValueRef arg = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 0, chan);
+ coords[chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DIV, arg, src_w);
+ }
+ coords[3] = bld_base->base.one;
+
+ if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
+ }
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->arg_count = 1;
+}
+
+static void tex_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+
+ LLVMValueRef coords[5];
+ unsigned chan;
+ for (chan = 0; chan < 4; chan++) {
+ coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
+ /* These instructions have additional operand that should be packed
+ * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
+ * That operand should be passed as a float value in the args array
+ * right after the coord vector. After packing it's not used anymore,
+ * that's why arg_count is not increased */
+ coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ }
+
+ if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
+ }
+
+ emit_data->arg_count = 1;
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+static void txf_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ const struct tgsi_texture_offset * off = inst->TexOffsets;
+ LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
+
+ /* fetch tex coords */
+ tex_fetch_args(bld_base, emit_data);
+
+ /* fetch tex offsets */
+ if (inst->Texture.NumOffsets) {
+ assert(inst->Texture.NumOffsets == 1);
+
+ emit_data->args[1] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleX],
+ offset_type);
+ emit_data->args[2] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleY],
+ offset_type);
+ emit_data->args[3] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleZ],
+ offset_type);
+ } else {
+ emit_data->args[1] = bld_base->int_bld.zero;
+ emit_data->args[2] = bld_base->int_bld.zero;
+ emit_data->args[3] = bld_base->int_bld.zero;
+ }
+
+ emit_data->arg_count = 4;
+}
LLVMModuleRef r600_tgsi_llvm(
struct radeon_llvm_context * ctx,
@@ -783,7 +907,6 @@ LLVMModuleRef r600_tgsi_llvm(
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
bld_base->emit_prologue = llvm_emit_prologue;
bld_base->emit_epilogue = llvm_emit_epilogue;
- ctx->userdata = ctx;
ctx->load_input = llvm_load_input;
ctx->load_system_value = llvm_load_system_value;
@@ -791,18 +914,42 @@ LLVMModuleRef r600_tgsi_llvm(
bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
+ bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
+ bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
+ bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
+ bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
+ bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
lp_build_tgsi_llvm(bld_base, tokens);
@@ -881,7 +1028,7 @@ unsigned r600_llvm_compile(
const char * gpu_family = r600_get_llvm_processor_name(family);
memset(&binary, 0, sizeof(struct radeon_shader_binary));
- r = radeon_llvm_compile(mod, &binary, gpu_family, dump, NULL);
+ r = radeon_llvm_compile(mod, &binary, gpu_family, dump, dump, NULL);
r = r600_create_shader(bc, &binary, use_kill);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index e122b607b86..6ffe5615fbf 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -120,6 +120,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
rctx->b.b.screen = screen;
rctx->b.b.priv = priv;
rctx->b.b.destroy = r600_destroy_context;
+ rctx->b.set_atom_dirty = (void *)r600_set_atom_dirty;
if (!r600_common_context_init(&rctx->b, &rscreen->b))
goto fail;
@@ -176,7 +177,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
goto fail;
}
- rctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX,
+ rctx->b.rings.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
r600_context_gfx_flush, rctx,
rscreen->b.trace_bo ?
rscreen->b.trace_bo->cs_buf : NULL);
@@ -268,8 +269,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ return rscreen->b.info.drm_major == 2 && rscreen->b.info.drm_minor >= 43;
+
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !R600_BIG_ENDIAN && rscreen->b.info.has_userptr;
@@ -329,10 +336,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
- case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 4ea270d3839..9b66105641a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -36,7 +36,7 @@
#include "util/list.h"
#include "util/u_transfer.h"
-#define R600_NUM_ATOMS 73
+#define R600_NUM_ATOMS 75
#define R600_MAX_VIEWPORTS 16
@@ -85,6 +85,9 @@
#define R600_BIG_ENDIAN 0
#endif
+#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
+#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
+
struct r600_context;
struct r600_bytecode;
struct r600_shader_key;
@@ -426,6 +429,8 @@ struct r600_context {
/* State binding slots are here. */
struct r600_atom *atoms[R600_NUM_ATOMS];
+ /* Dirty atom bitmask for fast tests */
+ unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
/* States for CS initialization. */
struct r600_command_buffer start_cs_cmd; /* invariant state mostly */
/** Compute specific registers initializations. The start_cs_cmd atom
@@ -490,37 +495,92 @@ struct r600_context {
struct r600_isa *isa;
};
-static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
+static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
struct r600_command_buffer *cb)
{
- assert(cs->cdw + cb->num_dw <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw + cb->num_dw <= cs->max_dw);
memcpy(cs->buf + cs->cdw, cb->buf, 4 * cb->num_dw);
cs->cdw += cb->num_dw;
}
+static inline void r600_set_atom_dirty(struct r600_context *rctx,
+ struct r600_atom *atom,
+ bool dirty)
+{
+ unsigned long mask;
+ unsigned int w;
+
+ atom->dirty = dirty;
+
+ assert(atom->id != 0);
+ w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
+ mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
+ if (dirty)
+ rctx->dirty_atoms[w] |= mask;
+ else
+ rctx->dirty_atoms[w] &= ~mask;
+}
+
+static inline void r600_mark_atom_dirty(struct r600_context *rctx,
+ struct r600_atom *atom)
+{
+ r600_set_atom_dirty(rctx, atom, true);
+}
+
+static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
+ unsigned int id)
+{
+#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
+ unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
+ unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
+ unsigned long bits, mask = (1ul << bit) - 1;
+
+ for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
+ bits = rctx->dirty_atoms[w] & ~mask;
+ if (bits == 0)
+ continue;
+ return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
+ }
+
+ return R600_NUM_ATOMS;
+#else
+ for (; id < R600_NUM_ATOMS; id++) {
+ bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
+ (1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
+ assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
+ if (dirty)
+ break;
+ }
+
+ return id;
+#endif
+}
+
void r600_trace_emit(struct r600_context *rctx);
-static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
+static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(&rctx->b, atom);
- atom->dirty = false;
+ r600_set_atom_dirty(rctx, atom, false);
if (rctx->screen->b.trace_bo) {
r600_trace_emit(rctx);
}
}
-static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
+static inline void r600_set_cso_state(struct r600_context *rctx,
+ struct r600_cso_state *state, void *cso)
{
state->cso = cso;
- state->atom.dirty = cso != NULL;
+ r600_set_atom_dirty(rctx, &state->atom, cso != NULL);
}
-static INLINE void r600_set_cso_state_with_cb(struct r600_cso_state *state, void *cso,
+static inline void r600_set_cso_state_with_cb(struct r600_context *rctx,
+ struct r600_cso_state *state, void *cso,
struct r600_command_buffer *cb)
{
state->cb = cb;
state->atom.num_dw = cb ? cb->num_dw : 0;
- r600_set_cso_state(state, cso);
+ r600_set_cso_state(rctx, state, cso);
}
/* compute_memory_pool.c */
@@ -529,11 +589,6 @@ void compute_memory_pool_delete(struct compute_memory_pool* pool);
struct compute_memory_pool* compute_memory_pool_new(
struct r600_screen *rscreen);
-/* evergreen_compute.c */
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
- unsigned start_slot, unsigned count,
- struct pipe_sampler_view **views);
-
/* evergreen_state.c */
struct pipe_sampler_view *
evergreen_create_sampler_view_custom(struct pipe_context *ctx,
@@ -656,6 +711,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a);
+void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id);
void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
unsigned num_dw);
@@ -719,19 +775,19 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
/*Evergreen Compute packet3*/
#define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE)
-static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned value)
+static inline void r600_store_value(struct r600_command_buffer *cb, unsigned value)
{
cb->buf[cb->num_dw++] = value;
}
-static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
+static inline void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
{
assert(cb->num_dw+num <= cb->max_num_dw);
memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0]));
cb->num_dw += num;
}
-static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -743,7 +799,7 @@ static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, uns
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -755,7 +811,7 @@ static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, un
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -763,7 +819,7 @@ static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsi
cb->buf[cb->num_dw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_LOOP_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -775,7 +831,7 @@ static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, uns
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= EG_LOOP_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -783,31 +839,31 @@ static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsig
cb->buf[cb->num_dw++] = (reg - EG_LOOP_CONST_OFFSET) >> 2;
}
-static INLINE void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_config_reg_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_context_reg_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_ctl_const_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_loop_const_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
eg_store_loop_const_seq(cb, reg, 1);
r600_store_value(cb, value);
@@ -816,28 +872,28 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
void r600_release_command_buffer(struct r600_command_buffer *cb);
-static INLINE void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
r600_write_context_reg_seq(cs, reg, num);
/* Set the compute bit on the packet header */
cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
}
-static INLINE void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
cs->buf[cs->cdw++] = PKT3(PKT3_SET_CTL_CONST, num, 0);
cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_compute_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
+static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
{
if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
r600_write_compute_context_reg(cs, reg, value);
@@ -846,7 +902,7 @@ static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsi
}
}
-static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_ctl_const_seq(cs, reg, 1);
radeon_emit(cs, value);
@@ -855,21 +911,21 @@ static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned re
/*
* common helpers
*/
-static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
+static inline uint32_t S_FIXED(float value, uint32_t frac_bits)
{
return value * (1 << frac_bits);
}
#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
/* 12.4 fixed-point */
-static INLINE unsigned r600_pack_float_12p4(float x)
+static inline unsigned r600_pack_float_12p4(float x)
{
return x <= 0 ? 0 :
x >= 4096 ? 0xffff : x * 16;
}
/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
-static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+static inline bool r600_can_read_depth(struct r600_texture *rtex)
{
return rtex->resource.b.b.nr_samples <= 1 &&
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
@@ -880,7 +936,7 @@ static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
#define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1
#define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2
-static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode)
+static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
{
static const int prim_conv[] = {
V_028A6C_OUTPRIM_TYPE_POINTLIST,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index af7622e9b34..8d1f95abddc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -310,6 +310,7 @@ struct r600_shader_ctx {
int gs_next_vertex;
struct r600_shader *gs_for_vs;
int gs_export_gpr_treg;
+ unsigned enabled_stream_buffers_mask;
};
struct r600_shader_tgsi_instruction {
@@ -1402,6 +1403,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
* with MEM_STREAM instructions */
output.array_size = 0xFFF;
output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
+
+ ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer);
+
if (ctx->bc->chip_class >= EVERGREEN) {
switch (so->output[i].output_buffer) {
case 0:
@@ -1718,6 +1722,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
gs->gs_copy_shader = cshader;
ctx.bc->nstack = 1;
+
+ cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
cshader->shader.ring_item_size = ocnt * 16;
return r600_bytecode_build(ctx.bc);
@@ -1931,15 +1937,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+ ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1;
+ ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
- ctx.temp_reg = ctx.bc->ar_reg + 2;
- ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
- ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
+ ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
+ ctx.temp_reg = ctx.bc->ar_reg + 4;
} else {
- ctx.temp_reg = ctx.bc->ar_reg + 1;
- ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
- ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
+ ctx.temp_reg = ctx.bc->ar_reg + 3;
}
shader->max_arrays = 0;
@@ -2086,7 +2091,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
radeon_llvm_ctx.stream_outputs = &so;
- radeon_llvm_ctx.clip_vertex = ctx.cv_output;
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
radeon_llvm_ctx.has_compressed_msaa_texturing =
ctx.bc->has_compressed_msaa_texturing;
@@ -2262,6 +2266,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
so.num_outputs && !use_llvm)
emit_streamout(&ctx, &so);
+ pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
convert_edgeflag_to_int(&ctx);
if (ring_outputs) {
@@ -2485,6 +2490,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
j++;
+ shader->nr_ps_color_exports++;
}
noutput = j;
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index dd359d7e959..5d05c8153d7 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -125,6 +125,7 @@ struct r600_pipe_shader {
struct r600_shader_key key;
unsigned db_shader_control;
unsigned ps_depth_export;
+ unsigned enabled_stream_buffers_mask;
};
/* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 960dfcedfef..5cc2283792d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -473,7 +473,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
/* offset */
rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
+ rs->offset_scale = state->offset_scale * 16.0f;
rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;
if (state->point_size_per_vertex) {
@@ -802,7 +802,7 @@ static void r600_set_scissor_states(struct pipe_context *ctx,
return;
for (i = start_slot ; i < start_slot + num_scissors; i++) {
- rctx->scissor[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
}
}
@@ -1193,7 +1193,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
if (rctx->alphatest_state.bypass != alphatest_bypass) {
rctx->alphatest_state.bypass = alphatest_bypass;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -1209,28 +1209,28 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
rctx->poly_offset_state.zs_format = state->zsbuf->format;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
if (rctx->db_state.rsurf != surf) {
rctx->db_state.rsurf = surf;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
} else if (rctx->db_state.rsurf) {
rctx->db_state.rsurf = NULL;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
rctx->alphatest_state.bypass = false;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
/* Calculate the CS size. */
@@ -1250,7 +1250,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
rctx->framebuffer.atom.num_dw += 2;
}
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
r600_set_sample_locations_constant_buffer(rctx);
}
@@ -1541,9 +1541,9 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
rctx->ps_iter_samples = min_samples;
if (rctx->framebuffer.nr_samples > 1) {
- rctx->rasterizer_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->rasterizer_state.atom);
if (rctx->b.chip_class == R600)
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -2089,7 +2089,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) {
rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2;
- rctx->config_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
return true;
@@ -2796,11 +2796,11 @@ void r600_update_db_shader_control(struct r600_context * rctx)
if (db_shader_control != rctx->db_misc_state.db_shader_control) {
rctx->db_misc_state.db_shader_control = db_shader_control;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
-static INLINE unsigned r600_array_mode(unsigned mode)
+static inline unsigned r600_array_mode(unsigned mode)
{
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_0280A0_ARRAY_LINEAR_ALIGNED;
@@ -3074,8 +3074,8 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
- rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
- rctx->atoms[id++] = &rctx->b.streamout.enable_atom;
+ r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
+ r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 13dc9ee8c10..aa4a8d0240f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -47,18 +47,26 @@ void r600_release_command_buffer(struct r600_command_buffer *cb)
FREE(cb->buf);
}
+void r600_add_atom(struct r600_context *rctx,
+ struct r600_atom *atom,
+ unsigned id)
+{
+ assert(id < R600_NUM_ATOMS);
+ assert(rctx->atoms[id] == NULL);
+ rctx->atoms[id] = atom;
+ atom->id = id;
+ atom->dirty = false;
+}
+
void r600_init_atom(struct r600_context *rctx,
struct r600_atom *atom,
unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
unsigned num_dw)
{
- assert(id < R600_NUM_ATOMS);
- assert(rctx->atoms[id] == NULL);
- rctx->atoms[id] = atom;
atom->emit = (void*)emit;
atom->num_dw = num_dw;
- atom->dirty = false;
+ r600_add_atom(rctx, atom, id);
}
void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -127,11 +135,11 @@ static void r600_bind_blend_state_internal(struct r600_context *rctx,
rctx->dual_src_blend = blend->dual_src_blend;
if (!blend_disable) {
- r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer);
color_control = blend->cb_color_control;
} else {
/* Blending is disabled. */
- r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer_no_blend);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer_no_blend);
color_control = blend->cb_color_control_no_blend;
}
@@ -150,7 +158,7 @@ static void r600_bind_blend_state_internal(struct r600_context *rctx,
update_cb = true;
}
if (update_cb) {
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
}
@@ -160,7 +168,7 @@ static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
struct r600_blend_state *blend = (struct r600_blend_state *)state;
if (blend == NULL) {
- r600_set_cso_state_with_cb(&rctx->blend_state, NULL, NULL);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, NULL, NULL);
return;
}
@@ -173,7 +181,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->blend_color.state = *state;
- rctx->blend_color.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->blend_color.atom);
}
void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
@@ -210,7 +218,7 @@ static void r600_set_clip_state(struct pipe_context *ctx,
struct pipe_constant_buffer cb;
rctx->clip_state.state = *state;
- rctx->clip_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
cb.buffer = NULL;
cb.user_buffer = state->ucp;
@@ -226,7 +234,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->stencil_ref.state = *state;
- rctx->stencil_ref.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom);
}
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
@@ -274,11 +282,11 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
struct r600_stencil_ref ref;
if (state == NULL) {
- r600_set_cso_state_with_cb(&rctx->dsa_state, NULL, NULL);
+ r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, NULL, NULL);
return;
}
- r600_set_cso_state_with_cb(&rctx->dsa_state, dsa, &dsa->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, dsa, &dsa->buffer);
ref.ref_value[0] = rctx->stencil_ref.pipe_state.ref_value[0];
ref.ref_value[1] = rctx->stencil_ref.pipe_state.ref_value[1];
@@ -293,7 +301,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
* we are having lockup on evergreen so do not enable
* hyperz when not writing zbuffer
*/
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -304,7 +312,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
rctx->alphatest_state.sx_alpha_ref != dsa->alpha_ref) {
rctx->alphatest_state.sx_alpha_test_control = dsa->sx_alpha_test_control;
rctx->alphatest_state.sx_alpha_ref = dsa->alpha_ref;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -318,14 +326,14 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
rctx->rasterizer = rs;
- r600_set_cso_state_with_cb(&rctx->rasterizer_state, rs, &rs->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->rasterizer_state, rs, &rs->buffer);
if (rs->offset_enable &&
(rs->offset_units != rctx->poly_offset_state.offset_units ||
rs->offset_scale != rctx->poly_offset_state.offset_scale)) {
rctx->poly_offset_state.offset_units = rs->offset_units;
rctx->poly_offset_state.offset_scale = rs->offset_scale;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
/* Update clip_misc_state. */
@@ -333,14 +341,14 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
rctx->clip_misc_state.clip_plane_enable != rs->clip_plane_enable) {
rctx->clip_misc_state.pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
rctx->clip_misc_state.clip_plane_enable = rs->clip_plane_enable;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
/* Workaround for a missing scissor enable on r600. */
if (rctx->b.chip_class == R600 &&
rs->scissor_enable != rctx->scissor[0].enable) {
rctx->scissor[0].enable = rs->scissor_enable;
- rctx->scissor[0].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[0].atom);
}
/* Re-emit PA_SC_LINE_STIPPLE. */
@@ -378,7 +386,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx,
state->atom.num_dw =
util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -399,9 +407,9 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
assert(start == 0); /* XXX fix below */
- if (shader != PIPE_SHADER_VERTEX &&
- shader != PIPE_SHADER_FRAGMENT) {
- return;
+ if (!states) {
+ disable_mask = ~0u;
+ count = 0;
}
for (i = 0; i < count; i++) {
@@ -443,7 +451,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
/* change in TA_CNTL_AUX need a pipeline flush */
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
rctx->seamless_cube_map.enabled = seamless_cube_map;
- rctx->seamless_cube_map.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
}
}
@@ -483,7 +491,7 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- r600_set_cso_state(&rctx->vertex_fetch_shader, state);
+ r600_set_cso_state(rctx, &rctx->vertex_fetch_shader, state);
}
static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state)
@@ -513,7 +521,7 @@ void r600_vertex_buffers_dirty(struct r600_context *rctx)
rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
rctx->vertex_buffer_state.atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 12 : 11) *
util_bitcount(rctx->vertex_buffer_state.dirty_mask);
- rctx->vertex_buffer_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
}
}
@@ -570,7 +578,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
state->atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 14 : 13) *
util_bitcount(state->dirty_mask);
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -593,9 +601,9 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
assert(start == 0); /* XXX fix below */
- if (shader == PIPE_SHADER_COMPUTE) {
- evergreen_set_cs_sampler_view(pipe, start, count, views);
- return;
+ if (!views) {
+ disable_mask = ~0u;
+ count = 0;
}
remaining_mask = dst->views.enabled_mask & disable_mask;
@@ -673,7 +681,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
for (i = start_slot; i < start_slot + num_viewports; i++) {
rctx->viewport[i].state = state[i - start_slot];
- rctx->viewport[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->viewport[i].atom);
}
}
@@ -694,7 +702,7 @@ void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
}
/* Compute the key for the hw shader variant */
-static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
+static inline struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
struct r600_pipe_shader_selector * sel)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -913,7 +921,7 @@ void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf
rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
state->atom.num_dw = rctx->b.chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
: util_bitcount(state->dirty_mask)*19;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -982,7 +990,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
return;
rctx->sample_mask.sample_mask = sample_mask;
- rctx->sample_mask.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
}
/*
@@ -1107,27 +1115,28 @@ static void update_shader_atom(struct pipe_context *ctx,
struct r600_shader_state *state,
struct r600_pipe_shader *shader)
{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
state->shader = shader;
if (shader) {
state->atom.num_dw = shader->command_buffer.num_dw;
- state->atom.dirty = true;
r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo);
} else {
state->atom.num_dw = 0;
- state->atom.dirty = false;
}
+ r600_mark_atom_dirty(rctx, &state->atom);
}
static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
{
if (rctx->shader_stages.geom_enable != enable) {
rctx->shader_stages.geom_enable = enable;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
if (rctx->gs_rings.enable != enable) {
rctx->gs_rings.enable = enable;
- rctx->gs_rings.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->gs_rings.atom);
if (enable && !rctx->gs_rings.esgs_ring.buffer) {
unsigned size = 0x1C000;
@@ -1192,7 +1201,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (!rctx->shader_stages.geom_enable) {
rctx->shader_stages.geom_enable = true;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
/* gs_shader provides GS and VS (copy shader) */
@@ -1206,8 +1215,9 @@ static bool r600_update_derived_state(struct r600_context *rctx)
rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->gs_shader->current->gs_copy_shader->pa_cl_vs_out_cntl;
rctx->clip_misc_state.clip_dist_write = rctx->gs_shader->current->gs_copy_shader->shader.clip_dist_write;
rctx->clip_misc_state.clip_disable = rctx->gs_shader->current->shader.vs_position_window_space;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
+ rctx->b.streamout.enabled_stream_buffers_mask = rctx->gs_shader->current->gs_copy_shader->enabled_stream_buffers_mask;
}
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
@@ -1223,7 +1233,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
update_shader_atom(ctx, &rctx->geometry_shader, NULL);
update_shader_atom(ctx, &rctx->export_shader, NULL);
rctx->shader_stages.geom_enable = false;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
@@ -1240,8 +1250,9 @@ static bool r600_update_derived_state(struct r600_context *rctx)
rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
rctx->clip_misc_state.clip_disable = rctx->vs_shader->current->shader.vs_position_window_space;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
+ rctx->b.streamout.enabled_stream_buffers_mask = rctx->vs_shader->current->enabled_stream_buffers_mask;
}
}
@@ -1252,7 +1263,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (rctx->b.chip_class <= R700) {
@@ -1260,7 +1271,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (rctx->cb_misc_state.multiwrite != multiwrite) {
rctx->cb_misc_state.multiwrite = multiwrite;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
}
@@ -1274,7 +1285,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
r600_update_ps_state(ctx, rctx->ps_shader->current);
}
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current);
}
@@ -1409,7 +1420,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
data += info.indirect_offset / sizeof(unsigned);
start = data[2] * ib.index_size;
count = data[0];
- rctx->b.ws->buffer_unmap(indirect_resource->cs_buf);
}
else {
start = 0;
@@ -1454,24 +1464,23 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
rctx->vgt_state.vgt_multi_prim_ib_reset_en = info.primitive_restart;
rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info.restart_index;
rctx->vgt_state.vgt_indx_offset = info.index_bias;
- rctx->vgt_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->vgt_state.atom);
}
/* Workaround for hardware deadlock on certain R600 ASICs: write into a CB register. */
if (rctx->b.chip_class == R600) {
rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
/* Emit states. */
r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
r600_flush_emit(rctx);
- for (i = 0; i < R600_NUM_ATOMS; i++) {
- if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) {
- continue;
- }
+ i = r600_next_dirty_atom(rctx, 0);
+ while (i < R600_NUM_ATOMS) {
r600_emit_atom(rctx, rctx->atoms[i]);
+ i = r600_next_dirty_atom(rctx, i + 1);
}
if (rctx->b.chip_class == CAYMAN) {
@@ -2490,7 +2499,7 @@ static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable
if (rctx->db_misc_state.occlusion_query_enabled != enable) {
rctx->db_misc_state.occlusion_query_enabled = enable;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 2e38a62c05a..62680788c5e 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -489,7 +489,7 @@ bool alu_group_tracker::try_reserve(alu_node* n) {
n->bc.bank_swizzle = 0;
- if (!trans & fbs)
+ if (!trans && fbs)
n->bc.bank_swizzle = VEC_210;
if (gpr.try_reserve(n)) {