diff options
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r-- | src/gallium/drivers/r600/Makefile.am | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/eg_asm.c | 17 | ||||
-rw-r--r-- | src/gallium/drivers/r600/eg_sq.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 98 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreend.h | 5 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_blit.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_formats.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 75 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_llvm.c | 191 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 128 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 22 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 36 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 115 | ||||
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_sched.cpp | 2 |
17 files changed, 511 insertions, 252 deletions
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am index dc0d90d759b..8317da727a2 100644 --- a/src/gallium/drivers/r600/Makefile.am +++ b/src/gallium/drivers/r600/Makefile.am @@ -1,5 +1,3 @@ -AUTOMAKE_OPTIONS = subdir-objects - include Makefile.sources include $(top_srcdir)/src/gallium/Automake.inc diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 295cb4d80b7..42e8b0b1761 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -160,6 +160,9 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c alu.op = ALU_OP1_MOVA_INT; alu.src[0].sel = bc->index_reg[id]; alu.src[0].chan = 0; + if (bc->chip_class == CAYMAN) + alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1; + alu.last = 1; r = r600_bytecode_add_alu(bc, &alu); if (r) @@ -167,12 +170,14 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c bc->ar_loaded = 0; /* clobbered */ - memset(&alu, 0, sizeof(alu)); - alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1; - alu.last = 1; - r = r600_bytecode_add_alu(bc, &alu); - if (r) - return r; + if (bc->chip_class == EVERGREEN) { + memset(&alu, 0, sizeof(alu)); + alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1; + alu.last = 1; + r = r600_bytecode_add_alu(bc, &alu); + if (r) + return r; + } /* Must split ALU group as index only applies to following group */ if (inside_alu_clause) { diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h index b534872f062..97e230f56c7 100644 --- a/src/gallium/drivers/r600/eg_sq.h +++ b/src/gallium/drivers/r600/eg_sq.h @@ -521,4 +521,11 @@ #define V_SQ_REL_ABSOLUTE 0 #define V_SQ_REL_RELATIVE 1 + +/* CAYMAN has special encoding for MOVA_INT destination */ +#define CM_V_SQ_MOVA_DST_AR_X 0 +#define CM_V_SQ_MOVA_DST_CF_PC 1 +#define CM_V_SQ_MOVA_DST_CF_IDX0 2 +#define CM_V_SQ_MOVA_DST_CF_IDX1 3 + #endif diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 4c3c34cd664..c52e43e9c2a 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -163,7 +163,7 @@ static void evergreen_cs_set_vertex_buffer( rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE; state->enabled_mask |= 1 << vb_index; state->dirty_mask |= 1 << vb_index; - state->atom.dirty = true; + r600_mark_atom_dirty(rctx, &state->atom); } static void evergreen_cs_set_constant_buffer( @@ -226,7 +226,7 @@ void *evergreen_create_compute_state( } #else memset(&shader->binary, 0, sizeof(shader->binary)); - radeon_elf_read(code, header->num_bytes, &shader->binary, true); + radeon_elf_read(code, header->num_bytes, &shader->binary); r600_create_shader(&shader->bc, &shader->binary, &use_kill); shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, @@ -487,6 +487,12 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, /* Emit constant buffer state */ r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); + /* Emit sampler state */ + r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom); + + /* Emit sampler view (texture resource) state */ + r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom); + /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); @@ -655,25 +661,6 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_, } } -void evergreen_set_cs_sampler_view(struct pipe_context *ctx_, - unsigned start_slot, unsigned count, - struct pipe_sampler_view **views) -{ - struct r600_pipe_sampler_view **resource = - (struct r600_pipe_sampler_view **)views; - - for (unsigned i = 0; i < count; i++) { - if (resource[i]) { - assert(i+1 < 12); - /* XXX: Implement */ - assert(!"Compute samplers not implemented."); - ///FETCH0 = VTX0 (param buffer), - //FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX - } - } -} - - static void evergreen_set_global_binding( struct pipe_context *ctx_, unsigned first, unsigned n, struct pipe_resource **resources, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 4ddbc0beba5..6a91d4709f4 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -32,7 +32,7 @@ #include "evergreen_compute.h" #include "util/u_math.h" -static INLINE unsigned evergreen_array_mode(unsigned mode) +static inline unsigned evergreen_array_mode(unsigned mode) { switch (mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_028C70_ARRAY_LINEAR_ALIGNED; @@ -485,7 +485,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, /* offset */ rs->offset_units = state->offset_units; - rs->offset_scale = state->offset_scale * 12.0f; + rs->offset_scale = state->offset_scale * 16.0f; rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri; if (state->point_size_per_vertex) { @@ -896,7 +896,7 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx, for (i = start_slot; i < start_slot + num_scissors; i++) { rctx->scissor[i].scissor = state[i - start_slot]; - rctx->scissor[i].atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom); } } @@ -1028,7 +1028,10 @@ void evergreen_init_color_surface(struct r600_context *rctx, macro_aspect = rtex->surface.mtilea; bankw = rtex->surface.bankw; bankh = rtex->surface.bankh; - fmask_bankh = rtex->fmask.bank_height; + if (rtex->fmask.size) + fmask_bankh = rtex->fmask.bank_height; + else + fmask_bankh = rtex->surface.bankh; tile_split = eg_tile_split(tile_split); macro_aspect = eg_macro_tile_aspect(macro_aspect); bankw = eg_bank_wh(bankw); @@ -1149,10 +1152,11 @@ void evergreen_init_color_surface(struct r600_context *rctx, surf->cb_color_attrib = color_attrib; if (rtex->fmask.size) { surf->cb_color_fmask = (base_offset + rtex->fmask.offset) >> 8; + surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); } else { surf->cb_color_fmask = surf->cb_color_base; + surf->cb_color_fmask_slice = S_028C88_TILE_MAX(slice); } - surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); surf->color_initialized = true; } @@ -1342,11 +1346,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (rctx->alphatest_state.bypass != alphatest_bypass) { rctx->alphatest_state.bypass = alphatest_bypass; - rctx->alphatest_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); } if (rctx->alphatest_state.cb0_export_16bpc != export_16bpc) { rctx->alphatest_state.cb0_export_16bpc = export_16bpc; - rctx->alphatest_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); } } @@ -1362,28 +1366,28 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, if (state->zsbuf->format != rctx->poly_offset_state.zs_format) { rctx->poly_offset_state.zs_format = state->zsbuf->format; - rctx->poly_offset_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom); } if (rctx->db_state.rsurf != surf) { rctx->db_state.rsurf = surf; - rctx->db_state.atom.dirty = true; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_state.atom); + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } else if (rctx->db_state.rsurf) { rctx->db_state.rsurf = NULL; - rctx->db_state.atom.dirty = true; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_state.atom); + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) { rctx->cb_misc_state.nr_cbufs = state->nr_cbufs; - rctx->cb_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); } if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) { rctx->alphatest_state.bypass = false; - rctx->alphatest_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); } log_samples = util_logbase2(rctx->framebuffer.nr_samples); @@ -1392,7 +1396,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, rctx->b.family == CHIP_RV770) && rctx->db_misc_state.log_samples != log_samples) { rctx->db_misc_state.log_samples = log_samples; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } @@ -1420,7 +1424,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, rctx->framebuffer.atom.num_dw += 4; } - rctx->framebuffer.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); r600_set_sample_locations_constant_buffer(rctx); } @@ -1434,7 +1438,7 @@ static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_sam rctx->ps_iter_samples = min_samples; if (rctx->framebuffer.nr_samples > 1) { - rctx->framebuffer.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); } } @@ -1732,10 +1736,10 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_ r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2); radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */ - /* Always enable the first colorbuffer in CB_SHADER_MASK. This - * will assure that the alpha-test will work even if there is - * no colorbuffer bound. */ - radeon_emit(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */ + /* This must match the used export instructions exactly. + * Other values may lead to undefined behavior and hangs. + */ + radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */ } static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) @@ -1980,7 +1984,7 @@ static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct static void evergreen_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, - unsigned resource_id_base) + unsigned resource_id_base, unsigned pkt_flags) { struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; uint32_t dirty_mask = state->dirty_mask; @@ -1993,7 +1997,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, rview = state->views[resource_index]; assert(rview); - radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0)); + radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags); radeon_emit(cs, (resource_id_base + resource_index) * 8); radeon_emit_array(cs, rview->tex_resource_words, 8); @@ -2002,11 +2006,11 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, rview->tex_resource->b.b.nr_samples > 1 ? RADEON_PRIO_SHADER_TEXTURE_MSAA : RADEON_PRIO_SHADER_TEXTURE_RO); - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, reloc); if (!rview->skip_mip_address_reloc) { - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); radeon_emit(cs, reloc); } } @@ -2015,23 +2019,33 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) { - evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 176 + R600_MAX_CONST_BUFFERS); + evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, + 176 + R600_MAX_CONST_BUFFERS, 0); } static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) { - evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 336 + R600_MAX_CONST_BUFFERS); + evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, + 336 + R600_MAX_CONST_BUFFERS, 0); } static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom) { - evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_MAX_CONST_BUFFERS); + evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, + R600_MAX_CONST_BUFFERS, 0); +} + +static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom) +{ + evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views, + 816 + 2, RADEON_CP_PACKET3_COMPUTE_MODE); } static void evergreen_emit_sampler_states(struct r600_context *rctx, struct r600_textures_info *texinfo, unsigned resource_id_base, - unsigned border_index_reg) + unsigned border_index_reg, + unsigned pkt_flags) { struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; @@ -2043,7 +2057,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx, rstate = texinfo->states.states[i]; assert(rstate); - radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0)); + radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags); radeon_emit(cs, (resource_id_base + i) * 3); radeon_emit_array(cs, rstate->tex_sampler_words, 3); @@ -2058,17 +2072,27 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx, static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) { - evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX); + evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, + R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0); } static void evergreen_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) { - evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, R_00A428_TD_GS_SAMPLER0_BORDER_INDEX); + evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, + R_00A428_TD_GS_SAMPLER0_BORDER_INDEX, 0); } static void evergreen_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom) { - evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX); + evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, + R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0); +} + +static void evergreen_emit_cs_sampler_states(struct r600_context *rctx, struct r600_atom *atom) +{ + evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE], 90, + R_00A464_TD_CS_SAMPLER0_BORDER_INDEX, + RADEON_CP_PACKET3_COMPUTE_MODE); } static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) @@ -3176,7 +3200,7 @@ void evergreen_update_db_shader_control(struct r600_context * rctx) if (db_shader_control != rctx->db_misc_state.db_shader_control) { rctx->db_misc_state.db_shader_control = db_shader_control; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } @@ -3431,12 +3455,14 @@ void evergreen_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, evergreen_emit_vs_sampler_states, 0); r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, evergreen_emit_gs_sampler_states, 0); r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, evergreen_emit_ps_sampler_states, 0); + r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom, id++, evergreen_emit_cs_sampler_states, 0); /* resources */ r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0); r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0); r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, evergreen_emit_vs_sampler_views, 0); r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0); r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0); + r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom, id++, evergreen_emit_cs_sampler_views, 0); r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10); @@ -3466,8 +3492,8 @@ void evergreen_init_state_functions(struct r600_context *rctx) } r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5); - rctx->atoms[id++] = &rctx->b.streamout.begin_atom; - rctx->atoms[id++] = &rctx->b.streamout.enable_atom; + r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++); + r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++); r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0); r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0); diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index cd4ff46b103..ad6ad434b78 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -1253,6 +1253,11 @@ #define R_00A430_TD_GS_SAMPLER0_BORDER_GREEN 0x00A430 #define R_00A434_TD_GS_SAMPLER0_BORDER_BLUE 0x00A434 #define R_00A438_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A438 +#define R_00A464_TD_CS_SAMPLER0_BORDER_INDEX 0x00A464 +#define R_00A468_TD_CS_SAMPLER0_BORDER_RED 0x00A468 +#define R_00A46C_TD_CS_SAMPLER0_BORDER_GREEN 0x00A46C +#define R_00A470_TD_CS_SAMPLER0_BORDER_BLUE 0x00A470 +#define R_00A474_TD_CS_SAMPLER0_BORDER_ALPHA 0x00A474 #define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000 #define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0) diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index 01262a59e90..b0002c3b50f 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -145,7 +145,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx, rctx->db_misc_state.copy_depth = util_format_has_depth(desc); rctx->db_misc_state.copy_stencil = util_format_has_stencil(desc); rctx->db_misc_state.copy_sample = first_sample; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); for (level = first_level; level <= last_level; level++) { if (!staging && !(texture->dirty_level_mask & (1 << level))) @@ -162,7 +162,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx, if (sample != rctx->db_misc_state.copy_sample) { rctx->db_misc_state.copy_sample = sample; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } surf_tmpl.format = texture->resource.b.b.format; @@ -197,7 +197,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx, /* reenable compression in DB_RENDER_CONTROL */ rctx->db_misc_state.flush_depthstencil_through_cb = false; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } static void r600_blit_decompress_depth_in_place(struct r600_context *rctx, @@ -210,7 +210,7 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx, /* Enable decompression in DB_RENDER_CONTROL */ rctx->db_misc_state.flush_depthstencil_in_place = true; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); surf_tmpl.format = texture->resource.b.b.format; @@ -248,7 +248,7 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx, /* Disable decompression in DB_RENDER_CONTROL */ rctx->db_misc_state.flush_depthstencil_in_place = false; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } void r600_decompress_depth_textures(struct r600_context *rctx, @@ -396,6 +396,8 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers, if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) { evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom, &buffers, color); + if (!buffers) + return; /* all buffers have been fast cleared */ } if (buffers & PIPE_CLEAR_COLOR) { @@ -435,10 +437,10 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers, fb->zsbuf->u.tex.last_layer == util_max_layer(&rtex->resource.b.b, level)) { if (rtex->depth_clear_value != depth) { rtex->depth_clear_value = depth; - rctx->db_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_state.atom); } rctx->db_misc_state.htile_clear = true; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } @@ -451,7 +453,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers, /* disable fast clear */ if (rctx->db_misc_state.htile_clear) { rctx->db_misc_state.htile_clear = false; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h index fa374d92e6f..9533aaa1378 100644 --- a/src/gallium/drivers/r600/r600_formats.h +++ b/src/gallium/drivers/r600/r600_formats.h @@ -64,7 +64,7 @@ #define ENDIAN_8IN32 2 #define ENDIAN_8IN64 3 -static INLINE unsigned r600_endian_swap(unsigned size) +static inline unsigned r600_endian_swap(unsigned size) { if (R600_BIG_ENDIAN) { switch (size) { @@ -82,7 +82,7 @@ static INLINE unsigned r600_endian_swap(unsigned size) } } -static INLINE bool r600_is_vertex_format_supported(enum pipe_format format) +static inline bool r600_is_vertex_format_supported(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); unsigned i; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 8eb0c6806b9..64451516c23 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -51,13 +51,13 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, unsigned i; /* The number of dwords all the dirty states would take. */ - for (i = 0; i < R600_NUM_ATOMS; i++) { - if (ctx->atoms[i] && ctx->atoms[i]->dirty) { - num_dw += ctx->atoms[i]->num_dw; - if (ctx->screen->b.trace_bo) { - num_dw += R600_TRACE_CS_DWORDS; - } + i = r600_next_dirty_atom(ctx, 0); + while (i < R600_NUM_ATOMS) { + num_dw += ctx->atoms[i]->num_dw; + if (ctx->screen->b.trace_bo) { + num_dw += R600_TRACE_CS_DWORDS; } + i = r600_next_dirty_atom(ctx, i + 1); } /* The upper-bound of how much space a draw command would take. */ @@ -68,7 +68,8 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, } /* Count in queries_suspend. */ - num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend; + num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend + + ctx->b.num_cs_dw_timer_queries_suspend; /* Count in streamout_end at the end of CS. */ if (ctx->b.streamout.begin_emitted) { @@ -92,7 +93,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, num_dw += 10; /* Flush if there's not enough space. */ - if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { + if (num_dw > ctx->b.rings.gfx.cs->max_dw) { ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); } } @@ -295,43 +296,45 @@ void r600_begin_new_cs(struct r600_context *ctx) r600_emit_command_buffer(ctx->b.rings.gfx.cs, &ctx->start_cs_cmd); /* Re-emit states. */ - ctx->alphatest_state.atom.dirty = true; - ctx->blend_color.atom.dirty = true; - ctx->cb_misc_state.atom.dirty = true; - ctx->clip_misc_state.atom.dirty = true; - ctx->clip_state.atom.dirty = true; - ctx->db_misc_state.atom.dirty = true; - ctx->db_state.atom.dirty = true; - ctx->framebuffer.atom.dirty = true; - ctx->pixel_shader.atom.dirty = true; - ctx->poly_offset_state.atom.dirty = true; - ctx->vgt_state.atom.dirty = true; - ctx->sample_mask.atom.dirty = true; + r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom); + r600_mark_atom_dirty(ctx, &ctx->blend_color.atom); + r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom); + r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom); + r600_mark_atom_dirty(ctx, &ctx->clip_state.atom); + r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom); + r600_mark_atom_dirty(ctx, &ctx->db_state.atom); + r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom); + r600_mark_atom_dirty(ctx, &ctx->pixel_shader.atom); + r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom); + r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom); + r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom); for (i = 0; i < R600_MAX_VIEWPORTS; i++) { - ctx->scissor[i].atom.dirty = true; - ctx->viewport[i].atom.dirty = true; - } - ctx->config_state.atom.dirty = true; - ctx->stencil_ref.atom.dirty = true; - ctx->vertex_fetch_shader.atom.dirty = true; - ctx->export_shader.atom.dirty = true; - ctx->shader_stages.atom.dirty = true; + r600_mark_atom_dirty(ctx, &ctx->scissor[i].atom); + r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom); + } + if (ctx->b.chip_class < EVERGREEN) { + r600_mark_atom_dirty(ctx, &ctx->config_state.atom); + } + r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); + r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom); + r600_mark_atom_dirty(ctx, &ctx->export_shader.atom); + r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom); if (ctx->gs_shader) { - ctx->geometry_shader.atom.dirty = true; - ctx->gs_rings.atom.dirty = true; + r600_mark_atom_dirty(ctx, &ctx->geometry_shader.atom); + r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom); } - ctx->vertex_shader.atom.dirty = true; - ctx->b.streamout.enable_atom.dirty = true; + r600_mark_atom_dirty(ctx, &ctx->vertex_shader.atom); + r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); if (ctx->blend_state.cso) - ctx->blend_state.atom.dirty = true; + r600_mark_atom_dirty(ctx, &ctx->blend_state.atom); if (ctx->dsa_state.cso) - ctx->dsa_state.atom.dirty = true; + r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom); if (ctx->rasterizer_state.cso) - ctx->rasterizer_state.atom.dirty = true; + r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom); if (ctx->b.chip_class <= R700) { - ctx->seamless_cube_map.atom.dirty = true; + r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom); } ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask; diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 72e2dc42f7e..faf538ccbb5 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -84,7 +84,7 @@ static void llvm_load_system_value( #else LLVMValueRef reg = lp_build_const_int32( ctx->soa.bld_base.base.gallivm, chan); - ctx->system_values[index] = build_intrinsic( + ctx->system_values[index] = lp_build_intrinsic( ctx->soa.bld_base.base.gallivm->builder, "llvm.R600.load.input", ctx->soa.bld_base.base.elem_type, ®, 1, @@ -111,9 +111,9 @@ llvm_load_input_vector( Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex, lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), ""); LLVMValueRef HalfVec[2] = { - build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy", + lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy", VecType, Args, ArgCount, LLVMReadNoneAttribute), - build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw", + lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw", VecType, Args, ArgCount, LLVMReadNoneAttribute) }; LLVMValueRef MaskInputs[4] = { @@ -127,7 +127,7 @@ llvm_load_input_vector( Mask, ""); } else { VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4); - return build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const", + return lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const", VecType, Args, ArgCount, LLVMReadNoneAttribute); } } @@ -153,7 +153,7 @@ llvm_load_input_helper( arg_count = 1; } - return build_intrinsic(bb->gallivm->builder, intrinsic, + return lp_build_intrinsic(bb->gallivm->builder, intrinsic, bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute); } #endif @@ -332,7 +332,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer); args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component); lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output", - LLVMVoidTypeInContext(base->gallivm->context), args, 4); + LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0); } } @@ -356,7 +356,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) args[0] = output; args[1] = lp_build_const_int32(base->gallivm, next_pos++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - build_intrinsic( + lp_build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), @@ -373,7 +373,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE); args[0] = output; args[1] = base_vector; - adjusted_elements[chan] = build_intrinsic(base->gallivm->builder, + adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder, "llvm.AMDGPU.dp4", bld_base->base.elem_type, args, 2, LLVMReadNoneAttribute); } @@ -381,7 +381,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) adjusted_elements, 4); args[1] = lp_build_const_int32(base->gallivm, next_pos++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - build_intrinsic( + lp_build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), @@ -394,14 +394,14 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) args[0] = output; args[1] = lp_build_const_int32(base->gallivm, next_pos++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); - build_intrinsic( + lp_build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), args, 3, 0); args[1] = lp_build_const_int32(base->gallivm, next_param++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - build_intrinsic( + lp_build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), @@ -418,7 +418,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) args[0] = lp_build_gather_values(base->gallivm, elements, 4); args[1] = lp_build_const_int32(base->gallivm, next_param++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - build_intrinsic( + lp_build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), @@ -430,7 +430,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) args[0] = output; args[1] = lp_build_const_int32(base->gallivm, next_param++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); - build_intrinsic( + lp_build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), @@ -449,7 +449,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) for (unsigned j = 0; j < ctx->color_buffer_count; j++) { args[1] = lp_build_const_int32(base->gallivm, j); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); - build_intrinsic( + lp_build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), @@ -458,7 +458,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) } else { args[1] = lp_build_const_int32(base->gallivm, color_count++); args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); - build_intrinsic( + lp_build_intrinsic( base->gallivm->builder, "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), @@ -543,7 +543,7 @@ static void llvm_emit_tex( case TGSI_OPCODE_TXF: { args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""); args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS); - emit_data->output[0] = build_intrinsic(gallivm->builder, + emit_data->output[0] = lp_build_intrinsic(gallivm->builder, "llvm.R600.load.texbuf", emit_data->dst_type, args, 2, LLVMReadNoneAttribute); if (ctx->chip_class >= EVERGREEN) @@ -658,7 +658,7 @@ static void llvm_emit_tex( lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1) }; - LLVMValueRef ptr = build_intrinsic(gallivm->builder, + LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder, "llvm.R600.ldptr", emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute); LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0], @@ -679,7 +679,7 @@ static void llvm_emit_tex( } } - emit_data->output[0] = build_intrinsic(gallivm->builder, + emit_data->output[0] = lp_build_intrinsic(gallivm->builder, action->intr_name, emit_data->dst_type, args, c, LLVMReadNoneAttribute); @@ -754,7 +754,131 @@ static struct lp_build_tgsi_action dot_action = { .intr_name = "llvm.AMDGPU.dp4" }; +static void txd_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + const struct tgsi_full_instruction * inst = emit_data->inst; + + LLVMValueRef coords[4]; + unsigned chan, src; + for (src = 0; src < 3; src++) { + for (chan = 0; chan < 4; chan++) + coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan); + + emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm, + coords, 4); + } + emit_data->arg_count = 3; + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); +} + + +static void txp_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + const struct tgsi_full_instruction * inst = emit_data->inst; + LLVMValueRef src_w; + unsigned chan; + LLVMValueRef coords[5]; + + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + + for (chan = 0; chan < 3; chan++ ) { + LLVMValueRef arg = lp_build_emit_fetch(bld_base, + emit_data->inst, 0, chan); + coords[chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DIV, arg, src_w); + } + coords[3] = bld_base->base.one; + + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || + inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { + radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL); + } + emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, + coords, 4); + emit_data->arg_count = 1; +} + +static void tex_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + const struct tgsi_full_instruction * inst = emit_data->inst; + + LLVMValueRef coords[5]; + unsigned chan; + for (chan = 0; chan < 4; chan++) { + coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { + /* These instructions have additional operand that should be packed + * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords. + * That operand should be passed as a float value in the args array + * right after the coord vector. After packing it's not used anymore, + * that's why arg_count is not increased */ + coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0); + } + + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || + inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { + radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL); + } + + emit_data->arg_count = 1; + emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, + coords, 4); + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); +} + +static void txf_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + const struct tgsi_full_instruction * inst = emit_data->inst; + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + const struct tgsi_texture_offset * off = inst->TexOffsets; + LLVMTypeRef offset_type = bld_base->int_bld.elem_type; + + /* fetch tex coords */ + tex_fetch_args(bld_base, emit_data); + + /* fetch tex offsets */ + if (inst->Texture.NumOffsets) { + assert(inst->Texture.NumOffsets == 1); + + emit_data->args[1] = LLVMConstBitCast( + bld->immediates[off->Index][off->SwizzleX], + offset_type); + emit_data->args[2] = LLVMConstBitCast( + bld->immediates[off->Index][off->SwizzleY], + offset_type); + emit_data->args[3] = LLVMConstBitCast( + bld->immediates[off->Index][off->SwizzleZ], + offset_type); + } else { + emit_data->args[1] = bld_base->int_bld.zero; + emit_data->args[2] = bld_base->int_bld.zero; + emit_data->args[3] = bld_base->int_bld.zero; + } + + emit_data->arg_count = 4; +} LLVMModuleRef r600_tgsi_llvm( struct radeon_llvm_context * ctx, @@ -783,7 +907,6 @@ LLVMModuleRef r600_tgsi_llvm( bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const; bld_base->emit_prologue = llvm_emit_prologue; bld_base->emit_epilogue = llvm_emit_epilogue; - ctx->userdata = ctx; ctx->load_input = llvm_load_input; ctx->load_system_value = llvm_load_system_value; @@ -791,18 +914,42 @@ LLVMModuleRef r600_tgsi_llvm( bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action; bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action; bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action; + bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; + bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; + bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex"; bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex"; bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb"; bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb"; bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd"; bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf"; + bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl"; bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl"; bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex"; bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq"; + bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt; lp_build_tgsi_llvm(bld_base, tokens); @@ -881,7 +1028,7 @@ unsigned r600_llvm_compile( const char * gpu_family = r600_get_llvm_processor_name(family); memset(&binary, 0, sizeof(struct radeon_shader_binary)); - r = radeon_llvm_compile(mod, &binary, gpu_family, dump, NULL); + r = radeon_llvm_compile(mod, &binary, gpu_family, dump, dump, NULL); r = r600_create_shader(bc, &binary, use_kill); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index e122b607b86..6ffe5615fbf 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -120,6 +120,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void rctx->b.b.screen = screen; rctx->b.b.priv = priv; rctx->b.b.destroy = r600_destroy_context; + rctx->b.set_atom_dirty = (void *)r600_set_atom_dirty; if (!r600_common_context_init(&rctx->b, &rscreen->b)) goto fail; @@ -176,7 +177,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void goto fail; } - rctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, + rctx->b.rings.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX, r600_context_gfx_flush, rctx, rscreen->b.trace_bo ? rscreen->b.trace_bo->cs_buf : NULL); @@ -268,8 +269,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SAMPLE_SHADING: case PIPE_CAP_CLIP_HALFZ: case PIPE_CAP_POLYGON_OFFSET_CLAMP: + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: return 1; + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + return rscreen->b.info.drm_major == 2 && rscreen->b.info.drm_minor >= 43; + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: return !R600_BIG_ENDIAN && rscreen->b.info.has_userptr; @@ -329,10 +336,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_VERTEX_COLOR_CLAMPED: case PIPE_CAP_USER_VERTEX_BUFFERS: case PIPE_CAP_TEXTURE_GATHER_OFFSETS: - case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: case PIPE_CAP_SAMPLER_VIEW_TARGET: case PIPE_CAP_VERTEXID_NOBASE: - case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + case PIPE_CAP_DEPTH_BOUNDS_TEST: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 4ea270d3839..9b66105641a 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -36,7 +36,7 @@ #include "util/list.h" #include "util/u_transfer.h" -#define R600_NUM_ATOMS 73 +#define R600_NUM_ATOMS 75 #define R600_MAX_VIEWPORTS 16 @@ -85,6 +85,9 @@ #define R600_BIG_ENDIAN 0 #endif +#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8) +#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS) + struct r600_context; struct r600_bytecode; struct r600_shader_key; @@ -426,6 +429,8 @@ struct r600_context { /* State binding slots are here. */ struct r600_atom *atoms[R600_NUM_ATOMS]; + /* Dirty atom bitmask for fast tests */ + unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN]; /* States for CS initialization. */ struct r600_command_buffer start_cs_cmd; /* invariant state mostly */ /** Compute specific registers initializations. The start_cs_cmd atom @@ -490,37 +495,92 @@ struct r600_context { struct r600_isa *isa; }; -static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs, +static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs, struct r600_command_buffer *cb) { - assert(cs->cdw + cb->num_dw <= RADEON_MAX_CMDBUF_DWORDS); + assert(cs->cdw + cb->num_dw <= cs->max_dw); memcpy(cs->buf + cs->cdw, cb->buf, 4 * cb->num_dw); cs->cdw += cb->num_dw; } +static inline void r600_set_atom_dirty(struct r600_context *rctx, + struct r600_atom *atom, + bool dirty) +{ + unsigned long mask; + unsigned int w; + + atom->dirty = dirty; + + assert(atom->id != 0); + w = atom->id / R600_DIRTY_ATOM_WORD_BITS; + mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS); + if (dirty) + rctx->dirty_atoms[w] |= mask; + else + rctx->dirty_atoms[w] &= ~mask; +} + +static inline void r600_mark_atom_dirty(struct r600_context *rctx, + struct r600_atom *atom) +{ + r600_set_atom_dirty(rctx, atom, true); +} + +static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx, + unsigned int id) +{ +#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ) + unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS; + unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS; + unsigned long bits, mask = (1ul << bit) - 1; + + for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) { + bits = rctx->dirty_atoms[w] & ~mask; + if (bits == 0) + continue; + return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits); + } + + return R600_NUM_ATOMS; +#else + for (; id < R600_NUM_ATOMS; id++) { + bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] & + (1ul << (id % R600_DIRTY_ATOM_WORD_BITS))); + assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty)); + if (dirty) + break; + } + + return id; +#endif +} + void r600_trace_emit(struct r600_context *rctx); -static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) +static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) { atom->emit(&rctx->b, atom); - atom->dirty = false; + r600_set_atom_dirty(rctx, atom, false); if (rctx->screen->b.trace_bo) { r600_trace_emit(rctx); } } -static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso) +static inline void r600_set_cso_state(struct r600_context *rctx, + struct r600_cso_state *state, void *cso) { state->cso = cso; - state->atom.dirty = cso != NULL; + r600_set_atom_dirty(rctx, &state->atom, cso != NULL); } -static INLINE void r600_set_cso_state_with_cb(struct r600_cso_state *state, void *cso, +static inline void r600_set_cso_state_with_cb(struct r600_context *rctx, + struct r600_cso_state *state, void *cso, struct r600_command_buffer *cb) { state->cb = cb; state->atom.num_dw = cb ? cb->num_dw : 0; - r600_set_cso_state(state, cso); + r600_set_cso_state(rctx, state, cso); } /* compute_memory_pool.c */ @@ -529,11 +589,6 @@ void compute_memory_pool_delete(struct compute_memory_pool* pool); struct compute_memory_pool* compute_memory_pool_new( struct r600_screen *rscreen); -/* evergreen_compute.c */ -void evergreen_set_cs_sampler_view(struct pipe_context *ctx_, - unsigned start_slot, unsigned count, - struct pipe_sampler_view **views); - /* evergreen_state.c */ struct pipe_sampler_view * evergreen_create_sampler_view_custom(struct pipe_context *ctx, @@ -656,6 +711,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom); void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom); void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a); +void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id); void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id, void (*emit)(struct r600_context *ctx, struct r600_atom *state), unsigned num_dw); @@ -719,19 +775,19 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe, /*Evergreen Compute packet3*/ #define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE) -static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned value) +static inline void r600_store_value(struct r600_command_buffer *cb, unsigned value) { cb->buf[cb->num_dw++] = value; } -static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr) +static inline void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr) { assert(cb->num_dw+num <= cb->max_num_dw); memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0])); cb->num_dw += num; } -static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg < R600_CONTEXT_REG_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -743,7 +799,7 @@ static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, uns * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute * shaders. */ -static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -755,7 +811,7 @@ static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, un * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute * shaders. */ -static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg >= R600_CTL_CONST_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -763,7 +819,7 @@ static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsi cb->buf[cb->num_dw++] = (reg - R600_CTL_CONST_OFFSET) >> 2; } -static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg >= R600_LOOP_CONST_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -775,7 +831,7 @@ static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, uns * Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute * shaders. */ -static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) +static inline void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num) { assert(reg >= EG_LOOP_CONST_OFFSET); assert(cb->num_dw+2+num <= cb->max_num_dw); @@ -783,31 +839,31 @@ static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsig cb->buf[cb->num_dw++] = (reg - EG_LOOP_CONST_OFFSET) >> 2; } -static INLINE void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) { r600_store_config_reg_seq(cb, reg, 1); r600_store_value(cb, value); } -static INLINE void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value) { r600_store_context_reg_seq(cb, reg, 1); r600_store_value(cb, value); } -static INLINE void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) { r600_store_ctl_const_seq(cb, reg, 1); r600_store_value(cb, value); } -static INLINE void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) { r600_store_loop_const_seq(cb, reg, 1); r600_store_value(cb, value); } -static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) +static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value) { eg_store_loop_const_seq(cb, reg, 1); r600_store_value(cb, value); @@ -816,28 +872,28 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw); void r600_release_command_buffer(struct r600_command_buffer *cb); -static INLINE void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) +static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) { r600_write_context_reg_seq(cs, reg, num); /* Set the compute bit on the packet header */ cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE; } -static INLINE void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) +static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num) { assert(reg >= R600_CTL_CONST_OFFSET); - assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS); + assert(cs->cdw+2+num <= cs->max_dw); cs->buf[cs->cdw++] = PKT3(PKT3_SET_CTL_CONST, num, 0); cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2; } -static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) +static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) { r600_write_compute_context_reg_seq(cs, reg, 1); radeon_emit(cs, value); } -static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag) +static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag) { if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) { r600_write_compute_context_reg(cs, reg, value); @@ -846,7 +902,7 @@ static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsi } } -static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) +static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value) { r600_write_ctl_const_seq(cs, reg, 1); radeon_emit(cs, value); @@ -855,21 +911,21 @@ static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned re /* * common helpers */ -static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) +static inline uint32_t S_FIXED(float value, uint32_t frac_bits) { return value * (1 << frac_bits); } #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) /* 12.4 fixed-point */ -static INLINE unsigned r600_pack_float_12p4(float x) +static inline unsigned r600_pack_float_12p4(float x) { return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16; } /* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */ -static INLINE bool r600_can_read_depth(struct r600_texture *rtex) +static inline bool r600_can_read_depth(struct r600_texture *rtex) { return rtex->resource.b.b.nr_samples <= 1 && (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM || @@ -880,7 +936,7 @@ static INLINE bool r600_can_read_depth(struct r600_texture *rtex) #define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1 #define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2 -static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode) +static inline unsigned r600_conv_prim_to_gs_out(unsigned mode) { static const int prim_conv[] = { V_028A6C_OUTPRIM_TYPE_POINTLIST, diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index af7622e9b34..8d1f95abddc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -310,6 +310,7 @@ struct r600_shader_ctx { int gs_next_vertex; struct r600_shader *gs_for_vs; int gs_export_gpr_treg; + unsigned enabled_stream_buffers_mask; }; struct r600_shader_tgsi_instruction { @@ -1402,6 +1403,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output * with MEM_STREAM instructions */ output.array_size = 0xFFF; output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component; + + ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer); + if (ctx->bc->chip_class >= EVERGREEN) { switch (so->output[i].output_buffer) { case 0: @@ -1718,6 +1722,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx, gs->gs_copy_shader = cshader; ctx.bc->nstack = 1; + + cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; cshader->shader.ring_item_size = ocnt * 16; return r600_bytecode_build(ctx.bc); @@ -1931,15 +1937,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; + ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; + ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { - ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1; - ctx.temp_reg = ctx.bc->ar_reg + 2; - ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3; - ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4; + ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3; + ctx.temp_reg = ctx.bc->ar_reg + 4; } else { - ctx.temp_reg = ctx.bc->ar_reg + 1; - ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2; - ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3; + ctx.temp_reg = ctx.bc->ar_reg + 3; } shader->max_arrays = 0; @@ -2086,7 +2091,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, radeon_llvm_ctx.chip_class = ctx.bc->chip_class; radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); radeon_llvm_ctx.stream_outputs = &so; - radeon_llvm_ctx.clip_vertex = ctx.cv_output; radeon_llvm_ctx.alpha_to_one = key.alpha_to_one; radeon_llvm_ctx.has_compressed_msaa_texturing = ctx.bc->has_compressed_msaa_texturing; @@ -2262,6 +2266,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, so.num_outputs && !use_llvm) emit_streamout(&ctx, &so); + pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; convert_edgeflag_to_int(&ctx); if (ring_outputs) { @@ -2485,6 +2490,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, output[j].array_base = 0; output[j].op = CF_OP_EXPORT; j++; + shader->nr_ps_color_exports++; } noutput = j; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index dd359d7e959..5d05c8153d7 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -125,6 +125,7 @@ struct r600_pipe_shader { struct r600_shader_key key; unsigned db_shader_control; unsigned ps_depth_export; + unsigned enabled_stream_buffers_mask; }; /* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 960dfcedfef..5cc2283792d 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -473,7 +473,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx, /* offset */ rs->offset_units = state->offset_units; - rs->offset_scale = state->offset_scale * 12.0f; + rs->offset_scale = state->offset_scale * 16.0f; rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri; if (state->point_size_per_vertex) { @@ -802,7 +802,7 @@ static void r600_set_scissor_states(struct pipe_context *ctx, return; for (i = start_slot ; i < start_slot + num_scissors; i++) { - rctx->scissor[i].atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom); } } @@ -1193,7 +1193,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (rctx->alphatest_state.bypass != alphatest_bypass) { rctx->alphatest_state.bypass = alphatest_bypass; - rctx->alphatest_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); } } @@ -1209,28 +1209,28 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, if (state->zsbuf->format != rctx->poly_offset_state.zs_format) { rctx->poly_offset_state.zs_format = state->zsbuf->format; - rctx->poly_offset_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom); } if (rctx->db_state.rsurf != surf) { rctx->db_state.rsurf = surf; - rctx->db_state.atom.dirty = true; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_state.atom); + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } else if (rctx->db_state.rsurf) { rctx->db_state.rsurf = NULL; - rctx->db_state.atom.dirty = true; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_state.atom); + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) { rctx->cb_misc_state.nr_cbufs = state->nr_cbufs; - rctx->cb_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); } if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) { rctx->alphatest_state.bypass = false; - rctx->alphatest_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); } /* Calculate the CS size. */ @@ -1250,7 +1250,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx, rctx->framebuffer.atom.num_dw += 2; } - rctx->framebuffer.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom); r600_set_sample_locations_constant_buffer(rctx); } @@ -1541,9 +1541,9 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples) rctx->ps_iter_samples = min_samples; if (rctx->framebuffer.nr_samples > 1) { - rctx->rasterizer_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->rasterizer_state.atom); if (rctx->b.chip_class == R600) - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } @@ -2089,7 +2089,7 @@ bool r600_adjust_gprs(struct r600_context *rctx) if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) { rctx->config_state.sq_gpr_resource_mgmt_1 = tmp; rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2; - rctx->config_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->config_state.atom); rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; } return true; @@ -2796,11 +2796,11 @@ void r600_update_db_shader_control(struct r600_context * rctx) if (db_shader_control != rctx->db_misc_state.db_shader_control) { rctx->db_misc_state.db_shader_control = db_shader_control; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } -static INLINE unsigned r600_array_mode(unsigned mode) +static inline unsigned r600_array_mode(unsigned mode) { switch (mode) { case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_0280A0_ARRAY_LINEAR_ALIGNED; @@ -3074,8 +3074,8 @@ void r600_init_state_functions(struct r600_context *rctx) r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3); r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4); r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5); - rctx->atoms[id++] = &rctx->b.streamout.begin_atom; - rctx->atoms[id++] = &rctx->b.streamout.enable_atom; + r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++); + r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++); r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23); r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0); r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0); diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 13dc9ee8c10..aa4a8d0240f 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -47,18 +47,26 @@ void r600_release_command_buffer(struct r600_command_buffer *cb) FREE(cb->buf); } +void r600_add_atom(struct r600_context *rctx, + struct r600_atom *atom, + unsigned id) +{ + assert(id < R600_NUM_ATOMS); + assert(rctx->atoms[id] == NULL); + rctx->atoms[id] = atom; + atom->id = id; + atom->dirty = false; +} + void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id, void (*emit)(struct r600_context *ctx, struct r600_atom *state), unsigned num_dw) { - assert(id < R600_NUM_ATOMS); - assert(rctx->atoms[id] == NULL); - rctx->atoms[id] = atom; atom->emit = (void*)emit; atom->num_dw = num_dw; - atom->dirty = false; + r600_add_atom(rctx, atom, id); } void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom) @@ -127,11 +135,11 @@ static void r600_bind_blend_state_internal(struct r600_context *rctx, rctx->dual_src_blend = blend->dual_src_blend; if (!blend_disable) { - r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer); + r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer); color_control = blend->cb_color_control; } else { /* Blending is disabled. */ - r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer_no_blend); + r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer_no_blend); color_control = blend->cb_color_control_no_blend; } @@ -150,7 +158,7 @@ static void r600_bind_blend_state_internal(struct r600_context *rctx, update_cb = true; } if (update_cb) { - rctx->cb_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); } } @@ -160,7 +168,7 @@ static void r600_bind_blend_state(struct pipe_context *ctx, void *state) struct r600_blend_state *blend = (struct r600_blend_state *)state; if (blend == NULL) { - r600_set_cso_state_with_cb(&rctx->blend_state, NULL, NULL); + r600_set_cso_state_with_cb(rctx, &rctx->blend_state, NULL, NULL); return; } @@ -173,7 +181,7 @@ static void r600_set_blend_color(struct pipe_context *ctx, struct r600_context *rctx = (struct r600_context *)ctx; rctx->blend_color.state = *state; - rctx->blend_color.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->blend_color.atom); } void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) @@ -210,7 +218,7 @@ static void r600_set_clip_state(struct pipe_context *ctx, struct pipe_constant_buffer cb; rctx->clip_state.state = *state; - rctx->clip_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->clip_state.atom); cb.buffer = NULL; cb.user_buffer = state->ucp; @@ -226,7 +234,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, struct r600_context *rctx = (struct r600_context *)ctx; rctx->stencil_ref.state = *state; - rctx->stencil_ref.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom); } void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom) @@ -274,11 +282,11 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) struct r600_stencil_ref ref; if (state == NULL) { - r600_set_cso_state_with_cb(&rctx->dsa_state, NULL, NULL); + r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, NULL, NULL); return; } - r600_set_cso_state_with_cb(&rctx->dsa_state, dsa, &dsa->buffer); + r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, dsa, &dsa->buffer); ref.ref_value[0] = rctx->stencil_ref.pipe_state.ref_value[0]; ref.ref_value[1] = rctx->stencil_ref.pipe_state.ref_value[1]; @@ -293,7 +301,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) * we are having lockup on evergreen so do not enable * hyperz when not writing zbuffer */ - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } @@ -304,7 +312,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state) rctx->alphatest_state.sx_alpha_ref != dsa->alpha_ref) { rctx->alphatest_state.sx_alpha_test_control = dsa->sx_alpha_test_control; rctx->alphatest_state.sx_alpha_ref = dsa->alpha_ref; - rctx->alphatest_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom); } } @@ -318,14 +326,14 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state) rctx->rasterizer = rs; - r600_set_cso_state_with_cb(&rctx->rasterizer_state, rs, &rs->buffer); + r600_set_cso_state_with_cb(rctx, &rctx->rasterizer_state, rs, &rs->buffer); if (rs->offset_enable && (rs->offset_units != rctx->poly_offset_state.offset_units || rs->offset_scale != rctx->poly_offset_state.offset_scale)) { rctx->poly_offset_state.offset_units = rs->offset_units; rctx->poly_offset_state.offset_scale = rs->offset_scale; - rctx->poly_offset_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom); } /* Update clip_misc_state. */ @@ -333,14 +341,14 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state) rctx->clip_misc_state.clip_plane_enable != rs->clip_plane_enable) { rctx->clip_misc_state.pa_cl_clip_cntl = rs->pa_cl_clip_cntl; rctx->clip_misc_state.clip_plane_enable = rs->clip_plane_enable; - rctx->clip_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom); } /* Workaround for a missing scissor enable on r600. */ if (rctx->b.chip_class == R600 && rs->scissor_enable != rctx->scissor[0].enable) { rctx->scissor[0].enable = rs->scissor_enable; - rctx->scissor[0].atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->scissor[0].atom); } /* Re-emit PA_SC_LINE_STIPPLE. */ @@ -378,7 +386,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx, state->atom.num_dw = util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 + util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5; - state->atom.dirty = true; + r600_mark_atom_dirty(rctx, &state->atom); } } @@ -399,9 +407,9 @@ static void r600_bind_sampler_states(struct pipe_context *pipe, assert(start == 0); /* XXX fix below */ - if (shader != PIPE_SHADER_VERTEX && - shader != PIPE_SHADER_FRAGMENT) { - return; + if (!states) { + disable_mask = ~0u; + count = 0; } for (i = 0; i < count; i++) { @@ -443,7 +451,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe, /* change in TA_CNTL_AUX need a pipeline flush */ rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE; rctx->seamless_cube_map.enabled = seamless_cube_map; - rctx->seamless_cube_map.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->seamless_cube_map.atom); } } @@ -483,7 +491,7 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state) { struct r600_context *rctx = (struct r600_context *)ctx; - r600_set_cso_state(&rctx->vertex_fetch_shader, state); + r600_set_cso_state(rctx, &rctx->vertex_fetch_shader, state); } static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state) @@ -513,7 +521,7 @@ void r600_vertex_buffers_dirty(struct r600_context *rctx) rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE; rctx->vertex_buffer_state.atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 12 : 11) * util_bitcount(rctx->vertex_buffer_state.dirty_mask); - rctx->vertex_buffer_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->vertex_buffer_state.atom); } } @@ -570,7 +578,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx, rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE; state->atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 14 : 13) * util_bitcount(state->dirty_mask); - state->atom.dirty = true; + r600_mark_atom_dirty(rctx, &state->atom); } } @@ -593,9 +601,9 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader, assert(start == 0); /* XXX fix below */ - if (shader == PIPE_SHADER_COMPUTE) { - evergreen_set_cs_sampler_view(pipe, start, count, views); - return; + if (!views) { + disable_mask = ~0u; + count = 0; } remaining_mask = dst->views.enabled_mask & disable_mask; @@ -673,7 +681,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx, for (i = start_slot; i < start_slot + num_viewports; i++) { rctx->viewport[i].state = state[i - start_slot]; - rctx->viewport[i].atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->viewport[i].atom); } } @@ -694,7 +702,7 @@ void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom) } /* Compute the key for the hw shader variant */ -static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx, +static inline struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx, struct r600_pipe_shader_selector * sel) { struct r600_context *rctx = (struct r600_context *)ctx; @@ -913,7 +921,7 @@ void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE; state->atom.num_dw = rctx->b.chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20 : util_bitcount(state->dirty_mask)*19; - state->atom.dirty = true; + r600_mark_atom_dirty(rctx, &state->atom); } } @@ -982,7 +990,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask return; rctx->sample_mask.sample_mask = sample_mask; - rctx->sample_mask.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom); } /* @@ -1107,27 +1115,28 @@ static void update_shader_atom(struct pipe_context *ctx, struct r600_shader_state *state, struct r600_pipe_shader *shader) { + struct r600_context *rctx = (struct r600_context *)ctx; + state->shader = shader; if (shader) { state->atom.num_dw = shader->command_buffer.num_dw; - state->atom.dirty = true; r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo); } else { state->atom.num_dw = 0; - state->atom.dirty = false; } + r600_mark_atom_dirty(rctx, &state->atom); } static void update_gs_block_state(struct r600_context *rctx, unsigned enable) { if (rctx->shader_stages.geom_enable != enable) { rctx->shader_stages.geom_enable = enable; - rctx->shader_stages.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); } if (rctx->gs_rings.enable != enable) { rctx->gs_rings.enable = enable; - rctx->gs_rings.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->gs_rings.atom); if (enable && !rctx->gs_rings.esgs_ring.buffer) { unsigned size = 0x1C000; @@ -1192,7 +1201,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) if (!rctx->shader_stages.geom_enable) { rctx->shader_stages.geom_enable = true; - rctx->shader_stages.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); } /* gs_shader provides GS and VS (copy shader) */ @@ -1206,8 +1215,9 @@ static bool r600_update_derived_state(struct r600_context *rctx) rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->gs_shader->current->gs_copy_shader->pa_cl_vs_out_cntl; rctx->clip_misc_state.clip_dist_write = rctx->gs_shader->current->gs_copy_shader->shader.clip_dist_write; rctx->clip_misc_state.clip_disable = rctx->gs_shader->current->shader.vs_position_window_space; - rctx->clip_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom); } + rctx->b.streamout.enabled_stream_buffers_mask = rctx->gs_shader->current->gs_copy_shader->enabled_stream_buffers_mask; } r600_shader_select(ctx, rctx->vs_shader, &vs_dirty); @@ -1223,7 +1233,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) update_shader_atom(ctx, &rctx->geometry_shader, NULL); update_shader_atom(ctx, &rctx->export_shader, NULL); rctx->shader_stages.geom_enable = false; - rctx->shader_stages.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); } r600_shader_select(ctx, rctx->vs_shader, &vs_dirty); @@ -1240,8 +1250,9 @@ static bool r600_update_derived_state(struct r600_context *rctx) rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl; rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write; rctx->clip_misc_state.clip_disable = rctx->vs_shader->current->shader.vs_position_window_space; - rctx->clip_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom); } + rctx->b.streamout.enabled_stream_buffers_mask = rctx->vs_shader->current->enabled_stream_buffers_mask; } } @@ -1252,7 +1263,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) { rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs; - rctx->cb_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); } if (rctx->b.chip_class <= R700) { @@ -1260,7 +1271,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) if (rctx->cb_misc_state.multiwrite != multiwrite) { rctx->cb_misc_state.multiwrite = multiwrite; - rctx->cb_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); } } @@ -1274,7 +1285,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) r600_update_ps_state(ctx, rctx->ps_shader->current); } - rctx->shader_stages.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom); update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current); } @@ -1409,7 +1420,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info data += info.indirect_offset / sizeof(unsigned); start = data[2] * ib.index_size; count = data[0]; - rctx->b.ws->buffer_unmap(indirect_resource->cs_buf); } else { start = 0; @@ -1454,24 +1464,23 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info rctx->vgt_state.vgt_multi_prim_ib_reset_en = info.primitive_restart; rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info.restart_index; rctx->vgt_state.vgt_indx_offset = info.index_bias; - rctx->vgt_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->vgt_state.atom); } /* Workaround for hardware deadlock on certain R600 ASICs: write into a CB register. */ if (rctx->b.chip_class == R600) { rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; - rctx->cb_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom); } /* Emit states. */ r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE); r600_flush_emit(rctx); - for (i = 0; i < R600_NUM_ATOMS; i++) { - if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) { - continue; - } + i = r600_next_dirty_atom(rctx, 0); + while (i < R600_NUM_ATOMS) { r600_emit_atom(rctx, rctx->atoms[i]); + i = r600_next_dirty_atom(rctx, i + 1); } if (rctx->b.chip_class == CAYMAN) { @@ -2490,7 +2499,7 @@ static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable if (rctx->db_misc_state.occlusion_query_enabled != enable) { rctx->db_misc_state.occlusion_query_enabled = enable; - rctx->db_misc_state.atom.dirty = true; + r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom); } } diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp index 2e38a62c05a..62680788c5e 100644 --- a/src/gallium/drivers/r600/sb/sb_sched.cpp +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp @@ -489,7 +489,7 @@ bool alu_group_tracker::try_reserve(alu_node* n) { n->bc.bank_swizzle = 0; - if (!trans & fbs) + if (!trans && fbs) n->bc.bank_swizzle = VEC_210; if (gpr.try_reserve(n)) { |