diff options
author | Zack Rusin <[email protected]> | 2010-03-15 15:24:38 -0400 |
---|---|---|
committer | Zack Rusin <[email protected]> | 2010-03-15 15:24:38 -0400 |
commit | 275c4bd3643d773210780cb8d578ca84f2604684 (patch) | |
tree | 8266edc39d4253ac0f2a0ecd41f560f3d815bb5c /src/gallium/drivers/r300 | |
parent | c5c5cd7132e18f4aad8e73d8ee879f8823c4c1e7 (diff) | |
parent | d0b35352ed27b1e66785c45ee95a352ed06b47ce (diff) |
Merge remote branch 'origin/master' into gallium_draw_llvm
Diffstat (limited to 'src/gallium/drivers/r300')
29 files changed, 2184 insertions, 1142 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index afddcb161fa..a6529b20604 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -14,12 +14,14 @@ C_SOURCES = \ r300_query.c \ r300_render.c \ r300_screen.c \ + r300_screen_buffer.c \ r300_state.c \ r300_state_derived.c \ r300_state_invariant.c \ r300_vs.c \ r300_texture.c \ - r300_tgsi_to_rc.c + r300_tgsi_to_rc.c \ + r300_transfer.c LIBRARY_INCLUDES = \ -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ @@ -32,7 +34,5 @@ EXTRA_OBJECTS = \ include ../../Makefile.template -.PHONY : $(COMPILER_ARCHIVE) - $(COMPILER_ARCHIVE): $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 183aa17f9b3..27b2e309932 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -30,6 +30,7 @@ r300 = env.ConvenienceLibrary( 'r300_vs.c', 'r300_texture.c', 'r300_tgsi_to_rc.c', + 'r300_transfer.c', ] + r300compiler) + r300compiler Export('r300') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index eb9b0beeb5a..b7ad6b20206 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -33,7 +33,10 @@ static void r300_blitter_save_states(struct r300_context* r300) util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref)); util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); - util_blitter_save_vertex_shader(r300->blitter, r300->vs); + util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); + util_blitter_save_viewport(r300->blitter, &r300->viewport); + util_blitter_save_clip(r300->blitter, &r300->clip); + util_blitter_save_vertex_elements(r300->blitter, r300->velems); } /* Clear currently bound buffers. */ @@ -98,6 +101,8 @@ static void r300_hw_copy(struct pipe_context* pipe, unsigned width, unsigned height) { struct r300_context* r300 = r300_context(pipe); + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; /* Yeah we have to save all those states to ensure this blitter operation * is really transparent. The states will be restored by the blitter once @@ -106,11 +111,11 @@ static void r300_hw_copy(struct pipe_context* pipe, util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); util_blitter_save_fragment_sampler_states( - r300->blitter, r300->sampler_count, (void**)r300->sampler_states); + r300->blitter, state->sampler_count, (void**)state->sampler_states); util_blitter_save_fragment_sampler_textures( - r300->blitter, r300->texture_count, - (struct pipe_texture**)r300->textures); + r300->blitter, state->texture_count, + (struct pipe_texture**)state->textures); /* Do a copy */ util_blitter_copy(r300->blitter, @@ -139,10 +144,10 @@ void r300_surface_copy(struct pipe_context* pipe, new_format = PIPE_FORMAT_I8_UNORM; break; case 2: - new_format = PIPE_FORMAT_A4R4G4B4_UNORM; + new_format = PIPE_FORMAT_B4G4R4A4_UNORM; break; case 4: - new_format = PIPE_FORMAT_A8R8G8B8_UNORM; + new_format = PIPE_FORMAT_B8G8R8A8_UNORM; break; default: debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n" diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index f631b4ed27f..d994a46ccfe 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -24,6 +24,7 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" +#include "util/u_upload_mgr.h" #include "r300_blit.h" #include "r300_context.h" @@ -34,6 +35,7 @@ #include "r300_screen.h" #include "r300_state_invariant.h" #include "r300_texture.h" +#include "r300_transfer.h" #include "radeon_winsys.h" @@ -54,12 +56,16 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } + u_upload_destroy(r300->upload_vb); + u_upload_destroy(r300->upload_ib); + FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); FREE(r300->fb_state.state); FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); - FREE(r300->vertex_format_state.state); + FREE(r300->textures_state.state); + FREE(r300->vap_output_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300); @@ -70,11 +76,7 @@ r300_is_texture_referenced(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level) { - struct pipe_buffer* buf = 0; - - r300_get_texture_buffer(pipe->screen, texture, &buf, NULL); - - return pipe->is_buffer_referenced(pipe, buf); + return 0; } static unsigned int @@ -84,7 +86,14 @@ r300_is_buffer_referenced(struct pipe_context *pipe, /* This only checks to see whether actual hardware buffers are * referenced. Since we use managed BOs and transfers, it's actually not * possible for pipe_buffers to ever reference the actual hardware, so - * buffers are never referenced. */ + * buffers are never referenced. + */ + + /* XXX: that doesn't make sense given that + * r300_is_texture_referenced is implemented on top of this + * function and hardware can certainly refer to textures + * directly... + */ return 0; } @@ -96,39 +105,54 @@ static void r300_flush_cb(void *data) } #define R300_INIT_ATOM(atomname, atomsize) \ - r300->atomname##_state.name = #atomname; \ - r300->atomname##_state.state = NULL; \ - r300->atomname##_state.size = atomsize; \ - r300->atomname##_state.emit = r300_emit_##atomname##_state; \ - r300->atomname##_state.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->atomname##_state); + r300->atomname.name = #atomname; \ + r300->atomname.state = NULL; \ + r300->atomname.size = atomsize; \ + r300->atomname.emit = r300_emit_##atomname; \ + r300->atomname.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname); static void r300_setup_atoms(struct r300_context* r300) { + boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; + boolean has_tcl = r300_screen(r300->context.screen)->caps->has_tcl; + /* Create the actual atom list. * * Each atom is examined and emitted in the order it appears here, which * can affect performance and conformance if not handled with care. * - * Some atoms never change size, others change every emit. This is just - * an upper bound on each atom, to keep the emission machinery from - * underallocating space. */ + * Some atoms never change size, others change every emit - those have + * the size of 0 here. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(invariant, 71); - R300_INIT_ATOM(ztop, 2); - R300_INIT_ATOM(blend, 8); - R300_INIT_ATOM(blend_color, 3); - R300_INIT_ATOM(clip, 29); - R300_INIT_ATOM(dsa, 8); - R300_INIT_ATOM(fb, 56); - R300_INIT_ATOM(rs, 25); - R300_INIT_ATOM(scissor, 3); - R300_INIT_ATOM(viewport, 9); - R300_INIT_ATOM(rs_block, 21); - R300_INIT_ATOM(vertex_format, 26); + R300_INIT_ATOM(invariant_state, 71); + R300_INIT_ATOM(ztop_state, 2); + R300_INIT_ATOM(blend_state, 8); + R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); + R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); + R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); + R300_INIT_ATOM(fb_state, 0); + R300_INIT_ATOM(rs_state, 0); + R300_INIT_ATOM(scissor_state, 3); + R300_INIT_ATOM(viewport_state, 9); + R300_INIT_ATOM(rs_block_state, 0); + R300_INIT_ATOM(vertex_stream_state, 0); + R300_INIT_ATOM(vap_output_state, 6); + R300_INIT_ATOM(pvs_flush, 2); + R300_INIT_ATOM(vs_state, 0); + R300_INIT_ATOM(texture_cache_inval, 2); + R300_INIT_ATOM(textures_state, 0); /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); + r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); + r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -136,14 +160,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); - struct radeon_winsys* radeon_winsys = r300screen->radeon_winsys; + struct r300_winsys_screen *rws = r300screen->rws; if (!r300) return NULL; - r300->winsys = radeon_winsys; + r300->rws = rws; - r300->context.winsys = (struct pipe_winsys*)radeon_winsys; + r300->context.winsys = (struct pipe_winsys*)rws; r300->context.screen = screen; r300->context.priv = priv; @@ -178,14 +202,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_setup_atoms(r300); - r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); - r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); - r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); - r300->vertex_format_state.state = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); - r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); - /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); @@ -195,17 +211,35 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_query_functions(r300); - /* r300_init_surface_functions(r300); */ + r300_init_transfer_functions(r300); r300_init_state_functions(r300); r300->invariant_state.dirty = TRUE; - r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); - r300->dirty_state = R300_NEW_KITCHEN_SINK; + rws->set_flush_cb(r300->rws, r300_flush_cb, r300); r300->dirty_hw++; r300->blitter = util_blitter_create(&r300->context); + r300->upload_ib = u_upload_create(screen, + 32 * 1024, 16, + PIPE_BUFFER_USAGE_INDEX); + + if (r300->upload_ib == NULL) + goto no_upload_ib; + + r300->upload_vb = u_upload_create(screen, + 128 * 1024, 16, + PIPE_BUFFER_USAGE_VERTEX); + if (r300->upload_vb == NULL) + goto no_upload_vb; + return &r300->context; + + no_upload_ib: + u_upload_destroy(r300->upload_ib); + no_upload_vb: + FREE(r300); + return NULL; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 443af4ec2e3..db2f74e0745 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -32,6 +32,7 @@ #include "r300_screen.h" +struct u_upload_mgr; struct r300_context; struct r300_fragment_shader; @@ -45,7 +46,7 @@ struct r300_atom { /* Opaque state. */ void* state; /* Emit the state to the context. */ - void (*emit)(struct r300_context*, void*); + void (*emit)(struct r300_context*, unsigned, void*); /* Upper bound on number of dwords to emit. */ unsigned size; /* Whether this atom should be emitted. */ @@ -86,7 +87,6 @@ struct r300_rs_state { uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ - uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ @@ -119,22 +119,58 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_texture_state { +struct r300_texture_format_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ uint32_t format2; /* R300_TX_FORMAT2: 0x4500 */ }; +#define R300_MAX_TEXTURE_LEVELS 13 + struct r300_texture_fb_state { /* Colorbuffer. */ - uint32_t colorpitch[PIPE_MAX_TEXTURE_LEVELS]; /* R300_RB3D_COLORPITCH[0-3]*/ + uint32_t colorpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_COLORPITCH[0-3]*/ uint32_t us_out_fmt; /* R300_US_OUT_FMT[0-3] */ /* Zbuffer. */ - uint32_t depthpitch[PIPE_MAX_TEXTURE_LEVELS]; /* R300_RB3D_DEPTHPITCH */ + uint32_t depthpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_DEPTHPITCH */ uint32_t zb_format; /* R300_ZB_FORMAT */ }; +struct r300_textures_state { + /* Textures. */ + struct r300_texture *textures[8]; + int texture_count; + /* Sampler states. */ + struct r300_sampler_state *sampler_states[8]; + int sampler_count; + + /* These is the merge of the texture and sampler states. */ + unsigned count; + uint32_t tx_enable; /* R300_TX_ENABLE: 0x4101 */ + struct r300_texture_sampler_state { + uint32_t format[3]; /* R300_TX_FORMAT[0-2] */ + uint32_t filter[2]; /* R300_TX_FILTER[0-1] */ + uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ + uint32_t tile_config; /* R300_TX_OFFSET (subset thereof) */ + } regs[8]; +}; + +struct r300_vertex_stream_state { + /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ + uint32_t vap_prog_stream_cntl[8]; + /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ + uint32_t vap_prog_stream_cntl_ext[8]; + + unsigned count; +}; + +struct r300_vap_output_state { + uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ + uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ + uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ +}; + struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -151,11 +187,6 @@ struct r300_ztop_state { #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_SAMPLER 0x00000200 -#define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_TEXTURE 0x00040000 -#define R300_ANY_NEW_TEXTURES 0x03fc0000 -#define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -208,16 +239,16 @@ struct r300_texture { struct pipe_texture tex; /* Offsets into the buffer. */ - unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned offset[R300_MAX_TEXTURE_LEVELS]; /* A pitch for each mip-level */ - unsigned pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch[R300_MAX_TEXTURE_LEVELS]; /* Size of one zslice or face based on the texture target */ - unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; /* Whether the mipmap level is macrotiled. */ - enum r300_buffer_tiling mip_macrotile[PIPE_MAX_TEXTURE_LEVELS]; + enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS]; /** * If non-zero, override the natural texture layout with @@ -238,10 +269,10 @@ struct r300_texture { boolean is_npot; /* Pipe buffer backing this texture. */ - struct pipe_buffer* buffer; + struct r300_winsys_buffer *buffer; /* Registers carrying texture format data. */ - struct r300_texture_state state; + struct r300_texture_format_state state; struct r300_texture_fb_state fb_state; /* Buffer tiling */ @@ -258,6 +289,13 @@ struct r300_vertex_info { uint32_t vap_prog_stream_cntl_ext[8]; }; +struct r300_vertex_element_state { + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; + + struct r300_vertex_stream_state vertex_stream; +}; + extern struct pipe_viewport_state r300_viewport_identity; struct r300_context { @@ -265,7 +303,7 @@ struct r300_context { struct pipe_context context; /* The interface to the windowing system, etc. */ - struct radeon_winsys* winsys; + struct r300_winsys_screen *rws; /* Draw module. Used mostly for SW TCL. */ struct draw_context* draw; /* Accelerated blit support. */ @@ -282,9 +320,6 @@ struct r300_context { struct r300_query *query_current; struct r300_query query_list; - /* Vertex formatting information. */ - struct r300_atom vertex_format_state; - /* Various CSO state objects. */ /* Beginning of atom list. */ struct r300_atom atom_list; @@ -306,20 +341,24 @@ struct r300_context { struct r300_atom rs_state; /* RS block state. */ struct r300_atom rs_block_state; - /* Sampler states. */ - struct r300_sampler_state* sampler_states[8]; - int sampler_count; /* Scissor state. */ struct r300_atom scissor_state; - /* Texture states. */ - struct r300_texture* textures[8]; - int texture_count; + /* Textures state. */ + struct r300_atom textures_state; + /* Vertex stream formatting state. */ + struct r300_atom vertex_stream_state; + /* VAP (vertex shader) output mapping state. */ + struct r300_atom vap_output_state; /* Vertex shader. */ - struct r300_vertex_shader* vs; + struct r300_atom vs_state; /* Viewport state. */ struct r300_atom viewport_state; /* ZTOP state. */ struct r300_atom ztop_state; + /* PVS flush. */ + struct r300_atom pvs_flush; + /* Texture cache invalidate. */ + struct r300_atom texture_cache_inval; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -327,22 +366,33 @@ struct r300_context { /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; + int vertex_buffer_max_index; /* Vertex elements for Gallium. */ - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - int vertex_element_count; + struct r300_vertex_element_state *velems; + bool any_user_vbs; + + /* Vertex info for Draw. */ + struct vertex_info vertex_info; struct pipe_stencil_ref stencil_ref; + struct pipe_clip_state clip; + + struct pipe_viewport_state viewport; + /* Bitmask of dirty state objects. */ uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; - /* Whether the TCL engine should be in bypass mode. */ - boolean tcl_bypass; /* Whether polygon offset is enabled. */ boolean polygon_offset_enabled; /* Z buffer bit depth. */ uint32_t zbuffer_bpp; + /* Whether scissor is enabled. */ + boolean scissor_enabled; + /* upload managers */ + struct u_upload_mgr *upload_vb; + struct u_upload_mgr *upload_ib; }; /* Convenience cast wrapper. */ @@ -359,6 +409,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); +void r300_init_tex_functions( struct pipe_context *pipe ); static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 151f72b0fe4..ad07efbffdb 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -51,7 +51,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ - struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ + struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \ int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ @@ -105,22 +105,34 @@ cs_count--; \ } while (0) -#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ +#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ cs_winsys->write_cs_dword(cs_winsys, offset); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ cs_count -= 3; \ } while (0) -#define OUT_CS_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ + +#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for texture %p, offset %d, " \ + "domains (%d, %d, %d)\n", \ + tex, offset, rd, wd, flags); \ + assert(tex); \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ + r300_texture_write_reloc(cs_winsys, tex, rd, wd, flags); \ + cs_count -= 3; \ +} while (0) + + +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \ "domains (%d, %d, %d)\n", \ bo, rd, wd, flags); \ assert(bo); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ cs_count -= 2; \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index b881730848a..d6177577c8d 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -37,6 +37,7 @@ static struct debug_option debug_options[] = { { "draw", DBG_DRAW, "Draw and emit" }, { "tex", DBG_TEX, "Textures" }, { "fall", DBG_FALL, "Fallbacks" }, + { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking purposes only!)" }, { "all", ~0, "Convenience option that enables all debug flags" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index f7dcd8dc52f..d8c64dd9001 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -32,16 +32,19 @@ #include "r300_emit.h" #include "r300_fs.h" #include "r300_screen.h" +#include "r300_screen_buffer.h" +#include "r300_state_inlines.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, void* state) +void r300_emit_blend_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_blend_state* blend = (struct r300_blend_state*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); - BEGIN_CS(8); + BEGIN_CS(size); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (fb->nr_cbufs) { @@ -58,26 +61,28 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, void* state) +void r300_emit_blend_color_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); if (r300screen->caps->is_r500) { - BEGIN_CS(3); + BEGIN_CS(size); OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); OUT_CS(bc->blend_color_red_alpha); OUT_CS(bc->blend_color_green_blue); END_CS; } else { - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color); END_CS; } } -void r300_emit_clip_state(struct r300_context* r300, void* state) +void r300_emit_clip_state(struct r300_context* r300, + unsigned size, void* state) { struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; @@ -85,7 +90,7 @@ void r300_emit_clip_state(struct r300_context* r300, void* state) CS_LOCALS(r300); if (r300screen->caps->has_tcl) { - BEGIN_CS(5 + (6 * 4)); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, (r300screen->caps->is_r500 ? R500_PVS_UCP_START : R300_PVS_UCP_START)); @@ -100,14 +105,14 @@ void r300_emit_clip_state(struct r300_context* r300, void* state) R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CS; } else { - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CS; } } -void r300_emit_dsa_state(struct r300_context* r300, void* state) +void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) { struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); @@ -116,7 +121,7 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) struct pipe_stencil_ref stencil_ref = r300->stencil_ref; CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); + BEGIN_CS(size); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); @@ -143,6 +148,8 @@ static const float * get_shader_constant( { struct r300_viewport_state* viewport = (struct r300_viewport_state*)r300->viewport_state.state; + struct r300_textures_state* texstate = + (struct r300_textures_state*)r300->textures_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -158,7 +165,7 @@ static const float * get_shader_constant( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = &r300->textures[constant->u.State[1]]->tex; + tex = &texstate->textures[constant->u.State[1]]->tex; vec[0] = 1.0 / tex->width0; vec[1] = 1.0 / tex->height0; break; @@ -170,23 +177,15 @@ static const float * get_shader_constant( break; case RC_STATE_R300_VIEWPORT_SCALE: - if (r300->tcl_bypass) { - vec[0] = 1; - vec[1] = 1; - vec[2] = 1; - } else { - vec[0] = viewport->xscale; - vec[1] = viewport->yscale; - vec[2] = viewport->zscale; - } + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; break; case RC_STATE_R300_VIEWPORT_OFFSET: - if (!r300->tcl_bypass) { - vec[0] = viewport->xoffset; - vec[1] = viewport->yoffset; - vec[2] = viewport->zoffset; - } + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; break; default: @@ -378,7 +377,7 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_fb_state(struct r300_context* r300, void* state) +void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); @@ -387,7 +386,7 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) int i; CS_LOCALS(r300); - BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 6); + BEGIN_CS(size); /* Flush and free renderbuffer caches. */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, @@ -418,14 +417,17 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) assert(tex && tex->buffer && "cbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, tex->fb_state.colorpitch[surf->level], + OUT_CS_TEX_RELOC(tex, tex->fb_state.colorpitch[surf->level], 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt); } + for (; i < 4; i++) { + OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + } /* Set up a zbuffer. */ if (fb->zsbuf) { @@ -434,19 +436,21 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_ZB_FORMAT, tex->fb_state.zb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->fb_state.depthpitch[surf->level], + OUT_CS_TEX_RELOC(tex, tex->fb_state.depthpitch[surf->level], 0, RADEON_GEM_DOMAIN_VRAM, 0); } + OUT_CS_REG(R300_GA_POINT_MINMAX, + (MAX2(fb->width, fb->height) * 6) << R300_GA_POINT_MINMAX_MAX_SHIFT); END_CS; } -static void r300_emit_query_start(struct r300_context *r300) +void r300_emit_query_start(struct r300_context *r300) { struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; struct r300_query *query = r300->query_current; @@ -489,13 +493,13 @@ static void r300_emit_query_finish(struct r300_context *r300, /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), 0, RADEON_GEM_DOMAIN_GTT, 0); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), 0, RADEON_GEM_DOMAIN_GTT, 0); case 2: /* pipe 1 only */ @@ -503,13 +507,13 @@ static void r300_emit_query_finish(struct r300_context *r300, OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), 0, RADEON_GEM_DOMAIN_GTT, 0); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), 0, RADEON_GEM_DOMAIN_GTT, 0); break; default: @@ -531,7 +535,7 @@ static void rv530_emit_query_single(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -544,10 +548,10 @@ static void rv530_emit_query_double(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -572,21 +576,19 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, void* state) +void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_rs_state* rs = (struct r300_rs_state*)state; float scale, offset; CS_LOCALS(r300); - BEGIN_CS(18 + (rs->polygon_offset_enable ? 5 : 0)); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); - OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); - OUT_CS(rs->point_minmax); - OUT_CS(rs->line_control); + OUT_CS_REG(R300_GA_LINE_CNTL, rs->line_control); if (rs->polygon_offset_enable) { scale = rs->depth_scale * 12; @@ -617,7 +619,8 @@ void r300_emit_rs_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_rs_block_state(struct r300_context* r300, void* state) +void r300_emit_rs_block_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_rs_block* rs = (struct r300_rs_block*)state; unsigned i; @@ -628,7 +631,7 @@ void r300_emit_rs_block_state(struct r300_context* r300, void* state) DBG(r300, DBG_DRAW, "r300: RS emit:\n"); - BEGIN_CS(5 + count*2); + BEGIN_CS(size); if (r300screen->caps->is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); } else { @@ -659,7 +662,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_scissor_state(struct r300_context* r300, void* state) +void r300_emit_scissor_state(struct r300_context* r300, + unsigned size, void* state) { unsigned minx, miny, maxx, maxy; uint32_t top_left, bottom_right; @@ -673,7 +677,7 @@ void r300_emit_scissor_state(struct r300_context* r300, void* state) maxx = fb->width; maxy = fb->height; - if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { + if (r300->scissor_enabled) { minx = MAX2(minx, scissor->minx); miny = MAX2(miny, scissor->miny); maxx = MIN2(maxx, scissor->maxx); @@ -713,65 +717,51 @@ void r300_emit_scissor_state(struct r300_context* r300, void* state) (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } - BEGIN_CS(3); + BEGIN_CS(size); OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); OUT_CS(top_left); OUT_CS(bottom_right); END_CS; } -void r300_emit_texture(struct r300_context* r300, - struct r300_sampler_state* sampler, - struct r300_texture* tex, - unsigned offset) +void r300_emit_textures_state(struct r300_context *r300, + unsigned size, void *state) { - uint32_t filter0 = sampler->filter0; - uint32_t format0 = tex->state.format0; - unsigned min_level, max_level; + struct r300_textures_state *allstate = (struct r300_textures_state*)state; + struct r300_texture_sampler_state *texstate; + unsigned i; CS_LOCALS(r300); - /* to emulate 1D textures through 2D ones correctly */ - if (tex->tex.target == PIPE_TEXTURE_1D) { - filter0 &= ~R300_TX_WRAP_T_MASK; - filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); - } + BEGIN_CS(size); + OUT_CS_REG(R300_TX_ENABLE, allstate->tx_enable); - if (tex->is_npot) { - /* NPOT textures don't support mip filter, unfortunately. - * This prevents incorrect rendering. */ - filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; - } else { - /* determine min/max levels */ - /* the MAX_MIP level is the largest (finest) one */ - max_level = MIN2(sampler->max_lod, tex->tex.last_level); - min_level = MIN2(sampler->min_lod, max_level); - format0 |= R300_TX_NUM_LEVELS(max_level); - filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); - } + for (i = 0; i < allstate->count; i++) { + if ((1 << i) & allstate->tx_enable) { + texstate = &allstate->regs[i]; + + OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter[0]); + OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter[1]); + OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (i * 4), + texstate->border_color); - BEGIN_CS(16); - OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | - (offset << 28)); - OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); - OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); - - OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0); - OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); - OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); - OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, - R300_TXO_MACRO_TILE(tex->macrotile) | - R300_TXO_MICRO_TILE(tex->microtile), - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_REG(R300_TX_FORMAT0_0 + (i * 4), texstate->format[0]); + OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format[1]); + OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format[2]); + + OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_CS_TEX_RELOC(allstate->textures[i], texstate->tile_config, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } END_CS; } void r300_emit_aos(struct r300_context* r300, unsigned offset) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_vertex_element *velem = r300->velems->velem; int i; - unsigned size1, size2, aos_count = r300->vertex_element_count; + unsigned size1, size2, aos_count = r300->velems->count; unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); @@ -800,74 +790,116 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) } for (i = 0; i < aos_count; i++) { - OUT_CS_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_CS_BUF_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, 0, 0); } END_CS; } -void r300_emit_vertex_format_state(struct r300_context* r300, void* state) +void r300_emit_vertex_buffer(struct r300_context* r300) { - struct r300_vertex_info* vertex_info = (struct r300_vertex_info*)state; - unsigned i; CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n"); + DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " + "vertex size %d\n", r300->vbo, + r300->vertex_info.size); + /* Set the pointer to our vertex buffer. The emitted values are this: + * PACKET3 [3D_LOAD_VBPNTR] + * COUNT [1] + * FORMAT [size | stride << 8] + * OFFSET [offset into BO] + * VBPNTR [relocated BO] + */ + BEGIN_CS(7); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); + OUT_CS(1); + OUT_CS(r300->vertex_info.size | + (r300->vertex_info.size << 8)); + OUT_CS(r300->vbo_offset); + OUT_CS_BUF_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_CS; +} - BEGIN_CS(26); - OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_info->vinfo.size); +void r300_emit_vertex_stream_state(struct r300_context* r300, + unsigned size, void* state) +{ + struct r300_vertex_stream_state *streams = + (struct r300_vertex_stream_state*)state; + unsigned i; + CS_LOCALS(r300); - OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(vertex_info->vinfo.hwfmt[0]); - OUT_CS(vertex_info->vinfo.hwfmt[1]); - OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(vertex_info->vinfo.hwfmt[2]); - OUT_CS(vertex_info->vinfo.hwfmt[3]); - for (i = 0; i < 4; i++) { - DBG(r300, DBG_DRAW, " : hwfmt%d: 0x%08x\n", i, - vertex_info->vinfo.hwfmt[i]); - } + DBG(r300, DBG_DRAW, "r300: PSC emit:\n"); - OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); - for (i = 0; i < 8; i++) { - OUT_CS(vertex_info->vap_prog_stream_cntl[i]); + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); + for (i = 0; i < streams->count; i++) { + OUT_CS(streams->vap_prog_stream_cntl[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - vertex_info->vap_prog_stream_cntl[i]); + streams->vap_prog_stream_cntl[i]); } - OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); - for (i = 0; i < 8; i++) { - OUT_CS(vertex_info->vap_prog_stream_cntl_ext[i]); + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); + for (i = 0; i < streams->count; i++) { + OUT_CS(streams->vap_prog_stream_cntl_ext[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - vertex_info->vap_prog_stream_cntl_ext[i]); + streams->vap_prog_stream_cntl_ext[i]); } END_CS; } +void r300_emit_vap_output_state(struct r300_context* r300, + unsigned size, void* state) +{ + struct r300_vap_output_state *vap_out_state = + (struct r300_vap_output_state*)state; + CS_LOCALS(r300); + + DBG(r300, DBG_DRAW, "r300: VAP emit:\n"); + + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(vap_out_state->vap_vtx_state_cntl); + OUT_CS(vap_out_state->vap_vsm_vtx_assm); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(vap_out_state->vap_out_vtx_fmt[0]); + OUT_CS(vap_out_state->vap_out_vtx_fmt[1]); + END_CS; +} + +void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) +{ + CS_LOCALS(r300); -void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code) + BEGIN_CS(size); + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + END_CS; +} + +void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) { - int i; + struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; + struct r300_vertex_program_code* code = &vs->code; struct r300_screen* r300screen = r300_screen(r300->context.screen); unsigned instruction_count = code->length / 4; + unsigned i; + + unsigned vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; + unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1); + unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1); + unsigned temp_count = MAX2(code->num_temporaries, 1); - int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; - int input_count = MAX2(util_bitcount(code->InputsRead), 1); - int output_count = MAX2(util_bitcount(code->OutputsWritten), 1); - int temp_count = MAX2(code->num_temporaries, 1); - int pvs_num_slots = MIN3(vtx_mem_size / input_count, - vtx_mem_size / output_count, 10); - int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); + unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count, + vtx_mem_size / output_count, 10); + unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { - debug_printf("r300: Implementation error: emit_vertex_shader called," + debug_printf("r300: Implementation error: emit_vs_state called," " but has_tcl is FALSE!\n"); return; } - BEGIN_CS(9 + code->length); + BEGIN_CS(size); /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -881,8 +913,9 @@ void r300_emit_vertex_program_code(struct r300_context* r300, OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); - for (i = 0; i < code->length; i++) + for (i = 0; i < code->length; i++) { OUT_CS(code->body.d[i]); + } OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | R300_PVS_NUM_CNTLRS(pvs_num_controllers) | @@ -892,12 +925,6 @@ void r300_emit_vertex_program_code(struct r300_context* r300, END_CS; } -void r300_emit_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs) -{ - r300_emit_vertex_program_code(r300, &vs->code); -} - void r300_emit_vs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants) { @@ -906,7 +933,7 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { - debug_printf("r300: Implementation error: emit_vertex_shader called," + debug_printf("r300: Implementation error: emit_vs_constant_buffer called," " but has_tcl is FALSE!\n"); return; } @@ -931,96 +958,75 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, void* state) +void r300_emit_viewport_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - if (r300->tcl_bypass) { - BEGIN_CS(2); - OUT_CS_REG(R300_VAP_VTE_CNTL, 0); - END_CS; - } else { - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - END_CS; - } + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } -void r300_emit_texture_count(struct r300_context* r300) -{ - uint32_t tx_enable = 0; - int i; - CS_LOCALS(r300); - - /* Notice that texture_count and sampler_count are just sizes - * of the respective arrays. We still have to check for the individual - * elements. */ - for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { - if (r300->textures[i]) { - tx_enable |= 1 << i; - } - } - - BEGIN_CS(2); - OUT_CS_REG(R300_TX_ENABLE, tx_enable); - END_CS; - -} - -void r300_emit_ztop_state(struct r300_context* r300, void* state) +void r300_emit_ztop_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; CS_LOCALS(r300); - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); END_CS; } -void r300_flush_textures(struct r300_context* r300) +void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state) { CS_LOCALS(r300); - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_TX_INVALTAGS, 0); END_CS; } -static void r300_flush_pvs(struct r300_context* r300) -{ - CS_LOCALS(r300); - - BEGIN_CS(2); - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); - END_CS; -} - -void r300_emit_buffer_validate(struct r300_context *r300) +void r300_emit_buffer_validate(struct r300_context *r300, + boolean do_validate_vertex_buffers, + struct pipe_buffer *index_buffer) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_textures_state *texstate = + (struct r300_textures_state*)r300->textures_state.state; struct r300_texture* tex; + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->velems->velem; + struct pipe_buffer *pbuf; unsigned i; boolean invalid = FALSE; + /* upload buffers first */ + if (r300->any_user_vbs) { + r300_upload_user_buffers(r300); + r300->any_user_vbs = false; + } + /* Clean out BOs. */ - r300->winsys->reset_bos(r300->winsys); + r300->rws->reset_bos(r300->rws); validate: /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { + if (!r300_add_texture(r300->rws, tex, + 0, RADEON_GEM_DOMAIN_VRAM)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1029,42 +1035,60 @@ validate: if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { + if (!r300_add_texture(r300->rws, tex, + 0, RADEON_GEM_DOMAIN_VRAM)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } /* ...textures... */ - for (i = 0; i < r300->texture_count; i++) { - tex = r300->textures[i]; - if (!tex) + for (i = 0; i < texstate->count; i++) { + tex = texstate->textures[i]; + if (!tex || !texstate->sampler_states[i]) continue; - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { + if (!r300_add_texture(r300->rws, tex, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } /* ...occlusion query buffer... */ if (r300->dirty_state & R300_NEW_QUERY) { - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { + if (!r300_add_buffer(r300->rws, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } - /* ...and vertex buffer. */ + /* ...vertex buffer for SWTCL path... */ if (r300->vbo) { - if (!r300->winsys->add_buffer(r300->winsys, r300->vbo, - RADEON_GEM_DOMAIN_GTT, 0)) { + if (!r300_add_buffer(r300->rws, r300->vbo, + RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } - } else { - /* debug_printf("No VBO while emitting dirty state!\n"); */ } - if (!r300->winsys->validate(r300->winsys)) { + /* ...vertex buffers for HWTCL path... */ + if (do_validate_vertex_buffers) { + for (i = 0; i < r300->velems->count; i++) { + pbuf = vbuf[velem[i].vertex_buffer_index].buffer; + + if (!r300_add_buffer(r300->rws, pbuf, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + } + /* ...and index buffer for HWTCL path. */ + if (index_buffer) { + if (!r300_add_buffer(r300->rws, index_buffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + if (!r300->rws->validate(r300->rws)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ @@ -1076,16 +1100,10 @@ validate: } } -/* Emit all dirty state. */ -void r300_emit_dirty_state(struct r300_context* r300) +unsigned r300_get_num_dirty_dwords(struct r300_context *r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_atom* atom; - unsigned i, dwords = 1024; - int dirty_tex = 0; - - /* Check the required number of dwords against the space remaining in the - * current CS object. If we need more, then flush. */ + unsigned dwords = 0; foreach(atom, &r300->atom_list) { if (atom->dirty || atom->always_dirty) { @@ -1093,12 +1111,17 @@ void r300_emit_dirty_state(struct r300_context* r300) } } - /* Make sure we have at least 2*1024 spare dwords. */ - /* XXX It would be nice to know the number of dwords we really need to - * XXX emit. */ - while (!r300->winsys->check_cs(r300->winsys, dwords)) { - r300->context.flush(&r300->context, 0, NULL); - } + /* XXX This is the compensation for the non-atomized states. */ + dwords += 1024; + + return dwords; +} + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300) +{ + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_atom* atom; if (r300->dirty_state & R300_NEW_QUERY) { r300_emit_query_start(r300); @@ -1107,7 +1130,7 @@ void r300_emit_dirty_state(struct r300_context* r300) foreach(atom, &r300->atom_list) { if (atom->dirty || atom->always_dirty) { - atom->emit(r300, atom->state); + atom->emit(r300, atom->size, atom->state); atom->dirty = FALSE; } } @@ -1133,43 +1156,9 @@ void r300_emit_dirty_state(struct r300_context* r300) r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS; } - /* Samplers and textures are tracked separately but emitted together. */ - if (r300->dirty_state & - (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { - r300_emit_texture_count(r300); - - for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { - if (r300->dirty_state & - ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { - if (r300->textures[i]) { - r300_emit_texture(r300, - r300->sampler_states[i], - r300->textures[i], - i); - dirty_tex |= r300->dirty_state & (R300_NEW_TEXTURE << i); - } - r300->dirty_state &= - ~((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i)); - } - } - r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); - } - - if (dirty_tex) { - r300_flush_textures(r300); - } - - if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) { - r300_flush_pvs(r300); - } - - if (r300->dirty_state & R300_NEW_VERTEX_SHADER) { - r300_emit_vertex_shader(r300, r300->vs); - r300->dirty_state &= ~R300_NEW_VERTEX_SHADER; - } - if (r300->dirty_state & R300_NEW_VERTEX_SHADER_CONSTANTS) { - r300_emit_vs_constant_buffer(r300, &r300->vs->code.constants); + struct r300_vertex_shader* vs = r300->vs_state.state; + r300_emit_vs_constant_buffer(r300, &vs->code.constants); r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS; } @@ -1177,8 +1166,10 @@ void r300_emit_dirty_state(struct r300_context* r300) assert(r300->dirty_state == 0); */ - /* Finally, emit the VBO. */ - /* r300_emit_vertex_buffer(r300); */ + /* Emit the VBO for SWTCL. */ + if (!r300screen->caps->has_tcl) { + r300_emit_vertex_buffer(r300); + } r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 6b96d9b57c0..7db2fc6a1a1 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,13 +31,17 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, void* state); +void r300_emit_blend_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, void* state); +void r300_emit_blend_color_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_clip_state(struct r300_context* r300, void* state); +void r300_emit_clip_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_dsa_state(struct r300_context* r300, void* state); +void r300_emit_dsa_state(struct r300_context* r300, + unsigned size, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); @@ -51,48 +55,53 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r500_emit_fs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants); -void r300_emit_fb_state(struct r300_context* r300, void* state); +void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_query_begin(struct r300_context* r300, - struct r300_query* query); +void r300_emit_query_start(struct r300_context* r300); void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, void* state); +void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_rs_block_state(struct r300_context* r300, void* state); +void r300_emit_rs_block_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_scissor_state(struct r300_context* r300, void* state); +void r300_emit_scissor_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_texture(struct r300_context* r300, - struct r300_sampler_state* sampler, - struct r300_texture* tex, - unsigned offset); +void r300_emit_textures_state(struct r300_context *r300, + unsigned size, void *state); void r300_emit_vertex_buffer(struct r300_context* r300); -void r300_emit_vertex_format_state(struct r300_context* r300, void* state); +void r300_emit_vertex_stream_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code); +void r300_emit_vap_output_state(struct r300_context* r300, + unsigned size, void* state); void r300_emit_vs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants); -void r300_emit_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs); +void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_viewport_state(struct r300_context* r300, void* state); +void r300_emit_viewport_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_ztop_state(struct r300_context* r300, void* state); +void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state); -void r300_flush_textures(struct r300_context* r300); +void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state); + +unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); -void r300_emit_buffer_validate(struct r300_context *r300); +void r300_emit_buffer_validate(struct r300_context *r300, + boolean do_validate_vertex_buffers, + struct pipe_buffer *index_buffer); #endif /* R300_EMIT_H */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index e37d3092703..70de152713d 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -61,6 +61,12 @@ static void r300_flush(struct pipe_context* pipe, atom->dirty = TRUE; } } + + /* Unmark HWTCL state for SWTCL. */ + if (!r300_screen(pipe->screen)->caps->has_tcl) { + r300->vs_state.dirty = FALSE; + r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS; + } } /* reset flushed query */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index ae4c62b2f1d..9e71e61c303 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -133,10 +133,13 @@ static void get_compare_state( struct r300_fragment_program_external_state* state, unsigned shadow_samplers) { + struct r300_textures_state *texstate = + (struct r300_textures_state*)r300->textures_state.state; + memset(state, 0, sizeof(*state)); - for (int i = 0; i < r300->sampler_count; i++) { - struct r300_sampler_state* s = r300->sampler_states[i]; + for (int i = 0; i < texstate->sampler_count; i++) { + struct r300_sampler_state* s = texstate->sampler_states[i]; if (s && s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { /* XXX Gallium doesn't provide us with any information regarding @@ -204,6 +207,7 @@ static void r300_translate_fragment_shader( DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n", compiler.Base.ErrorMsg); assert(0); + abort(); } /* And, finally... */ diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index a249e8b36be..1c2b2528877 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -540,7 +540,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_FIRST_INST(x) ((x) << 0) # define R300_PVS_XYZW_VALID_INST(x) ((x) << 10) # define R300_PVS_LAST_INST(x) ((x) << 20) -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative to the vertex program parameters area. */ #define R300_VAP_PVS_CONST_CNTL 0x22D4 # define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 @@ -1500,6 +1500,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ANISO_THRESHOLD_MASK (7<<17) # define R500_MACRO_SWITCH (1<<22) +# define R500_TX_MAX_ANISO(x) ((x) << 23) +# define R500_TX_MAX_ANISO_MASK (63 << 23) +# define R500_TX_ANISO_HIGH_QUALITY (1 << 30) + # define R500_BORDER_FIX (1<<31) #define R300_TX_FORMAT0_0 0x4480 @@ -1857,7 +1861,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The destination register index is in FPI1 (color) and FPI3 (alpha) * together with enable bits. * There are separate enable bits for writing into temporary registers - * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* * /DSTA_OUTPUT). You can write to both at once, or not write at all (the * same index must be used for both). * diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 648d884654f..47100c83b0b 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -30,10 +30,12 @@ #include "util/u_format.h" #include "util/u_memory.h" +#include "util/u_upload_mgr.h" #include "util/u_prim.h" #include "r300_cs.h" #include "r300_context.h" +#include "r300_screen_buffer.h" #include "r300_emit.h" #include "r300_reg.h" #include "r300_render.h" @@ -118,10 +120,50 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +/* Check if the requested number of dwords is available in the CS and + * if not, flush. Return TRUE if the flush occured. */ +static boolean r300_reserve_cs_space(struct r300_context *r300, + unsigned dwords) +{ + if (!r300->rws->check_cs(r300->rws, dwords)) { + r300->context.flush(&r300->context, 0, NULL); + return TRUE; + } + return FALSE; +} + static boolean immd_is_good_idea(struct r300_context *r300, - unsigned count) + unsigned count) { - return count <= 4; + struct pipe_vertex_element* velem; + struct pipe_vertex_buffer* vbuf; + boolean checked[PIPE_MAX_ATTRIBS] = {0}; + unsigned vertex_element_count = r300->velems->count; + unsigned i, vbi; + + if (count > 4) { + return FALSE; + } + + /* We shouldn't map buffers referenced by CS, busy buffers, + * and ones placed in VRAM. */ + /* XXX Check for VRAM buffers. */ + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->velems->velem[i]; + vbi = velem->vertex_buffer_index; + + if (!checked[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + + if (r300_buffer_is_referenced(r300, + vbuf->buffer)) { + /* It's a very bad idea to map it... */ + return FALSE; + } + checked[vbi] = TRUE; + } + } + return TRUE; } static void r300_emit_draw_arrays_immediate(struct r300_context *r300, @@ -131,8 +173,8 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; - unsigned vertex_element_count = r300->vertex_element_count; - unsigned i, v, vbi, dw, elem_offset; + unsigned vertex_element_count = r300->velems->count; + unsigned i, v, vbi, dw, elem_offset, dwords; /* Size of the vertex, in dwords. */ unsigned vertex_size = 0; @@ -154,7 +196,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { - velem = &r300->vertex_element[i]; + velem = &r300->velems->velem[i]; offset[i] = velem->src_offset / 4; size[i] = util_format_get_blocksize(velem->src_format) / 4; vertex_size += size[i]; @@ -171,14 +213,19 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, } } + dwords = 9 + count * vertex_size; + + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); - BEGIN_CS(10 + count * vertex_size); + BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(count - 1); + OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); @@ -186,7 +233,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Emit vertices. */ for (v = 0; v < count; v++) { for (i = 0; i < vertex_element_count; i++) { - velem = &r300->vertex_element[i]; + velem = &r300->velems->velem[i]; vbi = velem->vertex_buffer_index; elem_offset = offset[i] + stride[vbi] * (v + start); @@ -199,7 +246,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Unmap buffers. */ for (i = 0; i < vertex_element_count; i++) { - vbi = r300->vertex_element[i].vertex_buffer_index; + vbi = r300->velems->velem[i].vertex_buffer_index; if (map[vbi]) { vbuf = &r300->vertex_buffer[vbi]; @@ -222,15 +269,16 @@ static void r300_emit_draw_arrays(struct r300_context *r300, if (alt_num_verts) { assert(count < (1 << 24)); - BEGIN_CS(10); + BEGIN_CS(9); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } else { - BEGIN_CS(8); + BEGIN_CS(7); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(count - 1); + OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300_translate_primitive(mode) | @@ -256,19 +304,25 @@ static void r300_emit_draw_elements(struct r300_context *r300, #endif CS_LOCALS(r300); - assert((start * indexSize) % 4 == 0); + assert((start * indexSize) % 4 == 0); + assert(count < (1 << 24)); + + maxIndex = MIN3(maxIndex, r300->vertex_buffer_max_index, count - minIndex); + + DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", + count, minIndex, maxIndex); if (alt_num_verts) { - assert(count < (1 << 24)); - BEGIN_CS(16); + BEGIN_CS(15); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } else { - BEGIN_CS(14); + BEGIN_CS(13); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(maxIndex); + OUT_CS(minIndex); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { count_dwords = count; @@ -292,37 +346,12 @@ static void r300_emit_draw_elements(struct r300_context *r300, OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_RELOC(indexBuffer, count_dwords, - RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_CS_BUF_RELOC(indexBuffer, count_dwords, + RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; } -static boolean r300_setup_vertex_buffers(struct r300_context *r300) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; - struct pipe_buffer *pbuf; - -validate: - for (int i = 0; i < r300->vertex_element_count; i++) { - pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - - if (!r300->winsys->add_buffer(r300->winsys, pbuf, - RADEON_GEM_DOMAIN_GTT, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } - - if (!r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - return r300->winsys->validate(r300->winsys); - } - - return TRUE; -} - static void r300_shorten_ubyte_elts(struct r300_context* r300, struct pipe_buffer** elts, unsigned count) @@ -378,32 +407,23 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } - r300_update_derived_state(r300); - - r300_emit_buffer_validate(r300); - - if (!r300_setup_vertex_buffers(r300)) { - return; - } - if (indexSize == 1) { r300_shorten_ubyte_elts(r300, &indexBuffer, count); indexSize = 2; } - if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, - RADEON_GEM_DOMAIN_GTT, 0)) { - goto cleanup; - } + r300_update_derived_state(r300); - if (!r300->winsys->validate(r300->winsys)) { - goto cleanup; - } + r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count); + /* 128 dwords for emit_aos and emit_draw_elements */ + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); + r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); + u_upload_flush(r300->upload_vb); + u_upload_flush(r300->upload_ib); if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); @@ -415,12 +435,18 @@ void r300_draw_range_elements(struct pipe_context* pipe, start += short_count; count -= short_count; + + /* 16 spare dwords are enough for emit_draw_elements. */ + if (count && r300_reserve_cs_space(r300, 16)) { + r300_emit_buffer_validate(r300, TRUE, indexBuffer); + r300_emit_dirty_state(r300); + r300_emit_aos(r300, 0); + } } while (count); } -cleanup: if (indexBuffer != orgIndexBuffer) { - pipe->screen->buffer_destroy(indexBuffer); + pipe_buffer_reference( &indexBuffer, NULL ); } } @@ -430,8 +456,11 @@ void r300_draw_elements(struct pipe_context* pipe, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + struct r300_context *r300 = r300_context(pipe); + + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, + r300->vertex_buffer_max_index, + mode, start, count); } void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, @@ -452,15 +481,13 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_update_derived_state(r300); - r300_emit_buffer_validate(r300); - if (immd_is_good_idea(r300, count)) { r300_emit_draw_arrays_immediate(r300, mode, start, count); } else { - if (!r300_setup_vertex_buffers(r300)) { - return; - } - + /* Make sure there are at least 128 spare dwords in the command buffer. + * (most of it being consumed by emit_aos) */ + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); + r300_emit_buffer_validate(r300, TRUE, NULL); r300_emit_dirty_state(r300); if (alt_num_verts || count <= 65535) { @@ -474,8 +501,16 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, start += short_count; count -= short_count; + + /* Again, we emit both AOS and draw_arrays so there should be + * at least 128 spare dwords. */ + if (count && r300_reserve_cs_space(r300, 128)) { + r300_emit_buffer_validate(r300, TRUE, NULL); + r300_emit_dirty_state(r300); + } } while (count); } + u_upload_flush(r300->upload_vb); } } @@ -605,7 +640,7 @@ r300_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return (struct vertex_info*)r300->vertex_format_state.state; + return &r300->vertex_info; } static boolean r300_render_allocate_vertices(struct vbuf_render* render, @@ -690,6 +725,8 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2); + r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -708,12 +745,15 @@ static void r300_render_draw(struct vbuf_render* render, struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; int i; + unsigned dwords = 2 + (count+1)/2; CS_LOCALS(r300); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); - BEGIN_CS(2 + (count+1)/2); + BEGIN_CS(dwords); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 6a55570571a..3e31688f8e8 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,17 +21,15 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" -#include "util/u_simple_screen.h" #include "r300_context.h" -#include "r300_screen.h" #include "r300_texture.h" #include "radeon_winsys.h" -#include "r300_winsys.h" + +#include "r300_screen_buffer.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -210,9 +209,9 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, { uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps->is_r500; - boolean is_z24 = format == PIPE_FORMAT_Z24X8_UNORM || - format == PIPE_FORMAT_Z24S8_UNORM; - boolean is_color2101010 = format == PIPE_FORMAT_A2B10G10R10_UNORM; + boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || + format == PIPE_FORMAT_S8Z24_UNORM; + boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM; if (target >= PIPE_MAX_TEXTURE_TYPES) { debug_printf("r300: Implementation error: Received bogus texture " @@ -231,14 +230,16 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check colorbuffer format support. */ if ((usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) && + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) && /* 2101010 cannot be rendered to on non-r5xx. */ (is_r500 || !is_color2101010) && r300_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY); + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED); } /* Check depth-stencil format support. */ @@ -250,82 +251,22 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, return retval == usage; } -static struct pipe_transfer* -r300_get_tex_transfer(struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, unsigned x, unsigned y, - unsigned w, unsigned h) -{ - struct r300_texture *tex = (struct r300_texture *)texture; - struct r300_transfer *trans; - struct r300_screen *rscreen = r300_screen(screen); - unsigned offset; - - offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ - - trans = CALLOC_STRUCT(r300_transfer); - if (trans) { - pipe_texture_reference(&trans->transfer.texture, texture); - trans->transfer.x = x; - trans->transfer.y = y; - trans->transfer.width = w; - trans->transfer.height = h; - trans->transfer.stride = r300_texture_get_stride(rscreen, tex, level); - trans->transfer.usage = usage; - trans->transfer.zslice = zslice; - trans->transfer.face = face; - - trans->offset = offset; - } - return &trans->transfer; -} - -static void -r300_tex_transfer_destroy(struct pipe_transfer *trans) -{ - pipe_texture_reference(&trans->texture, NULL); - FREE(trans); -} - -static void* r300_transfer_map(struct pipe_screen* screen, - struct pipe_transfer* transfer) -{ - struct r300_texture* tex = (struct r300_texture*)transfer->texture; - char* map; - enum pipe_format format = tex->tex.format; - - map = pipe_buffer_map(screen, tex->buffer, - pipe_transfer_buffer_flags(transfer)); - - if (!map) { - return NULL; - } - - return map + r300_transfer(transfer)->offset + - transfer->y / util_format_get_blockheight(format) * transfer->stride + - transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); -} - -static void r300_transfer_unmap(struct pipe_screen* screen, - struct pipe_transfer* transfer) -{ - struct r300_texture* tex = (struct r300_texture*)transfer->texture; - pipe_buffer_unmap(screen, tex->buffer); -} - static void r300_destroy_screen(struct pipe_screen* pscreen) { struct r300_screen* r300screen = r300_screen(pscreen); + struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); + + if (rws) + rws->destroy(rws); FREE(r300screen->caps); FREE(r300screen); } -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) +struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) { - struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); - struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); + struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen); + struct r300_capabilities *caps = CALLOC_STRUCT(r300_capabilities); if (!r300screen || !caps) { FREE(r300screen); @@ -333,16 +274,16 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) return NULL; } - caps->pci_id = radeon_winsys->pci_id; - caps->num_frag_pipes = radeon_winsys->gb_pipes; - caps->num_z_pipes = radeon_winsys->z_pipes; + caps->pci_id = rws->get_value(rws, R300_VID_PCI_ID); + caps->num_frag_pipes = rws->get_value(rws, R300_VID_GB_PIPES); + caps->num_z_pipes = rws->get_value(rws, R300_VID_Z_PIPES); r300_init_debug(r300screen); r300_parse_chipset(caps); r300screen->caps = caps; - r300screen->radeon_winsys = radeon_winsys; - r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys; + r300screen->rws = rws; + r300screen->screen.winsys = (struct pipe_winsys*)rws; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; r300screen->screen.get_vendor = r300_get_vendor; @@ -350,13 +291,15 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) r300screen->screen.get_paramf = r300_get_paramf; r300screen->screen.is_format_supported = r300_is_format_supported; r300screen->screen.context_create = r300_create_context; - r300screen->screen.get_tex_transfer = r300_get_tex_transfer; - r300screen->screen.tex_transfer_destroy = r300_tex_transfer_destroy; - r300screen->screen.transfer_map = r300_transfer_map; - r300screen->screen.transfer_unmap = r300_transfer_unmap; r300_init_screen_texture_functions(&r300screen->screen); - u_simple_screen_init(&r300screen->screen); + r300_screen_init_buffer_functions(r300screen); return &r300screen->screen; } + +struct r300_winsys_screen * +r300_winsys_screen(struct pipe_screen *screen) +{ + return r300_screen(screen)->rws; +} diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 502fbfa5a24..1ccc0bfb7a5 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,13 +28,15 @@ #include "r300_chipset.h" +#define R300_TEXTURE_USAGE_TRANSFER PIPE_TEXTURE_USAGE_CUSTOM + struct radeon_winsys; struct r300_screen { /* Parent class */ struct pipe_screen screen; - struct radeon_winsys* radeon_winsys; + struct r300_winsys_screen *rws; /* Chipset capabilities */ struct r300_capabilities* caps; @@ -42,26 +45,12 @@ struct r300_screen { unsigned debug; }; -struct r300_transfer { - /* Parent class */ - struct pipe_transfer transfer; - - /* Offset from start of buffer. */ - unsigned offset; -}; /* Convenience cast wrapper. */ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { return (struct r300_screen*)screen; } -/* Convenience cast wrapper. */ -static INLINE struct r300_transfer* -r300_transfer(struct pipe_transfer* transfer) -{ - return (struct r300_transfer*)transfer; -} - /* Debug functionality. */ /** @@ -81,6 +70,7 @@ r300_transfer(struct pipe_transfer* transfer) #define DBG_DRAW 0x0000010 #define DBG_TEX 0x0000020 #define DBG_FALL 0x0000040 +#define DBG_ANISOHQ 0x0000080 /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c new file mode 100644 index 00000000000..b97d0d76a4a --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -0,0 +1,314 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + */ +#include <stdio.h> + +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "util/u_math.h" + +#include "r300_screen_buffer.h" + +#include "r300_winsys.h" + +boolean r300_buffer_is_referenced(struct r300_context *r300, + struct pipe_buffer *buf) +{ + struct r300_buffer *rbuf = r300_buffer(buf); + if (r300_buffer_is_user_buffer(buf)) + return FALSE; + + return r300->rws->is_buffer_referenced(r300->rws, rbuf->buf); + +} +int r300_upload_index_buffer(struct r300_context *r300, + struct pipe_buffer **index_buffer, + unsigned index_size, + unsigned start, + unsigned count) +{ + struct pipe_buffer *upload_buffer = NULL; + unsigned index_offset = start * index_size; + int ret = 0; + + if (r300_buffer_is_user_buffer(*index_buffer)) { + ret = u_upload_buffer(r300->upload_ib, + index_offset, + count * index_size, + *index_buffer, + &index_offset, + &upload_buffer); + if (ret) { + goto done; + } + *index_buffer = upload_buffer; + } + done: + // if (upload_buffer) + // pipe_buffer_reference(&upload_buffer, NULL); + return ret; +} + +int r300_upload_user_buffers(struct r300_context *r300) +{ + enum pipe_error ret = PIPE_OK; + int i, nr; + + nr = r300->vertex_buffer_count; + + for (i = 0; i < nr; i++) { + + if (r300_buffer_is_user_buffer(r300->vertex_buffer[i].buffer)) { + struct pipe_buffer *upload_buffer = NULL; + unsigned offset = 0; /*r300->vertex_buffer[i].buffer_offset * 4;*/ + unsigned size = r300->vertex_buffer[i].buffer->size; + unsigned upload_offset; + ret = u_upload_buffer(r300->upload_vb, + offset, size, + r300->vertex_buffer[i].buffer, + &upload_offset, &upload_buffer); + if (ret) + return ret; + + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, NULL); + r300->vertex_buffer[i].buffer = upload_buffer; + r300->vertex_buffer[i].buffer_offset = upload_offset; + } + } + return ret; +} + +static struct r300_winsys_buffer * +r300_winsys_buffer_create(struct r300_screen *r300screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_winsys_buffer *buf; + + buf = rws->buffer_create(rws, alignment, usage, size); + return buf; +} + +static void r300_winsys_buffer_destroy(struct r300_screen *r300screen, + struct r300_buffer *rbuf) +{ + struct r300_winsys_screen *rws = r300screen->rws; + + if (rbuf->buf) { + rws->buffer_reference(rws, &rbuf->buf, NULL); + rbuf->buf = NULL; + } +} + +static struct pipe_buffer *r300_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_buffer *rbuf; + + rbuf = CALLOC_STRUCT(r300_buffer); + if (!rbuf) + goto error1; + + rbuf->magic = R300_BUFFER_MAGIC; + + pipe_reference_init(&rbuf->base.reference, 1); + rbuf->base.screen = screen; + rbuf->base.alignment = alignment; + rbuf->base.usage = usage; + rbuf->base.size = size; + + rbuf->buf = r300_winsys_buffer_create(r300screen, + alignment, + usage, + size); + + if (!rbuf->buf) + goto error2; + + return &rbuf->base; +error2: + FREE(rbuf); +error1: + return NULL; +} + + +static struct pipe_buffer *r300_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct r300_buffer *rbuf; + + rbuf = CALLOC_STRUCT(r300_buffer); + if (!rbuf) + goto no_rbuf; + + rbuf->magic = R300_BUFFER_MAGIC; + + pipe_reference_init(&rbuf->base.reference, 1); + rbuf->base.screen = screen; + rbuf->base.alignment = 1; + rbuf->base.usage = 0; + rbuf->base.size = bytes; + + rbuf->user_buffer = ptr; + return &rbuf->base; + +no_rbuf: + return NULL; +} + +static void r300_buffer_destroy(struct pipe_buffer *buf) +{ + struct r300_screen *r300screen = r300_screen(buf->screen); + struct r300_buffer *rbuf = r300_buffer(buf); + + r300_winsys_buffer_destroy(r300screen, rbuf); + FREE(rbuf); +} + +static void * +r300_buffer_map_range(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned length, + unsigned usage ) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + void *map; + int flush = 0; + int i; + + if (rbuf->user_buffer) + return rbuf->user_buffer; + + if (rbuf->base.usage & PIPE_BUFFER_USAGE_CONSTANT) + goto just_map; + + /* check if the mapping is to a range we already flushed */ + if (usage & PIPE_BUFFER_USAGE_DISCARD) { + for (i = 0; i < rbuf->num_ranges; i++) { + + if ((offset >= rbuf->ranges[i].start) && + (offset < rbuf->ranges[i].end)) + flush = 1; + + if (flush) { + /* unreference this hw buffer and allocate a new one */ + rws->buffer_reference(rws, &rbuf->buf, NULL); + + rbuf->num_ranges = 0; + rbuf->map = NULL; + rbuf->buf = r300_winsys_buffer_create(r300screen, + rbuf->base.alignment, + rbuf->base.usage, + rbuf->base.size); + break; + } + } + } +just_map: + map = rws->buffer_map(rws, rbuf->buf, usage | R300_USAGE_FLAG_DONT_SYNC); + + return map; +} + +static void +r300_buffer_flush_mapped_range( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, + unsigned length ) +{ + struct r300_buffer *rbuf = r300_buffer(buf); + int i; + + if (rbuf->user_buffer) + return; + + if (rbuf->base.usage & PIPE_BUFFER_USAGE_CONSTANT) + return; + + /* mark the range as used */ + for(i = 0; i < rbuf->num_ranges; ++i) { + if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+length)) { + rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset); + rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length)); + return; + } + } + + rbuf->ranges[rbuf->num_ranges].start = offset; + rbuf->ranges[rbuf->num_ranges].end = offset+length; + rbuf->num_ranges++; +} + +static void * +r300_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + void *map; + + if (rbuf->user_buffer) + return rbuf->user_buffer; + + map = rws->buffer_map(rws, rbuf->buf, usage); + + return map; +} + +static void +r300_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + + if (rbuf->buf) { + rws->buffer_unmap(rws, rbuf->buf); + } +} + +void r300_screen_init_buffer_functions(struct r300_screen *r300screen) +{ + r300screen->screen.buffer_create = r300_buffer_create; + r300screen->screen.user_buffer_create = r300_user_buffer_create; + r300screen->screen.buffer_map = r300_buffer_map; + r300screen->screen.buffer_map_range = r300_buffer_map_range; + r300screen->screen.buffer_flush_mapped_range = r300_buffer_flush_mapped_range; + r300screen->screen.buffer_unmap = r300_buffer_unmap; + r300screen->screen.buffer_destroy = r300_buffer_destroy; +} diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h new file mode 100644 index 00000000000..0cf349c25cd --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -0,0 +1,99 @@ +#ifndef R300_SCREEN_BUFFER_H +#define R300_SCREEN_BUFFER_H +#include <stdio.h> +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "r300_screen.h" + +#include "r300_winsys.h" +#include "r300_context.h" + +#define R300_BUFFER_MAGIC 0xabcd1234 + +struct r300_buffer_range { + uint32_t start; + uint32_t end; +}; +#define R300_BUFFER_MAX_RANGES 32 + +struct r300_buffer +{ + struct pipe_buffer base; + + uint32_t magic; + + struct r300_winsys_buffer *buf; + + void *user_buffer; + struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; + unsigned num_ranges; + + void *map; +}; + +static INLINE struct r300_buffer * +r300_buffer(struct pipe_buffer *buffer) +{ + if (buffer) { + assert(((struct r300_buffer *)buffer)->magic == R300_BUFFER_MAGIC); + return (struct r300_buffer *)buffer; + } + return NULL; +} + +static INLINE boolean +r300_buffer_is_user_buffer(struct pipe_buffer *buffer) +{ + return r300_buffer(buffer)->user_buffer ? true : false; +} + +static INLINE boolean r300_add_buffer(struct r300_winsys_screen *rws, + struct pipe_buffer *buffer, + int rd, int wr) +{ + struct r300_buffer *buf = r300_buffer(buffer); + + if (!buf->buf) + return true; + + return rws->add_buffer(rws, buf->buf, rd, wr); +} + + +static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws, + struct r300_texture *tex, + int rd, int wr) +{ + return rws->add_buffer(rws, tex->buffer, rd, wr); +} + +void r300_screen_init_buffer_functions(struct r300_screen *r300screen); + +static INLINE void r300_buffer_write_reloc(struct r300_winsys_screen *rws, + struct r300_buffer *buf, + uint32_t rd, uint32_t wd, uint32_t flags) +{ + if (!buf->buf) + return; + + rws->write_cs_reloc(rws, buf->buf, rd, wd, flags); +} + +static INLINE void r300_texture_write_reloc(struct r300_winsys_screen *rws, + struct r300_texture *texture, + uint32_t rd, uint32_t wd, uint32_t flags) +{ + rws->write_cs_reloc(rws, texture->buffer, rd, wd, flags); +} + +int r300_upload_user_buffers(struct r300_context *r300); + +int r300_upload_index_buffer(struct r300_context *r300, + struct pipe_buffer **index_buffer, + unsigned index_size, + unsigned start, + unsigned count); + +boolean r300_buffer_is_referenced(struct r300_context *r300, + struct pipe_buffer *buf); +#endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 34bf81c1930..712e9280e3c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -34,6 +34,7 @@ #include "r300_context.h" #include "r300_reg.h" #include "r300_screen.h" +#include "r300_screen_buffer.h" #include "r300_state_inlines.h" #include "r300_fs.h" #include "r300_vs.h" @@ -43,6 +44,12 @@ /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ +#define UPDATE_STATE(cso, atom) \ + if (cso != atom.state) { \ + atom.state = cso; \ + atom.dirty = TRUE; \ + } + static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, unsigned dstRGB, unsigned dstA) { @@ -328,8 +335,7 @@ static void r300_bind_blend_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->blend_state.state = state; - r300->blend_state.dirty = TRUE; + UPDATE_STATE(state, r300->blend_state); } /* Free blend state. */ @@ -356,7 +362,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; - util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ @@ -376,6 +382,8 @@ static void r300_set_clip_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); + r300->clip = *state; + if (r300_screen(pipe->screen)->caps->has_tcl) { memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); r300->clip_state.size = 29; @@ -476,11 +484,8 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_screen* r300screen = r300_screen(pipe->screen); - r300->dsa_state.state = state; - r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; - r300->dsa_state.dirty = TRUE; + UPDATE_STATE(state, r300->dsa_state); } /* Free DSA state. */ @@ -521,7 +526,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)old_state->cbufs[i]->texture; if (tex) { - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[0], tex->microtile != 0, tex->macrotile != 0); @@ -533,7 +538,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)old_state->zsbuf->texture; if (tex) { - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[0], tex->microtile != 0, tex->macrotile != 0); @@ -545,7 +550,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)new_state->cbufs[i]->texture; level = new_state->cbufs[i]->level; - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[level], tex->microtile != 0, tex->mip_macrotile[level] != 0); @@ -554,7 +559,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)new_state->zsbuf->texture; level = new_state->zsbuf->level; - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[level], tex->microtile != 0, tex->mip_macrotile[level] != 0); @@ -567,6 +572,7 @@ static void { struct r300_context* r300 = r300_context(pipe); struct r300_screen* r300screen = r300_screen(pipe->screen); + struct pipe_framebuffer_state *old_state = r300->fb_state.state; unsigned max_width, max_height; uint32_t zbuffer_bpp = 0; @@ -591,22 +597,30 @@ static void return; } - if (r300->draw) { draw_flush(r300->draw); } - memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); + r300->fb_state.dirty = TRUE; - r300->fb_state.size = (10 * state->nr_cbufs) + (state->zsbuf ? 10 : 0) + 6; + /* If nr_cbufs is changed from zero to non-zero or vice versa... */ + if (!!old_state->nr_cbufs != !!state->nr_cbufs) { + r300->blend_state.dirty = TRUE; + } + /* If zsbuf is set from NULL to non-NULL or vice versa.. */ + if (!!old_state->zsbuf != !!state->zsbuf) { + r300->dsa_state.dirty = TRUE; + } + if (!r300->scissor_enabled) { + r300->scissor_state.dirty = TRUE; + } r300_fb_update_tiling_flags(r300, r300->fb_state.state, state); - /* XXX wait what */ - r300->blend_state.dirty = TRUE; - r300->dsa_state.dirty = TRUE; - r300->fb_state.dirty = TRUE; - r300->scissor_state.dirty = TRUE; + memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); + + r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) + + (state->zsbuf ? 10 : 0) + 8; /* Polygon offset depends on the zbuffer bit depth. */ if (state->zsbuf && r300->polygon_offset_enabled) { @@ -658,8 +672,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; r300_pick_fragment_shader(r300); - if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { - r300->vertex_format_state.dirty = TRUE; + r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ + + if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) { + r300->vap_output_state.dirty = TRUE; } r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -709,32 +725,14 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->vap_control_status = R300_VC_32BIT_SWAP; #endif - /* If bypassing TCL, or if no TCL engine is present, turn off the HW TCL. - * Else, enable HW TCL and force Draw's TCL off. */ - if (state->bypass_vs_clip_and_viewport || - !r300screen->caps->has_tcl) { + /* If no TCL engine is present, turn off the HW TCL. */ + if (!r300screen->caps->has_tcl) { rs->vap_control_status |= R300_VAP_TCL_BYPASS; } rs->point_size = pack_float_16_6x(state->point_size) | (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); - /* Point minimum and maximum sizes. This register has to be emitted, - * and it'd be a step backwards to put it in invariant state. */ - if (r300screen->caps->is_r500) { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(4096.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } else if (r300screen->caps->is_r400) { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(4021.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } else { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(2560.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } - rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; @@ -817,6 +815,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); struct r300_rs_state* rs = (struct r300_rs_state*)state; + boolean scissor_was_enabled = r300->scissor_enabled; if (r300->draw) { draw_flush(r300->draw); @@ -824,22 +823,18 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } if (rs) { - r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw; + r300->scissor_enabled = rs->rs.scissor; } else { - r300->tcl_bypass = FALSE; r300->polygon_offset_enabled = FALSE; + r300->scissor_enabled = FALSE; } - r300->rs_state.state = rs; - r300->rs_state.dirty = TRUE; - /* XXX Why is this still needed, dammit!? */ - r300->scissor_state.dirty = TRUE; - r300->viewport_state.dirty = TRUE; + UPDATE_STATE(state, r300->rs_state); + r300->rs_state.size = 17 + (r300->polygon_offset_enabled ? 5 : 0); - /* XXX Clean these up when we move to atom emits */ - if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { - r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + if (scissor_was_enabled != r300->scissor_enabled) { + r300->scissor_state.dirty = TRUE; } } @@ -855,6 +850,7 @@ static void* { struct r300_context* r300 = r300_context(pipe); struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); + boolean is_r500 = r300_screen(pipe->screen)->caps->is_r500; int lod_bias; union util_color uc; @@ -870,8 +866,10 @@ static void* state->min_mip_filter, state->max_anisotropy > 0); + sampler->filter0 |= r300_anisotropy(state->max_anisotropy); + /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ - /* We must pass these to the emit function to clamp them properly. */ + /* We must pass these to the merge function to clamp them properly. */ sampler->min_lod = MAX2((unsigned)state->min_lod, 0); sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0); @@ -879,9 +877,15 @@ static void* sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; - sampler->filter1 |= r300_anisotropy(state->max_anisotropy); + /* This is very high quality anisotropic filtering for R5xx. + * It's good for benchmarking the performance of texturing but + * in practice we don't want to slow down the driver because it's + * a pretty good performance killer. Feel free to play with it. */ + if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) { + sampler->filter1 |= r500_anisotropy(state->max_anisotropy); + } - util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); sampler->border_color = uc.ui; /* R500-specific fixups and optimizations */ @@ -897,23 +901,20 @@ static void r300_bind_sampler_states(struct pipe_context* pipe, void** states) { struct r300_context* r300 = r300_context(pipe); - int i; + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; if (count > 8) { return; } - for (i = 0; i < count; i++) { - if (r300->sampler_states[i] != states[i]) { - r300->sampler_states[i] = (struct r300_sampler_state*)states[i]; - r300->dirty_state |= (R300_NEW_SAMPLER << i); - } - } + memcpy(state->sampler_states, states, sizeof(void*) * count); + state->sampler_count = count; - r300->sampler_count = count; + r300->textures_state.dirty = TRUE; /* Pick a fragment shader based on the texture compare state. */ - if (r300->fs && (r300->dirty_state & R300_ANY_NEW_SAMPLERS)) { + if (r300->fs && count) { if (r300_pick_fragment_shader(r300)) { r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -937,22 +938,25 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, struct pipe_texture** texture) { struct r300_context* r300 = r300_context(pipe); + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; + unsigned i; boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; - int i; + boolean dirty_tex = FALSE; /* XXX magic num */ if (count > 8) { return; } - + for (i = 0; i < count; i++) { - if (r300->textures[i] != (struct r300_texture*)texture[i]) { - pipe_texture_reference((struct pipe_texture**)&r300->textures[i], - texture[i]); - r300->dirty_state |= (R300_NEW_TEXTURE << i); + if (state->textures[i] != (struct r300_texture*)texture[i]) { + pipe_texture_reference((struct pipe_texture**)&state->textures[i], + texture[i]); + dirty_tex = TRUE; - /* R300-specific - set the texrect factor in a fragment shader */ - if (!is_r500 && r300->textures[i]->is_npot) { + /* R300-specific - set the texrect factor in the fragment shader */ + if (!is_r500 && state->textures[i]->is_npot) { /* XXX It would be nice to re-emit just 1 constant, * XXX not all of them */ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -961,14 +965,19 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, } for (i = count; i < 8; i++) { - if (r300->textures[i]) { - pipe_texture_reference((struct pipe_texture**)&r300->textures[i], + if (state->textures[i]) { + pipe_texture_reference((struct pipe_texture**)&state->textures[i], NULL); - r300->dirty_state |= (R300_NEW_TEXTURE << i); } } - r300->texture_count = count; + state->texture_count = count; + + r300->textures_state.dirty = TRUE; + + if (dirty_tex) { + r300->texture_cache_inval.dirty = TRUE; + } } static void r300_set_scissor_state(struct pipe_context* pipe, @@ -979,7 +988,9 @@ static void r300_set_scissor_state(struct pipe_context* pipe, memcpy(r300->scissor_state.state, state, sizeof(struct pipe_scissor_state)); - r300->scissor_state.dirty = TRUE; + if (r300->scissor_enabled) { + r300->scissor_state.dirty = TRUE; + } } static void r300_set_viewport_state(struct pipe_context* pipe, @@ -989,6 +1000,8 @@ static void r300_set_viewport_state(struct pipe_context* pipe, struct r300_viewport_state* viewport = (struct r300_viewport_state*)r300->viewport_state.state; + r300->viewport = *state; + /* Do the transform in HW. */ viewport->vte_control = R300_VTX_W0_FMT; @@ -1028,27 +1041,45 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); + int i; + unsigned max_index = (1 << 24) - 1; + boolean any_user_buffer = false; + + if (count == r300->vertex_buffer_count && + memcmp(r300->vertex_buffer, buffers, count * sizeof(buffers[0])) == 0) + return; + + for (i = 0; i < count; i++) { + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer); + if (r300_buffer_is_user_buffer(buffers[i].buffer)) + any_user_buffer = true; + max_index = MIN2(buffers[i].max_index, max_index); + } + + for ( ; i < r300->vertex_buffer_count; i++) + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, NULL); memcpy(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count); + sizeof(struct pipe_vertex_buffer) * count); + r300->vertex_buffer_count = count; + r300->vertex_buffer_max_index = max_index; + r300->any_user_vbs = any_user_buffer; if (r300->draw) { draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } - - r300->vertex_format_state.dirty = TRUE; } static boolean r300_validate_aos(struct r300_context *r300) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_vertex_element *velem = r300->velems->velem; int i; /* Check if formats and strides are aligned to the size of DWORD. */ - for (i = 0; i < r300->vertex_element_count; i++) { + for (i = 0; i < r300->velems->count; i++) { if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || util_format_get_blocksize(velem[i].src_format) % 4 != 0) { return FALSE; @@ -1057,20 +1088,209 @@ static boolean r300_validate_aos(struct r300_context *r300) return TRUE; } -static void r300_set_vertex_elements(struct pipe_context* pipe, - unsigned count, - const struct pipe_vertex_element* elements) +static void r300_draw_emit_attrib(struct r300_context* r300, + enum attrib_emit emit, + enum interp_mode interp, + int index) { - struct r300_context* r300 = r300_context(pipe); + struct r300_vertex_shader* vs = r300->vs_state.state; + struct tgsi_shader_info* info = &vs->info; + int output; + + output = draw_find_shader_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); + draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output); +} + +static void r300_draw_emit_all_attribs(struct r300_context* r300) +{ + struct r300_vertex_shader* vs = r300->vs_state.state; + struct r300_shader_semantics* vs_outputs = &vs->outputs; + int i, gen_count; + + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->pos); + } else { + assert(0); + } + + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, + vs_outputs->psize); + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, + vs_outputs->color[i]); + } + } - memcpy(r300->vertex_element, - elements, - sizeof(struct pipe_vertex_element) * count); - r300->vertex_element_count = count; + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->generic[i]); + gen_count++; + } + } + + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->fog); + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); +} + +/* Update the PSC tables. */ +static void r300_vertex_psc(struct r300_vertex_element_state *velems) +{ + struct r300_vertex_stream_state *vstream = &velems->vertex_stream; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i; + + assert(velems->count <= 16); + + /* Vertex shaders have no semantics on their inputs, + * so PSC should just route stuff based on the vertex elements, + * and not on attrib information. */ + for (i = 0; i < velems->count; i++) { + format = velems->velem[i].src_format; + + type = r300_translate_vertex_data_type(format) | + (i << R300_DST_VEC_LOC_SHIFT); + swizzle = r300_translate_vertex_data_swizzle(format); + + if (i & 1) { + vstream->vap_prog_stream_cntl[i >> 1] |= type << 16; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vstream->vap_prog_stream_cntl[i >> 1] |= type; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; + } + } + + /* Set the last vector in the PSC. */ + if (i) { + i -= 1; + } + vstream->vap_prog_stream_cntl[i >> 1] |= + (R300_LAST_VEC << (i & 1 ? 16 : 0)); + + vstream->count = (i >> 1) + 1; +} + +/* Update the PSC tables for SW TCL, using Draw. */ +static void r300_swtcl_vertex_psc(struct r300_context *r300, + struct r300_vertex_element_state *velems) +{ + struct r300_vertex_stream_state *vstream = &velems->vertex_stream; + struct r300_vertex_shader* vs = r300->vs_state.state; + struct vertex_info* vinfo = &r300->vertex_info; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i, attrib_count; + int* vs_output_tab = vs->stream_loc_notcl; + + /* For each Draw attribute, route it to the fragment shader according + * to the vs_output_tab. */ + attrib_count = vinfo->num_attribs; + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + for (i = 0; i < attrib_count; i++) { + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," + " vs_output_tab %d\n", vinfo->attrib[i].src_index, + vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, + vs_output_tab[i]); + } + + for (i = 0; i < attrib_count; i++) { + /* Make sure we have a proper destination for our attribute. */ + assert(vs_output_tab[i] != -1); + + format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + + /* Obtain the type of data in this attribute. */ + type = r300_translate_vertex_data_type(format) | + vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; + + /* Obtain the swizzle for this attribute. Note that the default + * swizzle in the hardware is not XYZW! */ + swizzle = r300_translate_vertex_data_swizzle(format); + + /* Add the attribute to the PSC table. */ + if (i & 1) { + vstream->vap_prog_stream_cntl[i >> 1] |= type << 16; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vstream->vap_prog_stream_cntl[i >> 1] |= type; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; + } + } + + /* Set the last vector in the PSC. */ + if (i) { + i -= 1; + } + vstream->vap_prog_stream_cntl[i >> 1] |= + (R300_LAST_VEC << (i & 1 ? 16 : 0)); + + vstream->count = (i >> 1) + 1; +} + +static void* r300_create_vertex_elements_state(struct pipe_context* pipe, + unsigned count, + const struct pipe_vertex_element* attribs) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); + struct r300_vertex_element_state *velems; + + assert(count <= PIPE_MAX_ATTRIBS); + velems = CALLOC_STRUCT(r300_vertex_element_state); + if (velems != NULL) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); + + if (r300screen->caps->has_tcl) { + r300_vertex_psc(velems); + } else { + memset(&r300->vertex_info, 0, sizeof(struct vertex_info)); + r300_draw_emit_all_attribs(r300); + draw_compute_vertex_size(&r300->vertex_info); + r300_swtcl_vertex_psc(r300, velems); + } + } + return velems; +} + +static void r300_bind_vertex_elements_state(struct pipe_context *pipe, + void *state) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_vertex_element_state *velems = state; + + if (velems == NULL) { + return; + } + + r300->velems = velems; if (r300->draw) { draw_flush(r300->draw); - draw_set_vertex_elements(r300->draw, count, elements); + draw_set_vertex_elements(r300->draw, velems->count, velems->velem); } if (!r300_validate_aos(r300)) { @@ -1078,6 +1298,14 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, assert(0); abort(); } + + UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); + r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2; +} + +static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) +{ + FREE(state); } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -1085,64 +1313,71 @@ static void* r300_create_vs_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (r300_screen(pipe->screen)->caps->has_tcl) { - struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); - /* Copy state directly into shader. */ - vs->state = *shader; - vs->state.tokens = tgsi_dup_tokens(shader->tokens); - - tgsi_scan_shader(shader->tokens, &vs->info); + struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); + r300_vertex_shader_common_init(vs, shader); - return (void*)vs; + if (r300_screen(pipe->screen)->caps->has_tcl) { + r300_translate_vertex_shader(r300, vs); } else { - return draw_create_vertex_shader(r300->draw, shader); + vs->draw_vs = draw_create_vertex_shader(r300->draw, shader); } + + return vs; } static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); + struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; - if (r300_screen(pipe->screen)->caps->has_tcl) { - struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; + if (vs == NULL) { + r300->vs_state.state = NULL; + return; + } + if (vs == r300->vs_state.state) { + return; + } + r300->vs_state.state = vs; - if (vs == NULL) { - r300->vs = NULL; - return; - } else if (!vs->translated) { - r300_translate_vertex_shader(r300, vs); - } + // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block + if (r300->fs) { + r300_vertex_shader_setup_wpos(r300); + } + memcpy(r300->vap_output_state.state, &vs->vap_out, + sizeof(struct r300_vap_output_state)); + r300->vap_output_state.dirty = TRUE; - r300->vs = vs; - if (r300->fs) { - r300_vertex_shader_setup_wpos(r300); - } + /* The majority of the RS block bits is dependent on the vertex shader. */ + r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ + + if (r300_screen(pipe->screen)->caps->has_tcl) { + r300->vs_state.dirty = TRUE; + r300->vs_state.size = vs->code.length + 9; - r300->vertex_format_state.dirty = TRUE; + r300->pvs_flush.dirty = TRUE; - r300->dirty_state |= - R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; + r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, - (struct draw_vertex_shader*)shader); + (struct draw_vertex_shader*)vs->draw_vs); } } static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); + struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; if (r300_screen(pipe->screen)->caps->has_tcl) { - struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; - rc_constants_destroy(&vs->code.constants); - FREE((void*)vs->state.tokens); - FREE(shader); } else { draw_delete_vertex_shader(r300->draw, - (struct draw_vertex_shader*)shader); + (struct draw_vertex_shader*)vs->draw_vs); } + + FREE((void*)vs->state.tokens); + FREE(shader); } static void r300_set_constant_buffer(struct pipe_context *pipe, @@ -1193,8 +1428,12 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, r300->shader_constants[shader].count = buf->size / (4 * sizeof(float)); pipe_buffer_unmap(pipe->screen, buf); - if (shader == PIPE_SHADER_VERTEX) - r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; + if (shader == PIPE_SHADER_VERTEX) { + if (r300screen->caps->has_tcl) { + r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; + r300->pvs_flush.dirty = TRUE; + } + } else if (shader == PIPE_SHADER_FRAGMENT) r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -1241,7 +1480,10 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_viewport_state = r300_set_viewport_state; r300->context.set_vertex_buffers = r300_set_vertex_buffers; - r300->context.set_vertex_elements = r300_set_vertex_elements; + + r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; + r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; + r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state; r300->context.create_vs_state = r300_create_vs_state; r300->context.bind_vs_state = r300_bind_vs_state; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2cbce9210a7..6b9f61acd7b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -37,179 +37,6 @@ /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ -static void r300_draw_emit_attrib(struct r300_context* r300, - enum attrib_emit emit, - enum interp_mode interp, - int index) -{ - struct tgsi_shader_info* info = &r300->vs->info; - int output; - - output = draw_find_shader_output(r300->draw, - info->output_semantic_name[index], - info->output_semantic_index[index]); - draw_emit_vertex_attr( - (struct vertex_info*)r300->vertex_format_state.state, - emit, interp, output); -} - -static void r300_draw_emit_all_attribs(struct r300_context* r300) -{ - struct r300_shader_semantics* vs_outputs = &r300->vs->outputs; - int i, gen_count; - - /* Position. */ - if (vs_outputs->pos != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, - vs_outputs->pos); - } else { - assert(0); - } - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, - vs_outputs->psize); - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, - vs_outputs->color[i]); - } - } - - /* XXX Back-face colors. */ - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, - vs_outputs->generic[i]); - gen_count++; - } - } - - /* Fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, - vs_outputs->fog); - gen_count++; - } - - /* XXX magic */ - assert(gen_count <= 8); -} - -/* Update the PSC tables. */ -static void r300_vertex_psc(struct r300_context* r300) -{ - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; - uint16_t type, swizzle; - enum pipe_format format; - unsigned i; - int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - int* stream_tab; - - /* If TCL is bypassed, map vertex streams to equivalent VS output - * locations. */ - if (r300->tcl_bypass) { - stream_tab = r300->vs->stream_loc_notcl; - } else { - stream_tab = identity; - } - - /* Vertex shaders have no semantics on their inputs, - * so PSC should just route stuff based on the vertex elements, - * and not on attrib information. */ - DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements" - " in psc\n", - r300->vs->info.num_inputs, - r300->vertex_element_count); - - for (i = 0; i < r300->vertex_element_count; i++) { - format = r300->vertex_element[i].src_format; - - type = r300_translate_vertex_data_type(format) | - (stream_tab[i] << R300_DST_VEC_LOC_SHIFT); - swizzle = r300_translate_vertex_data_swizzle(format); - - if (i & 1) { - vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; - } else { - vformat->vap_prog_stream_cntl[i >> 1] |= type; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; - } - } - - assert(i <= 15); - - /* Set the last vector in the PSC. */ - if (i) { - i -= 1; - } - vformat->vap_prog_stream_cntl[i >> 1] |= - (R300_LAST_VEC << (i & 1 ? 16 : 0)); -} - -/* Update the PSC tables for SW TCL, using Draw. */ -static void r300_swtcl_vertex_psc(struct r300_context* r300) -{ - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; - struct vertex_info* vinfo = &vformat->vinfo; - uint16_t type, swizzle; - enum pipe_format format; - unsigned i, attrib_count; - int* vs_output_tab = r300->vs->stream_loc_notcl; - - /* For each Draw attribute, route it to the fragment shader according - * to the vs_output_tab. */ - attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); - for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," - " vs_output_tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - vs_output_tab[i]); - } - - for (i = 0; i < attrib_count; i++) { - /* Make sure we have a proper destination for our attribute. */ - assert(vs_output_tab[i] != -1); - - format = draw_translate_vinfo_format(vinfo->attrib[i].emit); - - /* Obtain the type of data in this attribute. */ - type = r300_translate_vertex_data_type(format) | - vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; - - /* Obtain the swizzle for this attribute. Note that the default - * swizzle in the hardware is not XYZW! */ - swizzle = r300_translate_vertex_data_swizzle(format); - - /* Add the attribute to the PSC table. */ - if (i & 1) { - vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; - } else { - vformat->vap_prog_stream_cntl[i >> 1] |= type; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; - } - } - - /* Set the last vector in the PSC. */ - if (i) { - i -= 1; - } - vformat->vap_prog_stream_cntl[i >> 1] |= - (R300_LAST_VEC << (i & 1 ? 16 : 0)); -} - static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, boolean swizzle_0001) { @@ -416,33 +243,16 @@ static void r300_update_rs_block(struct r300_context* r300, /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 5 + count; - r300->rs_block_state.dirty = TRUE; + r300->rs_block_state.size = 5 + count*2; } } /* Update the shader-dependant states. */ static void r300_update_derived_shader_state(struct r300_context* r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; - struct vertex_info* vinfo = &vformat->vinfo; + struct r300_vertex_shader* vs = r300->vs_state.state; - /* Mmm, delicious hax */ - memset(r300->vertex_format_state.state, 0, sizeof(struct r300_vertex_info)); - memcpy(vinfo->hwfmt, r300->vs->hwfmt, sizeof(uint)*4); - - r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); - - if (r300screen->caps->has_tcl) { - r300_vertex_psc(r300); - } else { - r300_draw_emit_all_attribs(r300); - draw_compute_vertex_size( - (struct vertex_info*)r300->vertex_format_state.state); - r300_swtcl_vertex_psc(r300); - } + r300_update_rs_block(r300, &vs->outputs, &r300->fs->inputs); } static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) @@ -516,14 +326,72 @@ static void r300_update_ztop(struct r300_context* r300) r300->ztop_state.dirty = TRUE; } +static void r300_merge_textures_and_samplers(struct r300_context* r300) +{ + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + struct r300_texture_sampler_state *texstate; + struct r300_sampler_state *sampler; + struct r300_texture *tex; + unsigned min_level, max_level, i, size; + unsigned count = MIN2(state->texture_count, state->sampler_count); + + state->tx_enable = 0; + size = 2; + + for (i = 0; i < count; i++) { + if (state->textures[i] && state->sampler_states[i]) { + state->tx_enable |= 1 << i; + + tex = state->textures[i]; + sampler = state->sampler_states[i]; + + texstate = &state->regs[i]; + memcpy(texstate->format, &tex->state, sizeof(uint32_t)*3); + texstate->filter[0] = sampler->filter0; + texstate->filter[1] = sampler->filter1; + texstate->border_color = sampler->border_color; + texstate->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile); + + /* to emulate 1D textures through 2D ones correctly */ + if (tex->tex.target == PIPE_TEXTURE_1D) { + texstate->filter[0] &= ~R300_TX_WRAP_T_MASK; + texstate->filter[0] |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); + } + + if (tex->is_npot) { + /* NPOT textures don't support mip filter, unfortunately. + * This prevents incorrect rendering. */ + texstate->filter[0] &= ~R300_TX_MIN_FILTER_MIP_MASK; + } else { + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + texstate->format[0] |= R300_TX_NUM_LEVELS(max_level); + texstate->filter[0] |= R300_TX_MAX_MIP_LEVEL(min_level); + } + + texstate->filter[0] |= i << 28; + + size += 16; + state->count = i+1; + } + } + + r300->textures_state.size = size; +} + void r300_update_derived_state(struct r300_context* r300) { - /* XXX */ - if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER) || - r300->vertex_format_state.dirty || r300->rs_state.dirty) { + if (r300->rs_block_state.dirty) { r300_update_derived_shader_state(r300); } + if (r300->textures_state.dirty) { + r300_merge_textures_and_samplers(r300); + } + r300_update_ztop(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 0e1cb328d17..8485d4f8f94 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -327,6 +327,18 @@ static INLINE uint32_t r300_anisotropy(unsigned max_aniso) } } +static INLINE uint32_t r500_anisotropy(unsigned max_aniso) +{ + if (!max_aniso) { + return 0; + } + max_aniso -= 1; + + // Map the range [0, 15] to [0, 63]. + return R500_TX_MAX_ANISO(MIN2((unsigned)(max_aniso*4.2001), 63)) | + R500_TX_ANISO_HIGH_QUALITY;; +} + /* Non-CSO state. (For now.) */ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) @@ -348,44 +360,16 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) return 0; } -/* Utility function to count the number of components in RGBAZS formats. - * XXX should go to util or p_format.h */ -static INLINE unsigned pf_component_count(enum pipe_format format) { - unsigned count = 0; - - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) { - count++; - } - - return count; -} - /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { uint32_t result = 0; const struct util_format_description *desc; - unsigned components = pf_component_count(format); + unsigned components = util_format_get_nr_components(format); desc = util_format_description(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) { + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { debug_printf("r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); @@ -454,36 +438,19 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - unsigned swizzle[4], i; assert(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) { + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { debug_printf("r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); return 0; } - /* Swizzles for 8bits formats are in the reversed order, not sure why. */ - if (desc->channel[0].size == 8) { - for (i = 0; i < 4; i++) { - if (desc->swizzle[i] <= 3) { - swizzle[i] = 3 - desc->swizzle[i]; - } else { - swizzle[i] = desc->swizzle[i]; - } - } - } else { - for (i = 0; i < 4; i++) { - swizzle[i] = desc->swizzle[i]; - } - } - - return ((swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | + return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | (0xf << R300_WRITE_ENA_SHIFT)); } diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 97927acf1b4..4a2c68269b1 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -38,7 +38,8 @@ struct pipe_viewport_state r300_viewport_identity = { * * Note that eventually this should be empty, but it's useful for development * and general unduplication of code. */ -void r300_emit_invariant_state(struct r300_context* r300, void* state) +void r300_emit_invariant_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h index 5d1a9636545..83d031c7fe9 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ b/src/gallium/drivers/r300/r300_state_invariant.h @@ -25,6 +25,7 @@ struct r300_context; -void r300_emit_invariant_state(struct r300_context* r300, void* state); +void r300_emit_invariant_state(struct r300_context* r300, + unsigned size, void* state); #endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ed2be06254a..7c7656068bb 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -70,6 +70,12 @@ static uint32_t r300_translate_texformat(enum pipe_format format) R300_TX_FORMAT_B_SHIFT, R300_TX_FORMAT_A_SHIFT }; + const uint32_t swizzle[4] = { + R300_TX_FORMAT_X, + R300_TX_FORMAT_Y, + R300_TX_FORMAT_Z, + R300_TX_FORMAT_W + }; const uint32_t sign_bit[4] = { R300_TX_FORMAT_SIGNED_X, R300_TX_FORMAT_SIGNED_Y, @@ -86,8 +92,8 @@ static uint32_t r300_translate_texformat(enum pipe_format format) switch (format) { case PIPE_FORMAT_Z16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); default: return ~0; /* Unsupported. */ @@ -98,9 +104,9 @@ static uint32_t r300_translate_texformat(enum pipe_format format) result |= R300_TX_FORMAT_YUV_TO_RGB; switch (format) { - case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_UYVY: return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | result; - case PIPE_FORMAT_YCBCR_REV: + case PIPE_FORMAT_YUYV: return R300_EASY_TX_FORMAT(X, Y, Z, ONE, VYUY422) | result; default: return ~0; /* Unsupported/unknown. */ @@ -119,16 +125,16 @@ static uint32_t r300_translate_texformat(enum pipe_format format) switch (desc->swizzle[i]) { case UTIL_FORMAT_SWIZZLE_X: case UTIL_FORMAT_SWIZZLE_NONE: - result |= R300_TX_FORMAT_X << swizzle_shift[i]; + result |= swizzle[0] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_Y: - result |= R300_TX_FORMAT_Y << swizzle_shift[i]; + result |= swizzle[1] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_Z: - result |= R300_TX_FORMAT_Z << swizzle_shift[i]; + result |= swizzle[2] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_W: - result |= R300_TX_FORMAT_W << swizzle_shift[i]; + result |= swizzle[3] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_0: result |= R300_TX_FORMAT_ZERO << swizzle_shift[i]; @@ -142,7 +148,7 @@ static uint32_t r300_translate_texformat(enum pipe_format format) } /* Compressed formats. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_DXT) { + if (desc->layout == UTIL_FORMAT_LAYOUT_COMPRESSED) { switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: @@ -302,33 +308,30 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return R300_COLOR_FORMAT_RGB565; - case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: return R300_COLOR_FORMAT_ARGB1555; - case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: return R300_COLOR_FORMAT_ARGB4444; /* 32-bit buffers. */ - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A8R8G8B8_SRGB: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_SRGB: case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8A8_SRGB: case PIPE_FORMAT_B8G8R8X8_UNORM: case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SRGB: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8G8B8X8_SRGB: - case PIPE_FORMAT_R8G8B8X8_SNORM: - case PIPE_FORMAT_A8B8G8R8_SNORM: - case PIPE_FORMAT_X8B8G8R8_SNORM: - case PIPE_FORMAT_X8UB8UG8SR8S_NORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_SRGB: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return R300_COLOR_FORMAT_ARGB8888; - case PIPE_FORMAT_A2B10G10R10_UNORM: + case PIPE_FORMAT_R10G10B10A2_UNORM: return R500_COLOR_FORMAT_ARGB2101010; /* R5xx-only? */ /* 64-bit buffers. */ @@ -345,9 +348,9 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) #endif /* YUV buffers. */ - case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_UYVY: return R300_COLOR_FORMAT_YVYU; - case PIPE_FORMAT_YCBCR_REV: + case PIPE_FORMAT_YUYV: return R300_COLOR_FORMAT_VYUY; default: return ~0; /* Unsupported. */ @@ -362,9 +365,9 @@ static uint32_t r300_translate_zsformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return R300_DEPTHFORMAT_16BIT_INT_Z; /* 24-bit depth, ignored stencil */ - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: /* 24-bit depth, 8-bit stencil */ - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; default: return ~0; /* Unsupported. */ @@ -431,42 +434,39 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) return modifier | R300_C2_SEL_R; /* ARGB 32-bit outputs. */ - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A8R8G8B8_SRGB: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_SRGB: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8X8_SRGB: return modifier | R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A; /* BGRA 32-bit outputs. */ - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_SRGB: return modifier | R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; /* RGBA 32-bit outputs. */ - case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_A8B8G8R8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SRGB: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8G8B8X8_SRGB: - case PIPE_FORMAT_R8G8B8X8_SNORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_SRGB: return modifier | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; /* ABGR 32-bit outputs. */ - case PIPE_FORMAT_A8B8G8R8_SNORM: - case PIPE_FORMAT_X8B8G8R8_SNORM: - case PIPE_FORMAT_X8UB8UG8SR8S_NORM: - case PIPE_FORMAT_A2B10G10R10_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_R10G10B10A2_UNORM: /* RGBA high precision outputs (same swizzles as ABGR low precision) */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -499,7 +499,7 @@ boolean r300_is_sampler_format_supported(enum pipe_format format) static void r300_setup_texture_state(struct r300_screen* screen, struct r300_texture* tex) { - struct r300_texture_state* state = &tex->state; + struct r300_texture_format_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; unsigned i; boolean is_r500 = screen->caps->is_r500; @@ -617,18 +617,23 @@ static unsigned r300_texture_get_tile_size(struct r300_texture* tex, /* Return true if macrotiling should be enabled on the miplevel. */ static boolean r300_texture_macro_switch(struct r300_texture *tex, unsigned level, - boolean rv350_mode) + boolean rv350_mode, + int dim) { - unsigned tile_width, width; + unsigned tile, texdim; - tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH, TRUE); - width = u_minify(tex->tex.width0, level); + tile = r300_texture_get_tile_size(tex, dim, TRUE); + if (dim == TILE_WIDTH) { + texdim = u_minify(tex->tex.width0, level); + } else { + texdim = u_minify(tex->tex.height0, level); + } /* See TX_FILTER1_n.MACRO_SWITCH. */ if (rv350_mode) { - return width >= tile_width; + return texdim >= tile; } else { - return width > tile_width; + return texdim > tile; } } @@ -692,9 +697,10 @@ static void r300_setup_miptree(struct r300_screen* screen, for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ - tex->mip_macrotile[i] = (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode)) ? - R300_BUFFER_TILED : R300_BUFFER_LINEAR; + tex->mip_macrotile[i] = + (tex->macrotile == R300_BUFFER_TILED && + r300_texture_macro_switch(tex, i, rv350_mode, TILE_WIDTH)) ? + R300_BUFFER_TILED : R300_BUFFER_LINEAR; stride = r300_texture_get_stride(screen, tex, i); nblocksy = r300_texture_get_nblocksy(tex, i); @@ -724,14 +730,50 @@ static void r300_setup_flags(struct r300_texture* tex) !util_is_power_of_two(tex->tex.height0); } +static void r300_setup_tiling(struct pipe_screen *screen, + struct r300_texture *tex) +{ + enum pipe_format format = tex->tex.format; + boolean rv350_mode = r300_screen(screen)->caps->family >= CHIP_FAMILY_RV350; + + if (util_format_is_compressed(format)) { + return; + } + + if (tex->tex.width0 == 1 || + tex->tex.height0 == 1) { + return; + } + + /* Set microtiling. */ + switch (util_format_get_blocksize(format)) { + case 1: + case 4: + tex->microtile = R300_BUFFER_TILED; + break; + + /* XXX Square-tiling doesn't work with kernel older than 2.6.34, + * XXX need to check the DRM version */ + /*case 2: + case 8: + tex->microtile = R300_BUFFER_SQUARETILED; + break;*/ + } + + /* Set macrotiling. */ + if (r300_texture_macro_switch(tex, 0, rv350_mode, TILE_WIDTH) && + r300_texture_macro_switch(tex, 0, rv350_mode, TILE_HEIGHT)) { + tex->macrotile = R300_BUFFER_TILED; + } +} + /* Create a new texture. */ -static struct pipe_texture* - r300_texture_create(struct pipe_screen* screen, - const struct pipe_texture* template) +static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, + const struct pipe_texture* template) { struct r300_texture* tex = CALLOC_STRUCT(r300_texture); struct r300_screen* rscreen = r300_screen(screen); - struct radeon_winsys* winsys = (struct radeon_winsys*)screen->winsys; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; if (!tex) { return NULL; @@ -742,16 +784,19 @@ static struct pipe_texture* tex->tex.screen = screen; r300_setup_flags(tex); + if (!(template->tex_usage & R300_TEXTURE_USAGE_TRANSFER)) { + r300_setup_tiling(screen, tex); + } r300_setup_miptree(rscreen, tex); r300_setup_texture_state(rscreen, tex); - tex->buffer = screen->buffer_create(screen, 2048, - PIPE_BUFFER_USAGE_PIXEL, - tex->size); - winsys->buffer_set_tiling(winsys, tex->buffer, - tex->pitch[0], - tex->microtile != R300_BUFFER_LINEAR, - tex->macrotile != R300_BUFFER_LINEAR); + tex->buffer = rws->buffer_create(rws, 2048, + PIPE_BUFFER_USAGE_PIXEL, + tex->size); + rws->buffer_set_tiling(rws, tex->buffer, + tex->pitch[0], + tex->microtile != R300_BUFFER_LINEAR, + tex->macrotile != R300_BUFFER_LINEAR); if (!tex->buffer) { FREE(tex); @@ -764,9 +809,9 @@ static struct pipe_texture* static void r300_texture_destroy(struct pipe_texture* texture) { struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; - pipe_buffer_reference(&tex->buffer, NULL); - + rws->buffer_reference(rws, &tex->buffer, NULL); FREE(tex); } @@ -806,14 +851,17 @@ static void r300_tex_surface_destroy(struct pipe_surface* s) FREE(s); } + static struct pipe_texture* - r300_texture_blanket(struct pipe_screen* screen, - const struct pipe_texture* base, - const unsigned* stride, - struct pipe_buffer* buffer) + r300_texture_from_handle(struct pipe_screen* screen, + const struct pipe_texture* base, + struct winsys_handle *whandle) { - struct r300_texture* tex; + struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; struct r300_screen* rscreen = r300_screen(screen); + struct r300_winsys_buffer *buffer; + struct r300_texture* tex; + unsigned stride; /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || @@ -822,6 +870,11 @@ static struct pipe_texture* return NULL; } + buffer = rws->buffer_from_handle(rws, screen, whandle, &stride); + if (!buffer) { + return NULL; + } + tex = CALLOC_STRUCT(r300_texture); if (!tex) { return NULL; @@ -831,17 +884,38 @@ static struct pipe_texture* pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; - tex->stride_override = *stride; - tex->pitch[0] = *stride / util_format_get_blocksize(base->format); + tex->stride_override = stride; + tex->pitch[0] = stride / util_format_get_blocksize(base->format); r300_setup_flags(tex); r300_setup_texture_state(rscreen, tex); - pipe_buffer_reference(&tex->buffer, buffer); + /* one ref already taken */ + tex->buffer = buffer; return (struct pipe_texture*)tex; } +static boolean + r300_texture_get_handle(struct pipe_screen* screen, + struct pipe_texture *texture, + struct winsys_handle *whandle) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; + struct r300_texture* tex = (struct r300_texture*)texture; + unsigned stride; + + if (!tex) { + return FALSE; + } + + stride = r300_texture_get_stride(r300_screen(screen), tex, 0); + + rws->buffer_get_handle(rws, tex->buffer, stride, whandle); + + return TRUE; +} + static struct pipe_video_surface * r300_video_surface_create(struct pipe_screen *screen, enum pipe_video_chroma_format chroma_format, @@ -865,7 +939,7 @@ r300_video_surface_create(struct pipe_screen *screen, memset(&template, 0, sizeof(struct pipe_texture)); template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_X8R8G8B8_UNORM; + template.format = PIPE_FORMAT_B8G8R8X8_UNORM; template.last_level = 0; template.width0 = util_next_power_of_two(width); template.height0 = util_next_power_of_two(height); @@ -893,10 +967,11 @@ static void r300_video_surface_destroy(struct pipe_video_surface *vsfc) void r300_init_screen_texture_functions(struct pipe_screen* screen) { screen->texture_create = r300_texture_create; + screen->texture_from_handle = r300_texture_from_handle; + screen->texture_get_handle = r300_texture_get_handle; screen->texture_destroy = r300_texture_destroy; screen->get_tex_surface = r300_get_tex_surface; screen->tex_surface_destroy = r300_tex_surface_destroy; - screen->texture_blanket = r300_texture_blanket; screen->video_surface_create = r300_video_surface_create; screen->video_surface_destroy= r300_video_surface_destroy; @@ -904,19 +979,23 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) boolean r300_get_texture_buffer(struct pipe_screen* screen, struct pipe_texture* texture, - struct pipe_buffer** buffer, + struct r300_winsys_buffer** buffer, unsigned* stride) { struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; + struct r300_winsys_buffer *buf; + if (!tex) { return FALSE; } - pipe_buffer_reference(buffer, tex->buffer); + rws->buffer_reference(rws, &buf, tex->buffer); if (stride) { *stride = r300_texture_get_stride(r300_screen(screen), tex, 0); } + *buffer = buf; return TRUE; } diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 46a5fb6188b..60c7fa83420 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -60,13 +60,11 @@ r300_video_surface(struct pipe_video_surface *pvs) return (struct r300_video_surface *)pvs; } -#ifndef R300_WINSYS_H - +/* Used internally for texture_is_referenced() + */ boolean r300_get_texture_buffer(struct pipe_screen* screen, - struct pipe_texture* texture, - struct pipe_buffer** buffer, + struct pipe_texture *texture, + struct r300_winsys_buffer** buffer, unsigned* stride); -#endif /* R300_WINSYS_H */ - #endif /* R300_TEXTURE_H */ diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c new file mode 100644 index 00000000000..987a040698f --- /dev/null +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -0,0 +1,280 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" +#include "r300_transfer.h" +#include "r300_texture.h" +#include "r300_screen.h" + +#include "r300_winsys.h" + +#include "util/u_memory.h" +#include "util/u_format.h" + +struct r300_transfer { + /* Parent class */ + struct pipe_transfer transfer; + + /* Pipe context. */ + struct pipe_context *ctx; + + /* Parameters of get_tex_transfer. */ + unsigned x, y, level, zslice, face; + + /* Offset from start of buffer. */ + unsigned offset; + + /* Detiled texture. */ + struct r300_texture *detiled_texture; + + /* Transfer and format flags. */ + unsigned buffer_usage, render_target_usage; +}; + +/* Convenience cast wrapper. */ +static INLINE struct r300_transfer* +r300_transfer(struct pipe_transfer* transfer) +{ + return (struct r300_transfer*)transfer; +} + +/* Copy from a tiled texture to a detiled one. */ +static void r300_copy_from_tiled_texture(struct pipe_context *ctx, + struct r300_transfer *r300transfer) +{ + struct pipe_screen *screen = ctx->screen; + struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; + struct pipe_texture *tex = transfer->texture; + struct pipe_surface *src, *dst; + + src = screen->get_tex_surface(screen, tex, r300transfer->face, + r300transfer->level, r300transfer->zslice, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_PIXEL); + + dst = screen->get_tex_surface(screen, &r300transfer->detiled_texture->tex, + 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_PIXEL | + r300transfer->buffer_usage); + + ctx->surface_copy(ctx, dst, 0, 0, src, r300transfer->x, r300transfer->y, + transfer->width, transfer->height); + + pipe_surface_reference(&src, NULL); + pipe_surface_reference(&dst, NULL); +} + +/* Copy a detiled texture to a tiled one. */ +static void r300_copy_into_tiled_texture(struct pipe_context *ctx, + struct r300_transfer *r300transfer) +{ + struct pipe_screen *screen = ctx->screen; + struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; + struct pipe_texture *tex = transfer->texture; + struct pipe_surface *src, *dst; + + src = screen->get_tex_surface(screen, &r300transfer->detiled_texture->tex, + 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_PIXEL); + + dst = screen->get_tex_surface(screen, tex, r300transfer->face, + r300transfer->level, r300transfer->zslice, + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_PIXEL); + + /* XXX this flush prevents the following DRM error from occuring: + * [drm:radeon_cs_ioctl] *ERROR* Failed to parse relocation ! + * Reproducible with perf/copytex. */ + ctx->flush(ctx, 0, NULL); + + ctx->surface_copy(ctx, dst, r300transfer->x, r300transfer->y, src, 0, 0, + transfer->width, transfer->height); + + /* XXX this flush fixes a few piglit tests (e.g. glean/pixelFormats). */ + ctx->flush(ctx, 0, NULL); + + pipe_surface_reference(&src, NULL); + pipe_surface_reference(&dst, NULL); +} + +static struct pipe_transfer* +r300_get_tex_transfer(struct pipe_context *ctx, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct r300_texture *tex = (struct r300_texture *)texture; + struct r300_screen *r300screen = r300_screen(ctx->screen); + struct r300_transfer *trans; + struct pipe_texture template; + + trans = CALLOC_STRUCT(r300_transfer); + if (trans) { + /* Initialize the transfer object. */ + pipe_texture_reference(&trans->transfer.texture, texture); + trans->transfer.usage = usage; + trans->transfer.width = w; + trans->transfer.height = h; + trans->ctx = ctx; + trans->x = x; + trans->y = y; + trans->level = level; + trans->zslice = zslice; + trans->face = face; + + /* If the texture is tiled, we must create a temporary detiled texture + * for this transfer. */ + if (tex->microtile || tex->macrotile) { + trans->buffer_usage = pipe_transfer_buffer_flags(&trans->transfer); + trans->render_target_usage = + util_format_is_depth_or_stencil(texture->format) ? + PIPE_TEXTURE_USAGE_DEPTH_STENCIL : + PIPE_TEXTURE_USAGE_RENDER_TARGET; + + template.target = PIPE_TEXTURE_2D; + template.format = texture->format; + template.width0 = w; + template.height0 = h; + template.depth0 = 0; + template.last_level = 0; + template.nr_samples = 0; + template.tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | + R300_TEXTURE_USAGE_TRANSFER; + + /* For texture reading, the temporary (detiled) texture is used as + * a render target when blitting from a tiled texture. */ + if (usage & PIPE_TRANSFER_READ) { + template.tex_usage |= trans->render_target_usage; + } + /* For texture writing, the temporary texture is used as a sampler + * when blitting into a tiled texture. */ + if (usage & PIPE_TRANSFER_WRITE) { + template.tex_usage |= PIPE_TEXTURE_USAGE_SAMPLER; + } + + /* Create the temporary texture. */ + trans->detiled_texture = (struct r300_texture*) + ctx->screen->texture_create(ctx->screen, + &template); + + assert(!trans->detiled_texture->microtile && + !trans->detiled_texture->macrotile); + + /* Set the stride. + * Parameters x, y, level, zslice, and face remain zero. */ + trans->transfer.stride = + r300_texture_get_stride(r300screen, trans->detiled_texture, 0); + + if (usage & PIPE_TRANSFER_READ) { + /* We cannot map a tiled texture directly because the data is + * in a different order, therefore we do detiling using a blit. */ + r300_copy_from_tiled_texture(ctx, trans); + } + } else { + trans->transfer.x = x; + trans->transfer.y = y; + trans->transfer.stride = + r300_texture_get_stride(r300screen, tex, level); + trans->transfer.level = level; + trans->transfer.zslice = zslice; + trans->transfer.face = face; + trans->offset = r300_texture_get_offset(tex, level, zslice, face); + } + } + return &trans->transfer; +} + +static void r300_tex_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans) +{ + struct r300_transfer *r300transfer = r300_transfer(trans); + + if (r300transfer->detiled_texture) { + if (trans->usage & PIPE_TRANSFER_WRITE) { + r300_copy_into_tiled_texture(r300transfer->ctx, r300transfer); + } + + pipe_texture_reference( + (struct pipe_texture**)&r300transfer->detiled_texture, NULL); + } + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); +} + +static void* r300_transfer_map(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; + struct r300_transfer *r300transfer = r300_transfer(transfer); + struct r300_texture *tex = (struct r300_texture*)transfer->texture; + char *map; + enum pipe_format format = tex->tex.format; + + if (r300transfer->detiled_texture) { + /* The detiled texture is of the same size as the region being mapped + * (no offset needed). */ + return rws->buffer_map(rws, + r300transfer->detiled_texture->buffer, + pipe_transfer_buffer_flags(transfer)); + } else { + /* Tiling is disabled. */ + map = rws->buffer_map(rws, tex->buffer, + pipe_transfer_buffer_flags(transfer)); + + if (!map) { + return NULL; + } + + return map + r300_transfer(transfer)->offset + + transfer->y / util_format_get_blockheight(format) * transfer->stride + + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + } +} + +static void r300_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; + struct r300_transfer *r300transfer = r300_transfer(transfer); + struct r300_texture *tex = (struct r300_texture*)transfer->texture; + + if (r300transfer->detiled_texture) { + rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer); + } else { + rws->buffer_unmap(rws, tex->buffer); + } +} + + +void r300_init_transfer_functions( struct r300_context *r300ctx ) +{ + struct pipe_context *ctx = &r300ctx->context; + + ctx->get_tex_transfer = r300_get_tex_transfer; + ctx->tex_transfer_destroy = r300_tex_transfer_destroy; + ctx->transfer_map = r300_transfer_map; + ctx->transfer_unmap = r300_transfer_unmap; +} diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h new file mode 100644 index 00000000000..79baf6d0480 --- /dev/null +++ b/src/gallium/drivers/r300/r300_transfer.h @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TRANSFER +#define R300_TRANSFER + +#include "pipe/p_screen.h" + +struct r300_context; + +void r300_init_transfer_functions(struct r300_context *r300ctx); + +#endif diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index a6786c321c6..bd6b95dccba 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -34,8 +34,6 @@ #include "radeon_compiler.h" -#include "util/u_math.h" - /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( struct tgsi_shader_info* info, @@ -89,95 +87,41 @@ static void r300_shader_read_vs_outputs( assert(0); } } + + /* WPOS is a straight copy of POSITION and it's always emitted. */ + vs_outputs->wpos = i; } -static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) +/* This function sets up: + * - VAP mapping, which maps VS registers to output semantics and + * at the same time it indicates which attributes are enabled and should + * be rasterized. + * - Stream mapping to VS outputs if TCL is not present. */ +static void r300_init_vs_output_mapping(struct r300_vertex_shader* vs) { struct r300_shader_semantics* vs_outputs = &vs->outputs; - uint32_t* hwfmt = vs->hwfmt; - int i, gen_count; + struct r300_vap_output_state *vap_out = &vs->vap_out; + int *stream_loc = vs->stream_loc_notcl; + int i, gen_count, tabi = 0; boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || vs_outputs->bcolor[1] != ATTR_UNUSED; - /* Do the actual vertex_info setup. - * - * vertex_info has four uints of hardware-specific data in it. - * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL - * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM - * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 - * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - - hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ + vap_out->vap_vtx_state_cntl = 0x5555; /* XXX this is classic Mesa bonghits */ /* Position. */ if (vs_outputs->pos != ATTR_UNUSED) { - hwfmt[1] |= R300_INPUT_CNTL_POS; - hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; + vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + stream_loc[tabi++] = 0; } else { assert(0); } /* Point size. */ if (vs_outputs->psize != ATTR_UNUSED) { - hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || - vs_outputs->color[1] != ATTR_UNUSED) { - hwfmt[1] |= R300_INPUT_CNTL_COLOR; - hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; - } - } - - /* Back-face colors. */ - if (any_bcolor_used) { - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - hwfmt[1] |= R300_INPUT_CNTL_COLOR; - hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); - } - } - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { - hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); - hwfmt[3] |= (4 << (3 * gen_count)); - gen_count++; - } - } - - /* Fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); - hwfmt[3] |= (4 << (3 * gen_count)); - gen_count++; - } - - /* XXX magic */ - assert(gen_count <= 8); + vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - /* WPOS. */ - vs->wpos_tex_output = gen_count; -} - -/* Sets up stream mapping to equivalent VS outputs if TCL is bypassed - * or isn't present. */ -static void r300_stream_locations_notcl( - struct r300_shader_semantics* vs_outputs, - int* stream_loc) -{ - int i, tabi = 0, gen_count; - boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || - vs_outputs->bcolor[1] != ATTR_UNUSED; - - /* Position. */ - stream_loc[tabi++] = 0; - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { stream_loc[tabi++] = 1; } @@ -185,6 +129,9 @@ static void r300_stream_locations_notcl( for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || vs_outputs->color[1] != ATTR_UNUSED) { + vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + stream_loc[tabi++] = 2 + i; } } @@ -192,6 +139,9 @@ static void r300_stream_locations_notcl( /* Back-face colors. */ if (any_bcolor_used) { for (i = 0; i < ATTR_COLOR_COUNT; i++) { + vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + stream_loc[tabi++] = 4 + i; } } @@ -200,6 +150,9 @@ static void r300_stream_locations_notcl( gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT; i++) { if (vs_outputs->generic[i] != ATTR_UNUSED) { + vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); + vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); + assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; @@ -208,17 +161,22 @@ static void r300_stream_locations_notcl( /* Fog coordinates. */ if (vs_outputs->fog != ATTR_UNUSED) { + vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); + vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); + assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; } + /* XXX magic */ + assert(gen_count <= 8); + /* WPOS. */ - if (vs_outputs->wpos != ATTR_UNUSED) { - assert(tabi < 16); - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } + vs->wpos_tex_output = gen_count; + + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; for (; tabi < 16;) { stream_loc[tabi++] = -1; @@ -294,26 +252,16 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) } } -static void r300_insert_wpos(struct r300_vertex_program_compiler* c, - struct r300_shader_semantics* outputs) +void r300_vertex_shader_common_init(struct r300_vertex_shader *vs, + const struct pipe_shader_state *shader) { - int i, lastOutput = 0; + /* Copy state directly into shader. */ + vs->state = *shader; + vs->state.tokens = tgsi_dup_tokens(shader->tokens); + tgsi_scan_shader(shader->tokens, &vs->info); - /* Find the max output index. */ - lastOutput = MAX2(lastOutput, outputs->psize); - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - lastOutput = MAX2(lastOutput, outputs->color[i]); - lastOutput = MAX2(lastOutput, outputs->bcolor[i]); - } - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - lastOutput = MAX2(lastOutput, outputs->generic[i]); - } - lastOutput = MAX2(lastOutput, outputs->fog); - - /* Set WPOS after the last output. */ - lastOutput++; - rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */ - outputs->wpos = lastOutput; + r300_shader_read_vs_outputs(&vs->info, &vs->outputs); + r300_init_vs_output_mapping(vs); } void r300_translate_vertex_shader(struct r300_context* r300, @@ -322,9 +270,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_program_compiler compiler; struct tgsi_to_rc ttr; - /* Initialize. */ - r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - /* Setup the compiler */ rc_init(&compiler.Base); @@ -348,10 +293,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, compiler.SetHwInputOutput = &set_vertex_inputs_outputs; /* Insert the WPOS output. */ - r300_insert_wpos(&compiler, &vs->outputs); - - r300_shader_vap_output_fmt(vs); - r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); + rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); @@ -363,30 +305,29 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* And, finally... */ rc_destroy(&compiler.Base); - vs->translated = TRUE; } boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) { - struct r300_vertex_shader* vs = r300->vs; - int tex_output = r300->vs->wpos_tex_output; + struct r300_vertex_shader* vs = r300->vs_state.state; + struct r300_vap_output_state *vap_out = &vs->vap_out; + int tex_output = vs->wpos_tex_output; uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; - uint32_t* hwfmt = vs->hwfmt; if (r300->fs->inputs.wpos != ATTR_UNUSED) { /* Enable WPOS in VAP. */ - if (!(hwfmt[1] & tex_fmt)) { - hwfmt[1] |= tex_fmt; - hwfmt[3] |= (4 << (3 * tex_output)); + if (!(vap_out->vap_vsm_vtx_assm & tex_fmt)) { + vap_out->vap_vsm_vtx_assm |= tex_fmt; + vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * tex_output)); assert(tex_output < 8); return TRUE; } } else { /* Disable WPOS in VAP. */ - if (hwfmt[1] & tex_fmt) { - hwfmt[1] &= ~tex_fmt; - hwfmt[3] &= ~(4 << (3 * tex_output)); + if (vap_out->vap_vsm_vtx_assm & tex_fmt) { + vap_out->vap_vsm_vtx_assm &= ~tex_fmt; + vap_out->vap_out_vtx_fmt[1] &= ~(4 << (3 * tex_output)); return TRUE; } } diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 18cfeee3cd4..f6f0b86b683 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -28,6 +28,7 @@ #include "tgsi/tgsi_scan.h" #include "radeon_code.h" +#include "r300_context.h" #include "r300_shader_semantics.h" struct r300_context; @@ -38,7 +39,7 @@ struct r300_vertex_shader { struct tgsi_shader_info info; struct r300_shader_semantics outputs; - uint hwfmt[4]; + struct r300_vap_output_state vap_out; /* Stream locations for SWTCL or if TCL is bypassed. */ int stream_loc_notcl[16]; @@ -46,13 +47,17 @@ struct r300_vertex_shader { /* Output stream location for WPOS. */ int wpos_tex_output; - /* Has this shader been translated yet? */ - boolean translated; - + /* HWTCL-specific. */ /* Machine code (if translated) */ struct r300_vertex_program_code code; + + /* SWTCL-specific. */ + void *draw_vs; }; +void r300_vertex_shader_common_init(struct r300_vertex_shader *vs, + const struct pipe_shader_state *shader); + void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 40fb8a95ca5..e5183a8239c 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -23,10 +23,6 @@ #ifndef R300_WINSYS_H #define R300_WINSYS_H -#ifdef __cplusplus -extern "C" { -#endif - /* The public interface header for the r300 pipe driver. * Any winsys hosting this pipe needs to implement r300_winsys and then * call r300_create_screen to start things. */ @@ -34,19 +30,146 @@ extern "C" { #include "pipe/p_defines.h" #include "pipe/p_state.h" -struct radeon_winsys; +struct r300_winsys_screen; /* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); +struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws); + +struct r300_winsys_buffer; boolean r300_get_texture_buffer(struct pipe_screen* screen, struct pipe_texture* texture, - struct pipe_buffer** buffer, - unsigned* stride); + struct r300_winsys_buffer** buffer, + unsigned *stride); + +enum r300_value_id { + R300_VID_PCI_ID, + R300_VID_GB_PIPES, + R300_VID_Z_PIPES, +}; + +#define R300_USAGE_FLAG_DONT_SYNC (1 << 17) + +struct r300_winsys_screen { + void (*destroy)(struct r300_winsys_screen *ws); + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + * + * usage is a bitmask of R300_WINSYS_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This + * usage argument is only an optimization hint, not a guarantee, therefore + * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. + */ + struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, + unsigned alignment, + unsigned usage, + unsigned size); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags. + */ + void *(*buffer_map)( struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + unsigned usage); + + void (*buffer_unmap)( struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf ); + + void (*buffer_destroy)( struct r300_winsys_buffer *buf ); + + + void (*buffer_reference)(struct r300_winsys_screen *rws, + struct r300_winsys_buffer **pdst, + struct r300_winsys_buffer *src); + + boolean (*buffer_references)(struct r300_winsys_buffer *a, + struct r300_winsys_buffer *b); + + void (*buffer_flush_range)(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buf, + unsigned offset, + unsigned length); + + /* Add a pipe_buffer to the list of buffer objects to validate. */ + boolean (*add_buffer)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buf, + uint32_t rd, + uint32_t wd); + + + /* Revalidate all currently setup pipe_buffers. + * Returns TRUE if a flush is required. */ + boolean (*validate)(struct r300_winsys_screen* winsys); + + /* Check to see if there's room for commands. */ + boolean (*check_cs)(struct r300_winsys_screen* winsys, int size); + + /* Start a command emit. */ + void (*begin_cs)(struct r300_winsys_screen* winsys, + int size, + const char* file, + const char* function, + int line); + + /* Write a dword to the command buffer. */ + void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); + + /* Write a relocated dword to the command buffer. */ + void (*write_cs_reloc)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buf, + uint32_t rd, + uint32_t wd, + uint32_t flags); + + /* Finish a command emit. */ + void (*end_cs)(struct r300_winsys_screen* winsys, + const char* file, + const char* function, + int line); + + /* Flush the CS. */ + void (*flush_cs)(struct r300_winsys_screen* winsys); + + /* winsys flush - callback from winsys when flush required */ + void (*set_flush_cb)(struct r300_winsys_screen *winsys, + void (*flush_cb)(void *), void *data); + + void (*reset_bos)(struct r300_winsys_screen *winsys); + + void (*buffer_set_tiling)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer, + uint32_t pitch, + boolean microtiled, + boolean macrotiled); + + uint32_t (*get_value)(struct r300_winsys_screen *winsys, + enum r300_value_id vid); + + struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys, + struct pipe_screen *screen, + struct winsys_handle *whandle, + unsigned *stride); + boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer, + unsigned stride, + struct winsys_handle *whandle); + + boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer); + + +}; -#ifdef __cplusplus -} -#endif +struct r300_winsys_screen * +r300_winsys_screen(struct pipe_screen *screen); #endif /* R300_WINSYS_H */ |