diff options
Diffstat (limited to 'src/gallium/drivers')
271 files changed, 11743 insertions, 15456 deletions
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index c92f8e5cba2..8769b826b5f 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -21,6 +21,7 @@ SPU_CODE_MODULE = ../spu/g3d_spu.a SOURCES = \ cell_batch.c \ + cell_buffer.c \ cell_clear.c \ cell_context.c \ cell_draw_arrays.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_buffer.c b/src/gallium/drivers/cell/ppu/cell_buffer.c new file mode 100644 index 00000000000..f56a28d62e2 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_buffer.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "cell_screen.h" +#include "cell_buffer.h" + + +static void * +cell_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned flags) +{ + struct cell_buffer *cell_buf = cell_buffer(buf); + return cell_buf->data; +} + + +static void +cell_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ +} + + +static void +cell_buffer_destroy(struct pipe_buffer *buf) +{ + struct cell_buffer *sbuf = cell_buffer(buf); + + if (!sbuf->userBuffer) + align_free(sbuf->data); + + FREE(sbuf); +} + + +static struct pipe_buffer * +cell_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct cell_buffer *buffer = CALLOC_STRUCT(cell_buffer); + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = screen; + buffer->base.alignment = MAX2(alignment, 16); + buffer->base.usage = usage; + buffer->base.size = size; + + buffer->data = align_malloc(size, alignment); + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +cell_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct cell_buffer *buffer; + + buffer = CALLOC_STRUCT(cell_buffer); + if(!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = screen; + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + +void +cell_init_screen_buffer_funcs(struct pipe_screen *screen) +{ + screen->buffer_create = cell_buffer_create; + screen->user_buffer_create = cell_user_buffer_create; + screen->buffer_map = cell_buffer_map; + screen->buffer_unmap = cell_buffer_unmap; + screen->buffer_destroy = cell_buffer_destroy; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_buffer.h index e227e065ff3..ef0a8a70e59 100644 --- a/src/gallium/drivers/cell/ppu/cell_winsys.h +++ b/src/gallium/drivers/cell/ppu/cell_buffer.h @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * + * Copyright 2009 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,38 +10,46 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ - -#ifndef CELL_WINSYS_H -#define CELL_WINSYS_H +#ifndef SP_BUFFER_H +#define SP_BUFFER_H #include "pipe/p_compiler.h" +#include "pipe/p_state.h" -/** - * Very simple winsys at this time. - * Will probably eventually add SPU control info. - */ -struct cell_winsys +struct cell_buffer { - uint dummy; + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; }; +/** Cast wrapper */ +static INLINE struct cell_buffer * +cell_buffer( struct pipe_buffer *buf ) +{ + return (struct cell_buffer *)buf; +} + + +void +cell_init_screen_buffer_funcs(struct pipe_screen *screen); -#endif +#endif /* SP_BUFFER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 5bff9869fd0..f6cb1fc9be7 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -36,7 +36,6 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" #include "util/u_memory.h" -#include "util/u_simple_screen.h" #include "pipe/p_screen.h" #include "draw/draw_context.h" @@ -137,7 +136,7 @@ cell_create_context(struct pipe_screen *screen, memset(cell, 0, sizeof(*cell)); cell->winsys = NULL; /* XXX: fixme - get this from screen? */ - cell->pipe.winsys = screen->winsys; + cell->pipe.winsys = NULL; cell->pipe.screen = screen; cell->pipe.priv = priv; cell->pipe.destroy = cell_destroy_context; @@ -159,6 +158,7 @@ cell_create_context(struct pipe_screen *screen, cell_init_shader_functions(cell); cell_init_surface_functions(cell); cell_init_vertex_functions(cell); + cell_init_texture_transfer_funcs(cell); cell->draw = cell_draw_create(cell); diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index a77cc5b9067..28f80b82cd5 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -34,7 +34,7 @@ #include "pipe/p_defines.h" #include "draw/draw_vertex.h" #include "draw/draw_vbuf.h" -#include "cell_winsys.h" +/*#include "cell_winsys.h"*/ #include "cell/common.h" #include "rtasm/rtasm_ppc_spe.h" #include "tgsi/tgsi_scan.h" @@ -93,6 +93,11 @@ struct cell_buffer_list struct cell_buffer_node *head; }; +struct cell_velems_state +{ + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; +} /** * Per-context state, subclass of pipe_context. @@ -110,6 +115,7 @@ struct cell_context const struct pipe_rasterizer_state *rasterizer; const struct cell_vertex_shader_state *vs; const struct cell_fragment_shader_state *fs; + const struct cell_velems_state *velems; struct spe_function logic_op; @@ -125,8 +131,6 @@ struct cell_context struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; uint num_vertex_buffers; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - uint num_vertex_elements; ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index bffd0fac6fe..15d4e8338f4 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -33,48 +33,18 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "util/u_simple_screen.h" #include "util/u_inlines.h" #include "cell_context.h" #include "cell_draw_arrays.h" #include "cell_state.h" #include "cell_flush.h" +#include "cell_buffer.h" #include "draw/draw_context.h" -static void -cell_map_constant_buffers(struct cell_context *sp) -{ - struct pipe_winsys *ws = sp->pipe.winsys; - uint i; - for (i = 0; i < 2; i++) { - if (sp->constants[i] && sp->constants[i]->size) { - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i], - PIPE_BUFFER_USAGE_CPU_READ); - cell_flush_buffer_range(sp, sp->mapped_constants[i], - sp->constants[i]->size); - } - } - - draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, 0, - sp->mapped_constants[PIPE_SHADER_VERTEX], - sp->constants[PIPE_SHADER_VERTEX]->size); -} - -static void -cell_unmap_constant_buffers(struct cell_context *sp) -{ - struct pipe_winsys *ws = sp->pipe.winsys; - uint i; - for (i = 0; i < 2; i++) { - if (sp->constants[i] && sp->constants[i]->size) - ws->buffer_unmap(ws, sp->constants[i]); - sp->mapped_constants[i] = NULL; - } -} @@ -93,33 +63,27 @@ cell_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { - struct cell_context *sp = cell_context(pipe); - struct draw_context *draw = sp->draw; + struct cell_context *cell = cell_context(pipe); + struct draw_context *draw = cell->draw; unsigned i; - if (sp->dirty) - cell_update_derived( sp ); + if (cell->dirty) + cell_update_derived( cell ); #if 0 - cell_map_surfaces(sp); + cell_map_surfaces(cell); #endif - cell_map_constant_buffers(sp); /* * Map vertex buffers */ - for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); + for (i = 0; i < cell->num_vertex_buffers; i++) { + void *buf = cell_buffer(cell->vertex_buffer[i].buffer)->data; draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes = pipe_buffer_map(pipe->screen, - indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + void *mapped_indexes = cell_buffer(indexBuffer)->data; draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } else { @@ -134,17 +98,19 @@ cell_draw_range_elements(struct pipe_context *pipe, /* * unmap vertex/index buffers - will cause draw module to flush */ - for (i = 0; i < sp->num_vertex_buffers; i++) { + for (i = 0; i < cell->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); - pipe_buffer_unmap(pipe->screen, indexBuffer); } - /* Note: leave drawing surfaces mapped */ - cell_unmap_constant_buffers(sp); + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); } diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c index e10071529a8..035ef41b898 100644 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -92,7 +92,6 @@ cell_add_buffer_to_list(struct cell_context *cell, struct cell_buffer_list *list, struct pipe_buffer *buffer) { - struct pipe_screen *ps = cell->pipe.screen; struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); /* create new list node which references the buffer, insert at head */ if (node) { @@ -157,8 +156,13 @@ cell_add_fenced_textures(struct cell_context *cell) printf("Adding texture %p buffer %p to list\n", ct, ct->tiled_buffer[level]); #endif - if (ct->buffer) +#if 00 + /* XXX this needs to be fixed/restored! + * Maybe keep pointers to textures, not buffers. + */ + if (ct->base.buffer) cell_add_buffer_to_list(cell, list, ct->buffer); +#endif } } } diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 70683bb3676..c54576b3c32 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -304,7 +304,7 @@ unpack_colors(struct spe_function *f, spe_comment(f, 0, "Unpack framebuffer colors, convert to floats"); switch (color_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: /* fbB = fbRGBA & mask */ spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg); @@ -327,7 +327,7 @@ unpack_colors(struct spe_function *f, spe_roti(f, fbA_reg, fbA_reg, -24); break; - case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: /* fbA = fbRGBA & mask */ spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg); @@ -1043,12 +1043,12 @@ gen_pack_colors(struct spe_function *f, spe_rotmi(f, a_reg, a_reg, -24); /* Shift the color bytes according to the surface format */ - if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { + if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ } - else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { + else if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ @@ -1096,14 +1096,14 @@ gen_colormask(struct spe_function *f, * end up, so we can mask them correctly. */ switch(color_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: /* ARGB */ a_mask = 0xff000000; r_mask = 0x00ff0000; g_mask = 0x0000ff00; b_mask = 0x000000ff; break; - case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: /* BGRA */ b_mask = 0xff000000; g_mask = 0x00ff0000; @@ -1352,7 +1352,7 @@ gen_stencil_values(struct spe_function *f, */ ASSERT(fbS_reg != newS_reg); - /* The code also assumes the the stencil_max_value is of the form + /* The code also assumes that the stencil_max_value is of the form * 2^n-1 and can therefore be used as a mask for the valid bits in * addition to a maximum. Make sure this is the case as well. * The clever math below exploits the fact that incrementing a @@ -1859,8 +1859,8 @@ gen_depth_stencil(struct cell_context *cell, spe_comment(f, 0, "Fetch Z/stencil quad from tile"); switch(zs_format) { - case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ - case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: /* prepare mask to extract Z vals from ZS vals */ spe_load_uint(f, zmask_reg, 0x00ffffff); @@ -1880,8 +1880,8 @@ gen_depth_stencil(struct cell_context *cell, spe_rotmi(f, fbS_reg, fbZS_reg, -24); break; - case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ + case PIPE_FORMAT_X8Z24_UNORM: /* convert fragment Z from [0,1] to 32-bit ints */ spe_cfltu(f, fragZ_reg, fragZ_reg, 32); @@ -1969,13 +1969,13 @@ gen_depth_stencil(struct cell_context *cell, * fbS_reg has four 8-bit Z values in bits [7..0]. */ spe_comment(f, 0, "Store quad's depth/stencil values in tile"); - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { + if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ } - else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { + else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ } @@ -2015,7 +2015,7 @@ gen_depth_stencil(struct cell_context *cell, * code before the fragment shader to cull fragments/quads that are * totally occluded/discarded. * - * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now. + * XXX we only support PIPE_FORMAT_S8Z24_UNORM z/stencil buffer right now. * * See the spu_default_fragment_ops() function to see how the per-fragment * operations would be done with ordinary C code. diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 3d8b4409c75..dce10ae7f86 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -292,17 +292,23 @@ cell_set_sampler_textures(struct pipe_context *pipe, static void cell_map_surfaces(struct cell_context *cell) { +#if 0 struct pipe_screen *screen = cell->pipe.screen; +#endif uint i; for (i = 0; i < 1; i++) { struct pipe_surface *ps = cell->framebuffer.cbufs[i]; if (ps) { struct cell_texture *ct = cell_texture(ps->texture); +#if 0 cell->cbuf_map[i] = screen->buffer_map(screen, ct->buffer, (PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE)); +#else + cell->cbuf_map[i] = ct->data; +#endif } } @@ -310,10 +316,14 @@ cell_map_surfaces(struct cell_context *cell) struct pipe_surface *ps = cell->framebuffer.zsbuf; if (ps) { struct cell_texture *ct = cell_texture(ps->texture); +#if 0 cell->zsbuf_map = screen->buffer_map(screen, ct->buffer, (PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE)); +#else + cell->zsbuf_map = ct->data; +#endif } } } @@ -325,17 +335,17 @@ cell_map_surfaces(struct cell_context *cell) static void cell_unmap_surfaces(struct cell_context *cell) { - struct pipe_screen *screen = cell->pipe.screen; + /*struct pipe_screen *screen = cell->pipe.screen;*/ uint i; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { struct pipe_surface *ps = cell->framebuffer.cbufs[i]; if (ps && cell->cbuf_map[i]) { - struct cell_texture *ct = cell_texture(ps->texture); + /*struct cell_texture *ct = cell_texture(ps->texture);*/ assert(ps->texture); - assert(ct->buffer); + /*assert(ct->buffer);*/ - screen->buffer_unmap(screen, ct->buffer); + /*screen->buffer_unmap(screen, ct->buffer);*/ cell->cbuf_map[i] = NULL; } } @@ -343,8 +353,8 @@ cell_unmap_surfaces(struct cell_context *cell) { struct pipe_surface *ps = cell->framebuffer.zsbuf; if (ps && cell->zsbuf_map) { - struct cell_texture *ct = cell_texture(ps->texture); - screen->buffer_unmap(screen, ct->buffer); + /*struct cell_texture *ct = cell_texture(ps->texture);*/ + /*screen->buffer_unmap(screen, ct->buffer);*/ cell->zsbuf_map = NULL; } } diff --git a/src/gallium/drivers/cell/ppu/cell_public.h b/src/gallium/drivers/cell/ppu/cell_public.h new file mode 100644 index 00000000000..7e2e093565d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_public.h @@ -0,0 +1,10 @@ +#ifndef CELL_PUBLIC_H +#define CELL_PUBLIC_H + +struct pipe_screen; +struct sw_winsys; + +struct pipe_screen * +cell_create_screen(struct sw_winsys *winsys); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 449855f539d..31fd963d19a 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -28,7 +28,6 @@ #include "util/u_memory.h" #include "util/u_simple_screen.h" -#include "util/u_simple_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -36,7 +35,10 @@ #include "cell_context.h" #include "cell_screen.h" #include "cell_texture.h" -#include "cell_winsys.h" +#include "cell_buffer.h" +#include "cell_public.h" + +#include "state_tracker/sw_winsys.h" static const char * @@ -134,19 +136,28 @@ cell_is_format_supported( struct pipe_screen *screen, unsigned tex_usage, unsigned geom_flags ) { - /* cell supports most formats, XXX for now anyway */ + struct sw_winsys *winsys = cell_screen(screen)->winsys; + if (format == PIPE_FORMAT_DXT5_RGBA || - format == PIPE_FORMAT_R8G8B8A8_SRGB) + format == PIPE_FORMAT_A8B8G8R8_SRGB) return FALSE; - else - return TRUE; + + if (tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if (!winsys->is_displaytarget_format_supported(winsys, format)) + return FALSE; + } + + /* This is often a lie. Pull in logic from llvmpipe to fix. + */ + return TRUE; } static void cell_destroy_screen( struct pipe_screen *screen ) { - struct pipe_winsys *winsys = screen->winsys; + struct cell_screen *sp_screen = cell_screen(screen); + struct sw_winsys *winsys = sp_screen->winsys; if(winsys->destroy) winsys->destroy(winsys); @@ -155,32 +166,34 @@ cell_destroy_screen( struct pipe_screen *screen ) } + /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no cell_screen) but * that would be the place to put SPU thread/context info... */ struct pipe_screen * -cell_create_screen(struct pipe_winsys *winsys) +cell_create_screen(struct sw_winsys *winsys) { - struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + struct cell_screen *screen = CALLOC_STRUCT(cell_screen); if (!screen) return NULL; screen->winsys = winsys; + screen->base.winsys = NULL; - screen->destroy = cell_destroy_screen; + screen->base.destroy = cell_destroy_screen; - screen->get_name = cell_get_name; - screen->get_vendor = cell_get_vendor; - screen->get_param = cell_get_param; - screen->get_paramf = cell_get_paramf; - screen->is_format_supported = cell_is_format_supported; - screen->context_create = cell_create_context; + screen->base.get_name = cell_get_name; + screen->base.get_vendor = cell_get_vendor; + screen->base.get_param = cell_get_param; + screen->base.get_paramf = cell_get_paramf; + screen->base.is_format_supported = cell_is_format_supported; + screen->base.context_create = cell_create_context; - cell_init_screen_texture_funcs(screen); - u_simple_screen_init(screen); + cell_init_screen_texture_funcs(&screen->base); + cell_init_screen_buffer_funcs(&screen->base); - return screen; + return &screen->base; } diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h index c7e15889d66..baff9d3b7d4 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.h +++ b/src/gallium/drivers/cell/ppu/cell_screen.h @@ -30,12 +30,26 @@ #define CELL_SCREEN_H -struct pipe_screen; -struct pipe_winsys; +#include "pipe/p_screen.h" +struct sw_winsys; -extern struct pipe_screen * -cell_create_screen(struct pipe_winsys *winsys); +struct cell_screen { + struct pipe_screen base; + + struct sw_winsys *winsys; + + /* Increments whenever textures are modified. Contexts can track + * this. + */ + unsigned timestamp; +}; + +static INLINE struct cell_screen * +cell_screen( struct pipe_screen *pipe ) +{ + return (struct cell_screen *)pipe; +} #endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 28e5e6d706d..39284f3a5d1 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -135,7 +135,7 @@ cell_thread_function(void *arg) /** * Create the SPU threads. This is done once during driver initialization. - * This involves setting the the "init" message which is sent to each SPU. + * This involves setting the "init" message which is sent to each SPU. * The init message specifies an SPU id, total number of SPUs, location * and number of batch buffers, etc. */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index a59c7828ac3..424e2628a95 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -245,16 +245,13 @@ cell_emit_state(struct cell_context *cell) uint i, j; float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); uint32_t *ibuf = (uint32_t *) buf; - const float *constants = pipe_buffer_map(cell->pipe.screen, - cell->constants[shader], - PIPE_BUFFER_USAGE_CPU_READ); + const float *constants = cell->mapped_constants[shader]; ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; ibuf[4] = num_const; j = 8; for (i = 0; i < num_const; i++) { buf[j++] = constants[i]; } - pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader]); } if (cell->dirty & (CELL_NEW_FRAMEBUFFER | diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 07be5e92ea7..dc33e7ccc2c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -1251,7 +1251,7 @@ cell_generate_logic_op(struct spe_function *f, /* Convert fragment colors to framebuffer format in AoS layout. */ switch (surf->format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: data[0] = 0x00010203; data[1] = 0x10111213; data[2] = 0x04050607; @@ -1261,7 +1261,7 @@ cell_generate_logic_op(struct spe_function *f, data[6] = 0x80808080; data[7] = 0x80808080; break; - case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: data[0] = 0x03020100; data[1] = 0x13121110; data[2] = 0x07060504; diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 9b2f86fdfba..9e29ddc2d45 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -28,13 +28,13 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "util/u_inlines.h" -#include "util/u_simple_screen.h" #include "draw/draw_context.h" #include "tgsi/tgsi_parse.h" #include "cell_context.h" #include "cell_state.h" #include "cell_gen_fp.h" +#include "cell_buffer.h" /** cast wrapper */ @@ -183,17 +183,29 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf) + struct pipe_buffer *constants) { struct cell_context *cell = cell_context(pipe); + unsigned size = constants ? constants->size : 0; + const void *data = constants ? cell_buffer(constants)->data : NULL; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); + if (cell->constants[shader] == constants) + return; + draw_flush(cell->draw); /* note: reference counting */ - pipe_buffer_reference(&cell->constants[shader], buf); + pipe_buffer_reference(&cell->constants[shader], constants); + + if(shader == PIPE_SHADER_VERTEX) { + draw_set_mapped_constant_buffer(cell->draw, PIPE_SHADER_VERTEX, 0, + data, size); + } + + cell->mapped_constants[shader] = data; if (shader == PIPE_SHADER_VERTEX) cell->dirty |= CELL_NEW_VS_CONSTANTS; diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index fbe55c84721..9510ea9ac2b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -32,24 +32,44 @@ #include "cell_context.h" #include "cell_state.h" +#include "util/u_memory.h" #include "draw/draw_context.h" -static void -cell_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements) +void * +cell_create_vertex_elements_state(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) { - struct cell_context *cell = cell_context(pipe); - + struct cell_velems_state *velems; assert(count <= PIPE_MAX_ATTRIBS); + velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state)); + if (velems) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(*attribs) * count); + } + return velems; +} + +void +cell_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems; - memcpy(cell->vertex_element, elements, count * sizeof(elements[0])); - cell->num_vertex_elements = count; + cell->velems = cell_velems; cell->dirty |= CELL_NEW_VERTEX; - draw_set_vertex_elements(cell->draw, count, elements); + if (cell_velems) + draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem); +} + +void +cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + FREE( velems ); } @@ -75,5 +95,7 @@ void cell_init_vertex_functions(struct cell_context *cell) { cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; - cell->pipe.set_vertex_elements = cell_set_vertex_elements; + cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; + cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; + cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index fad290dfa0e..c65c3b4f885 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -34,20 +34,23 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" -#include "util/u_simple_screen.h" #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" #include "cell_context.h" +#include "cell_screen.h" #include "cell_state.h" #include "cell_texture.h" +#include "state_tracker/sw_winsys.h" -static void -cell_texture_layout(struct cell_texture *ct) + +static boolean +cell_texture_layout(struct pipe_screen *screen, + struct cell_texture *ct) { struct pipe_texture *pt = &ct->base; unsigned level; @@ -83,9 +86,34 @@ cell_texture_layout(struct cell_texture *ct) height = u_minify(height, 1); depth = u_minify(depth, 1); } + + ct->data = align_malloc(ct->buffer_size, 16); + + return ct->data != NULL; } +/** + * Texture layout for simple color buffers. + */ +static boolean +cell_displaytarget_layout(struct pipe_screen *screen, + struct cell_texture * ct) +{ + struct sw_winsys *winsys = cell_screen(screen)->winsys; + + /* Round up the surface size to a multiple of the tile size? + */ + ct->dt = winsys->displaytarget_create(winsys, + ct->base.format, + ct->base.width0, + ct->base.height0, + 16, + &ct->dt_stride ); + + return ct->dt != NULL; +} + static struct pipe_texture * cell_texture_create(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -98,31 +126,46 @@ cell_texture_create(struct pipe_screen *screen, pipe_reference_init(&ct->base.reference, 1); ct->base.screen = screen; - cell_texture_layout(ct); + /* Create both a displaytarget (linear) and regular texture + * (twiddled). Convert twiddled->linear at flush_frontbuffer time. + */ + if (ct->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) { + if (!cell_displaytarget_layout(screen, ct)) + goto fail; + } - ct->buffer = screen->buffer_create(screen, 32, PIPE_BUFFER_USAGE_PIXEL, - ct->buffer_size); + if (!cell_texture_layout(screen, ct)) + goto fail; - if (!ct->buffer) { - FREE(ct); - return NULL; + return &ct->base; + +fail: + if (ct->dt) { + struct sw_winsys *winsys = cell_screen(screen)->winsys; + winsys->displaytarget_destroy(winsys, ct->dt); } - return &ct->base; + FREE(ct); + + return NULL; } static void cell_texture_destroy(struct pipe_texture *pt) { + struct cell_screen *screen = cell_screen(pt->screen); + struct sw_winsys *winsys = screen->winsys; struct cell_texture *ct = cell_texture(pt); - if (ct->mapped) { - pipe_buffer_unmap(ct->buffer->screen, ct->buffer); - ct->mapped = NULL; + if (ct->dt) { + /* display target */ + winsys->displaytarget_destroy(winsys, ct->dt); } - pipe_buffer_reference(&ct->buffer, NULL); + align_free(ct->data); FREE(ct); } @@ -312,7 +355,7 @@ cell_tex_surface_destroy(struct pipe_surface *surf) * back out for glGetTexImage). */ static struct pipe_transfer * -cell_get_tex_transfer(struct pipe_screen *screen, +cell_get_tex_transfer(struct pipe_context *ctx, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, @@ -359,7 +402,7 @@ cell_get_tex_transfer(struct pipe_screen *screen, static void -cell_tex_transfer_destroy(struct pipe_transfer *t) +cell_tex_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t) { struct cell_transfer *transfer = cell_transfer(t); /* Effectively do the texture_update work here - if texture images @@ -376,7 +419,7 @@ cell_tex_transfer_destroy(struct pipe_transfer *t) * Return pointer to texture image data in linear layout. */ static void * -cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) +cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct cell_transfer *ctrans = cell_transfer(transfer); struct pipe_texture *pt = transfer->texture; @@ -389,10 +432,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) assert(transfer->texture); - if (!ct->mapped) { - /* map now */ - ct->mapped = pipe_buffer_map(screen, ct->buffer, - pipe_transfer_buffer_flags(transfer)); + if (ct->mapped == NULL) { + ct->mapped = ct->data; } /* @@ -430,7 +471,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer) * to tiled data. */ static void -cell_transfer_unmap(struct pipe_screen *screen, +cell_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer) { struct cell_transfer *ctrans = cell_transfer(transfer); @@ -442,9 +483,8 @@ cell_transfer_unmap(struct pipe_screen *screen, const uint stride = ct->stride[level]; if (!ct->mapped) { - /* map now */ - ct->mapped = pipe_buffer_map(screen, ct->buffer, - PIPE_BUFFER_USAGE_CPU_READ); + assert(0); + return; } if (transfer->usage & PIPE_TRANSFER_WRITE) { @@ -467,6 +507,50 @@ cell_transfer_unmap(struct pipe_screen *screen, } + +/* This used to be overriden by the co-state tracker, but really needs + * to be active with sw_winsys. + * + * Contrasting with llvmpipe and softpipe, this is the only place + * where we use the ct->dt display target in any real sense. + * + * Basically just untwiddle our local data into the linear + * displaytarget. + */ +static void +cell_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_surface *surface, + void *context_private) +{ + struct cell_screen *screen = cell_screen(_screen); + struct sw_winsys *winsys = screen->winsys; + struct cell_texture *ct = cell_texture(surface->texture); + + if (!ct->dt) + return; + + /* Need to untwiddle from our internal representation here: + */ + { + unsigned *map = winsys->displaytarget_map(winsys, ct->dt, + (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE)); + unsigned *src = (unsigned *)(ct->data + ct->level_offset[surface->level]); + + untwiddle_image_uint(surface->width, + surface->height, + TILE_SIZE, + map, + ct->dt_stride, + src); + + winsys->displaytarget_unmap(winsys, ct->dt); + } + + winsys->displaytarget_display(winsys, ct->dt, context_private); +} + + void cell_init_screen_texture_funcs(struct pipe_screen *screen) { @@ -476,9 +560,14 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen) screen->get_tex_surface = cell_get_tex_surface; screen->tex_surface_destroy = cell_tex_surface_destroy; - screen->get_tex_transfer = cell_get_tex_transfer; - screen->tex_transfer_destroy = cell_tex_transfer_destroy; + screen->flush_frontbuffer = cell_flush_frontbuffer; +} - screen->transfer_map = cell_transfer_map; - screen->transfer_unmap = cell_transfer_unmap; +void +cell_init_texture_transfer_funcs(struct cell_context *cell) +{ + cell->pipe.get_tex_transfer = cell_get_tex_transfer; + cell->pipe.tex_transfer_destroy = cell_tex_transfer_destroy; + cell->pipe.transfer_map = cell_transfer_map; + cell->pipe.transfer_unmap = cell_transfer_unmap; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 3ffc0bfdb51..ac0b9167750 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -28,6 +28,7 @@ #ifndef CELL_TEXTURE_H #define CELL_TEXTURE_H +#include "cell/common.h" struct cell_context; struct pipe_texture; @@ -43,8 +44,20 @@ struct cell_texture unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; - /** The tiled texture data is held in this buffer */ - struct pipe_buffer *buffer; + /** + * Display target, for textures with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET + * usage. + */ + struct sw_displaytarget *dt; + unsigned dt_stride; + + /** + * Malloc'ed data for regular textures, or a mapping to dt above. + */ + void *data; + + /* Size of the linear buffer?? + */ unsigned long buffer_size; /** The buffer above, mapped. This is the memory from which the @@ -82,5 +95,7 @@ cell_transfer(struct pipe_transfer *pt) extern void cell_init_screen_texture_funcs(struct pipe_screen *screen); +extern void +cell_init_texture_transfer_funcs(struct cell_context *cell); #endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index cf8cd411598..3d389d6ea36 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -31,7 +31,6 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "util/u_simple_screen.h" #include "util/u_math.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index 55bd85bde2b..79f1fb7fb28 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -337,10 +337,10 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) spu.fb.zsize = 4; spu.fb.zscale = (float) 0xffffffffu; break; - case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: spu.fb.zsize = 4; spu.fb.zscale = (float) 0x00ffffffu; break; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 53283740805..2c9e7458afe 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -138,14 +138,14 @@ spu_fallback_fragment_ops(uint x, uint y, if (spu.depth_stencil_alpha.stencil[0].enabled) { /* do stencil test */ - ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); + ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24S8_UNORM); } else if (spu.depth_stencil_alpha.depth.enabled) { /* do depth test */ - ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || - spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); + ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24S8_UNORM || + spu.fb.depth_format == PIPE_FORMAT_Z24X8_UNORM); vector unsigned int ifragZ; vector unsigned int zmask; @@ -240,13 +240,13 @@ spu_fallback_fragment_ops(uint x, uint y, { vector float temp[4]; /* float colors in AOS form */ switch (spu.fb.color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: temp[0] = spu_unpack_B8G8R8A8(fbc0); temp[1] = spu_unpack_B8G8R8A8(fbc1); temp[2] = spu_unpack_B8G8R8A8(fbc2); temp[3] = spu_unpack_B8G8R8A8(fbc3); break; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: temp[0] = spu_unpack_A8R8G8B8(fbc0); temp[1] = spu_unpack_A8R8G8B8(fbc1); temp[2] = spu_unpack_A8R8G8B8(fbc2); @@ -506,13 +506,13 @@ spu_fallback_fragment_ops(uint x, uint y, * Pack fragment float colors into 32-bit RGBA words. */ switch (spu.fb.color_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); break; - case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); @@ -532,7 +532,7 @@ spu_fallback_fragment_ops(uint x, uint y, /* Form bitmask depending on color buffer format and colormask bits */ switch (spu.fb.color_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: if (spu.blend.rt[0].colormask & PIPE_MASK_R) cmask |= 0x00ff0000; /* red */ if (spu.blend.rt[0].colormask & PIPE_MASK_G) @@ -542,7 +542,7 @@ spu_fallback_fragment_ops(uint x, uint y, if (spu.blend.rt[0].colormask & PIPE_MASK_A) cmask |= 0xff000000; /* alpha */ break; - case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: if (spu.blend.rt[0].colormask & PIPE_MASK_R) cmask |= 0x0000ff00; /* red */ if (spu.blend.rt[0].colormask & PIPE_MASK_G) diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 2ccc5d3e605..659e40cbf03 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -27,7 +27,6 @@ #include "pipe/p_defines.h" -#include "util/u_simple_screen.h" #include "util/u_memory.h" #include "pipe/p_context.h" diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index bb1a168ea7a..4a754465bbe 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -78,6 +78,7 @@ struct failover_context { const struct fo_state *rasterizer; const struct fo_state *fragment_shader; const struct fo_state *vertex_shader; + const struct fo_state *vertex_elements; struct pipe_blend_color blend_color; struct pipe_stencil_ref stencil_ref; @@ -89,10 +90,8 @@ struct failover_context { struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; uint num_vertex_buffers; - uint num_vertex_elements; void *sw_sampler_state[PIPE_MAX_SAMPLERS]; void *hw_sampler_state[PIPE_MAX_SAMPLERS]; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 970606a3f50..0247fb803b2 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -255,9 +255,52 @@ failover_delete_vs_state(struct pipe_context *pipe, free(state); } + + +static void * +failover_create_vertex_elements_state( struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *velems ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_vertex_elements_state(failover->sw, count, velems); + state->hw_state = failover->hw->create_vertex_elements_state(failover->hw, count, velems); + + return state; +} + +static void +failover_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems ) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)velems; + + failover->vertex_elements = state; + failover->dirty |= FO_NEW_VERTEX_ELEMENT; + failover->sw->bind_vertex_elements_state( failover->sw, velems ); + failover->hw->bind_vertex_elements_state( failover->hw, velems ); +} + +static void +failover_delete_vertex_elements_state( struct pipe_context *pipe, + void *velems ) +{ + struct fo_state *state = (struct fo_state*)velems; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_vertex_elements_state(failover->sw, state->sw_state); + failover->hw->delete_vertex_elements_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + static void failover_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) + const struct pipe_poly_stipple *stipple ) { struct failover_context *failover = failover_context(pipe); @@ -490,22 +533,6 @@ failover_set_vertex_buffers(struct pipe_context *pipe, } -static void -failover_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *vertex_elements) -{ - struct failover_context *failover = failover_context(pipe); - - memcpy(failover->vertex_elements, vertex_elements, - count * sizeof(vertex_elements[0])); - - failover->dirty |= FO_NEW_VERTEX_ELEMENT; - failover->num_vertex_elements = count; - failover->sw->set_vertex_elements( failover->sw, count, vertex_elements ); - failover->hw->set_vertex_elements( failover->hw, count, vertex_elements ); -} - void failover_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, @@ -543,6 +570,9 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.create_vs_state = failover_create_vs_state; failover->pipe.bind_vs_state = failover_bind_vs_state; failover->pipe.delete_vs_state = failover_delete_vs_state; + failover->pipe.create_vertex_elements_state = failover_create_vertex_elements_state; + failover->pipe.bind_vertex_elements_state = failover_bind_vertex_elements_state; + failover->pipe.delete_vertex_elements_state = failover_delete_vertex_elements_state; failover->pipe.set_blend_color = failover_set_blend_color; failover->pipe.set_stencil_ref = failover_set_stencil_ref; @@ -554,6 +584,5 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; - failover->pipe.set_vertex_elements = failover_set_vertex_elements; failover->pipe.set_constant_buffer = failover_set_constant_buffer; } diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index 5c000808425..09ca1944971 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -81,6 +81,10 @@ failover_state_emit( struct failover_context *failover ) failover->sw->bind_vs_state( failover->sw, failover->vertex_shader->sw_state ); + if (failover->dirty & FO_NEW_VERTEX_ELEMENT) + failover->sw->bind_vertex_elements_state( failover->sw, + failover->vertex_elements->sw_state ); + if (failover->dirty & FO_NEW_STIPPLE) failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple ); @@ -116,11 +120,5 @@ failover_state_emit( struct failover_context *failover ) failover->vertex_buffers ); } - if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { - failover->sw->set_vertex_elements( failover->sw, - failover->num_vertex_elements, - failover->vertex_elements ); - } - failover->dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c index 3d45a22b7e7..130519ffa50 100644 --- a/src/gallium/drivers/i915/i915_context.c +++ b/src/gallium/drivers/i915/i915_context.c @@ -221,6 +221,7 @@ i915_create_context(struct pipe_screen *screen, void *priv) i915_init_surface_functions(i915); i915_init_state_functions(i915); i915_init_flush_functions(i915); + i915_init_texture_functions(i915); draw_install_aaline_stage(i915->draw, &i915->base); draw_install_aapoint_stage(i915->draw, &i915->base); diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index da769e7b290..039165b63f5 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -148,7 +148,7 @@ struct i915_state /** Describes the current hardware vertex layout */ struct vertex_info vertex_info; - + unsigned id; /* track lost context events */ }; @@ -187,6 +187,14 @@ struct i915_sampler_state { unsigned maxlod; }; +struct i915_velems_state { + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; +}; + +#define I915_MAX_TEXTURE_2D_LEVELS 11 /* max 1024x1024 */ +#define I915_MAX_TEXTURE_3D_LEVELS 8 /* max 128x128x128 */ + struct i915_texture { struct pipe_texture base; @@ -199,7 +207,7 @@ struct i915_texture { unsigned sw_tiled; /**< tiled with software flags */ unsigned hw_tiled; /**< tiled with hardware fences */ - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + unsigned nr_images[I915_MAX_TEXTURE_2D_LEVELS]; /* Explicitly store the offset of each image for each cube face or * depth value. Pretty much have to accept that hardware formats @@ -207,7 +215,7 @@ struct i915_texture { * compute the offsets of depth/cube images within a mipmap level, * so have to store them as a lookup table: */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + unsigned *image_offset[I915_MAX_TEXTURE_2D_LEVELS]; /**< array [depth] of offsets */ /* The data is held here: */ @@ -247,7 +255,6 @@ struct i915_context unsigned num_samplers; unsigned num_textures; - unsigned num_vertex_elements; unsigned num_vertex_buffers; struct intel_batchbuffer *batch; @@ -343,6 +350,12 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen, /*********************************************************************** + * i915_texture.c + */ +void i915_init_texture_functions(struct i915_context *i915 ); + + +/*********************************************************************** * Inline conversion functions. These are better-typed than the * macros used previously: */ diff --git a/src/gallium/drivers/i915/i915_debug_fp.c b/src/gallium/drivers/i915/i915_debug_fp.c index 066e7392d18..f41c51f2991 100644 --- a/src/gallium/drivers/i915/i915_debug_fp.c +++ b/src/gallium/drivers/i915/i915_debug_fp.c @@ -28,7 +28,6 @@ #include "i915_reg.h" #include "i915_debug.h" -#include "util/u_simple_screen.h" #include "util/u_debug.h" diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index b1d754dc4eb..e5bf4a20bd0 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -116,11 +116,11 @@ i915_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ + return I915_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ + return I915_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ + return I915_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; @@ -165,22 +165,22 @@ i915_is_format_supported(struct pipe_screen *screen, unsigned geom_flags) { static const enum pipe_format tex_supported[] = { - PIPE_FORMAT_R8G8B8A8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_A8B8G8R8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_B5G6R5_UNORM, PIPE_FORMAT_L8_UNORM, PIPE_FORMAT_A8_UNORM, PIPE_FORMAT_I8_UNORM, - PIPE_FORMAT_A8L8_UNORM, - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, - PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_L8A8_UNORM, + PIPE_FORMAT_UYVY, + PIPE_FORMAT_YUYV, + PIPE_FORMAT_Z24S8_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; static const enum pipe_format surface_supported[] = { - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_B5G6R5_UNORM, + PIPE_FORMAT_Z24S8_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; const enum pipe_format *list; diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c index 62169918e2b..377d8425a5c 100644 --- a/src/gallium/drivers/i915/i915_state.c +++ b/src/gallium/drivers/i915/i915_state.c @@ -742,21 +742,45 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(i915->draw, count, buffers); } -static void i915_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements) +static void * +i915_create_vertex_elements_state(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) +{ + struct i915_velems_state *velems; + assert(count <= PIPE_MAX_ATTRIBS); + velems = (struct i915_velems_state *) MALLOC(sizeof(struct i915_velems_state)); + if (velems) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(*attribs) * count); + } + return velems; +} + +static void +i915_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems) { struct i915_context *i915 = i915_context(pipe); + struct i915_velems_state *i915_velems = (struct i915_velems_state *) velems; + /* Because we change state before the draw_set_vertex_buffers call * we need a flush here, just to be sure. */ draw_flush(i915->draw); - i915->num_vertex_elements = count; /* pass-through to draw module */ - draw_set_vertex_elements(i915->draw, count, elements); + if (i915_velems) { + draw_set_vertex_elements(i915->draw, + i915_velems->count, i915_velems->velem); + } } +static void +i915_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + FREE( velems ); +} void i915_init_state_functions( struct i915_context *i915 ) @@ -782,6 +806,9 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.create_vs_state = i915_create_vs_state; i915->base.bind_vs_state = i915_bind_vs_state; i915->base.delete_vs_state = i915_delete_vs_state; + i915->base.create_vertex_elements_state = i915_create_vertex_elements_state; + i915->base.bind_vertex_elements_state = i915_bind_vertex_elements_state; + i915->base.delete_vertex_elements_state = i915_delete_vertex_elements_state; i915->base.set_blend_color = i915_set_blend_color; i915->base.set_stencil_ref = i915_set_stencil_ref; @@ -794,5 +821,4 @@ i915_init_state_functions( struct i915_context *i915 ) i915->base.set_fragment_sampler_textures = i915_set_sampler_textures; i915->base.set_viewport_state = i915_set_viewport_state; i915->base.set_vertex_buffers = i915_set_vertex_buffers; - i915->base.set_vertex_elements = i915_set_vertex_elements; } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index a3d4e3b04e5..51f0ef12baf 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -37,9 +37,9 @@ static unsigned translate_format( enum pipe_format format ) { switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: return COLOR_BUF_ARGB8888; - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return COLOR_BUF_RGB565; default: assert(0); @@ -50,7 +50,7 @@ static unsigned translate_format( enum pipe_format format ) static unsigned translate_depth_format( enum pipe_format zformat ) { switch (zformat) { - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: return DEPTH_FRMT_24_FIXED_8_OTHER; case PIPE_FORMAT_Z16_UNORM: return DEPTH_FRMT_16_FIXED; @@ -244,6 +244,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + assert(tex); OUT_BATCH(BUF_3D_ID_DEPTH | BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ ztile); @@ -259,7 +260,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (cbuf_surface) cformat = cbuf_surface->format; else - cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */ + cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ cformat = translate_format(cformat); if (depth_surface) diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c index e5c6d87215b..9813290b51b 100644 --- a/src/gallium/drivers/i915/i915_state_sampler.c +++ b/src/gallium/drivers/i915/i915_state_sampler.c @@ -87,8 +87,8 @@ static void update_sampler(struct i915_context *i915, state[1] = sampler->state[1]; state[2] = sampler->state[2]; - if (pt->format == PIPE_FORMAT_YCBCR || - pt->format == PIPE_FORMAT_YCBCR_REV) + if (pt->format == PIPE_FORMAT_UYVY || + pt->format == PIPE_FORMAT_YUYV) state[0] |= SS2_COLORSPACE_CONVERSION; /* 3D textures don't seem to respect the border color. @@ -180,19 +180,19 @@ translate_texture_format(enum pipe_format pipeFormat) return MAPSURF_8BIT | MT_8BIT_I8; case PIPE_FORMAT_A8_UNORM: return MAPSURF_8BIT | MT_8BIT_A8; - case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_L8A8_UNORM: return MAPSURF_16BIT | MT_16BIT_AY88; - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return MAPSURF_16BIT | MT_16BIT_RGB565; - case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: return MAPSURF_16BIT | MT_16BIT_ARGB1555; - case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: return MAPSURF_16BIT | MT_16BIT_ARGB4444; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: return MAPSURF_32BIT | MT_32BIT_ARGB8888; - case PIPE_FORMAT_YCBCR_REV: + case PIPE_FORMAT_YUYV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); - case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_UYVY: return (MAPSURF_422 | MT_422_YCRCB_SWAPY); #if 0 case PIPE_FORMAT_RGB_FXT1: @@ -210,7 +210,7 @@ translate_texture_format(enum pipe_format pipeFormat) case PIPE_FORMAT_RGBA_DXT5: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); #endif - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: return (MAPSURF_32BIT | MT_32BIT_xI824); default: debug_printf("i915: translate_texture_format() bad image format %x\n", diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c index e101c8683ec..b252fb5330c 100644 --- a/src/gallium/drivers/i915/i915_texture.c +++ b/src/gallium/drivers/i915/i915_texture.c @@ -96,7 +96,7 @@ i915_miptree_set_level_info(struct i915_texture *tex, unsigned nr_images, unsigned w, unsigned h, unsigned d) { - assert(level < PIPE_MAX_TEXTURE_LEVELS); + assert(level < Elements(tex->nr_images)); tex->nr_images[level] = nr_images; @@ -219,12 +219,12 @@ i915_miptree_layout_2d(struct i915_texture *tex) unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->width0); /* used for scanouts that need special layouts */ - if (pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (pt->tex_usage & PIPE_TEXTURE_USAGE_SCANOUT) if (i915_scanout_layout(tex)) return; - /* for shared buffers we use some very like scanout */ - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + /* shared buffers needs to be compatible with X servers */ + if (pt->tex_usage & PIPE_TEXTURE_USAGE_SHARED) if (i915_display_target_layout(tex)) return; @@ -369,12 +369,12 @@ i945_miptree_layout_2d(struct i915_texture *tex) unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->height0); /* used for scanouts that need special layouts */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_SCANOUT) if (i915_scanout_layout(tex)) return; - /* for shared buffers we use some very like scanout */ - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + /* shared buffers needs to be compatible with X servers */ + if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_SHARED) if (i915_display_target_layout(tex)) return; @@ -642,7 +642,7 @@ i915_texture_create(struct pipe_screen *screen, /* for scanouts and cursors, cursors arn't scanouts */ - if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width0 != 64) + if (templat->tex_usage & PIPE_TEXTURE_USAGE_SCANOUT && templat->width0 != 64) buf_usage = INTEL_NEW_SCANOUT; else buf_usage = INTEL_NEW_TEXTURE; @@ -673,19 +673,24 @@ fail: } static struct pipe_texture * -i915_texture_blanket(struct pipe_screen * screen, - const struct pipe_texture *base, - const unsigned *stride, - struct pipe_buffer *buffer) +i915_texture_from_handle(struct pipe_screen * screen, + const struct pipe_texture *templat, + struct winsys_handle *whandle) { -#if 0 + struct i915_screen *is = i915_screen(screen); struct i915_texture *tex; + struct intel_winsys *iws = is->iws; + struct intel_buffer *buffer; + unsigned stride; + assert(screen); + buffer = iws->buffer_from_handle(iws, whandle, &stride); + /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth0 != 1) { + if (templat->target != PIPE_TEXTURE_2D || + templat->last_level != 0 || + templat->depth0 != 1) { return NULL; } @@ -693,23 +698,33 @@ i915_texture_blanket(struct pipe_screen * screen, if (!tex) return NULL; - tex->base = *base; + tex->base = *templat; pipe_reference_init(&tex->base.reference, 1); tex->base.screen = screen; - tex->stride = stride[0]; + tex->stride = stride; - i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1); + i915_miptree_set_level_info(tex, 0, 1, templat->width0, templat->height0, 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - pipe_buffer_reference(&tex->buffer, buffer); + tex->buffer = buffer; return &tex->base; -#else - return NULL; -#endif } +static boolean +i915_texture_get_handle(struct pipe_screen * screen, + struct pipe_texture *texture, + struct winsys_handle *whandle) +{ + struct i915_screen *is = i915_screen(screen); + struct i915_texture *tex = (struct i915_texture *)texture; + struct intel_winsys *iws = is->iws; + + return iws->buffer_get_handle(iws, tex->buffer, whandle, tex->stride); +} + + static void i915_texture_destroy(struct pipe_texture *pt) { @@ -723,7 +738,7 @@ i915_texture_destroy(struct pipe_texture *pt) iws->buffer_destroy(iws, tex->buffer); - for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + for (i = 0; i < Elements(tex->image_offset); i++) if (tex->image_offset[i]) FREE(tex->image_offset[i]); @@ -780,12 +795,12 @@ i915_tex_surface_destroy(struct pipe_surface *surf) /* - * Screen transfer functions + * Texture transfer functions */ -static struct pipe_transfer* -i915_get_tex_transfer(struct pipe_screen *screen, +static struct pipe_transfer * +i915_get_tex_transfer(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, @@ -822,7 +837,7 @@ i915_get_tex_transfer(struct pipe_screen *screen, } static void * -i915_transfer_map(struct pipe_screen *screen, +i915_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct i915_texture *tex = (struct i915_texture *)transfer->texture; @@ -844,7 +859,7 @@ i915_transfer_map(struct pipe_screen *screen, } static void -i915_transfer_unmap(struct pipe_screen *screen, +i915_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct i915_texture *tex = (struct i915_texture *)transfer->texture; @@ -853,7 +868,8 @@ i915_transfer_unmap(struct pipe_screen *screen, } static void -i915_tex_transfer_destroy(struct pipe_transfer *trans) +i915_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *trans) { pipe_texture_reference(&trans->texture, NULL); FREE(trans); @@ -864,67 +880,22 @@ i915_tex_transfer_destroy(struct pipe_transfer *trans) * Other texture functions */ +void +i915_init_texture_functions(struct i915_context *i915 ) +{ + i915->base.get_tex_transfer = i915_get_tex_transfer; + i915->base.transfer_map = i915_transfer_map; + i915->base.transfer_unmap = i915_transfer_unmap; + i915->base.tex_transfer_destroy = i915_tex_transfer_destroy; +} void i915_init_screen_texture_functions(struct i915_screen *is) { is->base.texture_create = i915_texture_create; - is->base.texture_blanket = i915_texture_blanket; + is->base.texture_from_handle = i915_texture_from_handle; + is->base.texture_get_handle = i915_texture_get_handle; is->base.texture_destroy = i915_texture_destroy; is->base.get_tex_surface = i915_get_tex_surface; is->base.tex_surface_destroy = i915_tex_surface_destroy; - is->base.get_tex_transfer = i915_get_tex_transfer; - is->base.transfer_map = i915_transfer_map; - is->base.transfer_unmap = i915_transfer_unmap; - is->base.tex_transfer_destroy = i915_tex_transfer_destroy; -} - -struct pipe_texture * -i915_texture_blanket_intel(struct pipe_screen *screen, - struct pipe_texture *base, - unsigned stride, - struct intel_buffer *buffer) -{ - struct i915_texture *tex; - assert(screen); - - /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth0 != 1) { - return NULL; - } - - tex = CALLOC_STRUCT(i915_texture); - if (!tex) - return NULL; - - tex->base = *base; - pipe_reference_init(&tex->base.reference, 1); - tex->base.screen = screen; - - tex->stride = stride; - - i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1); - i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - - tex->buffer = buffer; - - return &tex->base; -} - -boolean -i915_get_texture_buffer_intel(struct pipe_texture *texture, - struct intel_buffer **buffer, - unsigned *stride) -{ - struct i915_texture *tex = (struct i915_texture *)texture; - - if (!texture) - return FALSE; - - *stride = tex->stride; - *buffer = tex->buffer; - - return TRUE; } diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h index b3a802b0e29..00fd0c1efea 100644 --- a/src/gallium/drivers/i915/intel_winsys.h +++ b/src/gallium/drivers/i915/intel_winsys.h @@ -33,6 +33,7 @@ struct intel_buffer; struct intel_batchbuffer; struct pipe_texture; struct pipe_fence_handle; +struct winsys_handle; enum intel_buffer_usage { @@ -129,6 +130,25 @@ struct intel_winsys { enum intel_buffer_type type); /** + * Creates a buffer from a handle. + * Used to implement pipe_screen::texture_from_handle. + * Also provides the stride information needed for the + * texture via the stride argument. + */ + struct intel_buffer *(*buffer_from_handle)(struct intel_winsys *iws, + struct winsys_handle *whandle, + unsigned *stride); + + /** + * Used to implement pipe_screen::texture_get_handle. + * The winsys might need the stride information. + */ + boolean (*buffer_get_handle)(struct intel_winsys *iws, + struct intel_buffer *buffer, + struct winsys_handle *whandle, + unsigned stride); + + /** * Fence a buffer with a fence reg. * Not to be confused with pipe_fence_handle. */ @@ -204,23 +224,4 @@ struct intel_winsys { struct pipe_screen *i915_create_screen(struct intel_winsys *iws, unsigned pci_id); -/** - * Get the intel_winsys buffer backing the texture. - * - * TODO UGLY - */ -boolean i915_get_texture_buffer_intel(struct pipe_texture *texture, - struct intel_buffer **buffer, - unsigned *stride); - -/** - * Wrap a intel_winsys buffer with a texture blanket. - * - * TODO UGLY - */ -struct pipe_texture * i915_texture_blanket_intel(struct pipe_screen *screen, - struct pipe_texture *tmplt, - unsigned pitch, - struct intel_buffer *buffer); - #endif diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 12cfa7b049c..f5b1a06576b 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -351,7 +351,7 @@ struct brw_vs_prog_data { /* Size == 0 if output either not written, or always [0,0,0,1] */ -struct brw_vs_ouput_sizes { +struct brw_vs_output_sizes { GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS]; }; @@ -546,14 +546,13 @@ struct brw_context const struct brw_blend_state *blend; const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_state *zstencil; + const struct brw_vertex_element_packet *velems; const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS]; unsigned num_samplers; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - unsigned num_vertex_elements; unsigned num_textures; unsigned num_vertex_buffers; diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index d59261557b5..0820ba20a08 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -42,141 +42,6 @@ -static unsigned brw_translate_surface_format( unsigned id ) -{ - switch (id) { - case PIPE_FORMAT_R64_FLOAT: - return BRW_SURFACEFORMAT_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return BRW_SURFACEFORMAT_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return BRW_SURFACEFORMAT_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return BRW_SURFACEFORMAT_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return BRW_SURFACEFORMAT_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return BRW_SURFACEFORMAT_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return BRW_SURFACEFORMAT_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return BRW_SURFACEFORMAT_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return BRW_SURFACEFORMAT_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return BRW_SURFACEFORMAT_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return BRW_SURFACEFORMAT_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return BRW_SURFACEFORMAT_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return BRW_SURFACEFORMAT_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return BRW_SURFACEFORMAT_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return BRW_SURFACEFORMAT_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return BRW_SURFACEFORMAT_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return BRW_SURFACEFORMAT_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return BRW_SURFACEFORMAT_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return BRW_SURFACEFORMAT_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return BRW_SURFACEFORMAT_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return BRW_SURFACEFORMAT_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return BRW_SURFACEFORMAT_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return BRW_SURFACEFORMAT_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return BRW_SURFACEFORMAT_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return BRW_SURFACEFORMAT_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return BRW_SURFACEFORMAT_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return BRW_SURFACEFORMAT_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return BRW_SURFACEFORMAT_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return BRW_SURFACEFORMAT_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return BRW_SURFACEFORMAT_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return BRW_SURFACEFORMAT_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return BRW_SURFACEFORMAT_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return BRW_SURFACEFORMAT_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return BRW_SURFACEFORMAT_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return BRW_SURFACEFORMAT_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return BRW_SURFACEFORMAT_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return BRW_SURFACEFORMAT_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return BRW_SURFACEFORMAT_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return BRW_SURFACEFORMAT_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return BRW_SURFACEFORMAT_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return BRW_SURFACEFORMAT_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; - - default: - assert(0); - return 0; - } -} - static unsigned get_index_type(int type) { switch (type) { @@ -315,75 +180,16 @@ static int brw_emit_vertex_buffers( struct brw_context *brw ) - static int brw_emit_vertex_elements(struct brw_context *brw) { - GLuint nr = brw->curr.num_vertex_elements; - GLuint i; + const struct brw_vertex_element_packet *brw_velems = brw->curr.velems; + unsigned size = brw_velems->header.length + 2; + /* why is this here */ brw_emit_query_begin(brw); - /* If the VS doesn't read any inputs (calculating vertex position from - * a state variable for some reason, for example), emit a single pad - * VERTEX_ELEMENT struct and bail. - * - * The stale VB state stays in place, but they don't do anything unless - * a VE loads from them. - */ - if (nr == 0) { - BEGIN_BATCH(3, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); - OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - ADVANCE_BATCH(); - return 0; - } - - /* Now emit vertex element (VEP) state packets. - * - */ - BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2)); - for (i = 0; i < nr; i++) { - const struct pipe_vertex_element *input = &brw->curr.vertex_element[i]; - uint32_t format = brw_translate_surface_format( input->src_format ); - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - - switch (input->nr_components) { - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; - case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; - break; - } - - OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) | - BRW_VE0_VALID | - (format << BRW_VE0_FORMAT_SHIFT) | - (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT)); + brw_batchbuffer_data(brw->batch, brw_velems, size * 4, IGNORE_CLIPRECTS); - if (BRW_IS_IGDNG(brw)) - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); - else - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | - ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); - } - ADVANCE_BATCH(); return 0; } @@ -396,10 +202,11 @@ static int brw_emit_vertices( struct brw_context *brw ) if (ret) return ret; + /* XXX should separate this? */ ret = brw_emit_vertex_elements( brw ); if (ret) return ret; - + return 0; } @@ -407,7 +214,8 @@ static int brw_emit_vertices( struct brw_context *brw ) const struct brw_tracked_state brw_vertices = { .dirty = { .mesa = (PIPE_NEW_INDEX_RANGE | - PIPE_NEW_VERTEX_BUFFER), + PIPE_NEW_VERTEX_BUFFER | + PIPE_NEW_VERTEX_ELEMENT), .brw = BRW_NEW_BATCH, .cache = 0, }, diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index e4b24229db3..e3f25bdf622 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -265,8 +265,8 @@ static int emit_depthbuffer(struct brw_context *brw) format = BRW_DEPTHFORMAT_D16_UNORM; cpp = 2; break; - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; cpp = 4; break; diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index 452e1e89f93..d7048f769b2 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -139,8 +139,8 @@ static void zstencil_clear(struct brw_context *brw, unsigned value; switch (bsurface->base.format) { - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: value = ((unsigned)(depth * MASK24) & MASK24); break; case PIPE_FORMAT_Z16_UNORM: @@ -152,8 +152,8 @@ static void zstencil_clear(struct brw_context *brw, } switch (bsurface->base.format) { - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: value = value | (stencil << 24); break; diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index e3c48e31493..d6a840857ec 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -1,22 +1,251 @@ #include "brw_context.h" +#include "brw_defines.h" +#include "brw_structs.h" +#include "util/u_memory.h" +#include "util/u_format.h" -static void brw_set_vertex_elements( struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements ) + +static unsigned brw_translate_surface_format( unsigned id ) +{ + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; + } +} + +static void brw_translate_vertex_elements(struct brw_context *brw, + struct brw_vertex_element_packet *brw_velems, + const struct pipe_vertex_element *attribs, + unsigned count) +{ + unsigned i; + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + brw_velems->header.opcode = CMD_VERTEX_ELEMENT; + + if (count == 0) { + brw_velems->header.length = 1; + brw_velems->ve[0].ve0.src_offset = 0; + brw_velems->ve[0].ve0.src_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + brw_velems->ve[0].ve0.valid = 1; + brw_velems->ve[0].ve0.vertex_buffer_index = 0; + brw_velems->ve[0].ve1.dst_offset = 0; + brw_velems->ve[0].ve1.vfcomponent0 = BRW_VE1_COMPONENT_STORE_0; + brw_velems->ve[0].ve1.vfcomponent1 = BRW_VE1_COMPONENT_STORE_0; + brw_velems->ve[0].ve1.vfcomponent2 = BRW_VE1_COMPONENT_STORE_0; + brw_velems->ve[0].ve1.vfcomponent3 = BRW_VE1_COMPONENT_STORE_1_FLT; + return; + } + + + /* Now emit vertex element (VEP) state packets. + * + */ + brw_velems->header.length = (1 + count * 2) - 2; + for (i = 0; i < count; i++) { + const struct pipe_vertex_element *input = &attribs[i]; + unsigned nr_components = util_format_get_nr_components(input->src_format); + + uint32_t format = brw_translate_surface_format( input->src_format ); + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; + + switch (nr_components) { + case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ + case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ + case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ + case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; + break; + } + + brw_velems->ve[i].ve0.src_offset = input->src_offset; + brw_velems->ve[i].ve0.src_format = format; + brw_velems->ve[i].ve0.valid = 1; + brw_velems->ve[i].ve0.vertex_buffer_index = input->vertex_buffer_index; + brw_velems->ve[i].ve1.vfcomponent0 = comp0; + brw_velems->ve[i].ve1.vfcomponent1 = comp1; + brw_velems->ve[i].ve1.vfcomponent2 = comp2; + brw_velems->ve[i].ve1.vfcomponent3 = comp3; + + if (BRW_IS_IGDNG(brw)) + brw_velems->ve[i].ve1.dst_offset = 0; + else + brw_velems->ve[i].ve1.dst_offset = i * 4; + } +} + +static void* brw_create_vertex_elements_state( struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs ) { + /* note: for the brw_swtnl.c code (if ever we need draw fallback) we'd also need + to store the original data */ struct brw_context *brw = brw_context(pipe); + struct brw_vertex_element_packet *velems; + assert(count <= BRW_VEP_MAX); + velems = (struct brw_vertex_element_packet *) MALLOC(sizeof(struct brw_vertex_element_packet)); + if (velems) { + brw_translate_vertex_elements(brw, velems, attribs, count); + } + return velems; +} - memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0])); - brw->curr.num_vertex_elements = count; +static void brw_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_vertex_element_packet *brw_velems = (struct brw_vertex_element_packet *) velems; + + brw->curr.velems = brw_velems; brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT; } +static void brw_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + FREE( velems ); +} + static void brw_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct brw_context *brw = brw_context(pipe); unsigned i; @@ -49,7 +278,9 @@ void brw_pipe_vertex_init( struct brw_context *brw ) { brw->base.set_vertex_buffers = brw_set_vertex_buffers; - brw->base.set_vertex_elements = brw_set_vertex_elements; + brw->base.create_vertex_elements_state = brw_create_vertex_elements_state; + brw->base.bind_vertex_elements_state = brw_bind_vertex_elements_state; + brw->base.delete_vertex_elements_state = brw_delete_vertex_elements_state; } diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index b03a782a2f6..cef83ffea80 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -174,11 +174,11 @@ brw_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 11; /* max 1024x1024 */ + return BRW_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ + return BRW_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 11; /* max 1024x1024 */ + return BRW_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: return 1; @@ -229,15 +229,15 @@ brw_is_format_supported(struct pipe_screen *screen, PIPE_FORMAT_L16_UNORM, /*PIPE_FORMAT_I16_UNORM,*/ /*PIPE_FORMAT_A16_UNORM,*/ - PIPE_FORMAT_A8L8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_A1R5G5B5_UNORM, - PIPE_FORMAT_A4R4G4B4_UNORM, - PIPE_FORMAT_X8R8G8B8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_L8A8_UNORM, + PIPE_FORMAT_B5G6R5_UNORM, + PIPE_FORMAT_B5G5R5A1_UNORM, + PIPE_FORMAT_B4G4R4A4_UNORM, + PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, /* video */ - PIPE_FORMAT_YCBCR, - PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_UYVY, + PIPE_FORMAT_YUYV, /* compressed */ /*PIPE_FORMAT_FXT1_RGBA,*/ PIPE_FORMAT_DXT1_RGB, @@ -245,14 +245,14 @@ brw_is_format_supported(struct pipe_screen *screen, PIPE_FORMAT_DXT3_RGBA, PIPE_FORMAT_DXT5_RGBA, /* sRGB */ - PIPE_FORMAT_R8G8B8A8_SRGB, - PIPE_FORMAT_A8L8_SRGB, + PIPE_FORMAT_A8B8G8R8_SRGB, + PIPE_FORMAT_L8A8_SRGB, PIPE_FORMAT_L8_SRGB, PIPE_FORMAT_DXT1_SRGB, /* depth */ PIPE_FORMAT_Z32_FLOAT, - PIPE_FORMAT_X8Z24_UNORM, - PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z24X8_UNORM, + PIPE_FORMAT_Z24S8_UNORM, PIPE_FORMAT_Z16_UNORM, /* signed */ PIPE_FORMAT_R8G8_SNORM, @@ -260,15 +260,15 @@ brw_is_format_supported(struct pipe_screen *screen, PIPE_FORMAT_NONE /* list terminator */ }; static const enum pipe_format render_supported[] = { - PIPE_FORMAT_X8R8G8B8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_B5G6R5_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; static const enum pipe_format depth_supported[] = { PIPE_FORMAT_Z32_FLOAT, - PIPE_FORMAT_X8Z24_UNORM, - PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z24X8_UNORM, + PIPE_FORMAT_Z24S8_UNORM, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 7226d9228b7..e3a7c64d489 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -100,6 +100,9 @@ struct brw_surface }; +#define BRW_MAX_TEXTURE_2D_LEVELS 11 /* max 1024x1024 */ +#define BRW_MAX_TEXTURE_3D_LEVELS 8 /* max 128x128x128 */ + struct brw_texture { @@ -107,9 +110,9 @@ struct brw_texture struct brw_winsys_buffer *bo; struct brw_surface_state ss; - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned *image_offset[BRW_MAX_TEXTURE_2D_LEVELS]; + unsigned nr_images[BRW_MAX_TEXTURE_2D_LEVELS]; + unsigned level_offset[BRW_MAX_TEXTURE_2D_LEVELS]; boolean compressed; unsigned brw_target; @@ -178,6 +181,10 @@ void brw_update_texture( struct brw_screen *brw_screen, struct brw_texture *tex ); +/* brw_screen_texture.h + */ +struct brw_context; +void brw_tex_init( struct brw_context *brw ); void brw_screen_tex_init( struct brw_screen *brw_screen ); void brw_screen_tex_surface_init( struct brw_screen *brw_screen ); diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 8bdd43cf140..cadcb7cee2a 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -37,6 +37,8 @@ #include "brw_defines.h" #include "brw_structs.h" #include "brw_winsys.h" +#include "brw_context.h" + @@ -85,32 +87,32 @@ static GLuint translate_tex_format( enum pipe_format pf ) return BRW_SURFACEFORMAT_A16_UNORM; */ - case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_L8A8_UNORM: return BRW_SURFACEFORMAT_L8A8_UNORM; - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return BRW_SURFACEFORMAT_B5G6R5_UNORM; - case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; /* * Video formats */ - case PIPE_FORMAT_YCBCR_REV: + case PIPE_FORMAT_YUYV: return BRW_SURFACEFORMAT_YCRCB_NORMAL; - case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_UYVY: return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; /* @@ -137,10 +139,10 @@ static GLuint translate_tex_format( enum pipe_format pf ) * sRGB formats */ - case PIPE_FORMAT_R8G8B8A8_SRGB: + case PIPE_FORMAT_A8B8G8R8_SRGB: return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - case PIPE_FORMAT_A8L8_SRGB: + case PIPE_FORMAT_L8A8_SRGB: return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; case PIPE_FORMAT_L8_SRGB: @@ -156,8 +158,8 @@ static GLuint translate_tex_format( enum pipe_format pf ) case PIPE_FORMAT_Z16_UNORM: return BRW_SURFACEFORMAT_I16_UNORM; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return BRW_SURFACEFORMAT_I24X8_UNORM; case PIPE_FORMAT_Z32_FLOAT: @@ -231,8 +233,8 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, goto fail; - if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) { + if (templ->tex_usage & (PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) { buffer_type = BRW_BUFFER_TYPE_SCANOUT; } else { @@ -250,7 +252,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); - format = translate_tex_target(tex->base.format); + format = translate_tex_format(tex->base.format); assert(format != BRW_SURFACEFORMAT_INVALID); tex->ss.ss0.surface_format = format; @@ -303,14 +305,121 @@ fail: return NULL; } -static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen, - const struct pipe_texture *templ, - const unsigned *stride, - struct pipe_buffer *buffer) +static struct pipe_texture * +brw_texture_from_handle(struct pipe_screen *screen, + const struct pipe_texture *templ, + struct winsys_handle *whandle) { + struct brw_screen *bscreen = brw_screen(screen); + struct brw_texture *tex; + struct brw_winsys_buffer *buffer; + unsigned tiling; + unsigned pitch; + + if (templ->target != PIPE_TEXTURE_2D || + templ->last_level != 0 || + templ->depth0 != 1) + return NULL; + + if (util_format_is_compressed(templ->format)) + return NULL; + + tex = CALLOC_STRUCT(brw_texture); + if (!tex) + return NULL; + + if (bscreen->sws->bo_from_handle(bscreen->sws, whandle, &pitch, &tiling, &buffer) != PIPE_OK) + goto fail; + + memcpy(&tex->base, templ, sizeof *templ); + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + /* XXX: cpp vs. blocksize + */ + tex->cpp = util_format_get_blocksize(tex->base.format); + tex->tiling = tiling; + + make_empty_list(&tex->views[0]); + make_empty_list(&tex->views[1]); + + if (!brw_texture_layout(bscreen, tex)) + goto fail; + + /* XXX Maybe some more checks? */ + if ((pitch / tex->cpp) < tex->pitch) + goto fail; + + tex->pitch = pitch / tex->cpp; + + tex->bo = buffer; + + /* fix this warning */ +#if 0 + if (tex->size > buffer->size) + goto fail; +#endif + + tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); + tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); + assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + + /* XXX: what happens when tex->bo->offset changes??? + */ + tex->ss.ss1.base_addr = 0; /* reloc */ + tex->ss.ss2.mip_count = tex->base.last_level; + tex->ss.ss2.width = tex->base.width0 - 1; + tex->ss.ss2.height = tex->base.height0 - 1; + + switch (tex->tiling) { + case BRW_TILING_NONE: + tex->ss.ss3.tiled_surface = 0; + tex->ss.ss3.tile_walk = 0; + break; + case BRW_TILING_X: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case BRW_TILING_Y: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } + + tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; + tex->ss.ss3.depth = tex->base.depth0 - 1; + + tex->ss.ss4.min_lod = 0; + + return &tex->base; + +fail: + FREE(tex); return NULL; } +static boolean +brw_texture_get_handle(struct pipe_screen *screen, + struct pipe_texture *texture, + struct winsys_handle *whandle) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_texture *tex = brw_texture(texture); + unsigned stride; + + stride = tex->pitch * tex->cpp; + + return bscreen->sws->bo_get_handle(tex->bo, whandle, stride); +} + + + static void brw_texture_destroy(struct pipe_texture *pt) { struct brw_texture *tex = brw_texture(pt); @@ -372,7 +481,7 @@ boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, */ static struct pipe_transfer* -brw_get_tex_transfer(struct pipe_screen *screen, +brw_get_tex_transfer(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, @@ -407,11 +516,11 @@ brw_get_tex_transfer(struct pipe_screen *screen, } static void * -brw_transfer_map(struct pipe_screen *screen, +brw_transfer_map(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct brw_texture *tex = brw_texture(transfer->texture); - struct brw_winsys_screen *sws = brw_screen(screen)->sws; + struct brw_winsys_screen *sws = brw_screen(pipe->screen)->sws; char *map; unsigned usage = transfer->usage; @@ -434,146 +543,37 @@ brw_transfer_map(struct pipe_screen *screen, } static void -brw_transfer_unmap(struct pipe_screen *screen, +brw_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct brw_texture *tex = brw_texture(transfer->texture); - struct brw_winsys_screen *sws = brw_screen(screen)->sws; + struct brw_winsys_screen *sws = brw_screen(pipe->screen)->sws; sws->bo_unmap(tex->bo); } static void -brw_tex_transfer_destroy(struct pipe_transfer *trans) +brw_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *trans) { pipe_texture_reference(&trans->texture, NULL); FREE(trans); } -/* - * Functions exported to the winsys - */ - -boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, - struct brw_winsys_buffer **buffer, - unsigned *stride) +void brw_tex_init( struct brw_context *brw ) { - struct brw_texture *tex = brw_texture(texture); - - *buffer = tex->bo; - if (stride) - *stride = tex->pitch * tex->cpp; - - return TRUE; -} - -struct pipe_texture * -brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, - const struct pipe_texture *templ, - unsigned pitch, - unsigned tiling, - struct brw_winsys_buffer *buffer) -{ - struct brw_screen *bscreen = brw_screen(screen); - struct brw_texture *tex; - GLuint format; - - if (templ->target != PIPE_TEXTURE_2D || - templ->last_level != 0 || - templ->depth0 != 1) - return NULL; - - if (util_format_is_compressed(templ->format)) - return NULL; - - tex = CALLOC_STRUCT(brw_texture); - if (!tex) - return NULL; - - memcpy(&tex->base, templ, sizeof *templ); - pipe_reference_init(&tex->base.reference, 1); - tex->base.screen = screen; - - /* XXX: cpp vs. blocksize - */ - tex->cpp = util_format_get_blocksize(tex->base.format); - tex->tiling = tiling; - - make_empty_list(&tex->views[0]); - make_empty_list(&tex->views[1]); - - if (!brw_texture_layout(bscreen, tex)) - goto fail; - - /* XXX Maybe some more checks? */ - if ((pitch / tex->cpp) < tex->pitch) - goto fail; - - tex->pitch = pitch / tex->cpp; - - tex->bo = buffer; - - /* fix this warning */ -#if 0 - if (tex->size > buffer->size) - goto fail; -#endif - - tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); - - format = translate_tex_format(tex->base.format); - assert(format != BRW_SURFACEFORMAT_INVALID); - tex->ss.ss0.surface_format = format; - - /* This is ok for all textures with channel width 8bit or less: - */ -/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - - - /* XXX: what happens when tex->bo->offset changes??? - */ - tex->ss.ss1.base_addr = 0; /* reloc */ - tex->ss.ss2.mip_count = tex->base.last_level; - tex->ss.ss2.width = tex->base.width0 - 1; - tex->ss.ss2.height = tex->base.height0 - 1; - - switch (tex->tiling) { - case BRW_TILING_NONE: - tex->ss.ss3.tiled_surface = 0; - tex->ss.ss3.tile_walk = 0; - break; - case BRW_TILING_X: - tex->ss.ss3.tiled_surface = 1; - tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - break; - case BRW_TILING_Y: - tex->ss.ss3.tiled_surface = 1; - tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR; - break; - } - - tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; - tex->ss.ss3.depth = tex->base.depth0 - 1; - - tex->ss.ss4.min_lod = 0; - - return &tex->base; - -fail: - FREE(tex); - return NULL; + brw->base.get_tex_transfer = brw_get_tex_transfer; + brw->base.transfer_map = brw_transfer_map; + brw->base.transfer_unmap = brw_transfer_unmap; + brw->base.tex_transfer_destroy = brw_tex_transfer_destroy; } void brw_screen_tex_init( struct brw_screen *brw_screen ) { brw_screen->base.is_format_supported = brw_is_format_supported; brw_screen->base.texture_create = brw_texture_create; + brw_screen->base.texture_from_handle = brw_texture_from_handle; + brw_screen->base.texture_get_handle = brw_texture_get_handle; brw_screen->base.texture_destroy = brw_texture_destroy; - brw_screen->base.texture_blanket = brw_texture_blanket; - brw_screen->base.get_tex_transfer = brw_get_tex_transfer; - brw_screen->base.transfer_map = brw_transfer_map; - brw_screen->base.transfer_unmap = brw_transfer_unmap; - brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy; } diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h index bf10bc04de7..e97ddeb5e1c 100644 --- a/src/gallium/drivers/i965/brw_structs.h +++ b/src/gallium/drivers/i965/brw_structs.h @@ -28,7 +28,7 @@ * Authors: * Keith Whitwell <[email protected]> */ - + #ifndef BRW_STRUCTS_H #define BRW_STRUCTS_H @@ -1149,7 +1149,7 @@ struct brw_vertex_element_state GLuint valid:1; GLuint vertex_buffer_index:5; } ve0; - + struct { GLuint dst_offset:8; diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index c82d00f4a47..f30c7f18132 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -162,6 +162,16 @@ struct brw_winsys_screen { unsigned alignment, struct brw_winsys_buffer **bo_out); + enum pipe_error (*bo_from_handle)(struct brw_winsys_screen *sws, + struct winsys_handle *whandle, + unsigned *stride, + unsigned *tiling, + struct brw_winsys_buffer **bo_out); + + enum pipe_error (*bo_get_handle)(struct brw_winsys_buffer *buffer, + struct winsys_handle *whandle, + unsigned stride); + /* Destroy a buffer when our refcount goes to zero: */ void (*bo_destroy)(struct brw_winsys_buffer *buffer); @@ -257,28 +267,6 @@ bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf) struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id); -/** - * Get the brw_winsys buffer backing the texture. - * - * TODO UGLY - */ -struct pipe_texture; -boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, - struct brw_winsys_buffer **buffer, - unsigned *stride); - -/** - * Wrap a brw_winsys buffer with a texture blanket. - * - * TODO UGLY - */ -struct pipe_texture * -brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, - const struct pipe_texture *template, - unsigned pitch, - unsigned tiling, - struct brw_winsys_buffer *buffer); - /************************************************************************* * Cooperative dumping between winsys and driver. TODO: make this diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 5164c90ed60..dfb718e64fe 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -254,10 +254,10 @@ static void brw_wm_populate_key( struct brw_context *brw, for (i = 0; i < brw->curr.num_textures; i++) { const struct brw_texture *tex = brw_texture(brw->curr.texture[i]); - if (tex->base.format == PIPE_FORMAT_YCBCR) + if (tex->base.format == PIPE_FORMAT_UYVY) key->yuvtex_mask |= 1 << i; - if (tex->base.format == PIPE_FORMAT_YCBCR_REV) + if (tex->base.format == PIPE_FORMAT_YUYV) key->yuvtex_swap_mask |= 1 << i; /* XXX: shadow texture diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c index 8248b2a4132..26770d6b1e9 100644 --- a/src/gallium/drivers/identity/id_context.c +++ b/src/gallium/drivers/identity/id_context.c @@ -377,6 +377,42 @@ identity_delete_vs_state(struct pipe_context *_pipe, vs); } + +static void * +identity_create_vertex_elements_state(struct pipe_context *_pipe, + unsigned num_elements, + const struct pipe_vertex_element *vertex_elements) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + return pipe->create_vertex_elements_state(pipe, + num_elements, + vertex_elements); +} + +static void +identity_bind_vertex_elements_state(struct pipe_context *_pipe, + void *velems) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + pipe->bind_vertex_elements_state(pipe, + velems); +} + +static void +identity_delete_vertex_elements_state(struct pipe_context *_pipe, + void *velems) +{ + struct identity_context *id_pipe = identity_context(_pipe); + struct pipe_context *pipe = id_pipe->pipe; + + pipe->delete_vertex_elements_state(pipe, + velems); +} + static void identity_set_blend_color(struct pipe_context *_pipe, const struct pipe_blend_color *blend_color) @@ -563,20 +599,6 @@ identity_set_vertex_buffers(struct pipe_context *_pipe, num_buffers, buffers); } - -static void -identity_set_vertex_elements(struct pipe_context *_pipe, - unsigned num_elements, - const struct pipe_vertex_element *vertex_elements) -{ - struct identity_context *id_pipe = identity_context(_pipe); - struct pipe_context *pipe = id_pipe->pipe; - - pipe->set_vertex_elements(pipe, - num_elements, - vertex_elements); -} - static void identity_surface_copy(struct pipe_context *_pipe, struct pipe_surface *_dst, @@ -689,6 +711,76 @@ identity_is_buffer_referenced(struct pipe_context *_pipe, buffer); } + + +static struct pipe_transfer * +identity_context_get_tex_transfer(struct pipe_context *_context, + struct pipe_texture *_texture, + unsigned face, + unsigned level, + unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, + unsigned y, + unsigned w, + unsigned h) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_texture *id_texture = identity_texture(_texture); + struct pipe_context *context = id_context->pipe; + struct pipe_texture *texture = id_texture->texture; + struct pipe_transfer *result; + + result = context->get_tex_transfer(context, + texture, + face, + level, + zslice, + usage, + x, + y, + w, + h); + + if (result) + return identity_transfer_create(id_context, id_texture, result); + return NULL; +} + +static void +identity_context_tex_transfer_destroy(struct pipe_context *_pipe, + struct pipe_transfer *_transfer) +{ + identity_transfer_destroy(identity_context(_pipe), + identity_transfer(_transfer)); +} + +static void * +identity_context_transfer_map(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_transfer *id_transfer = identity_transfer(_transfer); + struct pipe_context *context = id_context->pipe; + struct pipe_transfer *transfer = id_transfer->transfer; + + return context->transfer_map(context, + transfer); +} + +static void +identity_context_transfer_unmap(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct identity_context *id_context = identity_context(_context); + struct identity_transfer *id_transfer = identity_transfer(_transfer); + struct pipe_context *context = id_context->pipe; + struct pipe_transfer *transfer = id_transfer->transfer; + + context->transfer_unmap(context, + transfer); +} + struct pipe_context * identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) { @@ -733,6 +825,9 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.create_vs_state = identity_create_vs_state; id_pipe->base.bind_vs_state = identity_bind_vs_state; id_pipe->base.delete_vs_state = identity_delete_vs_state; + id_pipe->base.create_vertex_elements_state = identity_create_vertex_elements_state; + id_pipe->base.bind_vertex_elements_state = identity_bind_vertex_elements_state; + id_pipe->base.delete_vertex_elements_state = identity_delete_vertex_elements_state; id_pipe->base.set_blend_color = identity_set_blend_color; id_pipe->base.set_stencil_ref = identity_set_stencil_ref; id_pipe->base.set_clip_state = identity_set_clip_state; @@ -744,13 +839,16 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe) id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures; id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures; id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers; - id_pipe->base.set_vertex_elements = identity_set_vertex_elements; id_pipe->base.surface_copy = identity_surface_copy; id_pipe->base.surface_fill = identity_surface_fill; id_pipe->base.clear = identity_clear; id_pipe->base.flush = identity_flush; id_pipe->base.is_texture_referenced = identity_is_texture_referenced; id_pipe->base.is_buffer_referenced = identity_is_buffer_referenced; + id_pipe->base.get_tex_transfer = identity_context_get_tex_transfer; + id_pipe->base.tex_transfer_destroy = identity_context_tex_transfer_destroy; + id_pipe->base.transfer_map = identity_context_transfer_map; + id_pipe->base.transfer_unmap = identity_context_transfer_unmap; id_pipe->pipe = pipe; diff --git a/src/gallium/drivers/identity/id_drm.c b/src/gallium/drivers/identity/id_drm.c index b89724e4f32..936ccc444a8 100644 --- a/src/gallium/drivers/identity/id_drm.c +++ b/src/gallium/drivers/identity/id_drm.c @@ -28,11 +28,11 @@ #include "state_tracker/drm_api.h" #include "util/u_memory.h" -#include "identity/id_drm.h" -#include "identity/id_screen.h" -#include "identity/id_public.h" -#include "identity/id_screen.h" -#include "identity/id_objects.h" +#include "id_drm.h" +#include "id_screen.h" +#include "id_public.h" +#include "id_screen.h" +#include "id_objects.h" struct identity_drm_api { @@ -63,62 +63,6 @@ identity_drm_create_screen(struct drm_api *_api, int fd, return identity_screen_create(screen); } - -static struct pipe_texture * -identity_drm_texture_from_shared_handle(struct drm_api *_api, - struct pipe_screen *_screen, - struct pipe_texture *templ, - const char *name, - unsigned stride, - unsigned handle) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_drm_api *id_api = identity_drm_api(_api); - struct pipe_screen *screen = id_screen->screen; - struct drm_api *api = id_api->api; - struct pipe_texture *result; - - result = api->texture_from_shared_handle(api, screen, templ, name, stride, handle); - - result = identity_texture_create(identity_screen(_screen), result); - - return result; -} - -static boolean -identity_drm_shared_handle_from_texture(struct drm_api *_api, - struct pipe_screen *_screen, - struct pipe_texture *_texture, - unsigned *stride, - unsigned *handle) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_texture *id_texture = identity_texture(_texture); - struct identity_drm_api *id_api = identity_drm_api(_api); - struct pipe_screen *screen = id_screen->screen; - struct pipe_texture *texture = id_texture->texture; - struct drm_api *api = id_api->api; - - return api->shared_handle_from_texture(api, screen, texture, stride, handle); -} - -static boolean -identity_drm_local_handle_from_texture(struct drm_api *_api, - struct pipe_screen *_screen, - struct pipe_texture *_texture, - unsigned *stride, - unsigned *handle) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_texture *id_texture = identity_texture(_texture); - struct identity_drm_api *id_api = identity_drm_api(_api); - struct pipe_screen *screen = id_screen->screen; - struct pipe_texture *texture = id_texture->texture; - struct drm_api *api = id_api->api; - - return api->local_handle_from_texture(api, screen, texture, stride, handle); -} - static void identity_drm_destroy(struct drm_api *_api) { @@ -145,9 +89,6 @@ identity_drm_create(struct drm_api *api) id_api->base.name = api->name; id_api->base.driver_name = api->driver_name; id_api->base.create_screen = identity_drm_create_screen; - id_api->base.texture_from_shared_handle = identity_drm_texture_from_shared_handle; - id_api->base.shared_handle_from_texture = identity_drm_shared_handle_from_texture; - id_api->base.local_handle_from_texture = identity_drm_local_handle_from_texture; id_api->base.destroy = identity_drm_destroy; id_api->api = api; diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c index 2b1a60c1bf1..d37fb0042e5 100644 --- a/src/gallium/drivers/identity/id_objects.c +++ b/src/gallium/drivers/identity/id_objects.c @@ -30,6 +30,7 @@ #include "id_screen.h" #include "id_objects.h" +#include "id_context.h" struct pipe_buffer * identity_buffer_create(struct identity_screen *id_screen, @@ -142,7 +143,8 @@ identity_surface_destroy(struct identity_surface *id_surface) struct pipe_transfer * -identity_transfer_create(struct identity_texture *id_texture, +identity_transfer_create(struct identity_context *id_context, + struct identity_texture *id_texture, struct pipe_transfer *transfer) { struct identity_transfer *id_transfer; @@ -159,25 +161,25 @@ identity_transfer_create(struct identity_texture *id_texture, memcpy(&id_transfer->base, transfer, sizeof(struct pipe_transfer)); id_transfer->base.texture = NULL; - pipe_texture_reference(&id_transfer->base.texture, &id_texture->base); id_transfer->transfer = transfer; + + pipe_texture_reference(&id_transfer->base.texture, &id_texture->base); assert(id_transfer->base.texture == &id_texture->base); return &id_transfer->base; error: - transfer->texture->screen->tex_transfer_destroy(transfer); + id_context->pipe->tex_transfer_destroy(id_context->pipe, transfer); return NULL; } void -identity_transfer_destroy(struct identity_transfer *id_transfer) +identity_transfer_destroy(struct identity_context *id_context, + struct identity_transfer *id_transfer) { - struct identity_screen *id_screen = identity_screen(id_transfer->base.texture->screen); - struct pipe_screen *screen = id_screen->screen; - pipe_texture_reference(&id_transfer->base.texture, NULL); - screen->tex_transfer_destroy(id_transfer->transfer); + id_context->pipe->tex_transfer_destroy(id_context->pipe, + id_transfer->transfer); FREE(id_transfer); } diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h index 77cc7190798..7333ecfb7fb 100644 --- a/src/gallium/drivers/identity/id_objects.h +++ b/src/gallium/drivers/identity/id_objects.h @@ -35,6 +35,7 @@ #include "id_screen.h" +struct identity_context; struct identity_buffer { @@ -64,6 +65,7 @@ struct identity_transfer { struct pipe_transfer base; + struct pipe_context *pipe; struct pipe_transfer *transfer; }; @@ -177,11 +179,13 @@ void identity_surface_destroy(struct identity_surface *id_surface); struct pipe_transfer * -identity_transfer_create(struct identity_texture *id_texture, +identity_transfer_create(struct identity_context *id_context, + struct identity_texture *id_texture, struct pipe_transfer *transfer); void -identity_transfer_destroy(struct identity_transfer *id_transfer); +identity_transfer_destroy(struct identity_context *id_context, + struct identity_transfer *id_transfer); struct pipe_video_surface * identity_video_surface_create(struct identity_screen *id_screen, diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c index b85492114a3..419b1465787 100644 --- a/src/gallium/drivers/identity/id_screen.c +++ b/src/gallium/drivers/identity/id_screen.c @@ -135,27 +135,40 @@ identity_screen_texture_create(struct pipe_screen *_screen, } static struct pipe_texture * -identity_screen_texture_blanket(struct pipe_screen *_screen, - const struct pipe_texture *templat, - const unsigned *stride, - struct pipe_buffer *_buffer) +identity_screen_texture_from_handle(struct pipe_screen *_screen, + const struct pipe_texture *templ, + struct winsys_handle *handle) { struct identity_screen *id_screen = identity_screen(_screen); - struct identity_buffer *id_buffer = identity_buffer(_buffer); struct pipe_screen *screen = id_screen->screen; - struct pipe_buffer *buffer = id_buffer->buffer; struct pipe_texture *result; - result = screen->texture_blanket(screen, - templat, - stride, - buffer); + /* TODO trace call */ - if (result) - return identity_texture_create(id_screen, result); - return NULL; + result = screen->texture_from_handle(screen, templ, handle); + + result = identity_texture_create(identity_screen(_screen), result); + + return result; } +static boolean +identity_screen_texture_get_handle(struct pipe_screen *_screen, + struct pipe_texture *_texture, + struct winsys_handle *handle) +{ + struct identity_screen *id_screen = identity_screen(_screen); + struct identity_texture *id_texture = identity_texture(_texture); + struct pipe_screen *screen = id_screen->screen; + struct pipe_texture *texture = id_texture->texture; + + /* TODO trace call */ + + return screen->texture_get_handle(screen, texture, handle); +} + + + static void identity_screen_texture_destroy(struct pipe_texture *_texture) { @@ -194,71 +207,6 @@ identity_screen_tex_surface_destroy(struct pipe_surface *_surface) identity_surface_destroy(identity_surface(_surface)); } -static struct pipe_transfer * -identity_screen_get_tex_transfer(struct pipe_screen *_screen, - struct pipe_texture *_texture, - unsigned face, - unsigned level, - unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, - unsigned y, - unsigned w, - unsigned h) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_texture *id_texture = identity_texture(_texture); - struct pipe_screen *screen = id_screen->screen; - struct pipe_texture *texture = id_texture->texture; - struct pipe_transfer *result; - - result = screen->get_tex_transfer(screen, - texture, - face, - level, - zslice, - usage, - x, - y, - w, - h); - - if (result) - return identity_transfer_create(id_texture, result); - return NULL; -} - -static void -identity_screen_tex_transfer_destroy(struct pipe_transfer *_transfer) -{ - identity_transfer_destroy(identity_transfer(_transfer)); -} - -static void * -identity_screen_transfer_map(struct pipe_screen *_screen, - struct pipe_transfer *_transfer) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_transfer *id_transfer = identity_transfer(_transfer); - struct pipe_screen *screen = id_screen->screen; - struct pipe_transfer *transfer = id_transfer->transfer; - - return screen->transfer_map(screen, - transfer); -} - -static void -identity_screen_transfer_unmap(struct pipe_screen *_screen, - struct pipe_transfer *_transfer) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct identity_transfer *id_transfer = identity_transfer(_transfer); - struct pipe_screen *screen = id_screen->screen; - struct pipe_transfer *transfer = id_transfer->transfer; - - screen->transfer_unmap(screen, - transfer); -} static struct pipe_buffer * identity_screen_buffer_create(struct pipe_screen *_screen, @@ -298,31 +246,6 @@ identity_screen_user_buffer_create(struct pipe_screen *_screen, return NULL; } -static struct pipe_buffer * -identity_screen_surface_buffer_create(struct pipe_screen *_screen, - unsigned width, - unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - struct identity_screen *id_screen = identity_screen(_screen); - struct pipe_screen *screen = id_screen->screen; - struct pipe_buffer *result; - - result = screen->surface_buffer_create(screen, - width, - height, - format, - usage, - tex_usage, - stride); - - if (result) - return identity_buffer_create(id_screen, result); - return NULL; -} static void * identity_screen_buffer_map(struct pipe_screen *_screen, @@ -495,17 +418,13 @@ identity_screen_create(struct pipe_screen *screen) id_screen->base.is_format_supported = identity_screen_is_format_supported; id_screen->base.context_create = identity_screen_context_create; id_screen->base.texture_create = identity_screen_texture_create; - id_screen->base.texture_blanket = identity_screen_texture_blanket; + id_screen->base.texture_from_handle = identity_screen_texture_from_handle; + id_screen->base.texture_get_handle = identity_screen_texture_get_handle; id_screen->base.texture_destroy = identity_screen_texture_destroy; id_screen->base.get_tex_surface = identity_screen_get_tex_surface; id_screen->base.tex_surface_destroy = identity_screen_tex_surface_destroy; - id_screen->base.get_tex_transfer = identity_screen_get_tex_transfer; - id_screen->base.tex_transfer_destroy = identity_screen_tex_transfer_destroy; - id_screen->base.transfer_map = identity_screen_transfer_map; - id_screen->base.transfer_unmap = identity_screen_transfer_unmap; id_screen->base.buffer_create = identity_screen_buffer_create; id_screen->base.user_buffer_create = identity_screen_user_buffer_create; - id_screen->base.surface_buffer_create = identity_screen_surface_buffer_create; if (screen->buffer_map) id_screen->base.buffer_map = identity_screen_buffer_map; if (screen->buffer_map_range) diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 31732514379..89c06ea3ad7 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -37,6 +37,7 @@ C_SOURCES = \ lp_surface.c \ lp_tex_sample_llvm.c \ lp_texture.c \ + lp_tile_image.c \ lp_tile_soa.c CPP_SOURCES = \ @@ -54,7 +55,7 @@ testprogs := lp_test_format \ LIBS += $(GL_LIB_DEPS) -L. -lllvmpipe -L../../auxiliary/ -lgallium -$(testprogs): lp_test_% : lp_test_%.o lp_test_main.o libllvmpipe.a - $(LD) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group +#$(testprogs): lp_test_% : lp_test_%.o lp_test_main.o libllvmpipe.a +# $(LD) $(filter %.o,$^) -o $@ -Wl,--start-group $(LIBS) -Wl,--end-group -default: $(testprogs) +#default: $(testprogs) diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README index 72d9f39658f..3c3fd386b52 100644 --- a/src/gallium/drivers/llvmpipe/README +++ b/src/gallium/drivers/llvmpipe/README @@ -12,7 +12,11 @@ Done so far is: - depth testing - - texture sampling (not all state/formats are supported) + - texture sampling + - 1D/2D/3D/cube maps supported + - all texture wrap modes supported + - all texture filtering modes supported + - perhaps not all texture formats yet supported - fragment shader TGSI translation - same level of support as the TGSI SSE2 exec machine, with the exception @@ -37,8 +41,6 @@ To do (probably by this order): - code generate stipple and stencil testing - - translate the remaining bits of texture sampling state - - translate TGSI control flow instructions, and all other remaining opcodes - integrate with the draw module for VS code generation @@ -49,8 +51,6 @@ To do (probably by this order): Requirements ============ - - Linux - - A x86 or amd64 processor. 64bit mode is preferred. Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will @@ -59,7 +59,7 @@ Requirements See /proc/cpuinfo to know what your CPU supports. - - LLVM 2.6. + - LLVM 2.6 (or later) For Linux, on a recent Debian based distribution do: @@ -69,6 +69,9 @@ Requirements http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment variable to the extracted path. + The version of LLVM from SVN ("2.7svn") from mid-March 2010 seems pretty + stable and has some features not in version 2.6. + - scons (optional) - udis86, http://udis86.sourceforge.net/ (optional): @@ -86,7 +89,7 @@ Building To build everything on Linux invoke scons as: - scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=xlib dri=false + scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false Alternatively, you can build it with GNU make, if you prefer, by invoking it as @@ -96,7 +99,7 @@ but the rest of these instructions assume that scons is used. For windows is everything the except except the winsys: - scons debug=yes statetrackers=mesa drivers=trace,llvmpipe winsys=gdi dri=false + scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=gdi dri=false Using ===== @@ -142,11 +145,13 @@ Development Notes then skim through the lp_bld_* functions called in there, and the comments at the top of the lp_bld_*.c functions. -- All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be - put in a stand-alone Gallium state -> LLVM IR translation module. +- The driver-independent parts of the LLVM / Gallium code are found in + src/gallium/auxiliary/gallivm/. The filenames and function prefixes + need to be renamed from "lp_bld_" to something else though. - We use LLVM-C bindings for now. They are not documented, but follow the C++ interfaces very closely, and appear to be complete enough for code generation. See http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html for a stand-alone example. + See the llvm-c/Core.h file for reference. diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index a39283e5e86..13c1a13e87a 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -18,6 +18,13 @@ env.CodeGenerate( command = 'python $SCRIPT $SOURCE > $TARGET' ) +# XXX: Our dependency scanner only finds depended modules in relative dirs. +env.Depends('lp_tile_soa.c', [ + '#src/gallium/auxiliary/util/u_format_parse.py', + '#src/gallium/auxiliary/util/u_format_pack.py', + '#src/gallium/auxiliary/util/u_format_access.py', +]) + llvmpipe = env.ConvenienceLibrary( target = 'llvmpipe', source = [ @@ -52,6 +59,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_surface.c', 'lp_tex_sample_llvm.c', 'lp_texture.c', + 'lp_tile_image.c', 'lp_tile_soa.c', ]) diff --git a/src/gallium/drivers/llvmpipe/lp_buffer.c b/src/gallium/drivers/llvmpipe/lp_buffer.c index 9eda9720818..6e0f37393e9 100644 --- a/src/gallium/drivers/llvmpipe/lp_buffer.c +++ b/src/gallium/drivers/llvmpipe/lp_buffer.c @@ -30,7 +30,6 @@ #include "util/u_memory.h" #include "util/u_math.h" -#include "lp_winsys.h" #include "lp_screen.h" #include "lp_buffer.h" diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c index 3edc62d0c69..e0676c80a25 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.c +++ b/src/gallium/drivers/llvmpipe/lp_context.c @@ -147,6 +147,10 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state; llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state; + llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state; + llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state; + llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state; + llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color; llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref; llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state; @@ -159,7 +163,6 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state; llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers; - llvmpipe->pipe.set_vertex_elements = llvmpipe_set_vertex_elements; llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays; llvmpipe->pipe.draw_elements = llvmpipe_draw_elements; @@ -172,6 +175,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) llvmpipe->pipe.is_buffer_referenced = llvmpipe_is_buffer_referenced; llvmpipe_init_query_funcs( llvmpipe ); + llvmpipe_init_context_texture_funcs( &llvmpipe->pipe ); /* * Create drawing context and plug our rendering stage into it. @@ -185,7 +189,7 @@ llvmpipe_create_context( struct pipe_screen *screen, void *priv ) if (debug_get_bool_option( "LP_NO_RAST", FALSE )) llvmpipe->no_rast = TRUE; - llvmpipe->setup = lp_setup_create( screen, + llvmpipe->setup = lp_setup_create( &llvmpipe->pipe, llvmpipe->draw ); if (!llvmpipe->setup) goto fail; diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h index 955c7eb8e0e..f391871b0ee 100644 --- a/src/gallium/drivers/llvmpipe/lp_context.h +++ b/src/gallium/drivers/llvmpipe/lp_context.h @@ -45,7 +45,8 @@ struct draw_stage; struct lp_fragment_shader; struct lp_vertex_shader; struct lp_blend_state; -struct setup_context; +struct lp_setup_context; +struct lp_velems_state; struct llvmpipe_context { struct pipe_context pipe; /**< base class */ @@ -58,6 +59,7 @@ struct llvmpipe_context { const struct pipe_rasterizer_state *rasterizer; struct lp_fragment_shader *fs; const struct lp_vertex_shader *vs; + const struct lp_velems_state *velems; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -71,13 +73,11 @@ struct llvmpipe_context { struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; unsigned num_textures; unsigned num_vertex_samplers; unsigned num_vertex_textures; - unsigned num_vertex_elements; unsigned num_vertex_buffers; unsigned dirty; /**< Mask of LP_NEW_x flags */ @@ -98,7 +98,7 @@ struct llvmpipe_context { int psize_slot; /** The tiling engine */ - struct setup_context *setup; + struct lp_setup_context *setup; /** The primitive drawing context */ struct draw_context *draw; diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c index bf832433be1..636d72a9bb8 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.c +++ b/src/gallium/drivers/llvmpipe/lp_flush.c @@ -79,12 +79,12 @@ llvmpipe_flush( struct pipe_context *pipe, for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) { util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no); - debug_dump_surface(filename, llvmpipe->framebuffer.cbufs[i]); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]); } if (0) { util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no); - debug_dump_surface(filename, llvmpipe->framebuffer.zsbuf); + debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf); } ++frame_no; @@ -92,3 +92,68 @@ llvmpipe_flush( struct pipe_context *pipe, #endif } + +/** + * Flush context if necessary. + * + * TODO: move this logic to an auxiliary library? + * + * FIXME: We must implement DISCARD/DONTBLOCK/UNSYNCHRONIZED/etc for + * textures to avoid blocking. + */ +boolean +llvmpipe_flush_texture(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_flush) +{ + struct pipe_fence_handle *last_fence = NULL; + unsigned referenced; + + referenced = pipe->is_texture_referenced(pipe, texture, face, level); + + if ((referenced & PIPE_REFERENCED_FOR_WRITE) || + ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) { + + if (do_not_flush) + return FALSE; + + /* + * TODO: The semantics of these flush flags are too obtuse. They should + * disappear and the pipe driver should just ensure that all visible + * side-effects happen when they need to happen. + */ + if (referenced & PIPE_REFERENCED_FOR_WRITE) + flush_flags |= PIPE_FLUSH_RENDER_CACHE; + + if (referenced & PIPE_REFERENCED_FOR_READ) + flush_flags |= PIPE_FLUSH_TEXTURE_CACHE; + + if (cpu_access) { + /* + * Flush and wait. + */ + + struct pipe_fence_handle *fence = NULL; + + pipe->flush(pipe, flush_flags, &fence); + + if (last_fence) { + pipe->screen->fence_finish(pipe->screen, fence, 0); + pipe->screen->fence_reference(pipe->screen, &fence, NULL); + } + } else { + /* + * Just flush. + */ + + pipe->flush(pipe, flush_flags, NULL); + } + } + + return TRUE; +} diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h index 10b2b525836..e13f57ccec5 100644 --- a/src/gallium/drivers/llvmpipe/lp_flush.h +++ b/src/gallium/drivers/llvmpipe/lp_flush.h @@ -28,10 +28,22 @@ #ifndef LP_FLUSH_H #define LP_FLUSH_H +#include "pipe/p_compiler.h" + struct pipe_context; struct pipe_fence_handle; void llvmpipe_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence); +boolean +llvmpipe_flush_texture(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned flush_flags, + boolean read_only, + boolean cpu_access, + boolean do_not_flush); + #endif diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index d3d7e26882d..5887613120d 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -37,6 +37,7 @@ #include "util/u_memory.h" #include "util/u_cpu_detect.h" +#include "gallivm/lp_bld_init.h" #include "lp_debug.h" #include "lp_screen.h" #include "gallivm/lp_bld_intr.h" @@ -50,12 +51,17 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) /* struct lp_jit_texture */ { - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[6]; elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); - elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type(); - elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0); + elem_types[LP_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); + elem_types[LP_JIT_TEXTURE_ROW_STRIDE] = + LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_2D_LEVELS); + elem_types[LP_JIT_TEXTURE_DATA] = + LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), + LP_MAX_TEXTURE_2D_LEVELS); texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); @@ -65,9 +71,15 @@ lp_jit_init_globals(struct llvmpipe_screen *screen) LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height, screen->target, texture_type, LP_JIT_TEXTURE_HEIGHT); - LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride, + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth, screen->target, texture_type, - LP_JIT_TEXTURE_STRIDE); + LP_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level, + screen->target, texture_type, + LP_JIT_TEXTURE_LAST_LEVEL); + LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride, + screen->target, texture_type, + LP_JIT_TEXTURE_ROW_STRIDE); LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data, screen->target, texture_type, LP_JIT_TEXTURE_DATA); @@ -148,8 +160,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen) util_cpu_caps.has_sse4_1 = 0; #endif - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); + lp_build_init(); screen->module = LLVMModuleCreateWithName("llvmpipe"); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 8df3015d4b4..13167ae3bf4 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -39,6 +39,7 @@ #include "gallivm/lp_bld_struct.h" #include "pipe/p_state.h" +#include "lp_texture.h" struct llvmpipe_screen; @@ -48,15 +49,19 @@ struct lp_jit_texture { uint32_t width; uint32_t height; - uint32_t stride; - const void *data; + uint32_t depth; + uint32_t last_level; + uint32_t row_stride[LP_MAX_TEXTURE_2D_LEVELS]; + const void *data[LP_MAX_TEXTURE_2D_LEVELS]; }; enum { LP_JIT_TEXTURE_WIDTH = 0, LP_JIT_TEXTURE_HEIGHT, - LP_JIT_TEXTURE_STRIDE, + LP_JIT_TEXTURE_DEPTH, + LP_JIT_TEXTURE_LAST_LEVEL, + LP_JIT_TEXTURE_ROW_STRIDE, LP_JIT_TEXTURE_DATA }; diff --git a/src/gallium/drivers/llvmpipe/lp_public.h b/src/gallium/drivers/llvmpipe/lp_public.h new file mode 100644 index 00000000000..ec6b660b48e --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_public.h @@ -0,0 +1,10 @@ +#ifndef LP_PUBLIC_H +#define LP_PUBLIC_H + +struct pipe_screen; +struct sw_winsys; + +struct pipe_screen * +llvmpipe_create_screen(struct sw_winsys *winsys); + +#endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 6dbcb3c9b31..81ea11a16b6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -42,128 +42,75 @@ #include "lp_scene.h" -/** - * Begin the rasterization phase. - * Map the framebuffer surfaces. Initialize the 'rast' state. +/* Begin rasterizing a scene: */ static boolean lp_rast_begin( struct lp_rasterizer *rast, - const struct pipe_framebuffer_state *fb, - boolean write_color, - boolean write_zstencil ) + struct lp_scene *scene ) { - struct pipe_screen *screen = rast->screen; - struct pipe_surface *cbuf, *zsbuf; + const struct pipe_framebuffer_state *fb = &scene->fb; + boolean write_color = fb->nr_cbufs != 0; + boolean write_zstencil = fb->zsbuf != NULL; int i; - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + rast->curr_scene = scene; - util_copy_framebuffer_state(&rast->state.fb, fb); + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + rast->state.nr_cbufs = scene->fb.nr_cbufs; rast->state.write_zstencil = write_zstencil; rast->state.write_color = write_color; - - rast->check_for_clipped_tiles = (fb->width % TILE_SIZE != 0 || - fb->height % TILE_SIZE != 0); - - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - cbuf = rast->state.fb.cbufs[i]; - if (cbuf) { - rast->cbuf_transfer[i] = screen->get_tex_transfer(rast->screen, - cbuf->texture, - cbuf->face, - cbuf->level, - cbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - cbuf->width, - cbuf->height); - if (!rast->cbuf_transfer[i]) - goto fail; - - rast->cbuf_map[i] = screen->transfer_map(rast->screen, - rast->cbuf_transfer[i]); - if (!rast->cbuf_map[i]) - goto fail; - } + for (i = 0; i < rast->state.nr_cbufs; i++) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + rast->cbuf[i].map = scene->cbuf_map[i]; + rast->cbuf[i].format = cbuf->texture->format; + rast->cbuf[i].width = cbuf->width; + rast->cbuf[i].height = cbuf->height; + rast->cbuf[i].stride = llvmpipe_texture_stride(cbuf->texture, cbuf->level); } - zsbuf = rast->state.fb.zsbuf; - if (zsbuf) { - rast->zsbuf_transfer = screen->get_tex_transfer(rast->screen, - zsbuf->texture, - zsbuf->face, - zsbuf->level, - zsbuf->zslice, - PIPE_TRANSFER_READ_WRITE, - 0, 0, - zsbuf->width, - zsbuf->height); - if (!rast->zsbuf_transfer) - goto fail; - - rast->zsbuf_map = screen->transfer_map(rast->screen, - rast->zsbuf_transfer); - if (!rast->zsbuf_map) - goto fail; + if (write_zstencil) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + rast->zsbuf.map = scene->zsbuf_map; + rast->zsbuf.stride = llvmpipe_texture_stride(zsbuf->texture, zsbuf->level); + rast->zsbuf.blocksize = + util_format_get_blocksize(zsbuf->texture->format); } + lp_scene_bin_iter_begin( scene ); + return TRUE; - -fail: - /* Unmap and release transfers? - */ - return FALSE; } -/** - * Finish the rasterization phase. - * Unmap framebuffer surfaces. - */ static void lp_rast_end( struct lp_rasterizer *rast ) { - struct pipe_screen *screen = rast->screen; - unsigned i; - - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - if (rast->cbuf_map[i]) - screen->transfer_unmap(screen, rast->cbuf_transfer[i]); - - if (rast->cbuf_transfer[i]) - screen->tex_transfer_destroy(rast->cbuf_transfer[i]); - - rast->cbuf_transfer[i] = NULL; - rast->cbuf_map[i] = NULL; - } + int i; - if (rast->zsbuf_map) - screen->transfer_unmap(screen, rast->zsbuf_transfer); + lp_scene_reset( rast->curr_scene ); - if (rast->zsbuf_transfer) - screen->tex_transfer_destroy(rast->zsbuf_transfer); + for (i = 0; i < rast->state.nr_cbufs; i++) + rast->cbuf[i].map = NULL; - rast->zsbuf_transfer = NULL; - rast->zsbuf_map = NULL; + rast->zsbuf.map = NULL; + rast->curr_scene = NULL; } - /** * Begining rasterization of a tile. * \param x window X position of the tile, in pixels * \param y window Y position of the tile, in pixels */ static void -lp_rast_start_tile( struct lp_rasterizer *rast, - unsigned thread_index, - unsigned x, unsigned y ) +lp_rast_start_tile(struct lp_rasterizer_task *task, + unsigned x, unsigned y) { LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); - rast->tasks[thread_index].x = x; - rast->tasks[thread_index].y = y; + task->x = x; + task->y = y; } @@ -171,12 +118,13 @@ lp_rast_start_tile( struct lp_rasterizer *rast, * Clear the rasterizer's current color tile. * This is a bin command called during bin processing. */ -void lp_rast_clear_color( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_clear_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { + struct lp_rasterizer *rast = task->rast; const uint8_t *clear_color = arg.clear_color; - uint8_t **color_tile = rast->tasks[thread_index].tile.color; + uint8_t **color_tile = task->tile.color; unsigned i; LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, @@ -189,7 +137,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, clear_color[1] == clear_color[2] && clear_color[2] == clear_color[3]) { /* clear to grayscale value {x, x, x, x} */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + for (i = 0; i < rast->state.nr_cbufs; i++) { memset(color_tile[i], clear_color[0], TILE_SIZE * TILE_SIZE * 4); } } @@ -200,7 +148,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * works. */ const unsigned chunk = TILE_SIZE / 4; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { + for (i = 0; i < rast->state.nr_cbufs; i++) { uint8_t *c = color_tile[i]; unsigned j; for (j = 0; j < 4 * TILE_SIZE; j++) { @@ -225,24 +173,24 @@ void lp_rast_clear_color( struct lp_rasterizer *rast, * Clear the rasterizer's current z/stencil tile. * This is a bin command called during bin processing. */ -void lp_rast_clear_zstencil( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg) +void +lp_rast_clear_zstencil(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + struct lp_rasterizer *rast = task->rast; const unsigned tile_x = task->x; const unsigned tile_y = task->y; - const unsigned height = TILE_SIZE/TILE_VECTOR_HEIGHT; - const unsigned width = TILE_SIZE*TILE_VECTOR_HEIGHT; - unsigned block_size = util_format_get_blocksize(rast->zsbuf_transfer->texture->format); + const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT; + const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT; + unsigned block_size = rast->zsbuf.blocksize; uint8_t *dst; - unsigned dst_stride = rast->zsbuf_transfer->stride*TILE_VECTOR_HEIGHT; + unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT; unsigned i, j; LP_DBG(DEBUG_RAST, "%s 0x%x\n", __FUNCTION__, arg.clear_zstencil); - assert(rast->zsbuf_map); - if (!rast->zsbuf_map) + assert(rast->zsbuf.map); + if (!rast->zsbuf.map) return; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -278,8 +226,8 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, } break; default: - assert(0); - break; + assert(0); + break; } } @@ -288,55 +236,42 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast, * Load tile color from the framebuffer surface. * This is a bin command called during bin processing. */ -void lp_rast_load_color( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg) +void +lp_rast_load_color(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; + struct lp_rasterizer *rast = task->rast; + const unsigned x = task->x, y = task->y; unsigned i; LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y); - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - struct pipe_transfer *transfer = rast->cbuf_transfer[i]; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x >= transfer->width) - continue; - - if (y >= transfer->height) + for (i = 0; i < rast->state.nr_cbufs; i++) { + if (x >= rast->cbuf[i].width || y >= rast->cbuf[i].height) continue; - assert(w >= 0); - assert(h >= 0); - assert(w <= TILE_SIZE); - assert(h <= TILE_SIZE); - - lp_tile_read_4ub(transfer->texture->format, + lp_tile_read_4ub(rast->cbuf[i].format, task->tile.color[i], - rast->cbuf_map[i], - transfer->stride, + rast->cbuf[i].map, + rast->cbuf[i].stride, x, y, - w, h); + TILE_SIZE, TILE_SIZE); LP_COUNT(nr_color_tile_load); } } -void lp_rast_set_state( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_set_state(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { const struct lp_rast_state *state = arg.set_state; LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state); /* just set the current state pointer for this rasterizer */ - rast->tasks[thread_index].current_state = state; + task->current_state = state; } @@ -346,16 +281,15 @@ void lp_rast_set_state( struct lp_rasterizer *rast, * completely contained inside a triangle. * This is a bin command called during bin processing. */ -void lp_rast_shade_tile( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_shade_tile(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; + struct lp_rasterizer *rast = task->rast; const struct lp_rast_state *state = task->current_state; struct lp_rast_tile *tile = &task->tile; const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned tile_x = task->x; - const unsigned tile_y = task->y; + const unsigned tile_x = task->x, tile_y = task->y; unsigned x, y; LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); @@ -371,7 +305,7 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, block_offset = ((y / 4) * (16 * 16) + (x / 4) * 16); /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; /* depth buffer */ @@ -396,14 +330,13 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, * Compute shading for a 4x4 block of pixels. * This is a bin command called during bin processing. */ -void lp_rast_shade_quads( struct lp_rasterizer *rast, - unsigned thread_index, +void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; const struct lp_rast_state *state = task->current_state; + struct lp_rasterizer *rast = task->rast; struct lp_rast_tile *tile = &task->tile; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -411,7 +344,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, unsigned ix, iy; int block_offset; -#ifdef DEBUG assert(state); /* Sanity checks */ @@ -420,7 +352,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, assert((x % 4) == 0); assert((y % 4) == 0); -#endif ix = x % TILE_SIZE; iy = y % TILE_SIZE; @@ -429,22 +360,19 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, block_offset = ((iy / 4) * (16 * 16) + (ix / 4) * 16); /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; /* depth buffer */ depth = lp_rast_depth_pointer(rast, x, y); - -#ifdef DEBUG assert(lp_check_alignment(tile->color[0], 16)); assert(lp_check_alignment(state->jit_context.blend_color, 16)); assert(lp_check_alignment(inputs->step[0], 16)); assert(lp_check_alignment(inputs->step[1], 16)); assert(lp_check_alignment(inputs->step[2], 16)); -#endif /* run shader */ state->jit_function[1]( &state->jit_context, @@ -515,66 +443,48 @@ outline_subtiles(uint8_t *tile) /** * Write the rasterizer's color tile to the framebuffer. */ -static void lp_rast_store_color( struct lp_rasterizer *rast, - unsigned thread_index) +static void +lp_rast_store_color(struct lp_rasterizer_task *task) { - struct lp_rasterizer_task *task = &rast->tasks[thread_index]; - const unsigned x = task->x; - const unsigned y = task->y; + struct lp_rasterizer *rast = task->rast; + const unsigned x = task->x, y = task->y; unsigned i; - for (i = 0; i < rast->state.fb.nr_cbufs; i++) { - struct pipe_transfer *transfer = rast->cbuf_transfer[i]; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (x >= transfer->width) + for (i = 0; i < rast->state.nr_cbufs; i++) { + if (x >= rast->cbuf[i].width) continue; - if (y >= transfer->height) + if (y >= rast->cbuf[i].height) continue; - LP_DBG(DEBUG_RAST, "%s [%u] %d,%d %dx%d\n", __FUNCTION__, - thread_index, x, y, w, h); + LP_DBG(DEBUG_RAST, "%s [%u] %d,%d\n", __FUNCTION__, + task->thread_index, x, y); if (LP_DEBUG & DEBUG_SHOW_SUBTILES) outline_subtiles(task->tile.color[i]); else if (LP_DEBUG & DEBUG_SHOW_TILES) outline_tile(task->tile.color[i]); - lp_tile_write_4ub(transfer->texture->format, + lp_tile_write_4ub(rast->cbuf[i].format, task->tile.color[i], - rast->cbuf_map[i], - transfer->stride, + rast->cbuf[i].map, + rast->cbuf[i].stride, x, y, - w, h); + TILE_SIZE, TILE_SIZE); LP_COUNT(nr_color_tile_store); } } -/** - * Write the rasterizer's tiles to the framebuffer. - */ -static void -lp_rast_end_tile( struct lp_rasterizer *rast, - unsigned thread_index ) -{ - LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); - - if (rast->state.write_color) - lp_rast_store_color(rast, thread_index); -} - /** * Signal on a fence. This is called during bin execution/rasterization. * Called per thread. */ -void lp_rast_fence( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +void +lp_rast_fence(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { struct lp_fence *fence = arg.fence; @@ -592,20 +502,6 @@ void lp_rast_fence( struct lp_rasterizer *rast, } -/** - * When all the threads are done rasterizing a scene, one thread will - * call this function to reset the scene and put it onto the empty queue. - */ -static void -release_scene( struct lp_rasterizer *rast, - struct lp_scene *scene ) -{ - util_unreference_framebuffer_state( &scene->fb ); - - lp_scene_reset( scene ); - lp_scene_enqueue( rast->empty_scenes, scene ); - rast->curr_scene = NULL; -} /** @@ -615,25 +511,31 @@ release_scene( struct lp_rasterizer *rast, * Called per thread. */ static void -rasterize_bin( struct lp_rasterizer *rast, - unsigned thread_index, - const struct cmd_bin *bin, - int x, int y) +rasterize_bin(struct lp_rasterizer_task *task, + const struct cmd_bin *bin, + int x, int y) { const struct cmd_block_list *commands = &bin->commands; struct cmd_block *block; unsigned k; - lp_rast_start_tile( rast, thread_index, x, y ); + lp_rast_start_tile( task, x * TILE_SIZE, y * TILE_SIZE ); /* simply execute each of the commands in the block list */ for (block = commands->head; block; block = block->next) { for (k = 0; k < block->count; k++) { - block->cmd[k]( rast, thread_index, block->arg[k] ); + block->cmd[k]( task, block->arg[k] ); } } - lp_rast_end_tile( rast, thread_index ); + /* Write the rasterizer's tiles to the framebuffer. + */ + if (task->rast->state.write_color) + lp_rast_store_color(task); + + /* Free data for this bin. + */ + lp_scene_bin_reset( task->rast->curr_scene, x, y); } @@ -717,10 +619,8 @@ is_empty_bin( const struct cmd_bin *bin ) * Called per thread. */ static void -rasterize_scene( struct lp_rasterizer *rast, - unsigned thread_index, - struct lp_scene *scene, - bool write_depth ) +rasterize_scene(struct lp_rasterizer_task *task, + struct lp_scene *scene) { /* loop over scene bins, rasterize each */ #if 0 @@ -728,9 +628,8 @@ rasterize_scene( struct lp_rasterizer *rast, unsigned i, j; for (i = 0; i < scene->tiles_x; i++) { for (j = 0; j < scene->tiles_y; j++) { - struct cmd_bin *bin = lp_get_bin(scene, i, j); - rasterize_bin( rast, thread_index, - bin, i * TILE_SIZE, j * TILE_SIZE ); + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + rasterize_bin(task, bin, i, j); } } } @@ -742,7 +641,7 @@ rasterize_scene( struct lp_rasterizer *rast, assert(scene); while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) { if (!is_empty_bin( bin )) - rasterize_bin( rast, thread_index, bin, x * TILE_SIZE, y * TILE_SIZE); + rasterize_bin(task, bin, x, y); } } #endif @@ -753,44 +652,20 @@ rasterize_scene( struct lp_rasterizer *rast, * Called by setup module when it has something for us to render. */ void -lp_rasterize_scene( struct lp_rasterizer *rast, - struct lp_scene *scene, - const struct pipe_framebuffer_state *fb, - bool write_depth ) +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene) { - boolean debug = false; - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); - if (debug) { - unsigned x, y; - debug_printf("rasterize scene:\n"); - debug_printf(" data size: %u\n", lp_scene_data_size(scene)); - for (y = 0; y < scene->tiles_y; y++) { - for (x = 0; x < scene->tiles_x; x++) { - debug_printf(" bin %u, %u size: %u\n", x, y, - lp_scene_bin_size(scene, x, y)); - } - } - } - - /* save framebuffer state in the bin */ - util_copy_framebuffer_state(&scene->fb, fb); - scene->write_depth = write_depth; - if (rast->num_threads == 0) { /* no threading */ - lp_rast_begin( rast, fb, - fb->nr_cbufs != 0, /* always write color if cbufs present */ - fb->zsbuf != NULL && write_depth ); + lp_rast_begin( rast, scene ); - lp_scene_bin_iter_begin( scene ); - rasterize_scene( rast, 0, scene, write_depth ); + rasterize_scene( &rast->tasks[0], scene ); - release_scene( rast, scene ); - - lp_rast_end( rast ); + lp_scene_reset( scene ); + rast->curr_scene = NULL; } else { /* threaded rendering! */ @@ -802,14 +677,26 @@ lp_rasterize_scene( struct lp_rasterizer *rast, for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_signal(&rast->tasks[i].work_ready); } + } + + LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); +} + + +void +lp_rast_finish( struct lp_rasterizer *rast ) +{ + if (rast->num_threads == 0) { + /* nothing to do */ + } + else { + int i; /* wait for work to complete */ for (i = 0; i < rast->num_threads; i++) { pipe_semaphore_wait(&rast->tasks[i].work_done); } } - - LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); } @@ -832,24 +719,16 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) debug_printf("thread %d waiting for work\n", task->thread_index); pipe_semaphore_wait(&task->work_ready); + if (rast->exit_flag) + break; + if (task->thread_index == 0) { /* thread[0]: * - get next scene to rasterize * - map the framebuffer surfaces */ - const struct pipe_framebuffer_state *fb; - boolean write_depth; - - rast->curr_scene = lp_scene_dequeue( rast->full_scenes, TRUE ); - - lp_scene_bin_iter_begin( rast->curr_scene ); - - fb = &rast->curr_scene->fb; - write_depth = rast->curr_scene->write_depth; - - lp_rast_begin( rast, fb, - fb->nr_cbufs != 0, - fb->zsbuf != NULL && write_depth ); + lp_rast_begin( rast, + lp_scene_dequeue( rast->full_scenes, TRUE ) ); } /* Wait for all threads to get here so that threads[1+] don't @@ -860,26 +739,23 @@ static PIPE_THREAD_ROUTINE( thread_func, init_data ) /* do work */ if (debug) debug_printf("thread %d doing work\n", task->thread_index); - rasterize_scene(rast, - task->thread_index, - rast->curr_scene, - rast->curr_scene->write_depth); + + rasterize_scene(task, + rast->curr_scene); /* wait for all threads to finish with this scene */ pipe_barrier_wait( &rast->barrier ); + /* XXX: shouldn't be necessary: + */ if (task->thread_index == 0) { - /* thread[0]: - * - release the scene object - * - unmap the framebuffer surfaces - */ - release_scene( rast, rast->curr_scene ); lp_rast_end( rast ); } /* signal done with work */ if (debug) debug_printf("thread %d done working\n", task->thread_index); + pipe_semaphore_signal(&task->work_done); } @@ -922,7 +798,7 @@ create_rast_threads(struct lp_rasterizer *rast) * processing them. */ struct lp_rasterizer * -lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) +lp_rast_create( void ) { struct lp_rasterizer *rast; unsigned i, cbuf; @@ -931,9 +807,6 @@ lp_rast_create( struct pipe_screen *screen, struct lp_scene_queue *empty ) if(!rast) return NULL; - rast->screen = screen; - - rast->empty_scenes = empty; rast->full_scenes = lp_scene_queue_create(); for (i = 0; i < Elements(rast->tasks); i++) { @@ -961,13 +834,31 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) { unsigned i, cbuf; - util_unreference_framebuffer_state(&rast->state.fb); - for (i = 0; i < Elements(rast->tasks); i++) { for (cbuf = 0; cbuf < PIPE_MAX_COLOR_BUFS; cbuf++ ) align_free(rast->tasks[i].tile.color[cbuf]); } + /* Set exit_flag and signal each thread's work_ready semaphore. + * Each thread will be woken up, notice that the exit_flag is set and + * break out of its main loop. The thread will then exit. + */ + rast->exit_flag = TRUE; + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_signal(&rast->tasks[i].work_ready); + } + + /* Wait for threads to terminate before cleaning up per-thread data */ + for (i = 0; i < rast->num_threads; i++) { + pipe_thread_wait(rast->threads[i]); + } + + /* Clean up per-thread data */ + for (i = 0; i < rast->num_threads; i++) { + pipe_semaphore_destroy(&rast->tasks[i].work_ready); + pipe_semaphore_destroy(&rast->tasks[i].work_done); + } + /* for synchronizing rasterization threads */ pipe_barrier_destroy( &rast->barrier ); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 875f18e0c0c..303f6e3f7e4 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -43,16 +43,17 @@ struct lp_rasterizer; struct lp_scene; -struct lp_scene_queue; struct lp_fence; struct cmd_bin; -struct pipe_screen; /** For sub-pixel positioning */ #define FIXED_ORDER 4 #define FIXED_ONE (1<<FIXED_ORDER) +struct lp_rasterizer_task; + + /** * Rasterization state. * Objects of this type are put into the shared data bin and pointed @@ -94,9 +95,13 @@ struct lp_rast_shader_inputs { * Rasterization information for a triangle known to be in this bin, * plus inputs to run the shader: * These fields are tile- and bin-independent. - * Objects of this type are put into the setup_context::data buffer. + * Objects of this type are put into the lp_setup_context::data buffer. */ struct lp_rast_triangle { +#ifdef DEBUG + float v[3][2]; +#endif + /* one-pixel sized trivial accept offsets for each plane */ int ei1; int ei2; @@ -126,18 +131,21 @@ struct lp_rast_triangle { -struct lp_rasterizer *lp_rast_create( struct pipe_screen *screen, - struct lp_scene_queue *empty ); +struct lp_rasterizer * +lp_rast_create( void ); -void lp_rast_destroy( struct lp_rasterizer * ); +void +lp_rast_destroy( struct lp_rasterizer * ); -unsigned lp_rast_get_num_threads( struct lp_rasterizer * ); +unsigned +lp_rast_get_num_threads( struct lp_rasterizer * ); -void lp_rasterize_scene( struct lp_rasterizer *rast, - struct lp_scene *scene, - const struct pipe_framebuffer_state *fb, - bool write_depth ); +void +lp_rast_queue_scene( struct lp_rasterizer *rast, + struct lp_scene *scene ); +void +lp_rast_finish( struct lp_rasterizer *rast ); union lp_rast_cmd_arg { @@ -201,32 +209,25 @@ lp_rast_arg_null( void ) * the bins are executed. */ -void lp_rast_clear_color( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_clear_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_clear_zstencil( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_clear_zstencil( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_load_color( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_load_color( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_set_state( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_set_state( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_triangle( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_triangle( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_shade_tile( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_shade_tile( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); -void lp_rast_fence( struct lp_rasterizer *, - unsigned thread_index, +void lp_rast_fence( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 5c5497e0929..39bf2c25879 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -38,8 +38,6 @@ #define MAX_THREADS 8 /* XXX probably temporary here */ -struct pipe_transfer; -struct pipe_screen; struct lp_rasterizer; @@ -82,19 +80,26 @@ struct lp_rasterizer_task */ struct lp_rasterizer { - boolean clipped_tile; - boolean check_for_clipped_tiles; + boolean exit_flag; /* Framebuffer stuff */ - struct pipe_screen *screen; - struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; - struct pipe_transfer *zsbuf_transfer; - void *cbuf_map[PIPE_MAX_COLOR_BUFS]; - uint8_t *zsbuf_map; + struct { + void *map; + unsigned stride; + unsigned width; + unsigned height; + enum pipe_format format; + } cbuf[PIPE_MAX_COLOR_BUFS]; + + struct { + uint8_t *map; + unsigned stride; + unsigned blocksize; + } zsbuf; struct { - struct pipe_framebuffer_state fb; + unsigned nr_cbufs; boolean write_color; boolean write_zstencil; unsigned clear_color; @@ -104,7 +109,14 @@ struct lp_rasterizer /** The incoming queue of scenes ready to rasterize */ struct lp_scene_queue *full_scenes; - /** The outgoing queue of processed scenes to return to setup modulee */ + + /** + * The outgoing queue of processed scenes to return to setup module + * + * XXX: while scenes are per-context but the rasterizer is + * (potentially) shared, these empty scenes should be returned to + * the context which created them rather than retained here. + */ struct lp_scene_queue *empty_scenes; /** The scene currently being rasterized by the threads */ @@ -121,8 +133,7 @@ struct lp_rasterizer }; -void lp_rast_shade_quads( struct lp_rasterizer *rast, - unsigned thread_index, +void lp_rast_shade_quads( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, int32_t c1, int32_t c2, int32_t c3); @@ -137,17 +148,18 @@ lp_rast_depth_pointer( struct lp_rasterizer *rast, unsigned x, unsigned y ) { void * depth; + assert((x % TILE_VECTOR_WIDTH) == 0); assert((y % TILE_VECTOR_HEIGHT) == 0); - if(!rast->zsbuf_map) + + if (!rast->zsbuf.map) return NULL; - assert(rast->zsbuf_transfer); - depth = rast->zsbuf_map + - y*rast->zsbuf_transfer->stride + - TILE_VECTOR_HEIGHT*x*util_format_get_blocksize(rast->zsbuf_transfer->texture->format); -#ifdef DEBUG + + depth = (rast->zsbuf.map + + rast->zsbuf.stride * y + + rast->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT); + assert(lp_check_alignment(depth, 16)); -#endif return depth; } @@ -159,13 +171,13 @@ lp_rast_depth_pointer( struct lp_rasterizer *rast, * \param x, y location of 4x4 block in window coords */ static INLINE void -lp_rast_shade_quads_all( struct lp_rasterizer *rast, - unsigned thread_index, +lp_rast_shade_quads_all( struct lp_rasterizer_task *task, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y ) { - const struct lp_rast_state *state = rast->tasks[thread_index].current_state; - struct lp_rast_tile *tile = &rast->tasks[thread_index].tile; + struct lp_rasterizer *rast = task->rast; + const struct lp_rast_state *state = task->current_state; + struct lp_rast_tile *tile = &task->tile; const unsigned ix = x % TILE_SIZE, iy = y % TILE_SIZE; uint8_t *color[PIPE_MAX_COLOR_BUFS]; void *depth; @@ -175,7 +187,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer *rast, block_offset = (iy / 4) * (16 * 16) + (ix / 4) * 16; /* color buffer */ - for (i = 0; i < rast->state.fb.nr_cbufs; i++) + for (i = 0; i < rast->state.nr_cbufs; i++) color[i] = tile->color[i] + 4 * block_offset; depth = lp_rast_depth_pointer(rast, x, y); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 0334705ef79..a5f0d14c95d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -89,14 +89,11 @@ static const int pos_table16[16][2] = { * Shade all pixels in a 4x4 block. */ static void -block_full_4( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y ) +block_full_4(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y) { - lp_rast_shade_quads_all(rast_task->rast, - rast_task->thread_index, - &tri->inputs, - x, y); + lp_rast_shade_quads_all(task, &tri->inputs, x, y); } @@ -104,16 +101,16 @@ block_full_4( struct lp_rasterizer_task *rast_task, * Shade all pixels in a 16x16 block. */ static void -block_full_16( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y ) +block_full_16(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y) { unsigned ix, iy; assert(x % 16 == 0); assert(y % 16 == 0); for (iy = 0; iy < 16; iy += 4) for (ix = 0; ix < 16; ix += 4) - block_full_4(rast_task, tri, x + ix, y + iy); + block_full_4(task, tri, x + ix, y + iy); } @@ -123,18 +120,15 @@ block_full_16( struct lp_rasterizer_task *rast_task, * will be done as part of the fragment shader. */ static void -do_block_4( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y, - int c1, - int c2, - int c3 ) +do_block_4(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y, + int c1, int c2, int c3) { - lp_rast_shade_quads(rast_task->rast, - rast_task->thread_index, - &tri->inputs, - x, y, - -c1, -c2, -c3); + assert(x >= 0); + assert(y >= 0); + + lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3); } @@ -143,18 +137,18 @@ do_block_4( struct lp_rasterizer_task *rast_task, * of the triangle's bounds. */ static void -do_block_16( struct lp_rasterizer_task *rast_task, - const struct lp_rast_triangle *tri, - int x, int y, - int c0, - int c1, - int c2 ) +do_block_16(struct lp_rasterizer_task *task, + const struct lp_rast_triangle *tri, + int x, int y, + int c0, int c1, int c2) { unsigned mask = 0; int eo[3]; int c[3]; int i, j; + assert(x >= 0); + assert(y >= 0); assert(x % 16 == 0); assert(y % 16 == 0); @@ -193,7 +187,7 @@ do_block_16( struct lp_rasterizer_task *rast_task, * the triangle. It's a little faster to do it in the jit code. */ LP_COUNT(nr_non_empty_4); - do_block_4(rast_task, tri, px, py, cx1, cx2, cx3); + do_block_4(task, tri, px, py, cx1, cx2, cx3); } } @@ -203,15 +197,11 @@ do_block_16( struct lp_rasterizer_task *rast_task, * for this triangle. */ void -lp_rast_triangle( struct lp_rasterizer *rast, - unsigned thread_index, - const union lp_rast_cmd_arg arg ) +lp_rast_triangle(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) { - struct lp_rasterizer_task *rast_task = &rast->tasks[thread_index]; const struct lp_rast_triangle *tri = arg.triangle; - - int x = rast_task->x; - int y = rast_task->y; + const int x = task->x, y = task->y; int ei[3], eo[3], c[3]; unsigned outmask, inmask, partial_mask; unsigned i, j; @@ -272,7 +262,7 @@ lp_rast_triangle( struct lp_rasterizer *rast, partial_mask &= ~(1 << i); LP_COUNT(nr_partially_covered_16); - do_block_16(rast_task, tri, px, py, cx1, cx2, cx3); + do_block_16(task, tri, px, py, cx1, cx2, cx3); } /* Iterate over fulls: @@ -285,6 +275,6 @@ lp_rast_triangle( struct lp_rasterizer *rast, inmask &= ~(1 << i); LP_COUNT(nr_fully_covered_16); - block_full_16(rast_task, tri, px, py); + block_full_16(task, tri, px, py); } } diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c index b7116297ece..681ce674d49 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.c +++ b/src/gallium/drivers/llvmpipe/lp_scene.c @@ -29,44 +29,67 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_simple_list.h" +#include "util/u_surface.h" #include "lp_scene.h" +#include "lp_scene_queue.h" +#include "lp_debug.h" struct lp_scene * -lp_scene_create(void) +lp_scene_create( struct pipe_context *pipe, + struct lp_scene_queue *queue ) { + unsigned i, j; struct lp_scene *scene = CALLOC_STRUCT(lp_scene); - if (scene) - lp_scene_init(scene); + if (!scene) + return NULL; + + scene->pipe = pipe; + scene->empty_queue = queue; + + for (i = 0; i < TILES_X; i++) { + for (j = 0; j < TILES_Y; j++) { + struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); + bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + } + } + + scene->data.head = + scene->data.tail = CALLOC_STRUCT(data_block); + + make_empty_list(&scene->textures); + + pipe_mutex_init(scene->mutex); + return scene; } +/** + * Free all data associated with the given scene, and free(scene). + */ void lp_scene_destroy(struct lp_scene *scene) { - lp_scene_reset(scene); - lp_scene_free_bin_data(scene); - FREE(scene); -} + unsigned i, j; + lp_scene_reset(scene); -void -lp_scene_init(struct lp_scene *scene) -{ - unsigned i, j; for (i = 0; i < TILES_X; i++) for (j = 0; j < TILES_Y; j++) { struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block); + assert(bin->commands.head == bin->commands.tail); + FREE(bin->commands.head); + bin->commands.head = NULL; + bin->commands.tail = NULL; } - scene->data.head = - scene->data.tail = CALLOC_STRUCT(data_block); + FREE(scene->data.head); + scene->data.head = NULL; - make_empty_list(&scene->textures); + pipe_mutex_destroy(scene->mutex); - pipe_mutex_init(scene->mutex); + FREE(scene); } @@ -92,6 +115,9 @@ lp_scene_is_empty(struct lp_scene *scene ) } +/* Free data for one particular bin. May be called from the + * rasterizer thread(s). + */ void lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) { @@ -100,6 +126,9 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) struct cmd_block *block; struct cmd_block *tmp; + assert(x < TILES_X); + assert(y < TILES_Y); + for (block = list->head; block != list->tail; block = tmp) { tmp = block->next; FREE(block); @@ -112,7 +141,8 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y) /** - * Set scene to empty state. + * Free all the temporary data in a scene. May be called from the + * rasterizer thread(s). */ void lp_scene_reset(struct lp_scene *scene ) @@ -159,40 +189,8 @@ lp_scene_reset(struct lp_scene *scene ) } -/** - * Free all data associated with the given bin, but don't free(scene). - */ -void -lp_scene_free_bin_data(struct lp_scene *scene) -{ - unsigned i, j; - - for (i = 0; i < TILES_X; i++) - for (j = 0; j < TILES_Y; j++) { - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); - /* lp_reset_scene() should have been already called */ - assert(bin->commands.head == bin->commands.tail); - FREE(bin->commands.head); - bin->commands.head = NULL; - bin->commands.tail = NULL; - } - - FREE(scene->data.head); - scene->data.head = NULL; - - pipe_mutex_destroy(scene->mutex); -} - -void -lp_scene_set_framebuffer_size( struct lp_scene *scene, - unsigned width, unsigned height ) -{ - assert(lp_scene_is_empty(scene)); - scene->tiles_x = align(width, TILE_SIZE) / TILE_SIZE; - scene->tiles_y = align(height, TILE_SIZE) / TILE_SIZE; -} void @@ -390,3 +388,136 @@ end: pipe_mutex_unlock(scene->mutex); return bin; } + + +/** + * Prepare this scene for the rasterizer. + * Map the framebuffer surfaces. Initialize the 'rast' state. + */ +static boolean +lp_scene_map_buffers( struct lp_scene *scene ) +{ + struct pipe_surface *cbuf, *zsbuf; + int i; + + LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); + + + /* Map all color buffers + */ + for (i = 0; i < scene->fb.nr_cbufs; i++) { + cbuf = scene->fb.cbufs[i]; + if (cbuf) { + scene->cbuf_map[i] = llvmpipe_texture_map(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); + if (!scene->cbuf_map[i]) + goto fail; + } + } + + /* Map the zsbuffer + */ + zsbuf = scene->fb.zsbuf; + if (zsbuf) { + scene->zsbuf_map = llvmpipe_texture_map(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); + if (!scene->zsbuf_map) + goto fail; + } + + return TRUE; + +fail: + /* Unmap and release transfers? + */ + return FALSE; +} + + + +/** + * Called after rasterizer as finished rasterizing a scene. + * + * We want to call this from the pipe_context's current thread to + * avoid having to have mutexes on the transfer functions. + */ +static void +lp_scene_unmap_buffers( struct lp_scene *scene ) +{ + unsigned i; + + for (i = 0; i < scene->fb.nr_cbufs; i++) { + if (scene->cbuf_map[i]) { + struct pipe_surface *cbuf = scene->fb.cbufs[i]; + llvmpipe_texture_unmap(cbuf->texture, + cbuf->face, + cbuf->level, + cbuf->zslice); + scene->cbuf_map[i] = NULL; + } + } + + if (scene->zsbuf_map) { + struct pipe_surface *zsbuf = scene->fb.zsbuf; + llvmpipe_texture_unmap(zsbuf->texture, + zsbuf->face, + zsbuf->level, + zsbuf->zslice); + scene->zsbuf_map = NULL; + } + + util_unreference_framebuffer_state( &scene->fb ); +} + + +void lp_scene_begin_binning( struct lp_scene *scene, + struct pipe_framebuffer_state *fb ) +{ + assert(lp_scene_is_empty(scene)); + + util_copy_framebuffer_state(&scene->fb, fb); + + scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE; + scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE; +} + + +void lp_scene_rasterize( struct lp_scene *scene, + struct lp_rasterizer *rast, + boolean write_depth ) +{ + if (0) { + unsigned x, y; + debug_printf("rasterize scene:\n"); + debug_printf(" data size: %u\n", lp_scene_data_size(scene)); + for (y = 0; y < scene->tiles_y; y++) { + for (x = 0; x < scene->tiles_x; x++) { + debug_printf(" bin %u, %u size: %u\n", x, y, + lp_scene_bin_size(scene, x, y)); + } + } + } + + + scene->write_depth = (scene->fb.zsbuf != NULL && + write_depth); + + lp_scene_map_buffers( scene ); + + /* Enqueue the scene for rasterization, then immediately wait for + * it to finish. + */ + lp_rast_queue_scene( rast, scene ); + + /* Currently just wait for the rasterizer to finish. Some + * threading interactions need to be worked out, particularly once + * transfers become per-context: + */ + lp_rast_finish( rast ); + lp_scene_unmap_buffers( scene ); + lp_scene_enqueue( scene->empty_queue, scene ); +} diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h index fb478cc2eb5..b602b1e8a05 100644 --- a/src/gallium/drivers/llvmpipe/lp_scene.h +++ b/src/gallium/drivers/llvmpipe/lp_scene.h @@ -39,6 +39,7 @@ #include "lp_tile_soa.h" #include "lp_rast.h" +struct lp_scene_queue; /* We're limited to 2K by 2K for 32bit fixed point rasterization. * Will need a 64-bit version for larger framebuffers. @@ -56,8 +57,7 @@ /* switch to a non-pointer value for this: */ -typedef void (*lp_rast_cmd)( struct lp_rasterizer *, - unsigned thread_index, +typedef void (*lp_rast_cmd)( struct lp_rasterizer_task *, const union lp_rast_cmd_arg ); struct cmd_block { @@ -113,8 +113,12 @@ struct texture_ref { * scenes: */ struct lp_scene { - struct cmd_bin tile[TILES_X][TILES_Y]; - struct data_block_list data; + struct pipe_context *pipe; + + /* Scene's buffers are mapped at the time the scene is enqueued: + */ + void *cbuf_map[PIPE_MAX_COLOR_BUFS]; + uint8_t *zsbuf_map; /** the framebuffer to render the scene into */ struct pipe_framebuffer_state fb; @@ -132,25 +136,28 @@ struct lp_scene { int curr_x, curr_y; /**< for iterating over bins */ pipe_mutex mutex; + + /* Where to place this scene once it has been rasterized: + */ + struct lp_scene_queue *empty_queue; + + struct cmd_bin tile[TILES_X][TILES_Y]; + struct data_block_list data; }; -struct lp_scene *lp_scene_create(void); +struct lp_scene *lp_scene_create(struct pipe_context *pipe, + struct lp_scene_queue *empty_queue); void lp_scene_destroy(struct lp_scene *scene); -void lp_scene_init(struct lp_scene *scene); boolean lp_scene_is_empty(struct lp_scene *scene ); void lp_scene_reset(struct lp_scene *scene ); -void lp_scene_free_bin_data(struct lp_scene *scene); - -void lp_scene_set_framebuffer_size( struct lp_scene *scene, - unsigned width, unsigned height ); void lp_bin_new_data_block( struct data_block_list *list ); @@ -297,5 +304,13 @@ lp_scene_bin_iter_begin( struct lp_scene *scene ); struct cmd_bin * lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y ); +void +lp_scene_rasterize( struct lp_scene *scene, + struct lp_rasterizer *rast, + boolean write_depth ); + +void +lp_scene_begin_binning( struct lp_scene *scene, + struct pipe_framebuffer_state *fb ); #endif /* LP_BIN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 1cd3ea9a840..5093f58bb19 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -34,11 +34,13 @@ #include "lp_texture.h" #include "lp_buffer.h" #include "lp_fence.h" -#include "lp_winsys.h" #include "lp_jit.h" #include "lp_screen.h" #include "lp_context.h" #include "lp_debug.h" +#include "lp_public.h" + +#include "state_tracker/sw_winsys.h" #ifdef DEBUG int LP_DEBUG = 0; @@ -83,7 +85,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return PIPE_MAX_SAMPLERS; case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return PIPE_MAX_VERTEX_SAMPLERS; + return 0; case PIPE_CAP_MAX_COMBINED_SAMPLERS: return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: @@ -107,11 +109,11 @@ llvmpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; /* max 4Kx4K */ + return LP_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 9; /* max 256x256x256 */ + return LP_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; /* max 4Kx4K */ + return LP_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: @@ -167,7 +169,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, unsigned geom_flags ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - struct llvmpipe_winsys *winsys = screen->winsys; + struct sw_winsys *winsys = screen->winsys; const struct util_format_description *format_desc; format_desc = util_format_description(format); @@ -194,9 +196,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, format_desc->block.height != 1) return FALSE; - if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return FALSE; if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && @@ -224,33 +224,22 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, format_desc->block.height != 1) return FALSE; - if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) + if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return FALSE; if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB && - format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB && format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) return FALSE; + + /* not supported yet */ + if (format == PIPE_FORMAT_Z16_UNORM) + return FALSE; } return TRUE; } -static struct pipe_buffer * -llvmpipe_surface_buffer_create(struct pipe_screen *screen, - unsigned width, unsigned height, - enum pipe_format format, - unsigned tex_usage, - unsigned usage, - unsigned *stride) -{ - /* This function should never be used */ - assert(0); - return NULL; -} static void @@ -259,7 +248,7 @@ llvmpipe_flush_frontbuffer(struct pipe_screen *_screen, void *context_private) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - struct llvmpipe_winsys *winsys = screen->winsys; + struct sw_winsys *winsys = screen->winsys; struct llvmpipe_texture *texture = llvmpipe_texture(surface->texture); assert(texture->dt); @@ -272,7 +261,7 @@ static void llvmpipe_destroy_screen( struct pipe_screen *_screen ) { struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - struct llvmpipe_winsys *winsys = screen->winsys; + struct sw_winsys *winsys = screen->winsys; lp_jit_screen_cleanup(screen); @@ -289,7 +278,7 @@ llvmpipe_destroy_screen( struct pipe_screen *_screen ) * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen). */ struct pipe_screen * -llvmpipe_create_screen(struct llvmpipe_winsys *winsys) +llvmpipe_create_screen(struct sw_winsys *winsys) { struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen); @@ -310,7 +299,6 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys) screen->base.get_paramf = llvmpipe_get_paramf; screen->base.is_format_supported = llvmpipe_is_format_supported; - screen->base.surface_buffer_create = llvmpipe_surface_buffer_create; screen->base.context_create = llvmpipe_create_context; screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer; diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h index 4a1b4d6f3e2..d977f98cfaa 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.h +++ b/src/gallium/drivers/llvmpipe/lp_screen.h @@ -34,23 +34,21 @@ #ifndef LP_SCREEN_H #define LP_SCREEN_H -#include <llvm-c/Core.h> -#include <llvm-c/Analysis.h> -#include <llvm-c/Target.h> +#include "os/os_llvm.h" #include <llvm-c/ExecutionEngine.h> #include "pipe/p_screen.h" #include "pipe/p_defines.h" -struct llvmpipe_winsys; +struct sw_winsys; struct llvmpipe_screen { struct pipe_screen base; - struct llvmpipe_winsys *winsys; + struct sw_winsys *winsys; LLVMModuleRef module; LLVMExecutionEngineRef engine; @@ -76,4 +74,5 @@ llvmpipe_screen( struct pipe_screen *pipe ) } + #endif /* LP_SCREEN_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index cb873667a20..16128c34c80 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -46,17 +46,17 @@ #include "lp_rast.h" #include "lp_setup_context.h" #include "lp_screen.h" -#include "lp_winsys.h" +#include "state_tracker/sw_winsys.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" -static void set_scene_state( struct setup_context *, unsigned ); +static void set_scene_state( struct lp_setup_context *, unsigned ); struct lp_scene * -lp_setup_get_current_scene(struct setup_context *setup) +lp_setup_get_current_scene(struct lp_setup_context *setup) { if (!setup->scene) { @@ -64,18 +64,17 @@ lp_setup_get_current_scene(struct setup_context *setup) */ setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE); - if(0)lp_scene_reset( setup->scene ); /* XXX temporary? */ + assert(lp_scene_is_empty(setup->scene)); - lp_scene_set_framebuffer_size(setup->scene, - setup->fb.width, - setup->fb.height); + lp_scene_begin_binning(setup->scene, + &setup->fb ); } return setup->scene; } static void -first_triangle( struct setup_context *setup, +first_triangle( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4]) @@ -86,7 +85,7 @@ first_triangle( struct setup_context *setup, } static void -first_line( struct setup_context *setup, +first_line( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4]) { @@ -96,7 +95,7 @@ first_line( struct setup_context *setup, } static void -first_point( struct setup_context *setup, +first_point( struct lp_setup_context *setup, const float (*v0)[4]) { set_scene_state( setup, SETUP_ACTIVE ); @@ -104,7 +103,7 @@ first_point( struct setup_context *setup, setup->point( setup, v0 ); } -static void reset_context( struct setup_context *setup ) +static void reset_context( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -132,14 +131,13 @@ static void reset_context( struct setup_context *setup ) /** Rasterize all scene's bins */ static void -lp_setup_rasterize_scene( struct setup_context *setup, - boolean write_depth ) +lp_setup_rasterize_scene( struct lp_setup_context *setup, + boolean write_depth ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); - lp_rasterize_scene(setup->rast, - scene, - &setup->fb, + lp_scene_rasterize(scene, + setup->rast, write_depth); reset_context( setup ); @@ -150,7 +148,7 @@ lp_setup_rasterize_scene( struct setup_context *setup, static void -begin_binning( struct setup_context *setup ) +begin_binning( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -186,7 +184,7 @@ begin_binning( struct setup_context *setup ) * TODO: fast path for fullscreen clears and no triangles. */ static void -execute_clears( struct setup_context *setup ) +execute_clears( struct lp_setup_context *setup ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -196,7 +194,7 @@ execute_clears( struct setup_context *setup ) static void -set_scene_state( struct setup_context *setup, +set_scene_state( struct lp_setup_context *setup, unsigned new_state ) { unsigned old_state = setup->state; @@ -231,7 +229,7 @@ set_scene_state( struct setup_context *setup, void -lp_setup_flush( struct setup_context *setup, +lp_setup_flush( struct lp_setup_context *setup, unsigned flags ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -241,27 +239,24 @@ lp_setup_flush( struct setup_context *setup, void -lp_setup_bind_framebuffer( struct setup_context *setup, +lp_setup_bind_framebuffer( struct lp_setup_context *setup, const struct pipe_framebuffer_state *fb ) { - struct lp_scene *scene = lp_setup_get_current_scene(setup); - LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); + /* Flush any old scene. + */ set_scene_state( setup, SETUP_FLUSHED ); - /* re-get scene pointer, may have a new scene after flushing */ - (void) scene; - scene = lp_setup_get_current_scene(setup); - + /* Set new state. This will be picked up later when we next need a + * scene. + */ util_copy_framebuffer_state(&setup->fb, fb); - - lp_scene_set_framebuffer_size(scene, setup->fb.width, setup->fb.height); } void -lp_setup_clear( struct setup_context *setup, +lp_setup_clear( struct lp_setup_context *setup, const float *color, double depth, unsigned stencil, @@ -319,7 +314,7 @@ lp_setup_clear( struct setup_context *setup, * Emit a fence. */ struct pipe_fence_handle * -lp_setup_fence( struct setup_context *setup ) +lp_setup_fence( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */ @@ -339,10 +334,11 @@ lp_setup_fence( struct setup_context *setup ) void -lp_setup_set_triangle_state( struct setup_context *setup, +lp_setup_set_triangle_state( struct lp_setup_context *setup, unsigned cull_mode, boolean ccw_is_frontface, - boolean scissor ) + boolean scissor, + boolean gl_rasterization_rules) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -350,12 +346,13 @@ lp_setup_set_triangle_state( struct setup_context *setup, setup->cullmode = cull_mode; setup->triangle = first_triangle; setup->scissor_test = scissor; + setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f; } void -lp_setup_set_fs_inputs( struct setup_context *setup, +lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *input, unsigned nr ) { @@ -366,7 +363,7 @@ lp_setup_set_fs_inputs( struct setup_context *setup, } void -lp_setup_set_fs_functions( struct setup_context *setup, +lp_setup_set_fs_functions( struct lp_setup_context *setup, lp_jit_frag_func jit_function0, lp_jit_frag_func jit_function1, boolean opaque ) @@ -381,7 +378,7 @@ lp_setup_set_fs_functions( struct setup_context *setup, } void -lp_setup_set_fs_constants(struct setup_context *setup, +lp_setup_set_fs_constants(struct lp_setup_context *setup, struct pipe_buffer *buffer) { LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffer); @@ -393,7 +390,7 @@ lp_setup_set_fs_constants(struct setup_context *setup, void -lp_setup_set_alpha_ref_value( struct setup_context *setup, +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup, float alpha_ref_value ) { LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value); @@ -405,7 +402,7 @@ lp_setup_set_alpha_ref_value( struct setup_context *setup, } void -lp_setup_set_blend_color( struct setup_context *setup, +lp_setup_set_blend_color( struct lp_setup_context *setup, const struct pipe_blend_color *blend_color ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -420,7 +417,7 @@ lp_setup_set_blend_color( struct setup_context *setup, void -lp_setup_set_scissor( struct setup_context *setup, +lp_setup_set_scissor( struct lp_setup_context *setup, const struct pipe_scissor_state *scissor ) { LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); @@ -435,7 +432,7 @@ lp_setup_set_scissor( struct setup_context *setup, void -lp_setup_set_flatshade_first( struct setup_context *setup, +lp_setup_set_flatshade_first( struct lp_setup_context *setup, boolean flatshade_first ) { setup->flatshade_first = flatshade_first; @@ -443,7 +440,7 @@ lp_setup_set_flatshade_first( struct setup_context *setup, void -lp_setup_set_vertex_info( struct setup_context *setup, +lp_setup_set_vertex_info( struct lp_setup_context *setup, struct vertex_info *vertex_info ) { /* XXX: just silently holding onto the pointer: @@ -456,7 +453,7 @@ lp_setup_set_vertex_info( struct setup_context *setup, * Called during state validation when LP_NEW_TEXTURE is set. */ void -lp_setup_set_sampler_textures( struct setup_context *setup, +lp_setup_set_sampler_textures( struct lp_setup_context *setup, unsigned num, struct pipe_texture **texture) { unsigned i; @@ -474,20 +471,29 @@ lp_setup_set_sampler_textures( struct setup_context *setup, jit_tex = &setup->fs.current.jit_context.textures[i]; jit_tex->width = tex->width0; jit_tex->height = tex->height0; - jit_tex->stride = lp_tex->stride[0]; - if(!lp_tex->dt) { - jit_tex->data = lp_tex->data; + jit_tex->depth = tex->depth0; + jit_tex->last_level = tex->last_level; + if (!lp_tex->dt) { + /* regular texture - setup array of mipmap level pointers */ + int j; + for (j = 0; j <= tex->last_level; j++) { + jit_tex->data[j] = + (ubyte *) lp_tex->data + lp_tex->level_offset[j]; + jit_tex->row_stride[j] = lp_tex->stride[j]; + } } else { + /* display target texture/surface */ /* * XXX: Where should this be unmapped? */ struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen); - struct llvmpipe_winsys *winsys = screen->winsys; - jit_tex->data = winsys->displaytarget_map(winsys, lp_tex->dt, + struct sw_winsys *winsys = screen->winsys; + jit_tex->data[0] = winsys->displaytarget_map(winsys, lp_tex->dt, PIPE_BUFFER_USAGE_CPU_READ); - assert(jit_tex->data); + jit_tex->row_stride[0] = lp_tex->stride[0]; + assert(jit_tex->data[0]); } /* the scene references this texture */ @@ -508,7 +514,7 @@ lp_setup_set_sampler_textures( struct setup_context *setup, * being rendered and the current scene being built. */ unsigned -lp_setup_is_texture_referenced( const struct setup_context *setup, +lp_setup_is_texture_referenced( const struct lp_setup_context *setup, const struct pipe_texture *texture ) { unsigned i; @@ -537,7 +543,7 @@ lp_setup_is_texture_referenced( const struct setup_context *setup, * Called by vbuf code when we're about to draw something. */ void -lp_setup_update_state( struct setup_context *setup ) +lp_setup_update_state( struct lp_setup_context *setup ) { struct lp_scene *scene = lp_setup_get_current_scene(setup); @@ -655,7 +661,7 @@ lp_setup_update_state( struct setup_context *setup ) /* Only caller is lp_setup_vbuf_destroy() */ void -lp_setup_destroy( struct setup_context *setup ) +lp_setup_destroy( struct lp_setup_context *setup ) { reset_context( setup ); @@ -680,12 +686,12 @@ lp_setup_destroy( struct setup_context *setup ) * the draw module. Currently also creates a rasterizer to use with * it. */ -struct setup_context * -lp_setup_create( struct pipe_screen *screen, +struct lp_setup_context * +lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ) { unsigned i; - struct setup_context *setup = CALLOC_STRUCT(setup_context); + struct lp_setup_context *setup = CALLOC_STRUCT(lp_setup_context); if (!setup) return NULL; @@ -696,7 +702,9 @@ lp_setup_create( struct pipe_screen *screen, if (!setup->empty_scenes) goto fail; - setup->rast = lp_rast_create( screen, setup->empty_scenes ); + /* XXX: move this to the screen and share between contexts: + */ + setup->rast = lp_rast_create(); if (!setup->rast) goto fail; @@ -709,7 +717,8 @@ lp_setup_create( struct pipe_screen *screen, /* create some empty scenes */ for (i = 0; i < MAX_SCENES; i++) { - setup->scenes[i] = lp_scene_create(); + setup->scenes[i] = lp_scene_create( pipe, setup->empty_scenes ); + lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 0e155a7dc31..be1bf96f12d 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -61,78 +61,79 @@ struct pipe_framebuffer_state; struct lp_fragment_shader; struct lp_jit_context; -struct setup_context * -lp_setup_create( struct pipe_screen *screen, +struct lp_setup_context * +lp_setup_create( struct pipe_context *pipe, struct draw_context *draw ); void -lp_setup_clear(struct setup_context *setup, +lp_setup_clear(struct lp_setup_context *setup, const float *clear_color, double clear_depth, unsigned clear_stencil, unsigned flags); struct pipe_fence_handle * -lp_setup_fence( struct setup_context *setup ); +lp_setup_fence( struct lp_setup_context *setup ); void -lp_setup_flush( struct setup_context *setup, +lp_setup_flush( struct lp_setup_context *setup, unsigned flags ); void -lp_setup_bind_framebuffer( struct setup_context *setup, +lp_setup_bind_framebuffer( struct lp_setup_context *setup, const struct pipe_framebuffer_state *fb ); void -lp_setup_set_triangle_state( struct setup_context *setup, +lp_setup_set_triangle_state( struct lp_setup_context *setup, unsigned cullmode, boolean front_is_ccw, - boolean scissor ); + boolean scissor, + boolean gl_rasterization_rules ); void -lp_setup_set_fs_inputs( struct setup_context *setup, +lp_setup_set_fs_inputs( struct lp_setup_context *setup, const struct lp_shader_input *interp, unsigned nr ); void -lp_setup_set_fs_functions( struct setup_context *setup, +lp_setup_set_fs_functions( struct lp_setup_context *setup, lp_jit_frag_func jit_function0, lp_jit_frag_func jit_function1, boolean opaque ); void -lp_setup_set_fs_constants(struct setup_context *setup, +lp_setup_set_fs_constants(struct lp_setup_context *setup, struct pipe_buffer *buffer); void -lp_setup_set_alpha_ref_value( struct setup_context *setup, +lp_setup_set_alpha_ref_value( struct lp_setup_context *setup, float alpha_ref_value ); void -lp_setup_set_blend_color( struct setup_context *setup, +lp_setup_set_blend_color( struct lp_setup_context *setup, const struct pipe_blend_color *blend_color ); void -lp_setup_set_scissor( struct setup_context *setup, +lp_setup_set_scissor( struct lp_setup_context *setup, const struct pipe_scissor_state *scissor ); void -lp_setup_set_sampler_textures( struct setup_context *setup, +lp_setup_set_sampler_textures( struct lp_setup_context *setup, unsigned num, struct pipe_texture **texture); unsigned -lp_setup_is_texture_referenced( const struct setup_context *setup, +lp_setup_is_texture_referenced( const struct lp_setup_context *setup, const struct pipe_texture *texture ); void -lp_setup_set_flatshade_first( struct setup_context *setup, +lp_setup_set_flatshade_first( struct lp_setup_context *setup, boolean flatshade_first ); void -lp_setup_set_vertex_info( struct setup_context *setup, +lp_setup_set_vertex_info( struct lp_setup_context *setup, struct vertex_info *info ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index a5fc34e54a2..464fb369840 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -65,7 +65,7 @@ struct lp_scene_queue; * Subclass of vbuf_render, plugged directly into the draw module as * the rendering backend. */ -struct setup_context +struct lp_setup_context { struct vbuf_render base; @@ -89,6 +89,7 @@ struct setup_context boolean ccw_is_frontface; boolean scissor_test; unsigned cullmode; + float pixel_offset; struct pipe_framebuffer_state fb; @@ -131,29 +132,29 @@ struct setup_context unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */ - void (*point)( struct setup_context *, + void (*point)( struct lp_setup_context *, const float (*v0)[4]); - void (*line)( struct setup_context *, + void (*line)( struct lp_setup_context *, const float (*v0)[4], const float (*v1)[4]); - void (*triangle)( struct setup_context *, + void (*triangle)( struct lp_setup_context *, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4]); }; -void lp_setup_choose_triangle( struct setup_context *setup ); -void lp_setup_choose_line( struct setup_context *setup ); -void lp_setup_choose_point( struct setup_context *setup ); +void lp_setup_choose_triangle( struct lp_setup_context *setup ); +void lp_setup_choose_line( struct lp_setup_context *setup ); +void lp_setup_choose_point( struct lp_setup_context *setup ); -struct lp_scene *lp_setup_get_current_scene(struct setup_context *setup); +struct lp_scene *lp_setup_get_current_scene(struct lp_setup_context *setup); -void lp_setup_init_vbuf(struct setup_context *setup); +void lp_setup_init_vbuf(struct lp_setup_context *setup); -void lp_setup_update_state( struct setup_context *setup ); +void lp_setup_update_state( struct lp_setup_context *setup ); -void lp_setup_destroy( struct setup_context *setup ); +void lp_setup_destroy( struct lp_setup_context *setup ); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index feea79d3943..be41c44e6f5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -31,7 +31,7 @@ #include "lp_setup_context.h" -static void line_nop( struct setup_context *setup, +static void line_nop( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4] ) { @@ -39,7 +39,7 @@ static void line_nop( struct setup_context *setup, void -lp_setup_choose_line( struct setup_context *setup ) +lp_setup_choose_line( struct lp_setup_context *setup ) { setup->line = line_nop; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index f03ca729b24..9f69e6c5ce2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -31,14 +31,14 @@ #include "lp_setup_context.h" -static void point_nop( struct setup_context *setup, +static void point_nop( struct lp_setup_context *setup, const float (*v0)[4] ) { } void -lp_setup_choose_point( struct setup_context *setup ) +lp_setup_choose_point( struct lp_setup_context *setup ) { setup->point = point_nop; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index a8bf540803b..ac6264dc73e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -41,7 +41,8 @@ /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct lp_rast_triangle *tri, +static void constant_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, unsigned slot, const float value, unsigned i ) @@ -56,7 +57,8 @@ static void constant_coef( struct lp_rast_triangle *tri, * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. */ -static void linear_coef( struct lp_rast_triangle *tri, +static void linear_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -90,8 +92,8 @@ static void linear_coef( struct lp_rast_triangle *tri, * instead - i'll switch to this later. */ tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); } @@ -103,7 +105,8 @@ static void linear_coef( struct lp_rast_triangle *tri, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct lp_rast_triangle *tri, +static void perspective_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -125,8 +128,8 @@ static void perspective_coef( struct lp_rast_triangle *tri, tri->inputs.dadx[slot][i] = dadx; tri->inputs.dady[slot][i] = dady; tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + (dadx * (v1[0][0] - setup->pixel_offset) + + dady * (v1[0][1] - setup->pixel_offset))); } @@ -137,7 +140,8 @@ static void perspective_coef( struct lp_rast_triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct lp_rast_triangle *tri, +setup_fragcoord_coef(struct lp_setup_context *setup, + struct lp_rast_triangle *tri, float oneoverarea, unsigned slot, const float (*v1)[4], @@ -153,27 +157,28 @@ setup_fragcoord_coef(struct lp_rast_triangle *tri, tri->inputs.dadx[slot][1] = 0.0; tri->inputs.dady[slot][1] = 1.0; /*Z*/ - linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 2); + linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2); /*W*/ - linear_coef(tri, oneoverarea, slot, v1, v2, v3, 0, 3); + linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3); } -static void setup_facing_coef( struct lp_rast_triangle *tri, +static void setup_facing_coef( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, unsigned slot, boolean frontface ) { - constant_coef( tri, slot, 1.0f - frontface, 0 ); - constant_coef( tri, slot, 0.0f, 1 ); /* wasted */ - constant_coef( tri, slot, 0.0f, 2 ); /* wasted */ - constant_coef( tri, slot, 0.0f, 3 ); /* wasted */ + constant_coef( setup, tri, slot, 1.0f - frontface, 0 ); + constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */ + constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */ + constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */ } /** * Compute the tri->coef[] array dadx, dady, a0 values. */ -static void setup_tri_coefficients( struct setup_context *setup, +static void setup_tri_coefficients( struct lp_setup_context *setup, struct lp_rast_triangle *tri, float oneoverarea, const float (*v1)[4], @@ -185,7 +190,7 @@ static void setup_tri_coefficients( struct setup_context *setup, /* The internal position input is in slot zero: */ - setup_fragcoord_coef(tri, oneoverarea, 0, v1, v2, v3); + setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3); /* setup interpolation for all the remaining attributes: */ @@ -196,27 +201,27 @@ static void setup_tri_coefficients( struct setup_context *setup, switch (setup->fs.input[slot].interp) { case LP_INTERP_CONSTANT: for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(tri, slot+1, v3[vert_attr][i], i); + constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); break; case LP_INTERP_LINEAR: for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_PERSPECTIVE: for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); + perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); break; case LP_INTERP_POSITION: /* XXX: fix me - duplicates the values in slot zero. */ - setup_fragcoord_coef(tri, oneoverarea, slot+1, v1, v2, v3); + setup_fragcoord_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3); break; case LP_INTERP_FACING: - setup_facing_coef(tri, slot+1, frontface); + setup_facing_coef(setup, tri, slot+1, frontface); break; default: @@ -274,19 +279,19 @@ alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) * bins for the tiles which we overlap. */ static void -do_triangle_ccw(struct setup_context *setup, +do_triangle_ccw(struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontfacing ) { /* x/y positions in fixed point */ - const int x1 = subpixel_snap(v1[0][0]); - const int x2 = subpixel_snap(v2[0][0]); - const int x3 = subpixel_snap(v3[0][0]); - const int y1 = subpixel_snap(v1[0][1]); - const int y2 = subpixel_snap(v2[0][1]); - const int y3 = subpixel_snap(v3[0][1]); + const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset); + const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset); + const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset); + const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset); + const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset); + const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset); struct lp_scene *scene = lp_setup_get_current_scene(setup); struct lp_rast_triangle *tri; @@ -297,6 +302,15 @@ do_triangle_ccw(struct setup_context *setup, tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); +#ifdef DEBUG + tri->v[0][0] = v1[0][0]; + tri->v[1][0] = v2[0][0]; + tri->v[2][0] = v3[0][0]; + tri->v[0][1] = v1[0][1]; + tri->v[1][1] = v2[0][1]; + tri->v[2][1] = v3[0][1]; +#endif + tri->dx12 = x1 - x2; tri->dx23 = x2 - x3; tri->dx31 = x3 - x1; @@ -556,7 +570,7 @@ do_triangle_ccw(struct setup_context *setup, } -static void triangle_cw( struct setup_context *setup, +static void triangle_cw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -565,7 +579,7 @@ static void triangle_cw( struct setup_context *setup, } -static void triangle_ccw( struct setup_context *setup, +static void triangle_ccw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -574,7 +588,7 @@ static void triangle_ccw( struct setup_context *setup, } -static void triangle_both( struct setup_context *setup, +static void triangle_both( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -593,7 +607,7 @@ static void triangle_both( struct setup_context *setup, } -static void triangle_nop( struct setup_context *setup, +static void triangle_nop( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) @@ -602,7 +616,7 @@ static void triangle_nop( struct setup_context *setup, void -lp_setup_choose_triangle( struct setup_context *setup ) +lp_setup_choose_triangle( struct lp_setup_context *setup ) { switch (setup->cullmode) { case PIPE_WINDING_NONE: diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c index 24291da91e4..d7336d82b21 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c @@ -48,10 +48,10 @@ /** cast wrapper */ -static struct setup_context * -setup_context(struct vbuf_render *vbr) +static struct lp_setup_context * +lp_setup_context(struct vbuf_render *vbr) { - return (struct setup_context *) vbr; + return (struct lp_setup_context *) vbr; } @@ -59,7 +59,7 @@ setup_context(struct vbuf_render *vbr) static const struct vertex_info * lp_setup_get_vertex_info(struct vbuf_render *vbr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); return setup->vertex_info; } @@ -68,7 +68,7 @@ static boolean lp_setup_allocate_vertices(struct vbuf_render *vbr, ushort vertex_size, ushort nr_vertices) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); unsigned size = vertex_size * nr_vertices; if (setup->vertex_buffer_size < size) { @@ -92,7 +92,7 @@ lp_setup_release_vertices(struct vbuf_render *vbr) static void * lp_setup_map_vertices(struct vbuf_render *vbr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); return setup->vertex_buffer; } @@ -101,7 +101,7 @@ lp_setup_unmap_vertices(struct vbuf_render *vbr, ushort min_index, ushort max_index ) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size ); /* do nothing */ } @@ -110,7 +110,7 @@ lp_setup_unmap_vertices(struct vbuf_render *vbr, static boolean lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim) { - setup_context(vbr)->prim = prim; + lp_setup_context(vbr)->prim = prim; return TRUE; } @@ -129,7 +129,7 @@ static INLINE const_float4_ptr get_vert( const void *vertex_buffer, static void lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); const void *vertex_buffer = setup->vertex_buffer; unsigned i; @@ -231,57 +231,29 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUADS: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_QUAD_STRIP: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride)); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride), - get_vert(vertex_buffer, indices[i-0], stride) ); - } + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; @@ -312,7 +284,7 @@ lp_setup_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) static void lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { - struct setup_context *setup = setup_context(vbr); + struct lp_setup_context *setup = lp_setup_context(vbr); const unsigned stride = setup->vertex_info->size * sizeof(float); const void *vertex_buffer = (void *) get_vert(setup->vertex_buffer, start, stride); @@ -415,57 +387,28 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUADS: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - setup->triangle( setup, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride) ); - } + for (i = 3; i < nr; i += 4) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; case PIPE_PRIM_QUAD_STRIP: - if (setup->flatshade_first) { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - setup->triangle( setup, - - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - setup->triangle( setup, - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride) ); - setup->triangle( setup, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride), - get_vert(vertex_buffer, i-0, stride) ); - } + for (i = 3; i < nr; i += 2) { + setup->triangle( setup, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride) ); + setup->triangle( setup, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; @@ -493,7 +436,7 @@ lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) static void lp_setup_vbuf_destroy(struct vbuf_render *vbr) { - lp_setup_destroy(setup_context(vbr)); + lp_setup_destroy(lp_setup_context(vbr)); } @@ -501,7 +444,7 @@ lp_setup_vbuf_destroy(struct vbuf_render *vbr) * Create the post-transform vertex handler for the given context. */ void -lp_setup_init_vbuf(struct setup_context *setup) +lp_setup_init_vbuf(struct lp_setup_context *setup) { setup->base.max_indices = LP_MAX_VBUF_INDEXES; setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE; diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h index 9beba32271f..a5a1a720742 100644 --- a/src/gallium/drivers/llvmpipe/lp_state.h +++ b/src/gallium/drivers/llvmpipe/lp_state.h @@ -31,7 +31,7 @@ #ifndef LP_STATE_H #define LP_STATE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" @@ -119,6 +119,10 @@ struct lp_vertex_shader { struct draw_vertex_shader *draw_data; }; +struct lp_velems_state { + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; +}; void * @@ -176,8 +180,14 @@ void *llvmpipe_create_vs_state(struct pipe_context *, void llvmpipe_bind_vs_state(struct pipe_context *, void *); void llvmpipe_delete_vs_state(struct pipe_context *, void *); +void *llvmpipe_create_vertex_elements_state(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); +void llvmpipe_bind_vertex_elements_state(struct pipe_context *, void *); +void llvmpipe_delete_vertex_elements_state(struct pipe_context *, void *); + void llvmpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); + const struct pipe_poly_stipple * ); void llvmpipe_set_scissor_state( struct pipe_context *, const struct pipe_scissor_state * ); @@ -194,10 +204,6 @@ llvmpipe_set_vertex_sampler_textures(struct pipe_context *, void llvmpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); -void llvmpipe_set_vertex_elements(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); - void llvmpipe_set_vertex_buffers(struct pipe_context *, unsigned count, const struct pipe_vertex_buffer *); diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 90dae3f9101..9a8de0edfda 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -40,7 +40,7 @@ * - depth/stencil test (stencil TBI) * - blending * - * This file has only the glue to assembly the fragment pipeline. The actual + * This file has only the glue to assemble the fragment pipeline. The actual * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we * muster the LLVM JIT execution engine to create a function that follows an @@ -95,6 +95,9 @@ #include "lp_tex_sample.h" +#include <llvm-c/Analysis.h> + + static const unsigned char quad_offset_x[4] = {0, 1, 0, 1}; static const unsigned char quad_offset_y[4] = {0, 0, 1, 1}; @@ -974,6 +977,13 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs) assert(fs != llvmpipe->fs); (void) llvmpipe; + /* + * XXX: we need to flush the context until we have some sort of reference + * counting in fragment shaders as they may still be binned + */ + draw_flush(llvmpipe->draw); + lp_setup_flush(llvmpipe->setup, 0); + variant = shader->variants; while(variant) { struct lp_fragment_shader_variant *next = variant->next; diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c index feb012816c9..6df3ef25b0e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c +++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c @@ -62,7 +62,8 @@ void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, lp_setup_set_triangle_state( llvmpipe->setup, llvmpipe->rasterizer->cull_mode, llvmpipe->rasterizer->front_winding == PIPE_WINDING_CCW, - llvmpipe->rasterizer->scissor); + llvmpipe->rasterizer->scissor, + llvmpipe->rasterizer->gl_rasterization_rules); } llvmpipe->dirty |= LP_NEW_RASTERIZER; diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c index 57ac25ea0cb..f6427aa908e 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_vertex.c +++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c @@ -35,24 +35,41 @@ #include "draw/draw_context.h" +void * +llvmpipe_create_vertex_elements_state(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) +{ + struct lp_velems_state *velems; + assert(count <= PIPE_MAX_ATTRIBS); + velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state)); + if (velems) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(*attribs) * count); + } + return velems; +} + void -llvmpipe_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *attribs) +llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems) { struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe); + struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems; - assert(count <= PIPE_MAX_ATTRIBS); - - memcpy(llvmpipe->vertex_element, attribs, - count * sizeof(struct pipe_vertex_element)); - llvmpipe->num_vertex_elements = count; + llvmpipe->velems = lp_velems; llvmpipe->dirty |= LP_NEW_VERTEX; - draw_set_vertex_elements(llvmpipe->draw, count, attribs); + if (velems) + draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem); } +void +llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + FREE( velems ); +} void llvmpipe_set_vertex_buffers(struct pipe_context *pipe, diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c index 6110b0a193e..ca3d62c3613 100644 --- a/src/gallium/drivers/llvmpipe/lp_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_surface.c @@ -27,6 +27,7 @@ #include "util/u_rect.h" #include "lp_context.h" +#include "lp_flush.h" #include "lp_surface.h" @@ -36,6 +37,20 @@ lp_surface_copy(struct pipe_context *pipe, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { + llvmpipe_flush_texture(pipe, + dest->texture, dest->face, dest->level, + 0, /* flush_flags */ + FALSE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_flush */ + + llvmpipe_flush_texture(pipe, + src->texture, src->face, src->level, + 0, /* flush_flags */ + TRUE, /* read_only */ + FALSE, /* cpu_access */ + FALSE); /* do_not_flush */ + util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h index a9b99945f92..1df98978988 100644 --- a/src/gallium/drivers/llvmpipe/lp_test.h +++ b/src/gallium/drivers/llvmpipe/lp_test.h @@ -41,7 +41,7 @@ #include <stdio.h> #include <float.h> -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <llvm-c/Analysis.h> #include <llvm-c/ExecutionEngine.h> #include <llvm-c/Target.h> diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 48828bd0a0f..2c4d7fb6e14 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -29,7 +29,7 @@ #include <stdlib.h> #include <stdio.h> -#include <llvm-c/Core.h> +#include "os/os_llvm.h" #include <llvm-c/Analysis.h> #include <llvm-c/ExecutionEngine.h> #include <llvm-c/Target.h> @@ -52,25 +52,25 @@ struct pixel_test_case struct pixel_test_case test_cases[] = { - {PIPE_FORMAT_R5G6B5_UNORM, 0x0000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_R5G6B5_UNORM, 0x001f, {0.0, 0.0, 1.0, 1.0}}, - {PIPE_FORMAT_R5G6B5_UNORM, 0x07e0, {0.0, 1.0, 0.0, 1.0}}, - {PIPE_FORMAT_R5G6B5_UNORM, 0xf800, {1.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_R5G6B5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, - - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x0000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x001f, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x03e0, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x7c00, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0x8000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_A1R5G5B5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0x0000, {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0x001f, {0.0, 0.0, 1.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0x07e0, {0.0, 1.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0xf800, {1.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G6R5_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, + + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x0000, {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x001f, {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x03e0, {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x7c00, {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0x8000, {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B5G5R5A1_UNORM, 0xffff, {1.0, 1.0, 1.0, 1.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x000000ff, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x0000ff00, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00ff0000, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0xff000000, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_A8R8G8B8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0x000000ff, {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0x0000ff00, {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00ff0000, {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0xff000000, {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_B8G8R8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, #if 0 {PIPE_FORMAT_R8G8B8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, @@ -81,12 +81,12 @@ struct pixel_test_case test_cases[] = {PIPE_FORMAT_R8G8B8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, #endif - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x0000ff00, {1.0, 0.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0xff000000, {0.0, 0.0, 1.0, 0.0}}, - {PIPE_FORMAT_B8G8R8A8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00000000, {0.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0x000000ff, {0.0, 0.0, 0.0, 1.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0x0000ff00, {1.0, 0.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0x00ff0000, {0.0, 1.0, 0.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0xff000000, {0.0, 0.0, 1.0, 0.0}}, + {PIPE_FORMAT_A8R8G8B8_UNORM, 0xffffffff, {1.0, 1.0, 1.0, 1.0}}, }; diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c index 14ff00469b0..f9dce8b9c25 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_main.c +++ b/src/gallium/drivers/llvmpipe/lp_test_main.c @@ -37,7 +37,7 @@ #include "util/u_cpu_detect.h" #include "gallivm/lp_bld_const.h" -#include "gallivm/lp_bld_misc.h" +#include "gallivm/lp_bld_init.h" #include "lp_test.h" @@ -380,8 +380,7 @@ int main(int argc, char **argv) n = atoi(argv[i]); } - LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); + lp_build_init(); util_cpu_detect(); diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h index cb59a94464a..799df182b6a 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h @@ -29,7 +29,7 @@ #define LP_TEX_SAMPLE_H -#include <llvm-c/Core.h> +#include "os/os_llvm.h" struct lp_sampler_static_state; diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c index 2533275dc18..662508af61a 100644 --- a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c @@ -51,10 +51,11 @@ /** - * This provides the bridge between the sampler state store in lp_jit_context - * and lp_jit_texture and the sampler code generator. It provides the - * texture layout information required by the texture sampler code generator - * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime. + * This provides the bridge between the sampler state store in + * lp_jit_context and lp_jit_texture and the sampler code + * generator. It provides the texture layout information required by + * the texture sampler code generator in terms of the state stored in + * lp_jit_context and lp_jit_texture in runtime. */ struct llvmpipe_sampler_dynamic_state { @@ -79,6 +80,9 @@ struct lp_llvm_sampler_soa /** * Fetch the specified member of the lp_jit_texture structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. * * @sa http://llvm.org/docs/GetElementPtr.html */ @@ -87,9 +91,11 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, LLVMBuilderRef builder, unsigned unit, unsigned member_index, - const char *member_name) + const char *member_name, + boolean emit_load) { - struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base; + struct llvmpipe_sampler_dynamic_state *state = + (struct llvmpipe_sampler_dynamic_state *)base; LLVMValueRef indices[4]; LLVMValueRef ptr; LLVMValueRef res; @@ -107,7 +113,10 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), ""); - res = LLVMBuildLoad(builder, ptr, ""); + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; lp_build_name(res, "context.texture%u.%s", unit, member_name); @@ -116,26 +125,30 @@ lp_llvm_texture_member(struct lp_sampler_dynamic_state *base, /** - * Helper macro to instantiate the functions that generate the code to fetch - * the members of lp_jit_texture to fulfill the sampler code generator requests. + * Helper macro to instantiate the functions that generate the code to + * fetch the members of lp_jit_texture to fulfill the sampler code + * generator requests. * - * This complexity is the price we have to pay to keep the texture sampler code - * generator a reusable module without dependencies to llvmpipe internals. + * This complexity is the price we have to pay to keep the texture + * sampler code generator a reusable module without dependencies to + * llvmpipe internals. */ -#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \ +#define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \ static LLVMValueRef \ lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \ LLVMBuilderRef builder, \ unsigned unit) \ { \ - return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \ + return lp_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \ } -LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH) -LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT) -LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE) -LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA) +LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT, TRUE) +LP_LLVM_TEXTURE_MEMBER(depth, LP_JIT_TEXTURE_DEPTH, TRUE) +LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE) +LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE) +LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA, FALSE) static void @@ -145,6 +158,10 @@ lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) } +/** + * Fetch filtered values from texture. + * The 'texel' parameter returns four vectors corresponding to R, G, B, A. + */ static void lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base, LLVMBuilderRef builder, @@ -185,7 +202,9 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state, sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel; sampler->dynamic_state.base.width = lp_llvm_texture_width; sampler->dynamic_state.base.height = lp_llvm_texture_height; - sampler->dynamic_state.base.stride = lp_llvm_texture_stride; + sampler->dynamic_state.base.depth = lp_llvm_texture_depth; + sampler->dynamic_state.base.last_level = lp_llvm_texture_last_level; + sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride; sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr; sampler->dynamic_state.static_state = static_state; sampler->dynamic_state.context_ptr = context_ptr; diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c index 022bf92cb46..9a85a42897d 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.c +++ b/src/gallium/drivers/llvmpipe/lp_texture.c @@ -40,9 +40,10 @@ #include "lp_context.h" #include "lp_screen.h" +#include "lp_flush.h" #include "lp_texture.h" #include "lp_tile_size.h" -#include "lp_winsys.h" +#include "state_tracker/sw_winsys.h" /** @@ -93,7 +94,7 @@ static boolean llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen, struct llvmpipe_texture *lpt) { - struct llvmpipe_winsys *winsys = screen->winsys; + struct sw_winsys *winsys = screen->winsys; /* Round up the surface size to a multiple of the tile size to * avoid tile clipping. @@ -124,14 +125,9 @@ llvmpipe_texture_create(struct pipe_screen *_screen, pipe_reference_init(&lpt->base.reference, 1); lpt->base.screen = &screen->base; - /* XXX: The xlib state tracker is brain-dead and will request - * PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it. - */ - if (lpt->base.format == PIPE_FORMAT_Z16_UNORM) - lpt->base.format = PIPE_FORMAT_Z32_UNORM; - if (lpt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) { + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) { if (!llvmpipe_displaytarget_layout(screen, lpt)) goto fail; } @@ -148,43 +144,6 @@ llvmpipe_texture_create(struct pipe_screen *_screen, } -static struct pipe_texture * -llvmpipe_texture_blanket(struct pipe_screen * screen, - const struct pipe_texture *base, - const unsigned *stride, - struct pipe_buffer *buffer) -{ - /* FIXME */ -#if 0 - struct llvmpipe_texture *lpt; - assert(screen); - - /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth0 != 1) { - return NULL; - } - - lpt = CALLOC_STRUCT(llvmpipe_texture); - if (!lpt) - return NULL; - - lpt->base = *base; - pipe_reference_init(&lpt->base.reference, 1); - lpt->base.screen = screen; - lpt->stride[0] = stride[0]; - - pipe_buffer_reference(&lpt->buffer, buffer); - - return &lpt->base; -#else - debug_printf("llvmpipe_texture_blanket() not implemented!"); - return NULL; -#endif -} - - static void llvmpipe_texture_destroy(struct pipe_texture *pt) { @@ -193,7 +152,7 @@ llvmpipe_texture_destroy(struct pipe_texture *pt) if (lpt->dt) { /* display target */ - struct llvmpipe_winsys *winsys = screen->winsys; + struct sw_winsys *winsys = screen->winsys; winsys->displaytarget_destroy(winsys, lpt->dt); } else { @@ -205,6 +164,92 @@ llvmpipe_texture_destroy(struct pipe_texture *pt) } +/** + * Map a texture. Without any synchronization. + */ +void * +llvmpipe_texture_map(struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned zslice) +{ + struct llvmpipe_texture *lpt = llvmpipe_texture(texture); + uint8_t *map; + + if (lpt->dt) { + /* display target */ + struct llvmpipe_screen *screen = llvmpipe_screen(texture->screen); + struct sw_winsys *winsys = screen->winsys; + const unsigned usage = PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + assert(face == 0); + assert(level == 0); + assert(zslice == 0); + + /* FIXME: keep map count? */ + map = winsys->displaytarget_map(winsys, lpt->dt, usage); + } + else { + /* regular texture */ + unsigned offset; + unsigned stride; + + map = lpt->data; + + assert(level < LP_MAX_TEXTURE_2D_LEVELS); + + offset = lpt->level_offset[level]; + stride = lpt->stride[level]; + + /* XXX shouldn't that rather be + tex_height = align(u_minify(texture->height0, level), 2) + to account for alignment done in llvmpipe_texture_layout ? + */ + if (texture->target == PIPE_TEXTURE_CUBE) { + unsigned tex_height = u_minify(texture->height0, level); + offset += face * util_format_get_nblocksy(texture->format, tex_height) * stride; + } + else if (texture->target == PIPE_TEXTURE_3D) { + unsigned tex_height = u_minify(texture->height0, level); + offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * stride; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + map += offset; + } + + return map; +} + + +/** + * Unmap a texture. Without any synchronization. + */ +void +llvmpipe_texture_unmap(struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned zslice) +{ + struct llvmpipe_texture *lpt = llvmpipe_texture(texture); + + if (lpt->dt) { + /* display target */ + struct llvmpipe_screen *lp_screen = llvmpipe_screen(texture->screen); + struct sw_winsys *winsys = lp_screen->winsys; + + assert(face == 0); + assert(level == 0); + assert(zslice == 0); + + winsys->displaytarget_unmap(winsys, lpt->dt); + } +} + + static struct pipe_surface * llvmpipe_get_tex_surface(struct pipe_screen *screen, struct pipe_texture *pt, @@ -223,7 +268,6 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, ps->format = pt->format; ps->width = u_minify(pt->width0, level); ps->height = u_minify(pt->height0, level); - ps->offset = lpt->level_offset[level]; ps->usage = usage; /* Because we are llvmpipe, anything that the state tracker @@ -249,23 +293,6 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen, ps->face = face; ps->level = level; ps->zslice = zslice; - - /* XXX shouldn't that rather be - tex_height = align(ps->height, 2); - to account for alignment done in llvmpipe_texture_layout ? - */ - if (pt->target == PIPE_TEXTURE_CUBE) { - unsigned tex_height = ps->height; - ps->offset += face * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; - } - else if (pt->target == PIPE_TEXTURE_3D) { - unsigned tex_height = ps->height; - ps->offset += zslice * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level]; - } - else { - assert(face == 0); - assert(zslice == 0); - } } return ps; } @@ -285,7 +312,7 @@ llvmpipe_tex_surface_destroy(struct pipe_surface *surf) static struct pipe_transfer * -llvmpipe_get_tex_transfer(struct pipe_screen *screen, +llvmpipe_get_tex_transfer(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, @@ -311,24 +338,6 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, pt->level = level; pt->zslice = zslice; - lpt->offset = lptex->level_offset[level]; - - /* XXX shouldn't that rather be - tex_height = align(u_minify(texture->height0, level), 2) - to account for alignment done in llvmpipe_texture_layout ? - */ - if (texture->target == PIPE_TEXTURE_CUBE) { - unsigned tex_height = u_minify(texture->height0, level); - lpt->offset += face * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; - } - else if (texture->target == PIPE_TEXTURE_3D) { - unsigned tex_height = u_minify(texture->height0, level); - lpt->offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * pt->stride; - } - else { - assert(face == 0); - assert(zslice == 0); - } return pt; } return NULL; @@ -336,7 +345,8 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen, static void -llvmpipe_tex_transfer_destroy(struct pipe_transfer *transfer) +llvmpipe_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) { /* Effectively do the texture_update work here - if texture images * needed post-processing to put them into hardware layout, this is @@ -349,11 +359,11 @@ llvmpipe_tex_transfer_destroy(struct pipe_transfer *transfer) static void * -llvmpipe_transfer_map( struct pipe_screen *_screen, +llvmpipe_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { - struct llvmpipe_screen *screen = llvmpipe_screen(_screen); - ubyte *map, *xfer_map; + struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen); + ubyte *map; struct llvmpipe_texture *lpt; enum pipe_format format; @@ -361,52 +371,45 @@ llvmpipe_transfer_map( struct pipe_screen *_screen, lpt = llvmpipe_texture(transfer->texture); format = lpt->base.format; - if (lpt->dt) { - /* display target */ - struct llvmpipe_winsys *winsys = screen->winsys; + /* + * Transfers, like other pipe operations, must happen in order, so flush the + * context if necessary. + */ + llvmpipe_flush_texture(pipe, + transfer->texture, transfer->face, transfer->level, + 0, /* flush_flags */ + !(transfer->usage & PIPE_TRANSFER_WRITE), /* read_only */ + TRUE, /* cpu_access */ + FALSE); /* do_not_flush */ - map = winsys->displaytarget_map(winsys, lpt->dt, - pipe_transfer_buffer_flags(transfer)); - if (map == NULL) - return NULL; - } - else { - /* regular texture */ - map = lpt->data; - } + map = llvmpipe_texture_map(transfer->texture, + transfer->face, transfer->level, transfer->zslice); /* May want to different things here depending on read/write nature * of the map: */ - if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE)) { + if (transfer->usage & PIPE_TRANSFER_WRITE) { /* Do something to notify sharing contexts of a texture change. */ screen->timestamp++; } - xfer_map = map + llvmpipe_transfer(transfer)->offset + + map += transfer->y / util_format_get_blockheight(format) * transfer->stride + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); - /*printf("map = %p xfer map = %p\n", map, xfer_map);*/ - return xfer_map; + + return map; } static void -llvmpipe_transfer_unmap(struct pipe_screen *screen, - struct pipe_transfer *transfer) +llvmpipe_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer) { - struct llvmpipe_screen *lp_screen = llvmpipe_screen(screen); - struct llvmpipe_texture *lpt; - assert(transfer->texture); - lpt = llvmpipe_texture(transfer->texture); - if (lpt->dt) { - /* display target */ - struct llvmpipe_winsys *winsys = lp_screen->winsys; - winsys->displaytarget_unmap(winsys, lpt->dt); - } + llvmpipe_texture_unmap(transfer->texture, + transfer->face, transfer->level, transfer->zslice); } @@ -414,14 +417,18 @@ void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = llvmpipe_texture_create; - screen->texture_blanket = llvmpipe_texture_blanket; screen->texture_destroy = llvmpipe_texture_destroy; screen->get_tex_surface = llvmpipe_get_tex_surface; screen->tex_surface_destroy = llvmpipe_tex_surface_destroy; +} - screen->get_tex_transfer = llvmpipe_get_tex_transfer; - screen->tex_transfer_destroy = llvmpipe_tex_transfer_destroy; - screen->transfer_map = llvmpipe_transfer_map; - screen->transfer_unmap = llvmpipe_transfer_unmap; + +void +llvmpipe_init_context_texture_funcs(struct pipe_context *pipe) +{ + pipe->get_tex_transfer = llvmpipe_get_tex_transfer; + pipe->tex_transfer_destroy = llvmpipe_tex_transfer_destroy; + pipe->transfer_map = llvmpipe_transfer_map; + pipe->transfer_unmap = llvmpipe_transfer_unmap; } diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h index 87c905bc027..2350c26e4fc 100644 --- a/src/gallium/drivers/llvmpipe/lp_texture.h +++ b/src/gallium/drivers/llvmpipe/lp_texture.h @@ -32,24 +32,29 @@ #include "pipe/p_state.h" +#define LP_MAX_TEXTURE_2D_LEVELS 13 /* 4K x 4K for now */ +#define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */ + + struct pipe_context; struct pipe_screen; struct llvmpipe_context; -struct llvmpipe_displaytarget; + +struct sw_displaytarget; struct llvmpipe_texture { struct pipe_texture base; - unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long level_offset[LP_MAX_TEXTURE_2D_LEVELS]; + unsigned stride[LP_MAX_TEXTURE_2D_LEVELS]; /** * Display target, for textures with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET * usage. */ - struct llvmpipe_displaytarget *dt; + struct sw_displaytarget *dt; /** * Malloc'ed data for regular textures, or a mapping to dt above. @@ -90,8 +95,32 @@ llvmpipe_transfer(struct pipe_transfer *pt) } +static INLINE unsigned +llvmpipe_texture_stride(struct pipe_texture *texture, + unsigned level) +{ + struct llvmpipe_texture *lpt = llvmpipe_texture(texture); + assert(level < LP_MAX_TEXTURE_2D_LEVELS); + return lpt->stride[level]; +} + + +void * +llvmpipe_texture_map(struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned zslice); + +void +llvmpipe_texture_unmap(struct pipe_texture *texture, + unsigned face, + unsigned level, + unsigned zslice); + extern void llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen); +extern void +llvmpipe_init_context_texture_funcs(struct pipe_context *pipe); #endif /* LP_TEXTURE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c new file mode 100644 index 00000000000..c1980b316d5 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -0,0 +1,126 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "lp_tile_soa.h" +#include "lp_tile_image.h" + + +#define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4) + + +/** + * Convert a tiled image into a linear image. + * \param src_stride source row stride in bytes (bytes per row of tiles) + * \param dst_stride dest row stride in bytes + */ +void +lp_tiled_to_linear(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride) +{ + const unsigned tiles_per_row = src_stride / BYTES_PER_TILE; + unsigned i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + unsigned tile_offset = + ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); + unsigned byte_offset = tile_offset * BYTES_PER_TILE; + const uint8_t *src_tile = src + byte_offset; + + lp_tile_write_4ub(format, + src_tile, + dst, + dst_stride, + i, j, TILE_SIZE, TILE_SIZE); + } + } +} + + +/** + * Convert a linear image into a tiled image. + * \param src_stride source row stride in bytes + * \param dst_stride dest row stride in bytes (bytes per row of tiles) + */ +void +lp_linear_to_tiled(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride) +{ + const unsigned tiles_per_row = dst_stride / BYTES_PER_TILE; + unsigned i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + unsigned tile_offset = + ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); + unsigned byte_offset = tile_offset * BYTES_PER_TILE; + uint8_t *dst_tile = dst + byte_offset; + + lp_tile_read_4ub(format, + dst_tile, + src, + src_stride, + i, j, TILE_SIZE, TILE_SIZE); + } + } +} + + +/** + * For testing only. + */ +void +test_tiled_linear_conversion(uint8_t *data, + enum pipe_format format, + unsigned width, unsigned height, + unsigned stride) +{ + /* size in tiles */ + unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE; + unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE; + + uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4); + + unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4; + + lp_linear_to_tiled(data, tiled, width, height, format, + stride, tiled_stride); + + lp_tiled_to_linear(tiled, data, width, height, format, + tiled_stride, stride); + + free(tiled); +} + diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/llvmpipe/lp_tile_image.h index 02ecfd7bbb7..60d472e8c5b 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h @@ -1,8 +1,6 @@ - /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. + * Copyright 2010 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -19,46 +17,41 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_tile.h" +#ifndef LP_TILE_IMAGE_H +#define LP_TILE_IMAGE_H -#include "nv40_context.h" -static void -nv40_surface_copy(struct pipe_context *pipe, - struct pipe_surface *dest, unsigned destx, unsigned desty, - struct pipe_surface *src, unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv04_surface_2d *eng2d = nv40->screen->eng2d; +void +lp_tiled_to_linear(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride); - eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); -} -static void -nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, - unsigned destx, unsigned desty, unsigned width, - unsigned height, unsigned value) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv04_surface_2d *eng2d = nv40->screen->eng2d; +void +lp_linear_to_tiled(const uint8_t *src, + uint8_t *dst, + unsigned width, unsigned height, + enum pipe_format format, + unsigned src_stride, + unsigned dst_stride); - eng2d->fill(eng2d, dest, destx, desty, width, height, value); -} void -nv40_init_surface_functions(struct nv40_context *nv40) -{ - nv40->pipe.surface_copy = nv40_surface_copy; - nv40->pipe.surface_fill = nv40_surface_fill; -} +test_tiled_linear_conversion(uint8_t *data, + enum pipe_format format, + unsigned width, unsigned height, + unsigned stride); + + +#endif /* LP_TILE_IMAGE_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py index 5d53689a3db..00b8d4fc382 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py +++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py @@ -45,10 +45,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/u from u_format_access import * -def generate_format_read(format, dst_type, dst_native_type, dst_suffix): +def generate_format_read(format, dst_channel, dst_native_type, dst_suffix): '''Generate the function to read pixels from a particular format''' - name = short_name(format) + name = format.short_name() src_native_type = native_type(format) @@ -64,11 +64,11 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): names = ['']*4 if format.colorspace == 'rgb': for i in range(4): - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if swizzle < 4: names[swizzle] += 'rgba'[i] elif format.colorspace == 'zs': - swizzle = format.out_swizzle[0] + swizzle = format.swizzles[0] if swizzle < 4: names[swizzle] = 'z' else: @@ -76,48 +76,49 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): else: assert False - if format.layout == ARITH: - print ' %s pixel = *src_pixel++;' % src_native_type - shift = 0; - for i in range(4): - src_type = format.in_types[i] - width = src_type.size - if names[i]: - value = 'pixel' - mask = (1 << width) - 1 - if shift: - value = '(%s >> %u)' % (value, shift) - if shift + width < format.block_size(): - value = '(%s & 0x%x)' % (value, mask) - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) - shift += width - elif format.layout == ARRAY: - for i in range(4): - src_type = format.in_types[i] - if names[i]: - value = '(*src_pixel++)' - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' %s %s = %s;' % (dst_native_type, names[i], value) + if format.layout == PLAIN: + if not format.is_array(): + print ' %s pixel = *src_pixel++;' % src_native_type + shift = 0; + for i in range(4): + src_channel = format.channels[i] + width = src_channel.size + if names[i]: + value = 'pixel' + mask = (1 << width) - 1 + if shift: + value = '(%s >> %u)' % (value, shift) + if shift + width < format.block_size(): + value = '(%s & 0x%x)' % (value, mask) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' %s %s = %s;' % (dst_native_type, names[i], value) + shift += width + else: + for i in range(4): + src_channel = format.channels[i] + if names[i]: + value = '(*src_pixel++)' + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' %s %s = %s;' % (dst_native_type, names[i], value) else: assert False for i in range(4): if format.colorspace == 'rgb': - swizzle = format.out_swizzle[i] + swizzle = format.swizzles[i] if swizzle < 4: value = names[swizzle] elif swizzle == SWIZZLE_0: value = '0' elif swizzle == SWIZZLE_1: - value = '1' + value = get_one(dst_channel) else: assert False elif format.colorspace == 'zs': if i < 3: value = 'z' else: - value = '1' + value = get_one(dst_channel) else: assert False print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i]) @@ -129,31 +130,16 @@ def generate_format_read(format, dst_type, dst_native_type, dst_suffix): print -def compute_inverse_swizzle(format): - '''Return an array[4] of inverse swizzle terms''' - inv_swizzle = [None]*4 - if format.colorspace == 'rgb': - for i in range(4): - swizzle = format.out_swizzle[i] - if swizzle < 4: - inv_swizzle[swizzle] = i - elif format.colorspace == 'zs': - swizzle = format.out_swizzle[0] - if swizzle < 4: - inv_swizzle[swizzle] = 0 - return inv_swizzle - - -def pack_rgba(format, src_type, r, g, b, a): +def pack_rgba(format, src_channel, r, g, b, a): """Return an expression for packing r, g, b, a into a pixel of the given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)' """ assert format.colorspace == 'rgb' - inv_swizzle = compute_inverse_swizzle(format) + inv_swizzle = format.inv_swizzles() shift = 0 expr = None for i in range(4): - # choose r, g, b, or a depending on the inverse swizzle term + # choose r, g, b, or a depending on the inverse swizzle term if inv_swizzle[i] == 0: value = r elif inv_swizzle[i] == 1: @@ -166,25 +152,25 @@ def pack_rgba(format, src_type, r, g, b, a): value = None if value: - dst_type = format.in_types[i] + dst_channel = format.channels[i] dst_native_type = native_type(format) - value = conversion_expr(src_type, dst_type, dst_native_type, value) + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) term = "((%s) << %d)" % (value, shift) if expr: expr = expr + " | " + term else: expr = term - width = format.in_types[i].size + width = format.channels[i].size shift = shift + width return expr -def emit_unrolled_write_code(format, src_type): +def emit_unrolled_write_code(format, src_channel): '''Emit code for writing a block based on unrolled loops. This is considerably faster than the TILE_PIXEL-based code below. ''' - dst_native_type = native_type(format) + dst_native_type = 'uint%u_t' % format.block_size() print ' const unsigned dstpix_stride = dst_stride / %d;' % format.stride() print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type) print ' unsigned int qx, qy, i;' @@ -199,8 +185,8 @@ def emit_unrolled_write_code(format, src_type): print ' const uint8_t *a = src + 3 * TILE_C_STRIDE;' print ' (void) r; (void) g; (void) b; (void) a; /* silence warnings */' print ' for (i = 0; i < TILE_C_STRIDE; i += 2) {' - print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_type, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") - print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_type, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") + print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_channel, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]") + print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_channel, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]") print ' const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);' print ' dstpix[offset + 0] = pixel0;' print ' dstpix[offset + 1] = pixel1;' @@ -210,11 +196,11 @@ def emit_unrolled_write_code(format, src_type): print ' }' -def emit_tile_pixel_write_code(format, src_type): +def emit_tile_pixel_write_code(format, src_channel): '''Emit code for writing a block based on the TILE_PIXEL macro.''' dst_native_type = native_type(format) - inv_swizzle = compute_inverse_swizzle(format) + inv_swizzle = format.inv_swizzles() print ' unsigned x, y;' print ' uint8_t *dst_row = dst + y0*dst_stride;' @@ -222,27 +208,28 @@ def emit_tile_pixel_write_code(format, src_type): print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride()) print ' for (x = 0; x < w; ++x) {' - if format.layout == ARITH: - print ' %s pixel = 0;' % dst_native_type - shift = 0; - for i in range(4): - dst_type = format.in_types[i] - width = dst_type.size - if inv_swizzle[i] is not None: - value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_type, dst_type, dst_native_type, value) - if shift: - value = '(%s << %u)' % (value, shift) - print ' pixel |= %s;' % value - shift += width - print ' *dst_pixel++ = pixel;' - elif format.layout == ARRAY: - for i in range(4): - dst_type = format.in_types[i] - if inv_swizzle[i] is not None: - value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] - value = conversion_expr(src_type, dst_type, dst_native_type, value) - print ' *dst_pixel++ = %s;' % value + if format.layout == PLAIN: + if not format.is_array(): + print ' %s pixel = 0;' % dst_native_type + shift = 0; + for i in range(4): + dst_channel = format.channels[i] + width = dst_channel.size + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + if shift: + value = '(%s << %u)' % (value, shift) + print ' pixel |= %s;' % value + shift += width + print ' *dst_pixel++ = pixel;' + else: + for i in range(4): + dst_channel = format.channels[i] + if inv_swizzle[i] is not None: + value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i] + value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False) + print ' *dst_pixel++ = %s;' % value else: assert False @@ -251,28 +238,33 @@ def emit_tile_pixel_write_code(format, src_type): print ' }' -def generate_format_write(format, src_type, src_native_type, src_suffix): +def generate_format_write(format, src_channel, src_native_type, src_suffix): '''Generate the function to write pixels to a particular format''' - name = short_name(format) + name = format.short_name() print 'static void' print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type) print '{' - if format.layout == ARITH and format.colorspace == 'rgb': - emit_unrolled_write_code(format, src_type) + if format.layout == PLAIN \ + and format.colorspace == 'rgb' \ + and format.block_size() <= 32 \ + and format.is_pot() \ + and not format.is_mixed() \ + and format.channels[0].type == UNSIGNED: + emit_unrolled_write_code(format, src_channel) else: - emit_tile_pixel_write_code(format, src_type) + emit_tile_pixel_write_code(format, src_channel) print '}' print -def generate_read(formats, dst_type, dst_native_type, dst_suffix): +def generate_read(formats, dst_channel, dst_native_type, dst_suffix): '''Generate the dispatch function to read pixels from any format''' for format in formats: if is_format_supported(format): - generate_format_read(format, dst_type, dst_native_type, dst_suffix) + generate_format_read(format, dst_channel, dst_native_type, dst_suffix) print 'void' print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type) @@ -282,7 +274,7 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_read_%s;' % (short_name(format), dst_suffix) + print ' func = &lp_tile_%s_read_%s;' % (format.short_name(), dst_suffix) print ' break;' print ' default:' print ' debug_printf("unsupported format\\n");' @@ -293,12 +285,12 @@ def generate_read(formats, dst_type, dst_native_type, dst_suffix): print -def generate_write(formats, src_type, src_native_type, src_suffix): +def generate_write(formats, src_channel, src_native_type, src_suffix): '''Generate the dispatch function to write pixels to any format''' for format in formats: if is_format_supported(format): - generate_format_write(format, src_type, src_native_type, src_suffix) + generate_format_write(format, src_channel, src_native_type, src_suffix) print 'void' print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type) @@ -309,7 +301,7 @@ def generate_write(formats, src_type, src_native_type, src_suffix): for format in formats: if is_format_supported(format): print ' case %s:' % format.name - print ' func = &lp_tile_%s_write_%s;' % (short_name(format), src_suffix) + print ' func = &lp_tile_%s_write_%s;' % (format.short_name(), src_suffix) print ' break;' print ' default:' print ' debug_printf("unsupported format\\n");' @@ -359,12 +351,12 @@ def main(): generate_clamp() - type = Type(UNSIGNED, True, 8) + channel = Channel(UNSIGNED, True, 8) native_type = 'uint8_t' suffix = '4ub' - generate_read(formats, type, native_type, suffix) - generate_write(formats, type, native_type, suffix) + generate_read(formats, channel, native_type, suffix) + generate_write(formats, channel, native_type, suffix) if __name__ == '__main__': diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h deleted file mode 100644 index ce11fa93041..00000000000 --- a/src/gallium/drivers/llvmpipe/lp_winsys.h +++ /dev/null @@ -1,125 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * llvmpipe public interface. - */ - - -#ifndef LP_WINSYS_H -#define LP_WINSYS_H - - -#include "pipe/p_compiler.h" /* for boolean */ -#include "pipe/p_format.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct pipe_screen; -struct pipe_context; - - -/** - * Opaque pointer. - */ -struct llvmpipe_displaytarget; - - -/** - * This is the interface that llvmpipe expects any window system - * hosting it to implement. - * - * llvmpipe is for the most part a self sufficient driver. The only thing it - * does not know is how to display a surface. - */ -struct llvmpipe_winsys -{ - void - (*destroy)( struct llvmpipe_winsys *ws ); - - boolean - (*is_displaytarget_format_supported)( struct llvmpipe_winsys *ws, - enum pipe_format format ); - - /** - * Allocate storage for a render target. - * - * Often surfaces which are meant to be blitted to the front screen (i.e., - * display targets) must be allocated with special characteristics, memory - * pools, or obtained directly from the windowing system. - * - * This callback is invoked by the pipe_screen when creating a texture marked - * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying - * storage. - */ - struct llvmpipe_displaytarget * - (*displaytarget_create)( struct llvmpipe_winsys *ws, - enum pipe_format format, - unsigned width, unsigned height, - unsigned alignment, - unsigned *stride ); - - void * - (*displaytarget_map)( struct llvmpipe_winsys *ws, - struct llvmpipe_displaytarget *dt, - unsigned flags ); - - void - (*displaytarget_unmap)( struct llvmpipe_winsys *ws, - struct llvmpipe_displaytarget *dt ); - - /** - * @sa pipe_screen:flush_frontbuffer. - * - * This call will likely become asynchronous eventually. - */ - void - (*displaytarget_display)( struct llvmpipe_winsys *ws, - struct llvmpipe_displaytarget *dt, - void *context_private ); - - void - (*displaytarget_destroy)( struct llvmpipe_winsys *ws, - struct llvmpipe_displaytarget *dt ); -}; - - - -struct pipe_screen * -llvmpipe_create_screen( struct llvmpipe_winsys * ); - - -#ifdef __cplusplus -} -#endif - -#endif /* LP_WINSYS_H */ diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile index 0e02680bc63..0cb66041d50 100644 --- a/src/gallium/drivers/nouveau/Makefile +++ b/src/gallium/drivers/nouveau/Makefile @@ -4,7 +4,6 @@ include $(TOP)/configs/current LIBNAME = nouveau C_SOURCES = nouveau_screen.c \ - nouveau_context.c \ - nv04_surface_2d.c + nouveau_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nouveau_context.c b/src/gallium/drivers/nouveau/nouveau_context.c index 23443869e68..15174983e7f 100644 --- a/src/gallium/drivers/nouveau/nouveau_context.c +++ b/src/gallium/drivers/nouveau/nouveau_context.c @@ -1,5 +1,5 @@ -#include <pipe/p_defines.h> -#include <pipe/p_context.h> +#include "pipe/p_defines.h" +#include "pipe/p_context.h" #include "nouveau/nouveau_screen.h" #include "nouveau/nouveau_context.h" diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c index 81bc296ab44..b1ad686022a 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.c +++ b/src/gallium/drivers/nouveau/nouveau_screen.c @@ -1,9 +1,10 @@ -#include <pipe/p_defines.h> -#include <pipe/p_screen.h> -#include <pipe/p_state.h> +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_state.h" -#include <util/u_memory.h> -#include <util/u_inlines.h> +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" #include <stdio.h> #include <errno.h> @@ -12,6 +13,9 @@ #include "nouveau_winsys.h" #include "nouveau_screen.h" +/* XXX this should go away */ +#include "state_tracker/drm_api.h" + static const char * nouveau_screen_get_name(struct pipe_screen *pscreen) { @@ -120,7 +124,7 @@ nouveau_screen_map_flags(unsigned pipe) if (pipe & PIPE_BUFFER_USAGE_DONTBLOCK) flags |= NOUVEAU_BO_NOWAIT; else - if (pipe & 0 /*PIPE_BUFFER_USAGE_UNSYNCHRONIZED*/) + if (pipe & PIPE_BUFFER_USAGE_UNSYNCHRONIZED) flags |= NOUVEAU_BO_NOSYNC; return flags; @@ -231,6 +235,72 @@ nouveau_screen_fence_finish(struct pipe_screen *screen, return 0; } + +/* + * Both texture_{from|get}_handle use drm api defines directly which they + * shouldn't do. The problem is that from|get are pipe functions and as + * such they should be defined in the pipe level. If nouveau had a propper + * winsys interface we would have added from|get to that interface using + * the winsys_handle struct as done with other drivers. However this code + * calls directly into the libdrm_nouveau.so functions (nouveau_bo_*). So + * we need to translate the handle into something they understand. + */ +static struct pipe_texture * +nouveau_screen_texture_from_handle(struct pipe_screen *pscreen, + const struct pipe_texture *templ, + struct winsys_handle *whandle) +{ + struct nouveau_device *dev = nouveau_screen(pscreen)->device; + struct pipe_texture *pt; + struct pipe_buffer *pb; + int ret; + + pb = CALLOC(1, sizeof(struct pipe_buffer) + sizeof(struct nouveau_bo*)); + if (!pb) + return NULL; + + ret = nouveau_bo_handle_ref(dev, whandle->handle, (struct nouveau_bo**)(pb+1)); + if (ret) { + debug_printf("%s: ref name 0x%08x failed with %d\n", + __func__, whandle->handle, ret); + FREE(pb); + return NULL; + } + + pipe_reference_init(&pb->reference, 1); + pb->screen = pscreen; + pb->alignment = 0; + pb->usage = PIPE_BUFFER_USAGE_GPU_READ_WRITE | + PIPE_BUFFER_USAGE_CPU_READ_WRITE; + pb->size = nouveau_bo(pb)->size; + pt = nouveau_screen(pscreen)->texture_blanket(pscreen, templ, + &whandle->stride, pb); + pipe_buffer_reference(&pb, NULL); + return pt; +} + +static boolean +nouveau_screen_texture_get_handle(struct pipe_screen *pscreen, + struct pipe_texture *pt, + struct winsys_handle *whandle) +{ + struct nouveau_miptree *mt = nouveau_miptree(pt); + + if (!mt || !mt->bo) + return false; + + whandle->stride = util_format_get_stride(mt->base.format, mt->base.width0); + + if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) { + return nouveau_bo_handle_get(mt->bo, &whandle->handle) == 0; + } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) { + whandle->handle = mt->bo->handle; + return TRUE; + } else { + return FALSE; + } +} + int nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) { @@ -258,6 +328,9 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) pscreen->fence_signalled = nouveau_screen_fence_signalled; pscreen->fence_finish = nouveau_screen_fence_finish; + pscreen->texture_from_handle = nouveau_screen_texture_from_handle; + pscreen->texture_get_handle = nouveau_screen_texture_get_handle; + return 0; } diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h index a7927d88dfc..f4a7a2bc234 100644 --- a/src/gallium/drivers/nouveau/nouveau_screen.h +++ b/src/gallium/drivers/nouveau/nouveau_screen.h @@ -6,6 +6,18 @@ struct nouveau_screen { struct nouveau_device *device; struct nouveau_channel *channel; + /** + * Create a new texture object, using the given template info, but on top of + * existing memory. + * + * It is assumed that the buffer data is layed out according to the expected + * by the hardware. NULL will be returned if any inconsistency is found. + */ + struct pipe_texture * (*texture_blanket)(struct pipe_screen *, + const struct pipe_texture *templat, + const unsigned *stride, + struct pipe_buffer *buffer); + int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen, struct pipe_buffer *pb, unsigned usage); }; diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h index a10114beab9..ab7761a31da 100644 --- a/src/gallium/drivers/nouveau/nouveau_util.h +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -33,7 +33,7 @@ nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp, max = max - (max % 3); break; case PIPE_PRIM_QUADS: - max = max & 3; + max = max & ~3; break; case PIPE_PRIM_LINE_LOOP: case PIPE_PRIM_LINE_STRIP: @@ -88,4 +88,104 @@ static INLINE unsigned log2i(unsigned i) return r; } +struct u_split_prim { + void *priv; + void (*emit)(void *priv, unsigned start, unsigned count); + void (*edge)(void *priv, boolean enabled); + + unsigned mode; + unsigned start; + unsigned p_start; + unsigned p_end; + + int repeat_first:1; + int close_first:1; + int edgeflag_off:1; +}; + +static inline void +u_split_prim_init(struct u_split_prim *s, + unsigned mode, unsigned start, unsigned count) +{ + if (mode == PIPE_PRIM_LINE_LOOP) { + s->mode = PIPE_PRIM_LINE_STRIP; + s->close_first = 1; + } else { + s->mode = mode; + s->close_first = 0; + } + s->start = start; + s->p_start = start; + s->p_end = start + count; + s->edgeflag_off = 0; + s->repeat_first = 0; +} + +static INLINE boolean +u_split_prim_next(struct u_split_prim *s, unsigned max_verts) +{ + int repeat = 0; + + if (s->repeat_first) { + s->emit(s->priv, s->start, 1); + max_verts--; + if (s->edgeflag_off) { + s->edge(s->priv, TRUE); + s->edgeflag_off = FALSE; + } + } + + if (s->p_start + s->close_first + max_verts >= s->p_end) { + s->emit(s->priv, s->p_start, s->p_end - s->p_start); + if (s->close_first) + s->emit(s->priv, s->start, 1); + return TRUE; + } + + switch (s->mode) { + case PIPE_PRIM_LINES: + max_verts &= ~1; + break; + case PIPE_PRIM_LINE_STRIP: + repeat = 1; + break; + case PIPE_PRIM_POLYGON: + max_verts--; + s->emit(s->priv, s->p_start, max_verts); + s->edge(s->priv, FALSE); + s->emit(s->priv, s->p_start + max_verts, 1); + s->p_start += max_verts; + s->repeat_first = TRUE; + s->edgeflag_off = TRUE; + return FALSE; + case PIPE_PRIM_TRIANGLES: + max_verts = max_verts - (max_verts % 3); + break; + case PIPE_PRIM_TRIANGLE_STRIP: + /* to ensure winding stays correct, always split + * on an even number of generated triangles + */ + max_verts = max_verts & ~1; + repeat = 2; + break; + case PIPE_PRIM_TRIANGLE_FAN: + s->repeat_first = TRUE; + repeat = 1; + break; + case PIPE_PRIM_QUADS: + max_verts &= ~3; + break; + case PIPE_PRIM_QUAD_STRIP: + max_verts &= ~1; + repeat = 2; + break; + default: + break; + } + + s->emit (s->priv, s->p_start, max_verts); + s->p_start += (max_verts - repeat); + return FALSE; +} + #endif diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index af9ddd558c8..bed014b9ce0 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -27,10 +27,7 @@ #define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19) extern struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *); - -extern struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *); +nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *); extern struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *); diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile deleted file mode 100644 index 364c80d8f3e..00000000000 --- a/src/gallium/drivers/nv30/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv30 - -C_SOURCES = \ - nv30_clear.c \ - nv30_context.c \ - nv30_draw.c \ - nv30_fragprog.c \ - nv30_fragtex.c \ - nv30_miptree.c \ - nv30_query.c \ - nv30_screen.c \ - nv30_state.c \ - nv30_state_blend.c \ - nv30_state_emit.c \ - nv30_state_fb.c \ - nv30_state_rasterizer.c \ - nv30_state_scissor.c \ - nv30_state_stipple.c \ - nv30_state_viewport.c \ - nv30_state_zsa.c \ - nv30_surface.c \ - nv30_transfer.c \ - nv30_vbo.c \ - nv30_vertprog.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c deleted file mode 100644 index c4ba9266647..00000000000 --- a/src/gallium/drivers/nv30/nv30_clear.c +++ /dev/null @@ -1,14 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_clear.h" - -#include "nv30_context.h" - -void -nv30_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil) -{ - util_clear(pipe, &nv30_context(pipe)->framebuffer, buffers, rgba, depth, - stencil); -} diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c deleted file mode 100644 index 279b74445ca..00000000000 --- a/src/gallium/drivers/nv30/nv30_context.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" - -#include "nv30_context.h" -#include "nv30_screen.h" - -static void -nv30_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_screen *screen = nv30->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, rankine, 0x1fd8, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, rankine, 0x1fd8, 1); - OUT_RING (chan, 1); - } - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv30_destroy(struct pipe_context *pipe) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned i; - - for (i = 0; i < NV30_STATE_MAX; i++) { - if (nv30->state.hw[i]) - so_ref(NULL, &nv30->state.hw[i]); - } - - if (nv30->draw) - draw_destroy(nv30->draw); - FREE(nv30); -} - -struct pipe_context * -nv30_create(struct pipe_screen *pscreen, void *priv) -{ - struct nv30_screen *screen = nv30_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nv30_context *nv30; - struct nouveau_winsys *nvws = screen->nvws; - - nv30 = CALLOC(1, sizeof(struct nv30_context)); - if (!nv30) - return NULL; - nv30->screen = screen; - - nv30->nvws = nvws; - - nv30->pipe.winsys = ws; - nv30->pipe.screen = pscreen; - nv30->pipe.priv = priv; - nv30->pipe.destroy = nv30_destroy; - nv30->pipe.draw_arrays = nv30_draw_arrays; - nv30->pipe.draw_elements = nv30_draw_elements; - nv30->pipe.clear = nv30_clear; - nv30->pipe.flush = nv30_flush; - - nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - screen->base.channel->user_private = nv30; - screen->base.channel->flush_notify = nv30_state_flush_notify; - - nv30_init_query_functions(nv30); - nv30_init_surface_functions(nv30); - nv30_init_state_functions(nv30); - - /* Create, configure, and install fallback swtnl path */ - nv30->draw = draw_create(); - draw_wide_point_threshold(nv30->draw, 9999999.0); - draw_wide_line_threshold(nv30->draw, 9999999.0); - draw_enable_line_stipple(nv30->draw, FALSE); - draw_enable_point_sprites(nv30->draw, FALSE); - draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); - - return &nv30->pipe; -} diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h deleted file mode 100644 index ca3d6aca7fa..00000000000 --- a/src/gallium/drivers/nv30/nv30_context.h +++ /dev/null @@ -1,219 +0,0 @@ -#ifndef __NV30_CONTEXT_H__ -#define __NV30_CONTEXT_H__ - -#include <stdio.h> - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_compiler.h" - -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_inlines.h" - -#include "draw/draw_vertex.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_context.h" -#include "nouveau/nouveau_stateobj.h" - -#include "nv30_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -enum nv30_state_index { - NV30_STATE_FB = 0, - NV30_STATE_VIEWPORT = 1, - NV30_STATE_BLEND = 2, - NV30_STATE_RAST = 3, - NV30_STATE_ZSA = 4, - NV30_STATE_BCOL = 5, - NV30_STATE_CLIP = 6, - NV30_STATE_SCISSOR = 7, - NV30_STATE_STIPPLE = 8, - NV30_STATE_FRAGPROG = 9, - NV30_STATE_VERTPROG = 10, - NV30_STATE_FRAGTEX0 = 11, - NV30_STATE_FRAGTEX1 = 12, - NV30_STATE_FRAGTEX2 = 13, - NV30_STATE_FRAGTEX3 = 14, - NV30_STATE_FRAGTEX4 = 15, - NV30_STATE_FRAGTEX5 = 16, - NV30_STATE_FRAGTEX6 = 17, - NV30_STATE_FRAGTEX7 = 18, - NV30_STATE_FRAGTEX8 = 19, - NV30_STATE_FRAGTEX9 = 20, - NV30_STATE_FRAGTEX10 = 21, - NV30_STATE_FRAGTEX11 = 22, - NV30_STATE_FRAGTEX12 = 23, - NV30_STATE_FRAGTEX13 = 24, - NV30_STATE_FRAGTEX14 = 25, - NV30_STATE_FRAGTEX15 = 26, - NV30_STATE_VERTTEX0 = 27, - NV30_STATE_VERTTEX1 = 28, - NV30_STATE_VERTTEX2 = 29, - NV30_STATE_VERTTEX3 = 30, - NV30_STATE_VTXBUF = 31, - NV30_STATE_VTXFMT = 32, - NV30_STATE_VTXATTR = 33, - NV30_STATE_SR = 34, - NV30_STATE_MAX = 35 -}; - -#include "nv30_screen.h" - -#define NV30_NEW_BLEND (1 << 0) -#define NV30_NEW_RAST (1 << 1) -#define NV30_NEW_ZSA (1 << 2) -#define NV30_NEW_SAMPLER (1 << 3) -#define NV30_NEW_FB (1 << 4) -#define NV30_NEW_STIPPLE (1 << 5) -#define NV30_NEW_SCISSOR (1 << 6) -#define NV30_NEW_VIEWPORT (1 << 7) -#define NV30_NEW_BCOL (1 << 8) -#define NV30_NEW_VERTPROG (1 << 9) -#define NV30_NEW_FRAGPROG (1 << 10) -#define NV30_NEW_ARRAYS (1 << 11) -#define NV30_NEW_UCP (1 << 12) -#define NV30_NEW_SR (1 << 13) - -struct nv30_rasterizer_state { - struct pipe_rasterizer_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv30_zsa_state { - struct pipe_depth_stencil_alpha_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv30_blend_state { - struct pipe_blend_state pipe; - struct nouveau_stateobj *so; -}; - - -struct nv30_state { - unsigned scissor_enabled; - unsigned stipple_enabled; - unsigned viewport_bypass; - unsigned fp_samplers; - - uint64_t dirty; - struct nouveau_stateobj *hw[NV30_STATE_MAX]; -}; - -struct nv30_context { - struct pipe_context pipe; - - struct nouveau_winsys *nvws; - struct nv30_screen *screen; - - struct draw_context *draw; - - /* HW state derived from pipe states */ - struct nv30_state state; - - /* Context state */ - unsigned dirty; - struct pipe_scissor_state scissor; - unsigned stipple[32]; - struct nv30_vertex_program *vertprog; - struct nv30_fragment_program *fragprog; - struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; - unsigned constbuf_nr[PIPE_SHADER_TYPES]; - struct nv30_rasterizer_state *rasterizer; - struct nv30_zsa_state *zsa; - struct nv30_blend_state *blend; - struct pipe_blend_color blend_colour; - struct pipe_stencil_ref stencil_ref; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state framebuffer; - struct pipe_buffer *idxbuf; - unsigned idxbuf_format; - struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned nr_samplers; - unsigned nr_textures; - unsigned dirty_samplers; - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; - unsigned vtxbuf_nr; - struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; - unsigned vtxelt_nr; -}; - -static INLINE struct nv30_context * -nv30_context(struct pipe_context *pipe) -{ - return (struct nv30_context *)pipe; -} - -struct nv30_state_entry { - boolean (*validate)(struct nv30_context *nv30); - struct { - unsigned pipe; - unsigned hw; - } dirty; -}; - -extern void nv30_init_state_functions(struct nv30_context *nv30); -extern void nv30_init_surface_functions(struct nv30_context *nv30); -extern void nv30_init_query_functions(struct nv30_context *nv30); - -extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); - -/* nv30_draw.c */ -extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); - -/* nv30_vertprog.c */ -extern void nv30_vertprog_destroy(struct nv30_context *, - struct nv30_vertex_program *); - -/* nv30_fragprog.c */ -extern void nv30_fragprog_destroy(struct nv30_context *, - struct nv30_fragment_program *); - -/* nv30_fragtex.c */ -extern void nv30_fragtex_bind(struct nv30_context *); - -/* nv30_state.c and friends */ -extern boolean nv30_state_validate(struct nv30_context *nv30); -extern void nv30_state_emit(struct nv30_context *nv30); -extern void nv30_state_flush_notify(struct nouveau_channel *chan); -extern struct nv30_state_entry nv30_state_rasterizer; -extern struct nv30_state_entry nv30_state_scissor; -extern struct nv30_state_entry nv30_state_stipple; -extern struct nv30_state_entry nv30_state_fragprog; -extern struct nv30_state_entry nv30_state_vertprog; -extern struct nv30_state_entry nv30_state_blend; -extern struct nv30_state_entry nv30_state_blend_colour; -extern struct nv30_state_entry nv30_state_zsa; -extern struct nv30_state_entry nv30_state_viewport; -extern struct nv30_state_entry nv30_state_framebuffer; -extern struct nv30_state_entry nv30_state_fragtex; -extern struct nv30_state_entry nv30_state_vbo; -extern struct nv30_state_entry nv30_state_sr; - -/* nv30_vbo.c */ -extern void nv30_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv30_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); - -/* nv30_clear.c */ -extern void nv30_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); - -/* nv30_context.c */ -struct pipe_context * -nv30_create(struct pipe_screen *pscreen, void *priv); - -#endif diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c deleted file mode 100644 index 74fc138c051..00000000000 --- a/src/gallium/drivers/nv30/nv30_draw.c +++ /dev/null @@ -1,61 +0,0 @@ -#include "draw/draw_pipe.h" - -#include "nv30_context.h" - -struct nv30_draw_stage { - struct draw_stage draw; - struct nv30_context *nv30; -}; - -static void -nv30_draw_point(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_line(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_flush(struct draw_stage *draw, unsigned flags) -{ -} - -static void -nv30_draw_reset_stipple_counter(struct draw_stage *draw) -{ - NOUVEAU_ERR("\n"); -} - -static void -nv30_draw_destroy(struct draw_stage *draw) -{ - FREE(draw); -} - -struct draw_stage * -nv30_draw_render_stage(struct nv30_context *nv30) -{ - struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage); - - nv30draw->nv30 = nv30; - nv30draw->draw.draw = nv30->draw; - nv30draw->draw.point = nv30_draw_point; - nv30draw->draw.line = nv30_draw_line; - nv30draw->draw.tri = nv30_draw_tri; - nv30draw->draw.flush = nv30_draw_flush; - nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter; - nv30draw->draw.destroy = nv30_draw_destroy; - - return &nv30draw->draw; -} - diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c deleted file mode 100644 index 2c432c6dfa7..00000000000 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ /dev/null @@ -1,905 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv30_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV30_FP_OP_COND_TR -#include "nv30_shader.h" - -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) -#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) - -#define MAX_CONSTS 128 -#define MAX_IMM 32 -struct nv30_fpc { - struct nv30_fragment_program *fp; - - uint attrib_map[PIPE_MAX_SHADER_INPUTS]; - - int high_temp; - int temp_temp_count; - int num_regs; - - uint depth_id; - uint colour_id; - - unsigned inst_offset; - - struct { - int pipe; - float vals[4]; - } consts[MAX_CONSTS]; - int nr_consts; - - struct nv30_sreg imm[MAX_IMM]; - unsigned nr_imm; -}; - -static INLINE struct nv30_sreg -temp(struct nv30_fpc *fpc) -{ - int idx; - - idx = fpc->temp_temp_count++; - idx += fpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); -} - -static INLINE struct nv30_sreg -constant(struct nv30_fpc *fpc, int pipe, float vals[4]) -{ - int idx; - - if (fpc->nr_consts == MAX_CONSTS) - assert(0); - idx = fpc->nr_consts++; - - fpc->consts[idx].pipe = pipe; - if (pipe == -1) - memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv30_sr(NV30SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ - (d), (m), (s0), (s1), (s2)) -#define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ - (d), (m), (s0), none, none) - -static void -grow_insns(struct nv30_fpc *fpc, int size) -{ - struct nv30_fragment_program *fp = fpc->fp; - - fp->insn_len += size; - fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); -} - -static void -emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - uint32_t sr = 0; - - switch (src.type) { - case NV30SR_INPUT: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); - break; - case NV30SR_OUTPUT: - sr |= NV30_FP_REG_SRC_HALF; - /* fall-through */ - case NV30SR_TEMP: - sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV30_FP_REG_SRC_SHIFT); - break; - case NV30SR_CONST: - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - if (fpc->consts[src.index].pipe >= 0) { - struct nv30_fragment_program_data *fpd; - - fp->consts = realloc(fp->consts, ++fp->nr_consts * - sizeof(*fpd)); - fpd = &fp->consts[fp->nr_consts - 1]; - fpd->offset = fpc->inst_offset + 4; - fpd->index = fpc->consts[src.index].pipe; - memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); - } else { - memcpy(&fp->insn[fpc->inst_offset + 4], - fpc->consts[src.index].vals, - sizeof(uint32_t) * 4); - } - - sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); - break; - case NV30SR_NONE: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_FP_REG_NEGATE; - - if (src.abs) - hw[1] |= (1 << (29 + pos)); - - sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); - - hw[pos + 1] |= sr; -} - -static void -emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw = &fp->insn[fpc->inst_offset]; - - switch (dst.type) { - case NV30SR_TEMP: - if (fpc->num_regs < (dst.index + 1)) - fpc->num_regs = dst.index + 1; - break; - case NV30SR_OUTPUT: - if (dst.index == 1) { - fp->fp_control |= 0xe; - } else { - hw[0] |= NV30_FP_OP_OUT_REG_HALF; - } - break; - case NV30SR_NONE: - hw[0] |= (1 << 30); - break; - default: - assert(0); - } - - hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); -} - -static void -nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) -{ - struct nv30_fragment_program *fp = fpc->fp; - uint32_t *hw; - - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - hw = &fp->insn[fpc->inst_offset]; - memset(hw, 0, sizeof(uint32_t) * 4); - - if (op == NV30_FP_OP_OPCODE_KIL) - fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; - hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); - - if (sat) - hw[0] |= NV30_FP_OP_OUT_SAT; - - if (dst.cc_update) - hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); - - emit_dst(fpc, dst); - emit_src(fpc, 0, s0); - emit_src(fpc, 1, s1); - emit_src(fpc, 2, s2); -} - -static void -nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) -{ - struct nv30_fragment_program *fp = fpc->fp; - - nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - - fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); - fp->samplers |= (1 << unit); -} - -static INLINE struct nv30_sreg -tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) -{ - struct nv30_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, - fpc->attrib_map[fsrc->Register.Index]); - break; - case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->Register.Index, NULL); - break; - case TGSI_FILE_IMMEDIATE: - assert(fsrc->Register.Index < fpc->nr_imm); - src = fpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1); - if (fpc->high_temp < src.index) - fpc->high_temp = src.index; - break; - /* This is clearly insane, but gallium hands us shaders like this. - * Luckily fragprog results are just temp regs.. - */ - case TGSI_FILE_OUTPUT: - if (fsrc->Register.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); - else - return nv30_sr(NV30SR_OUTPUT, 1); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nv30_sreg -tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { - int idx; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - if (fdst->Register.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); - else - return nv30_sr(NV30SR_OUTPUT, 1); - break; - case TGSI_FILE_TEMPORARY: - idx = fdst->Register.Index + 1; - if (fpc->high_temp < idx) - fpc->high_temp = idx; - return nv30_sr(NV30SR_TEMP, idx); - case TGSI_FILE_NULL: - return nv30_sr(NV30SR_NONE, 0); - default: - NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nv30_sr(NV30SR_NONE, 0); - } -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv30_sreg *src) -{ - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= (1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, - const struct tgsi_full_instruction *finst) -{ - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg src[3], dst, tmp; - int mask, sat, unit = 0; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - fpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(fpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - NOUVEAU_MSG("extra src attr %d\n", - fsrc->Register.Index); - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(fpc, fsrc); - } else { - src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, - tgsi_src(fpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - case TGSI_FILE_SAMPLER: - unit = fsrc->Register.Index; - break; - case TGSI_FILE_OUTPUT: - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(fpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_CMP: - tmp = nv30_sr(NV30SR_NONE, 0); - tmp.cc_update = 1; - arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV30_VP_INST_COND_GE; - arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NV30_VP_INST_COND_LT; - arith(fpc, sat, MOV, dst, mask, src[1], none, none); - break; - case TGSI_OPCODE_COS: - arith(fpc, sat, COS, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); - arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), - swz(src[1], W, W, W, W), none); - break; - case TGSI_OPCODE_DST: - arith(fpc, sat, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(fpc, sat, EX2, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FLR: - arith(fpc, sat, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(fpc, sat, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_KILP: - arith(fpc, 0, KIL, none, 0, none, none, none); - break; - case TGSI_OPCODE_KIL: - dst = nv30_sr(NV30SR_NONE, 0); - dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; - arith(fpc, 0, KIL, dst, 0, none, none, none); - break; - case TGSI_OPCODE_LG2: - arith(fpc, sat, LG2, dst, mask, src[0], none, none); - break; -// case TGSI_OPCODE_LIT: - case TGSI_OPCODE_LRP: - arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAD: - arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(fpc, sat, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - arith(fpc, sat, POW, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_RCP: - arith(fpc, sat, RCP, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_RET: - assert(0); - break; - case TGSI_OPCODE_RFL: - arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_RSQ: - arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); - break; - case TGSI_OPCODE_SCS: - /* avoid overwriting the source */ - if(src[0].swz[SWZ_X] != SWZ_X) - { - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - } - else - { - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); - } - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); - } - } - break; - case TGSI_OPCODE_SIN: - arith(fpc, sat, SIN, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_SGE: - arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); - break; - case TGSI_OPCODE_TEX: - tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXB: - tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_TXP: - tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_XPD: - tmp = temp(fpc); - arith(fpc, 0, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_FP_OP_INPUT_SRC_POSITION; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_FP_OP_INPUT_SRC_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_FP_OP_INPUT_SRC_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_FP_OP_INPUT_SRC_FOGC; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. - Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - default: - NOUVEAU_ERR("bad input semantic\n"); - return FALSE; - } - - fpc->attrib_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, - const struct tgsi_full_declaration *fdec) -{ - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->Range.First; - break; - case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->Range.First; - break; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_fragprog_prepare(struct nv30_fpc *fpc) -{ - struct tgsi_parse_context p; - /*int high_temp = -1, i;*/ - - tgsi_parse_init(&p, fpc->fp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &p.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_INPUT: - if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) - goto out_err; - break; - case TGSI_FILE_OUTPUT: - if (!nv30_fragprog_parse_decl_output(fpc, fdec)) - goto out_err; - break; - /*case TGSI_FILE_TEMPORARY: - if (fdec->Range.Last > high_temp) { - high_temp = - fdec->Range.Last; - } - break;*/ - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *imm; - float vals[4]; - - imm = &p.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(fpc->nr_imm < MAX_IMM); - - vals[0] = imm->u[0].Float; - vals[1] = imm->u[1].Float; - vals[2] = imm->u[2].Float; - vals[3] = imm->u[3].Float; - fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); - } - break; - default: - break; - } - } - tgsi_parse_free(&p); - - /*if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); - for (i = 0; i < high_temp; i++) - fpc->r_temp[i] = temp(fpc); - fpc->r_temps_discard = 0; - }*/ - - return TRUE; - -out_err: - /*if (fpc->r_temp) - FREE(fpc->r_temp);*/ - tgsi_parse_free(&p); - return FALSE; -} - -static void -nv30_fragprog_translate(struct nv30_context *nv30, - struct nv30_fragment_program *fp) -{ - struct tgsi_parse_context parse; - struct nv30_fpc *fpc = NULL; - - tgsi_dump(fp->pipe.tokens,0); - - fpc = CALLOC(1, sizeof(struct nv30_fpc)); - if (!fpc) - return; - fpc->fp = fp; - fpc->high_temp = -1; - fpc->num_regs = 2; - - if (!nv30_fragprog_prepare(fpc)) { - FREE(fpc); - return; - } - - tgsi_parse_init(&parse, fp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - - finst = &parse.FullToken.FullInstruction; - if (!nv30_fragprog_parse_instruction(fpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - fp->fp_control |= (fpc->num_regs-1)/2; - fp->fp_reg_control = (1<<16)|0x4; - - /* Terminate final instruction */ - fp->insn[fpc->inst_offset] |= 0x00000001; - - /* Append NOP + END instruction, may or may not be necessary. */ - fpc->inst_offset = fp->insn_len; - grow_insns(fpc, 4); - fp->insn[fpc->inst_offset + 0] = 0x00000001; - fp->insn[fpc->inst_offset + 1] = 0x00000000; - fp->insn[fpc->inst_offset + 2] = 0x00000000; - fp->insn[fpc->inst_offset + 3] = 0x00000000; - - fp->translated = TRUE; - fp->on_hw = FALSE; -out_err: - tgsi_parse_free(&parse); - FREE(fpc); -} - -static void -nv30_fragprog_upload(struct nv30_context *nv30, - struct nv30_fragment_program *fp) -{ - struct pipe_screen *pscreen = nv30->pipe.screen; - const uint32_t le = 1; - uint32_t *map; - int i; - - map = pipe_buffer_map(pscreen, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - -#if 0 - for (i = 0; i < fp->insn_len; i++) { - fflush(stdout); fflush(stderr); - NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); - fflush(stdout); fflush(stderr); - } -#endif - - if ((*(const uint8_t *)&le)) { - for (i = 0; i < fp->insn_len; i++) { - map[i] = fp->insn[i]; - } - } else { - /* Weird swapping for big-endian chips */ - for (i = 0; i < fp->insn_len; i++) { - map[i] = ((fp->insn[i] & 0xffff) << 16) | - ((fp->insn[i] >> 16) & 0xffff); - } - } - - pipe_buffer_unmap(pscreen, fp->buffer); -} - -static boolean -nv30_fragprog_validate(struct nv30_context *nv30) -{ - struct nv30_fragment_program *fp = nv30->fragprog; - struct pipe_buffer *constbuf = - nv30->constbuf[PIPE_SHADER_FRAGMENT]; - struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_stateobj *so; - boolean new_consts = FALSE; - int i; - - if (fp->translated) - goto update_constants; - - /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ - nv30_fragprog_translate(nv30, fp); - if (!fp->translated) { - /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ - return FALSE; - } - - fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); - nv30_fragprog_upload(nv30, fp); - - so = so_new(4, 4, 1); - so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); - so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, - NV34TCL_FP_ACTIVE_PROGRAM_DMA1); - so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); - so_data (so, fp->fp_control); - so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); - so_data (so, fp->fp_reg_control); - so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); - so_data (so, fp->samplers); - so_ref(so, &fp->so); - so_ref(NULL, &so); - -update_constants: - if (fp->nr_consts) { - float *map; - - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - for (i = 0; i < fp->nr_consts; i++) { - struct nv30_fragment_program_data *fpd = &fp->consts[i]; - uint32_t *p = &fp->insn[fpd->offset]; - uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; - - if (!memcmp(p, cb, 4 * sizeof(float))) - continue; - memcpy(p, cb, 4 * sizeof(float)); - new_consts = TRUE; - } - pipe_buffer_unmap(pscreen, constbuf); - - if (new_consts) - nv30_fragprog_upload(nv30, fp); - } - - if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { - so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv30_fragprog_destroy(struct nv30_context *nv30, - struct nv30_fragment_program *fp) -{ - if (fp->buffer) - pipe_buffer_reference(&fp->buffer, NULL); - - if (fp->so) - so_ref(NULL, &fp->so); - - if (fp->insn_len) - FREE(fp->insn); -} - -struct nv30_state_entry nv30_state_fragprog = { - .validate = nv30_fragprog_validate, - .dirty = { - .pipe = NV30_NEW_FRAGPROG, - .hw = NV30_STATE_FRAGPROG - } -}; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c deleted file mode 100644 index c29c36e20aa..00000000000 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ /dev/null @@ -1,239 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_math.h" - -#include "nv30_context.h" -#include "../nouveau/nv04_surface_2d.h" - -static void -nv30_miptree_layout(struct nv30_miptree *nv30mt) -{ - struct pipe_texture *pt = &nv30mt->base; - uint width = pt->width0; - uint offset = 0; - int nr_faces, l, f; - uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL | - PIPE_TEXTURE_USAGE_RENDER_TARGET | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY); - - if (pt->target == PIPE_TEXTURE_CUBE) { - nr_faces = 6; - } else - if (pt->target == PIPE_TEXTURE_3D) { - nr_faces = pt->depth0; - } else { - nr_faces = 1; - } - - for (l = 0; l <= pt->last_level; l++) { - if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) - nv30mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64); - else - nv30mt->level[l].pitch = util_format_get_stride(pt->format, width); - - nv30mt->level[l].image_offset = - CALLOC(nr_faces, sizeof(unsigned)); - - width = u_minify(width, 1); - } - - for (f = 0; f < nr_faces; f++) { - for (l = 0; l < pt->last_level; l++) { - nv30mt->level[l].image_offset[f] = offset; - - if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && - u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1) - offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64); - else - offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv30mt->level[l].image_offset[f] = offset; - offset += nv30mt->level[l].pitch * u_minify(pt->height0, l); - } - - nv30mt->total_size = offset; -} - -static struct pipe_texture * -nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) -{ - struct nv30_miptree *mt; - unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | - NOUVEAU_BUFFER_USAGE_TEXTURE; - - mt = MALLOC(sizeof(struct nv30_miptree)); - if (!mt) - return NULL; - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - - /* Swizzled textures must be POT */ - if (pt->width0 & (pt->width0 - 1) || - pt->height0 & (pt->height0 - 1)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | - PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - else { - switch (pt->format) { - /* TODO: Figure out which formats can be swizzled */ - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_A8L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_I8_UNORM: - { - if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - break; - } - default: - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - } - } - - if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) - buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; - - /* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear. - * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy. - * This also happens for small mipmaps of large textures. */ - if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - nv30_miptree_layout(mt); - - mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, - mt->total_size); - if (!mt->buffer) { - FREE(mt); - return NULL; - } - mt->bo = nouveau_bo(mt->buffer); - - return &mt->base; -} - -static struct pipe_texture * -nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, - const unsigned *stride, struct pipe_buffer *pb) -{ - struct nv30_miptree *mt; - - /* Only supports 2D, non-mipmapped textures for the moment */ - if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || - pt->depth0 != 1) - return NULL; - - mt = CALLOC_STRUCT(nv30_miptree); - if (!mt) - return NULL; - - mt->base = *pt; - pipe_reference_init(&mt->base.reference, 1); - mt->base.screen = pscreen; - mt->level[0].pitch = stride[0]; - mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); - - /* Assume whoever created this buffer expects it to be linear for now */ - mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - - pipe_buffer_reference(&mt->buffer, pb); - mt->bo = nouveau_bo(mt->buffer); - return &mt->base; -} - -static void -nv30_miptree_destroy(struct pipe_texture *pt) -{ - struct nv30_miptree *mt = (struct nv30_miptree *)pt; - int l; - - pipe_buffer_reference(&mt->buffer, NULL); - for (l = 0; l <= pt->last_level; l++) { - if (mt->level[l].image_offset) - FREE(mt->level[l].image_offset); - } - - FREE(mt); -} - -static struct pipe_surface * -nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned flags) -{ - struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; - struct nv04_surface *ns; - - ns = CALLOC_STRUCT(nv04_surface); - if (!ns) - return NULL; - pipe_texture_reference(&ns->base.texture, pt); - ns->base.format = pt->format; - ns->base.width = u_minify(pt->width0, level); - ns->base.height = u_minify(pt->height0, level); - ns->base.usage = flags; - pipe_reference_init(&ns->base.reference, 1); - ns->base.face = face; - ns->base.level = level; - ns->base.zslice = zslice; - ns->pitch = nv30mt->level[level].pitch; - - if (pt->target == PIPE_TEXTURE_CUBE) { - ns->base.offset = nv30mt->level[level].image_offset[face]; - } else - if (pt->target == PIPE_TEXTURE_3D) { - ns->base.offset = nv30mt->level[level].image_offset[zslice]; - } else { - ns->base.offset = nv30mt->level[level].image_offset[0]; - } - - /* create a linear temporary that we can render into if necessary. - * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so - * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ - if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) - return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base; - - return &ns->base; -} - -static void -nv30_miptree_surface_del(struct pipe_surface *ps) -{ - struct nv04_surface* ns = (struct nv04_surface*)ps; - if(ns->backing) - { - struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen; - if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) - screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); - nv30_miptree_surface_del(&ns->backing->base); - } - - pipe_texture_reference(&ps->texture, NULL); - FREE(ps); -} - -void -nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) -{ - pscreen->texture_create = nv30_miptree_create; - pscreen->texture_blanket = nv30_miptree_blanket; - pscreen->texture_destroy = nv30_miptree_destroy; - pscreen->get_tex_surface = nv30_miptree_surface_new; - pscreen->tex_surface_destroy = nv30_miptree_surface_del; -} diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c deleted file mode 100644 index e27e9ccbf60..00000000000 --- a/src/gallium/drivers/nv30/nv30_query.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "pipe/p_context.h" - -#include "nv30_context.h" - -struct nv30_query { - struct nouveau_resource *object; - unsigned type; - boolean ready; - uint64_t result; -}; - -static INLINE struct nv30_query * -nv30_query(struct pipe_query *pipe) -{ - return (struct nv30_query *)pipe; -} - -static struct pipe_query * -nv30_query_create(struct pipe_context *pipe, unsigned query_type) -{ - struct nv30_query *q; - - q = CALLOC(1, sizeof(struct nv30_query)); - q->type = query_type; - - return (struct pipe_query *)q; -} - -static void -nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_query *q = nv30_query(pq); - - if (q->object) - nouveau_resource_free(&q->object); - FREE(q); -} - -static void -nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_query *q = nv30_query(pq); - struct nv30_screen *screen = nv30->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - /* Happens when end_query() is called, then another begin_query() - * without querying the result in-between. For now we'll wait for - * the existing query to notify completion, but it could be better. - */ - if (q->object) { - uint64_t tmp; - pipe->get_query_result(pipe, pq, 1, &tmp); - } - - if (nouveau_resource_alloc(nv30->screen->query_heap, 1, NULL, &q->object)) - assert(0); - nouveau_notifier_reset(nv30->screen->query, q->object->start); - - BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1); - OUT_RING (chan, 1); - - q->ready = FALSE; -} - -static void -nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_screen *screen = nv30->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; - struct nv30_query *q = nv30_query(pq); - - BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1); - OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(chan); -} - -static boolean -nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_query *q = nv30_query(pq); - - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (!q->ready) { - unsigned status; - - status = nouveau_notifier_status(nv30->screen->query, - q->object->start); - if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { - if (wait == FALSE) - return FALSE; - - nouveau_notifier_wait_status(nv30->screen->query, - q->object->start, - NV_NOTIFY_STATE_STATUS_COMPLETED, 0); - } - - q->result = nouveau_notifier_return_val(nv30->screen->query, - q->object->start); - q->ready = TRUE; - nouveau_resource_free(&q->object); - } - - *result = q->result; - return TRUE; -} - -void -nv30_init_query_functions(struct nv30_context *nv30) -{ - nv30->pipe.create_query = nv30_query_create; - nv30->pipe.destroy_query = nv30_query_destroy; - nv30->pipe.begin_query = nv30_query_begin; - nv30->pipe.end_query = nv30_query_end; - nv30->pipe.get_query_result = nv30_query_result; -} diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c deleted file mode 100644 index aef37d303d8..00000000000 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ /dev/null @@ -1,362 +0,0 @@ -#include "pipe/p_screen.h" -#include "pipe/p_state.h" - -#include "nouveau/nouveau_screen.h" - -#include "nv30_context.h" -#include "nv30_screen.h" - -#define NV30TCL_CHIPSET_3X_MASK 0x00000003 -#define NV34TCL_CHIPSET_3X_MASK 0x00000010 -#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 - -/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h - * to get the pointer to the context front buffer, so I copied nouveau_winsys here. - * nv30_screen_surface_format_supported() can then use it to enforce creating fbo - * with same number of bits everywhere. - */ -struct nouveau_winsys { - struct pipe_winsys base; - - struct pipe_screen *pscreen; - - struct pipe_surface *front; -}; - -static int -nv30_screen_get_param(struct pipe_screen *pscreen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; - case PIPE_CAP_NPOT_TEXTURES: - return 0; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 2; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 0; - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 0; - case NOUVEAU_CAP_HW_VTXBUF: - case NOUVEAU_CAP_HW_IDXBUF: - return 1; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - case PIPE_CAP_INDEP_BLEND_ENABLE: - return 0; - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 8.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 4.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static boolean -nv30_screen_surface_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) -{ - struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; - - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - return TRUE; - default: - break; - } - } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - return TRUE; - case PIPE_FORMAT_Z16_UNORM: - if (front) { - return (front->format == PIPE_FORMAT_R5G6B5_UNORM); - } - return TRUE; - default: - break; - } - } else { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_A8L8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - return TRUE; - default: - break; - } - } - - return FALSE; -} - -static struct pipe_buffer * -nv30_surface_buffer(struct pipe_surface *surf) -{ - struct nv30_miptree *mt = (struct nv30_miptree *)surf->texture; - - return mt->buffer; -} - -static void -nv30_screen_destroy(struct pipe_screen *pscreen) -{ - struct nv30_screen *screen = nv30_screen(pscreen); - unsigned i; - - for (i = 0; i < NV30_STATE_MAX; i++) { - if (screen->state[i]) - so_ref(NULL, &screen->state[i]); - } - - nouveau_resource_destroy(&screen->vp_exec_heap); - nouveau_resource_destroy(&screen->vp_data_heap); - nouveau_resource_destroy(&screen->query_heap); - nouveau_notifier_free(&screen->query); - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->rankine); - nv04_surface_2d_takedown(&screen->eng2d); - - nouveau_screen_fini(&screen->base); - - FREE(pscreen); -} - -struct pipe_screen * -nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) -{ - struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - struct nouveau_stateobj *so; - unsigned rankine_class = 0; - int ret, i; - - if (!screen) - return NULL; - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv30_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv30_screen_destroy; - pscreen->get_param = nv30_screen_get_param; - pscreen->get_paramf = nv30_screen_get_paramf; - pscreen->is_format_supported = nv30_screen_surface_format_supported; - pscreen->context_create = nv30_create; - - nv30_screen_init_miptree_functions(pscreen); - nv30_screen_init_transfer_functions(pscreen); - - /* 3D object */ - switch (dev->chipset & 0xf0) { - case 0x30: - if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - rankine_class = 0x0397; - else - if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - rankine_class = 0x0697; - else - if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) - rankine_class = 0x0497; - break; - default: - break; - } - - if (!rankine_class) { - NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", dev->chipset); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, 0xbeef3097, rankine_class, - &screen->rankine); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv30_surface_buffer; - - /* Notifier for sync purposes */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv30_screen_destroy(pscreen); - return NULL; - } - - /* Query objects */ - ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv30_screen_destroy(pscreen); - return NULL; - } - - ret = nouveau_resource_init(&screen->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv30_screen_destroy(pscreen); - return NULL; - } - - /* Vtxprog resources */ - if (nouveau_resource_init(&screen->vp_exec_heap, 0, 256) || - nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { - nv30_screen_destroy(pscreen); - return NULL; - } - - /* Static rankine initialisation */ - so = so_new(36, 60, 0); - so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); - so_data (so, screen->sync->handle); - so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); -/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, screen->query->handle);*/ - so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1); - so_data (so, chan->vram->handle); - - for (i=1; i<8; i++) { - so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); - so_data (so, 0); - so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); - so_data (so, 0); - } - - so_method(so, screen->rankine, 0x220, 1); - so_data (so, 1); - - so_method(so, screen->rankine, 0x03b0, 1); - so_data (so, 0x00100000); - so_method(so, screen->rankine, 0x1454, 1); - so_data (so, 0); - so_method(so, screen->rankine, 0x1d80, 1); - so_data (so, 3); - so_method(so, screen->rankine, 0x1450, 1); - so_data (so, 0x00030004); - - /* NEW */ - so_method(so, screen->rankine, 0x1e98, 1); - so_data (so, 0); - so_method(so, screen->rankine, 0x17e0, 3); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - so_method(so, screen->rankine, 0x1f80, 16); - for (i=0; i<16; i++) { - so_data (so, (i==8) ? 0x0000ffff : 0); - } - - so_method(so, screen->rankine, 0x120, 3); - so_data (so, 0); - so_data (so, 1); - so_data (so, 2); - - so_method(so, screen->rankine, 0x1d88, 1); - so_data (so, 0x00001200); - - so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1); - so_data (so, 0); - - so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - - so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); - so_data (so, 0xffff0000); - - /* enables use of vp rather than fixed-function somehow */ - so_method(so, screen->rankine, 0x1e94, 1); - so_data (so, 0x13); - - so_emit(chan, so); - so_ref(NULL, &so); - nouveau_pushbuf_flush(chan, 0); - - return pscreen; -} diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h deleted file mode 100644 index 8591cd31cab..00000000000 --- a/src/gallium/drivers/nv30/nv30_screen.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __NV30_SCREEN_H__ -#define __NV30_SCREEN_H__ - -#include "nouveau/nouveau_screen.h" - -#include "nouveau/nv04_surface_2d.h" - -struct nv30_screen { - struct nouveau_screen base; - - struct nouveau_winsys *nvws; - - struct nv30_context *cur_ctx; - - /* HW graphics objects */ - struct nv04_surface_2d *eng2d; - struct nouveau_grobj *rankine; - struct nouveau_notifier *sync; - - /* Query object resources */ - struct nouveau_notifier *query; - struct nouveau_resource *query_heap; - - /* Vtxprog resources */ - struct nouveau_resource *vp_exec_heap; - struct nouveau_resource *vp_data_heap; - - /* Current 3D state of channel */ - struct nouveau_stateobj *state[NV30_STATE_MAX]; -}; - -static INLINE struct nv30_screen * -nv30_screen(struct pipe_screen *screen) -{ - return (struct nv30_screen *)screen; -} - -void -nv30_screen_init_transfer_functions(struct pipe_screen *pscreen); - -#endif diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h deleted file mode 100644 index dd3a36f78f3..00000000000 --- a/src/gallium/drivers/nv30/nv30_shader.h +++ /dev/null @@ -1,490 +0,0 @@ -#ifndef __NV30_SHADER_H__ -#define __NV30_SHADER_H__ - -/* Vertex programs instruction set - * - * 128bit opcodes, split into 4 32-bit ones for ease of use. - * - * Non-native instructions - * ABS - MOV + NV40_VP_INST0_DEST_ABS - * POW - EX2 + MUL + LG2 - * SUB - ADD, second source negated - * SWZ - MOV - * XPD - - * - * Register access - * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) - * - Only one CONST can be accessed per-instruction (move extras into TEMPs) - * - * Relative Addressing - * According to the value returned for - * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB - * - * there are only two address registers available. The destination in the - * ARL instruction is set to TEMP <n> (The temp isn't actually written). - * - * When using vanilla ARB_v_p, the proprietary driver will squish both the - * available ADDRESS regs into the first hardware reg in the X and Y - * components. - * - * To use an address reg as an index into consts, the CONST_SRC is set to - * (const_base + offset) and INDEX_CONST is set. - * - * To access the second address reg use ADDR_REG_SELECT_1. A particular - * component of the address regs is selected with ADDR_SWZ. - * - * Only one address register can be accessed per instruction. - * - * Conditional execution (see NV_vertex_program{2,3} for details) Conditional - * execution of an instruction is enabled by setting COND_TEST_ENABLE, and - * selecting the condition which will allow the test to pass with - * COND_{FL,LT,...}. It is possible to swizzle the values in the condition - * register, which allows for testing against an individual component. - * - * Branching: - * - * The BRA/CAL instructions seem to follow a slightly different opcode - * layout. The destination instruction ID (IADDR) overlaps a source field. - * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO - * command, and is incremented automatically on each UPLOAD_INST FIFO - * command. - * - * Conditional branching is achieved by using the condition tests described - * above. There doesn't appear to be dedicated looping instructions, but - * this can be done using a temp reg + conditional branching. - * - * Subroutines may be uploaded before the main program itself, but the first - * executed instruction is determined by the PROGRAM_START_ID FIFO command. - * - */ - -/* DWORD 0 */ - -#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) -#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ -#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ -#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ -#define NV30_VP_INST_VEC_RESULT (1 << 20) -#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 -#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) -#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) -#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) -#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) -#define NV30_VP_INST_COND_SHIFT 11 -#define NV30_VP_INST_COND_MASK (0x07 << 11) -# define NV30_VP_INST_COND_FL 0 /* guess */ -# define NV30_VP_INST_COND_LT 1 -# define NV30_VP_INST_COND_EQ 2 -# define NV30_VP_INST_COND_LE 3 -# define NV30_VP_INST_COND_GT 4 -# define NV30_VP_INST_COND_NE 5 -# define NV30_VP_INST_COND_GE 6 -# define NV30_VP_INST_COND_TR 7 /* guess */ -#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 -#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) -#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 -#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) -#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 -#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) -#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 -#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) -#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 -#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) -#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 -#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) -#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 -#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) - -/* DWORD 1 */ -#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 -#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) -# define NV30_VP_INST_OP_NOP 0x00 -# define NV30_VP_INST_OP_RCP 0x02 -# define NV30_VP_INST_OP_RCC 0x03 -# define NV30_VP_INST_OP_RSQ 0x04 -# define NV30_VP_INST_OP_EXP 0x05 -# define NV30_VP_INST_OP_LOG 0x06 -# define NV30_VP_INST_OP_LIT 0x07 -# define NV30_VP_INST_OP_BRA 0x09 -# define NV30_VP_INST_OP_CAL 0x0B -# define NV30_VP_INST_OP_RET 0x0C -# define NV30_VP_INST_OP_LG2 0x0D -# define NV30_VP_INST_OP_EX2 0x0E -# define NV30_VP_INST_OP_SIN 0x0F -# define NV30_VP_INST_OP_COS 0x10 -#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 -#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) -# define NV30_VP_INST_OP_NOPV 0x00 -# define NV30_VP_INST_OP_MOV 0x01 -# define NV30_VP_INST_OP_MUL 0x02 -# define NV30_VP_INST_OP_ADD 0x03 -# define NV30_VP_INST_OP_MAD 0x04 -# define NV30_VP_INST_OP_DP3 0x05 -# define NV30_VP_INST_OP_DP4 0x07 -# define NV30_VP_INST_OP_DPH 0x06 -# define NV30_VP_INST_OP_DST 0x08 -# define NV30_VP_INST_OP_MIN 0x09 -# define NV30_VP_INST_OP_MAX 0x0A -# define NV30_VP_INST_OP_SLT 0x0B -# define NV30_VP_INST_OP_SGE 0x0C -# define NV30_VP_INST_OP_ARL 0x0D -# define NV30_VP_INST_OP_FRC 0x0E -# define NV30_VP_INST_OP_FLR 0x0F -# define NV30_VP_INST_OP_SEQ 0x10 -# define NV30_VP_INST_OP_SFL 0x11 -# define NV30_VP_INST_OP_SGT 0x12 -# define NV30_VP_INST_OP_SLE 0x13 -# define NV30_VP_INST_OP_SNE 0x14 -# define NV30_VP_INST_OP_STR 0x15 -# define NV30_VP_INST_OP_SSG 0x16 -# define NV30_VP_INST_OP_ARR 0x17 -# define NV30_VP_INST_OP_ARA 0x18 -#define NV30_VP_INST_CONST_SRC_SHIFT 14 -#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) -#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ -#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ -# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ -# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ -# define NV30_VP_INST_IN_NORMAL 2 -# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ -# define NV30_VP_INST_IN_COL1 4 -# define NV30_VP_INST_IN_FOGC 5 -# define NV30_VP_INST_IN_TC0 8 -# define NV30_VP_INST_IN_TC(n) (8+n) -#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ -#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ - -/* Please note: the IADDR fields overlap other fields because they are used - * only for branch instructions. See Branching: label above - * - * DWORD 2 - */ -#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ -#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ -#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ -#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ -#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ -#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ -#define NV30_VP_INST_IADDR_SHIFT 2 -#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ - -/* DWORD 3 */ -#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ -#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ -#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 -#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) -#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 -#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) -#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 -#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) -#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ -#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ -#define NV30_VP_INST_DEST_SHIFT 2 -#define NV30_VP_INST_DEST_MASK (0x0F << 2) -# define NV30_VP_INST_DEST_POS 0 -# define NV30_VP_INST_DEST_BFC0 1 -# define NV30_VP_INST_DEST_BFC1 2 -# define NV30_VP_INST_DEST_COL0 3 -# define NV30_VP_INST_DEST_COL1 4 -# define NV30_VP_INST_DEST_FOGC 5 -# define NV30_VP_INST_DEST_PSZ 6 -# define NV30_VP_INST_DEST_TC(n) (8+n) - -#define NV30_VP_INST_LAST (1 << 0) - -/* Useful to split the source selection regs into their pieces */ -#define NV30_VP_SRC0_HIGH_SHIFT 6 -#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 -#define NV30_VP_SRC0_LOW_MASK 0x0000003F -#define NV30_VP_SRC2_HIGH_SHIFT 4 -#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 -#define NV30_VP_SRC2_LOW_MASK 0x0000000F - - -/* Source-register definition - matches NV20 exactly */ -#define NV30_VP_SRC_NEGATE (1<<14) -#define NV30_VP_SRC_SWZ_X_SHIFT 12 -#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) -#define NV30_VP_SRC_SWZ_Y_SHIFT 10 -#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) -#define NV30_VP_SRC_SWZ_Z_SHIFT 8 -#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) -#define NV30_VP_SRC_SWZ_W_SHIFT 6 -#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) -#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 -#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) -#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 -#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) -#define NV30_VP_SRC_REG_TYPE_SHIFT 0 -#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) -#define NV30_VP_SRC_REG_TYPE_TEMP 1 -#define NV30_VP_SRC_REG_TYPE_INPUT 2 -#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ - -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - * result.color == R0.xyzw - * result.depth == R1.z - * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 - * otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - * - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords. As such instructions such as: - * - * ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO - * is implemented simply by not writing to the relevant components of the destination. - * - * Conditional execution - * TODO - * - * Non-native instructions: - * LIT - * LRP - MAD+MAD - * SUB - ADD, negate second source - * RSQ - LG2 + EX2 - * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD - */ - -//== Opcode / Destination selection == -#define NV30_FP_OP_PROGRAM_END (1 << 0) -#define NV30_FP_OP_OUT_REG_SHIFT 1 -#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ -/* Needs to be set when writing outputs to get expected result.. */ -#define NV30_FP_OP_OUT_REG_HALF (1 << 7) -#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8) -#define NV30_FP_OP_OUTMASK_SHIFT 9 -#define NV30_FP_OP_OUTMASK_MASK (0xF << 9) -# define NV30_FP_OP_OUT_X (1<<9) -# define NV30_FP_OP_OUT_Y (1<<10) -# define NV30_FP_OP_OUT_Z (1<<11) -# define NV30_FP_OP_OUT_W (1<<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV30_FP_OP_INPUT_SRC_SHIFT 13 -#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13) -# define NV30_FP_OP_INPUT_SRC_POSITION 0x0 -# define NV30_FP_OP_INPUT_SRC_COL0 0x1 -# define NV30_FP_OP_INPUT_SRC_COL1 0x2 -# define NV30_FP_OP_INPUT_SRC_FOGC 0x3 -# define NV30_FP_OP_INPUT_SRC_TC0 0x4 -# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n) -#define NV30_FP_OP_TEX_UNIT_SHIFT 17 -#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ -#define NV30_FP_OP_PRECISION_SHIFT 22 -#define NV30_FP_OP_PRECISION_MASK (3 << 22) -# define NV30_FP_PRECISION_FP32 0 -# define NV30_FP_PRECISION_FP16 1 -# define NV30_FP_PRECISION_FX12 2 -#define NV30_FP_OP_OPCODE_SHIFT 24 -#define NV30_FP_OP_OPCODE_MASK (0x3F << 24) -# define NV30_FP_OP_OPCODE_NOP 0x00 -# define NV30_FP_OP_OPCODE_MOV 0x01 -# define NV30_FP_OP_OPCODE_MUL 0x02 -# define NV30_FP_OP_OPCODE_ADD 0x03 -# define NV30_FP_OP_OPCODE_MAD 0x04 -# define NV30_FP_OP_OPCODE_DP3 0x05 -# define NV30_FP_OP_OPCODE_DP4 0x06 -# define NV30_FP_OP_OPCODE_DST 0x07 -# define NV30_FP_OP_OPCODE_MIN 0x08 -# define NV30_FP_OP_OPCODE_MAX 0x09 -# define NV30_FP_OP_OPCODE_SLT 0x0A -# define NV30_FP_OP_OPCODE_SGE 0x0B -# define NV30_FP_OP_OPCODE_SLE 0x0C -# define NV30_FP_OP_OPCODE_SGT 0x0D -# define NV30_FP_OP_OPCODE_SNE 0x0E -# define NV30_FP_OP_OPCODE_SEQ 0x0F -# define NV30_FP_OP_OPCODE_FRC 0x10 -# define NV30_FP_OP_OPCODE_FLR 0x11 -# define NV30_FP_OP_OPCODE_KIL 0x12 -# define NV30_FP_OP_OPCODE_PK4B 0x13 -# define NV30_FP_OP_OPCODE_UP4B 0x14 -# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ -# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ -# define NV30_FP_OP_OPCODE_TEX 0x17 -# define NV30_FP_OP_OPCODE_TXP 0x18 -# define NV30_FP_OP_OPCODE_TXD 0x19 -# define NV30_FP_OP_OPCODE_RCP 0x1A -# define NV30_FP_OP_OPCODE_RSQ 0x1B -# define NV30_FP_OP_OPCODE_EX2 0x1C -# define NV30_FP_OP_OPCODE_LG2 0x1D -# define NV30_FP_OP_OPCODE_LIT 0x1E -# define NV30_FP_OP_OPCODE_LRP 0x1F -# define NV30_FP_OP_OPCODE_STR 0x20 -# define NV30_FP_OP_OPCODE_SFL 0x21 -# define NV30_FP_OP_OPCODE_COS 0x22 -# define NV30_FP_OP_OPCODE_SIN 0x23 -# define NV30_FP_OP_OPCODE_PK2H 0x24 -# define NV30_FP_OP_OPCODE_UP2H 0x25 -# define NV30_FP_OP_OPCODE_POW 0x26 -# define NV30_FP_OP_OPCODE_PK4UB 0x27 -# define NV30_FP_OP_OPCODE_UP4UB 0x28 -# define NV30_FP_OP_OPCODE_PK2US 0x29 -# define NV30_FP_OP_OPCODE_UP2US 0x2A -# define NV30_FP_OP_OPCODE_DP2A 0x2E -# define NV30_FP_OP_OPCODE_TXB 0x31 -# define NV30_FP_OP_OPCODE_RFL 0x36 -# define NV30_FP_OP_OPCODE_DIV 0x3A -#define NV30_FP_OP_OUT_SAT (1 << 31) - -/* high order bits of SRC0 */ -#define NV30_FP_OP_OUT_ABS (1 << 29) -#define NV30_FP_OP_COND_SWZ_W_SHIFT 27 -#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27) -#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25 -#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25) -#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23 -#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23) -#define NV30_FP_OP_COND_SWZ_X_SHIFT 21 -#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21) -#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21 -#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) -#define NV30_FP_OP_COND_SHIFT 18 -#define NV30_FP_OP_COND_MASK (0x07 << 18) -# define NV30_FP_OP_COND_FL 0 -# define NV30_FP_OP_COND_LT 1 -# define NV30_FP_OP_COND_EQ 2 -# define NV30_FP_OP_COND_LE 3 -# define NV30_FP_OP_COND_GT 4 -# define NV30_FP_OP_COND_NE 5 -# define NV30_FP_OP_COND_GE 6 -# define NV30_FP_OP_COND_TR 7 - -/* high order bits of SRC1 */ -#define NV30_FP_OP_DST_SCALE_SHIFT 28 -#define NV30_FP_OP_DST_SCALE_MASK (3 << 28) -#define NV30_FP_OP_DST_SCALE_1X 0 -#define NV30_FP_OP_DST_SCALE_2X 1 -#define NV30_FP_OP_DST_SCALE_4X 2 -#define NV30_FP_OP_DST_SCALE_8X 3 -#define NV30_FP_OP_DST_SCALE_INV_2X 5 -#define NV30_FP_OP_DST_SCALE_INV_4X 6 -#define NV30_FP_OP_DST_SCALE_INV_8X 7 - - -/* high order bits of SRC2 */ -#define NV30_FP_OP_INDEX_INPUT (1 << 30) - -//== Register selection == -#define NV30_FP_REG_TYPE_SHIFT 0 -#define NV30_FP_REG_TYPE_MASK (3 << 0) -# define NV30_FP_REG_TYPE_TEMP 0 -# define NV30_FP_REG_TYPE_INPUT 1 -# define NV30_FP_REG_TYPE_CONST 2 -#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */ -#define NV30_FP_REG_SRC_MASK (31 << 2) -#define NV30_FP_REG_SRC_HALF (1 << 8) -#define NV30_FP_REG_SWZ_ALL_SHIFT 9 -#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9) -#define NV30_FP_REG_SWZ_X_SHIFT 9 -#define NV30_FP_REG_SWZ_X_MASK (3 << 9) -#define NV30_FP_REG_SWZ_Y_SHIFT 11 -#define NV30_FP_REG_SWZ_Y_MASK (3 << 11) -#define NV30_FP_REG_SWZ_Z_SHIFT 13 -#define NV30_FP_REG_SWZ_Z_MASK (3 << 13) -#define NV30_FP_REG_SWZ_W_SHIFT 15 -#define NV30_FP_REG_SWZ_W_MASK (3 << 15) -# define NV30_FP_SWIZZLE_X 0 -# define NV30_FP_SWIZZLE_Y 1 -# define NV30_FP_SWIZZLE_Z 2 -# define NV30_FP_SWIZZLE_W 3 -#define NV30_FP_REG_NEGATE (1 << 17) - -#define NV30SR_NONE 0 -#define NV30SR_OUTPUT 1 -#define NV30SR_INPUT 2 -#define NV30SR_TEMP 3 -#define NV30SR_CONST 4 - -struct nv30_sreg { - int type; - int index; - - int dst_scale; - - int negate; - int abs; - int swz[4]; - - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; -}; - -static INLINE struct nv30_sreg -nv30_sr(int type, int index) -{ - struct nv30_sreg temp = { - .type = type, - .index = index, - .dst_scale = DEF_SCALE, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, - .cc_update = 0, - .cc_update_reg = 0, - .cc_test = DEF_CTEST, - .cc_test_reg = 0, - .cc_swz = { 0, 1, 2, 3 }, - }; - return temp; -} - -static INLINE struct nv30_sreg -nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) -{ - struct nv30_sreg dst = src; - - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; - return dst; -} - -static INLINE struct nv30_sreg -nv30_sr_neg(struct nv30_sreg src) -{ - src.negate = !src.negate; - return src; -} - -static INLINE struct nv30_sreg -nv30_sr_abs(struct nv30_sreg src) -{ - src.abs = 1; - return src; -} - -static INLINE struct nv30_sreg -nv30_sr_scale(struct nv30_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} - -#endif diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c deleted file mode 100644 index d911c807074..00000000000 --- a/src/gallium/drivers/nv30/nv30_state.c +++ /dev/null @@ -1,728 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv30_context.h" -#include "nv30_state.h" - -static void * -nv30_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nouveau_grobj *rankine = nv30->screen->rankine; - struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(5, 8, 0); - - if (cso->rt[0].blend_enable) { - so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); - so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | - nvgl_blend_func(cso->rt[0].rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rt[0].rgb_dst_factor)); - /* FIXME: Gallium assumes GL_EXT_blend_func_separate. - It is not the case for NV30 */ - so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); - } else { - so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, rankine, NV34TCL_COLOR_MASK, 1); - so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); - - if (cso->logicop_enable) { - so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); - } else { - so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1); - so_data (so, cso->dither ? 1 : 0); - - so_ref(so, &bso->so); - so_ref(NULL, &so); - bso->pipe = *cso; - return (void *)bso; -} - -static void -nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->blend = hwcso; - nv30->dirty |= NV30_NEW_BLEND; -} - -static void -nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_blend_state *bso = hwcso; - - so_ref(NULL, &bso->so); - FREE(bso); -} - - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV34TCL_TX_WRAP_S_CLAMP; - break; -/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; - break;*/ - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - } - - return ret >> NV34TCL_TX_WRAP_S_SHIFT; -} - -static void * -nv30_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nv30_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nv30_sampler_state)); - - ps->fmt = 0; - /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format - are the tx format to use. We should store normalized coord flag - in sampler state structure, and set appropriate format in - nvxx_fragtex_build() - */ - /*NV34TCL_TX_FORMAT_RECT*/ - /*if (!cso->normalized_coords) { - ps->fmt |= (1<<14) ; - }*/ - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); - - ps->en = 0; - - if (cso->max_anisotropy >= 8) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; - } else - if (cso->max_anisotropy >= 2) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; - } - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - -static void -nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nv30->tex_sampler[unit] = sampler[unit]; - nv30->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nv30->nr_samplers; unit++) { - nv30->tex_sampler[unit] = NULL; - nv30->dirty_samplers |= (1 << unit); - } - - nv30->nr_samplers = nr; - nv30->dirty |= NV30_NEW_SAMPLER; -} - -static void -nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr, - struct pipe_texture **miptree) -{ - struct nv30_context *nv30 = nv30_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nv30->tex_miptree[unit], miptree[unit]); - nv30->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nv30->nr_textures; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nv30->tex_miptree[unit], NULL); - nv30->dirty_samplers |= (1 << unit); - } - - nv30->nr_textures = nr; - nv30->dirty |= NV30_NEW_SAMPLER; -} - -static void * -nv30_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(9, 19, 0); - struct nouveau_grobj *rankine = nv30->screen->rankine; - - /*XXX: ignored: - * light_twoside - * point_smooth -nohw - * multisample - */ - - so_method(so, rankine, NV34TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : - NV34TCL_SHADE_MODEL_SMOOTH); - - so_method(so, rankine, NV34TCL_LINE_WIDTH, 2); - so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); - so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); - so_data (so, cso->line_stipple_enable ? 1 : 0); - so_data (so, (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor); - - so_method(so, rankine, NV34TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); - - so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6); - if (cso->front_winding == PIPE_WINDING_CCW) { - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV34TCL_FRONT_FACE_CCW); - } else { - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_CW: - so_data(so, NV34TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV34TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV34TCL_FRONT_FACE_CW); - } - so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); - - so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); - - so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) - so_data(so, 1); - else - so_data(so, 0); - if (cso->offset_cw || cso->offset_ccw) { - so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2); - so_data (so, fui(cso->offset_scale)); - so_data (so, fui(cso->offset_units * 2)); - } - - so_method(so, rankine, NV34TCL_POINT_SPRITE, 1); - if (cso->point_quad_rasterization) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if ((cso->sprite_coord_enable >> i) & 1) - psctl |= (1 << (8 + i)); - } - - so_data(so, psctl); - } else { - so_data(so, 0); - } - - so_ref(so, &rsso->so); - so_ref(NULL, &so); - rsso->pipe = *cso; - return (void *)rsso; -} - -static void -nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->rasterizer = hwcso; - nv30->dirty |= NV30_NEW_RAST; - /*nv30->draw_dirty |= NV30_NEW_RAST;*/ -} - -static void -nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_rasterizer_state *rsso = hwcso; - - so_ref(NULL, &rsso->so); - FREE(rsso); -} - -static void * -nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(6, 20, 0); - struct nouveau_grobj *rankine = nv30->screen->rankine; - - so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); - so_data (so, nvgl_comparison_op(cso->depth.func)); - so_data (so, cso->depth.writemask ? 1 : 0); - so_data (so, cso->depth.enabled ? 1 : 0); - - so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); - so_data (so, cso->alpha.enabled ? 1 : 0); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - so_data (so, float_to_ubyte(cso->alpha.ref_value)); - - if (cso->stencil[0].enabled) { - so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 3); - so_data (so, cso->stencil[0].enabled ? 1 : 0); - so_data (so, cso->stencil[0].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, rankine, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); - so_data (so, cso->stencil[0].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); - } else { - so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[1].enabled) { - so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 3); - so_data (so, cso->stencil[1].enabled ? 1 : 0); - so_data (so, cso->stencil[1].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, rankine, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); - so_data (so, cso->stencil[1].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); - } else { - so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &zsaso->so); - so_ref(NULL, &so); - zsaso->pipe = *cso; - return (void *)zsaso; -} - -static void -nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->zsa = hwcso; - nv30->dirty |= NV30_NEW_ZSA; -} - -static void -nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_zsa_state *zsaso = hwcso; - - so_ref(NULL, &zsaso->so); - FREE(zsaso); -} - -static void * -nv30_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - /*struct nv30_context *nv30 = nv30_context(pipe);*/ - struct nv30_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nv30_vertex_program)); - vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/ - - return (void *)vp; -} - -static void -nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->vertprog = hwcso; - nv30->dirty |= NV30_NEW_VERTPROG; - /*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/ -} - -static void -nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_vertex_program *vp = hwcso; - - /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/ - nv30_vertprog_destroy(nv30, vp); - FREE((void*)vp->pipe.tokens); - FREE(vp); -} - -static void * -nv30_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv30_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nv30_fragment_program)); - fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - - tgsi_scan_shader(fp->pipe.tokens, &fp->info); - - return (void *)fp; -} - -static void -nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->fragprog = hwcso; - nv30->dirty |= NV30_NEW_FRAGPROG; -} - -static void -nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_fragment_program *fp = hwcso; - - nv30_fragprog_destroy(nv30, fp); - FREE((void*)fp->pipe.tokens); - FREE(fp); -} - -static void -nv30_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->blend_colour = *bcol; - nv30->dirty |= NV30_NEW_BCOL; -} - -static void -nv30_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *sr) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->stencil_ref = *sr; - nv30->dirty |= NV30_NEW_SR; -} - -static void -nv30_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ -} - -static void -nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf ) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->constbuf[shader] = buf; - nv30->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); - - if (shader == PIPE_SHADER_VERTEX) { - nv30->dirty |= NV30_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nv30->dirty |= NV30_NEW_FRAGPROG; - } -} - -static void -nv30_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->framebuffer = *fb; - nv30->dirty |= NV30_NEW_FB; -} - -static void -nv30_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - memcpy(nv30->stipple, stipple->stipple, 4 * 32); - nv30->dirty |= NV30_NEW_STIPPLE; -} - -static void -nv30_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->scissor = *s; - nv30->dirty |= NV30_NEW_SCISSOR; -} - -static void -nv30_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - nv30->viewport = *vpt; - nv30->dirty |= NV30_NEW_VIEWPORT; - /*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/ -} - -static void -nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count); - nv30->vtxbuf_nr = count; - - nv30->dirty |= NV30_NEW_ARRAYS; - /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ -} - -static void -nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_element *ve) -{ - struct nv30_context *nv30 = nv30_context(pipe); - - memcpy(nv30->vtxelt, ve, sizeof(*ve) * count); - nv30->vtxelt_nr = count; - - nv30->dirty |= NV30_NEW_ARRAYS; - /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ -} - -void -nv30_init_state_functions(struct nv30_context *nv30) -{ - nv30->pipe.create_blend_state = nv30_blend_state_create; - nv30->pipe.bind_blend_state = nv30_blend_state_bind; - nv30->pipe.delete_blend_state = nv30_blend_state_delete; - - nv30->pipe.create_sampler_state = nv30_sampler_state_create; - nv30->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind; - nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; - nv30->pipe.set_fragment_sampler_textures = nv30_set_sampler_texture; - - nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; - nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; - nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete; - - nv30->pipe.create_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_create; - nv30->pipe.bind_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_bind; - nv30->pipe.delete_depth_stencil_alpha_state = - nv30_depth_stencil_alpha_state_delete; - - nv30->pipe.create_vs_state = nv30_vp_state_create; - nv30->pipe.bind_vs_state = nv30_vp_state_bind; - nv30->pipe.delete_vs_state = nv30_vp_state_delete; - - nv30->pipe.create_fs_state = nv30_fp_state_create; - nv30->pipe.bind_fs_state = nv30_fp_state_bind; - nv30->pipe.delete_fs_state = nv30_fp_state_delete; - - nv30->pipe.set_blend_color = nv30_set_blend_color; - nv30->pipe.set_stencil_ref = nv30_set_stencil_ref; - nv30->pipe.set_clip_state = nv30_set_clip_state; - nv30->pipe.set_constant_buffer = nv30_set_constant_buffer; - nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state; - nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple; - nv30->pipe.set_scissor_state = nv30_set_scissor_state; - nv30->pipe.set_viewport_state = nv30_set_viewport_state; - - nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers; - nv30->pipe.set_vertex_elements = nv30_set_vertex_elements; -} - diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h deleted file mode 100644 index e42e872de75..00000000000 --- a/src/gallium/drivers/nv30/nv30_state.h +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef __NV30_STATE_H__ -#define __NV30_STATE_H__ - -#include "pipe/p_state.h" -#include "tgsi/tgsi_scan.h" - -struct nv30_sampler_state { - uint32_t fmt; - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - -struct nv30_vertex_program_exec { - uint32_t data[4]; - boolean has_branch_offset; - int const_index; -}; - -struct nv30_vertex_program_data { - int index; /* immediates == -1 */ - float value[4]; -}; - -struct nv30_vertex_program { - struct pipe_shader_state pipe; - - boolean translated; - - struct nv30_vertex_program_exec *insns; - unsigned nr_insns; - struct nv30_vertex_program_data *consts; - unsigned nr_consts; - - struct nouveau_resource *exec; - unsigned exec_start; - struct nouveau_resource *data; - unsigned data_start; - unsigned data_start_min; - - uint32_t ir; - uint32_t or; - struct nouveau_stateobj *so; -}; - -struct nv30_fragment_program_data { - unsigned offset; - unsigned index; -}; - -struct nv30_fragment_program { - struct pipe_shader_state pipe; - struct tgsi_shader_info info; - - boolean translated; - boolean on_hw; - unsigned samplers; - - uint32_t *insn; - int insn_len; - - struct nv30_fragment_program_data *consts; - unsigned nr_consts; - - struct pipe_buffer *buffer; - - uint32_t fp_control; - uint32_t fp_reg_control; - struct nouveau_stateobj *so; -}; - -struct nv30_miptree { - struct pipe_texture base; - struct nouveau_bo *bo; - - struct pipe_buffer *buffer; - uint total_size; - - struct { - uint pitch; - uint *image_offset; - } level[PIPE_MAX_TEXTURE_LEVELS]; -}; - -#endif diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c deleted file mode 100644 index c36d58c040c..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_blend.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_blend_validate(struct nv30_context *nv30) -{ - so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]); - return TRUE; -} - -struct nv30_state_entry nv30_state_blend = { - .validate = nv30_state_blend_validate, - .dirty = { - .pipe = NV30_NEW_BLEND, - .hw = NV30_STATE_BLEND - } -}; - -static boolean -nv30_state_blend_colour_validate(struct nv30_context *nv30) -{ - struct nouveau_stateobj *so = so_new(1, 1, 0); - struct pipe_blend_color *bcol = &nv30->blend_colour; - - so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - - so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_blend_colour = { - .validate = nv30_state_blend_colour_validate, - .dirty = { - .pipe = NV30_NEW_BCOL, - .hw = NV30_STATE_BCOL - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c deleted file mode 100644 index deefe7fd8db..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ /dev/null @@ -1,122 +0,0 @@ -#include "nv30_context.h" -#include "nv30_state.h" - -static struct nv30_state_entry *render_states[] = { - &nv30_state_framebuffer, - &nv30_state_rasterizer, - &nv30_state_scissor, - &nv30_state_stipple, - &nv30_state_fragprog, - &nv30_state_fragtex, - &nv30_state_vertprog, - &nv30_state_blend, - &nv30_state_blend_colour, - &nv30_state_zsa, - &nv30_state_sr, - &nv30_state_viewport, - &nv30_state_vbo, - NULL -}; - -static void -nv30_state_do_validate(struct nv30_context *nv30, - struct nv30_state_entry **states) -{ - while (*states) { - struct nv30_state_entry *e = *states; - - if (nv30->dirty & e->dirty.pipe) { - if (e->validate(nv30)) { - nv30->state.dirty |= (1ULL << e->dirty.hw); - } - } - - states++; - } - nv30->dirty = 0; -} - -void -nv30_state_emit(struct nv30_context *nv30) -{ - struct nouveau_channel *chan = nv30->screen->base.channel; - struct nv30_state *state = &nv30->state; - struct nv30_screen *screen = nv30->screen; - unsigned i; - uint64_t states; - - /* XXX: racy! - */ - if (nv30 != screen->cur_ctx) { - for (i = 0; i < NV30_STATE_MAX; i++) { - if (state->hw[i] && screen->state[i] != state->hw[i]) - state->dirty |= (1ULL << i); - } - - screen->cur_ctx = nv30; - } - - for (i = 0, states = state->dirty; states; i++) { - if (!(states & (1ULL << i))) - continue; - so_ref (state->hw[i], &nv30->screen->state[i]); - if (state->hw[i]) - so_emit(chan, nv30->screen->state[i]); - states &= ~(1ULL << i); - } - - state->dirty = 0; -} - -void -nv30_state_flush_notify(struct nouveau_channel *chan) -{ - struct nv30_context *nv30 = chan->user_private; - struct nv30_state *state = &nv30->state; - unsigned i, samplers; - - so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]); - for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { - if (!(samplers & (1 << i))) - continue; - so_emit_reloc_markers(chan, - state->hw[NV30_STATE_FRAGTEX0+i]); - samplers &= ~(1ULL << i); - } - so_emit_reloc_markers(chan, state->hw[NV30_STATE_FRAGPROG]); - if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/) - so_emit_reloc_markers(chan, state->hw[NV30_STATE_VTXBUF]); -} - -boolean -nv30_state_validate(struct nv30_context *nv30) -{ -#if 0 - boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE; - - if (nv30->render_mode != HW) { - /* Don't even bother trying to go back to hw if none - * of the states that caused swtnl previously have changed. - */ - if ((nv30->fallback_swtnl & nv30->dirty) - != nv30->fallback_swtnl) - return FALSE; - - /* Attempt to go to hwtnl again */ - nv30->pipe.flush(&nv30->pipe, 0, NULL); - nv30->dirty |= (NV30_NEW_VIEWPORT | - NV30_NEW_VERTPROG | - NV30_NEW_ARRAYS); - nv30->render_mode = HW; - } -#endif - nv30_state_do_validate(nv30, render_states); -#if 0 - if (nv30->fallback_swtnl || nv30->fallback_swrast) - return FALSE; - - if (was_sw) - NOUVEAU_ERR("swtnl->hw\n"); -#endif - return TRUE; -} diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c deleted file mode 100644 index 2ed2ea55e84..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ /dev/null @@ -1,173 +0,0 @@ -#include "nv30_context.h" -#include "nouveau/nouveau_util.h" - -static boolean -nv30_state_framebuffer_validate(struct nv30_context *nv30) -{ - struct pipe_framebuffer_state *fb = &nv30->framebuffer; - struct nouveau_channel *chan = nv30->screen->base.channel; - struct nouveau_grobj *rankine = nv30->screen->rankine; - struct nv04_surface *rt[2], *zeta = NULL; - uint32_t rt_enable = 0, rt_format = 0; - int i, colour_format = 0, zeta_format = 0, depth_only = 0; - struct nouveau_stateobj *so = so_new(12, 18, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - unsigned w = fb->width; - unsigned h = fb->height; - struct nv30_miptree *nv30mt; - int colour_bits = 32, zeta_bits = 32; - - for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) { - assert(colour_format == fb->cbufs[i]->format); - } else { - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); - rt[i] = (struct nv04_surface *)fb->cbufs[i]; - } - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) - rt_enable |= NV34TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0|NV34TCL_RT_ENABLE_COLOR1)) { - /* Render to at least a colour buffer */ - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else if (fb->zsbuf) { - depth_only = 1; - - /* Render to depth buffer only */ - if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - - rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | - (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | - (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); - } - else - rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; - } else { - return FALSE; - } - - switch (colour_format) { - case PIPE_FORMAT_X8R8G8B8_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; - colour_bits = 16; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; - zeta_bits = 16; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case 0: - rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (colour_bits > zeta_bits) { - return FALSE; - } - - if (depth_only || (rt_enable & NV34TCL_RT_ENABLE_COLOR0)) { - struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); - uint32_t pitch = rt0->pitch; - - if (zeta) { - pitch |= (zeta->pitch << 16); - } else { - pitch |= (pitch << 16); - } - - nv30mt = (struct nv30_miptree *) rt0->base.texture; - so_method(so, rankine, NV34TCL_DMA_COLOR0, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, rankine, NV34TCL_COLOR0_PITCH, 2); - so_data (so, pitch); - so_reloc (so, nouveau_bo(nv30mt->buffer), rt0->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - } - - if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { - nv30mt = (struct nv30_miptree *)rt[1]->base.texture; - so_method(so, rankine, NV34TCL_DMA_COLOR1, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, rankine, NV34TCL_COLOR1_OFFSET, 2); - so_reloc (so, nouveau_bo(nv30mt->buffer), rt[1]->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_data (so, rt[1]->pitch); - } - - if (zeta_format) { - nv30mt = (struct nv30_miptree *)zeta->base.texture; - so_method(so, rankine, NV34TCL_DMA_ZETA, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, rankine, NV34TCL_ZETA_OFFSET, 1); - so_reloc (so, nouveau_bo(nv30mt->buffer), zeta->base.offset, - rt_flags | NOUVEAU_BO_LOW, 0, 0); - /* TODO: allocate LMA depth buffer */ - } - - so_method(so, rankine, NV34TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, rankine, NV34TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, rankine, NV34TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - so_method(so, rankine, 0x1d88, 1); - so_data (so, (1 << 12) | h); - /* Wonder why this is needed, context should all be set to zero on init */ - so_method(so, rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1); - so_data (so, 0); - - so_ref(so, &nv30->state.hw[NV30_STATE_FB]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_framebuffer = { - .validate = nv30_state_framebuffer_validate, - .dirty = { - .pipe = NV30_NEW_FB, - .hw = NV30_STATE_FB - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c deleted file mode 100644 index 6d1b60e043d..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_rasterizer.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_rasterizer_validate(struct nv30_context *nv30) -{ - so_ref(nv30->rasterizer->so, - &nv30->state.hw[NV30_STATE_RAST]); - return TRUE; -} - -struct nv30_state_entry nv30_state_rasterizer = { - .validate = nv30_state_rasterizer_validate, - .dirty = { - .pipe = NV30_NEW_RAST, - .hw = NV30_STATE_RAST - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c deleted file mode 100644 index ba61a9e24a4..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_scissor.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_scissor_validate(struct nv30_context *nv30) -{ - struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; - struct pipe_scissor_state *s = &nv30->scissor; - struct nouveau_stateobj *so; - - if (nv30->state.hw[NV30_STATE_SCISSOR] && - (rast->scissor == 0 && nv30->state.scissor_enabled == 0)) - return FALSE; - nv30->state.scissor_enabled = rast->scissor; - - so = so_new(1, 2, 0); - so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); - if (nv30->state.scissor_enabled) { - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - } else { - so_data (so, 4096 << 16); - so_data (so, 4096 << 16); - } - - so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_scissor = { - .validate = nv30_state_scissor_validate, - .dirty = { - .pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST, - .hw = NV30_STATE_SCISSOR - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c deleted file mode 100644 index ed520a4f439..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_stipple.c +++ /dev/null @@ -1,40 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_stipple_validate(struct nv30_context *nv30) -{ - struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; - struct nouveau_grobj *rankine = nv30->screen->rankine; - struct nouveau_stateobj *so; - - if (nv30->state.hw[NV30_STATE_STIPPLE] && - (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0)) - return FALSE; - - if (rast->poly_stipple_enable) { - unsigned i; - - so = so_new(2, 33, 0); - so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, nv30->stipple[i]); - } else { - so = so_new(1, 1, 0); - so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_stipple = { - .validate = nv30_state_stipple_validate, - .dirty = { - .pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST, - .hw = NV30_STATE_STIPPLE, - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c deleted file mode 100644 index 2d7781292bd..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_viewport.c +++ /dev/null @@ -1,72 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_viewport_validate(struct nv30_context *nv30) -{ - struct pipe_viewport_state *vpt = &nv30->viewport; - struct nouveau_stateobj *so; - unsigned bypass; - - if (/*nv30->render_mode == HW &&*/ - !nv30->rasterizer->pipe.bypass_vs_clip_and_viewport) - bypass = 0; - else - bypass = 1; - - if (nv30->state.hw[NV30_STATE_VIEWPORT] && - (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) && - nv30->state.viewport_bypass == bypass) - return FALSE; - nv30->state.viewport_bypass = bypass; - - so = so_new(3, 10, 0); - if (!bypass) { - so_method(so, nv30->screen->rankine, - NV34TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); -/* so_method(so, nv30->screen->rankine, 0x1d78, 1); - so_data (so, 1); -*/ } else { - so_method(so, nv30->screen->rankine, - NV34TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - so_data (so, fui(1.0)); - so_data (so, fui(1.0)); - so_data (so, fui(0.0)); - /* Not entirely certain what this is yet. The DDX uses this - * value also as it fixes rendering when you pass - * pre-transformed vertices to the GPU. My best gusss is that - * this bypasses some culling/clipping stage. Might be worth - * noting that points/lines are uneffected by whatever this - * value fixes, only filled polygons are effected. - */ -/* so_method(so, nv30->screen->rankine, 0x1d78, 1); - so_data (so, 0x110); -*/ } - /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */ - so_method(so, nv30->screen->rankine, 0x1d78, 1); - so_data (so, 1); - - so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_viewport = { - .validate = nv30_state_viewport_validate, - .dirty = { - .pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST, - .hw = NV30_STATE_VIEWPORT - } -}; diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c deleted file mode 100644 index 88cd74f180a..00000000000 --- a/src/gallium/drivers/nv30/nv30_state_zsa.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv30_context.h" - -static boolean -nv30_state_zsa_validate(struct nv30_context *nv30) -{ - so_ref(nv30->zsa->so, - &nv30->state.hw[NV30_STATE_ZSA]); - return TRUE; -} - -struct nv30_state_entry nv30_state_zsa = { - .validate = nv30_state_zsa_validate, - .dirty = { - .pipe = NV30_NEW_ZSA, - .hw = NV30_STATE_ZSA - } -}; - -static boolean -nv30_state_sr_validate(struct nv30_context *nv30) -{ - struct nouveau_stateobj *so = so_new(2, 2, 0); - struct pipe_stencil_ref *sr = &nv30->stencil_ref; - - so_method(so, nv30->screen->rankine, NV34TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, sr->ref_value[0]); - so_method(so, nv30->screen->rankine, NV34TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, sr->ref_value[1]); - - so_ref(so, &nv30->state.hw[NV30_STATE_SR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv30_state_entry nv30_state_sr = { - .validate = nv30_state_sr_validate, - .dirty = { - .pipe = NV30_NEW_SR, - .hw = NV30_STATE_SR - } -}; diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c deleted file mode 100644 index 554bcbbdd0e..00000000000 --- a/src/gallium/drivers/nv30/nv30_transfer.c +++ /dev/null @@ -1,178 +0,0 @@ -#include <pipe/p_state.h> -#include <pipe/p_defines.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_math.h> -#include <nouveau/nouveau_winsys.h> -#include "nv30_context.h" -#include "nv30_screen.h" -#include "nv30_state.h" - -struct nv30_transfer { - struct pipe_transfer base; - struct pipe_surface *surface; - boolean direct; -}; - -static void -nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, - struct pipe_texture *template) -{ - memset(template, 0, sizeof(struct pipe_texture)); - template->target = pt->target; - template->format = pt->format; - template->width0 = width; - template->height0 = height; - template->depth0 = 1; - template->last_level = 0; - template->nr_samples = pt->nr_samples; - - template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | - NOUVEAU_TEXTURE_USAGE_LINEAR; -} - -static struct pipe_transfer * -nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct nv30_miptree *mt = (struct nv30_miptree *)pt; - struct nv30_transfer *tx; - struct pipe_texture tx_tex_template, *tx_tex; - - tx = CALLOC_STRUCT(nv30_transfer); - if (!tx) - return NULL; - - pipe_texture_reference(&tx->base.texture, pt); - tx->base.x = x; - tx->base.y = y; - tx->base.width = w; - tx->base.height = h; - tx->base.stride = mt->level[level].pitch; - tx->base.usage = usage; - tx->base.face = face; - tx->base.level = level; - tx->base.zslice = zslice; - - /* Direct access to texture */ - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || - debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && - pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) - { - tx->direct = true; - tx->surface = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - pipe_transfer_buffer_flags(&tx->base)); - return &tx->base; - } - - tx->direct = false; - - nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template); - - tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); - if (!tx_tex) - { - FREE(tx); - return NULL; - } - - tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch; - - tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, - 0, 0, 0, - pipe_transfer_buffer_flags(&tx->base)); - - pipe_texture_reference(&tx_tex, NULL); - - if (!tx->surface) - { - pipe_surface_reference(&tx->surface, NULL); - FREE(tx); - return NULL; - } - - if (usage & PIPE_TRANSFER_READ) { - struct nv30_screen *nvscreen = nv30_screen(pscreen); - struct pipe_surface *src; - - src = pscreen->get_tex_surface(pscreen, pt, - face, level, zslice, - PIPE_BUFFER_USAGE_GPU_READ); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - /* TODO: Check if SIFM can un-swizzle */ - nvscreen->eng2d->copy(nvscreen->eng2d, - tx->surface, 0, 0, - src, x, y, - w, h); - - pipe_surface_reference(&src, NULL); - } - - return &tx->base; -} - -static void -nv30_transfer_del(struct pipe_transfer *ptx) -{ - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - - if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = ptx->texture->screen; - struct nv30_screen *nvscreen = nv30_screen(pscreen); - struct pipe_surface *dst; - - dst = pscreen->get_tex_surface(pscreen, ptx->texture, - ptx->face, ptx->level, ptx->zslice, - PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER); - - /* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ - nvscreen->eng2d->copy(nvscreen->eng2d, - dst, tx->base.x, tx->base.y, - tx->surface, 0, 0, - tx->base.width, tx->base.height); - - pipe_surface_reference(&dst, NULL); - } - - pipe_surface_reference(&tx->surface, NULL); - pipe_texture_reference(&ptx->texture, NULL); - FREE(ptx); -} - -static void * -nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture; - void *map = pipe_buffer_map(pscreen, mt->buffer, - pipe_transfer_buffer_flags(ptx)); - - if(!tx->direct) - return map + ns->base.offset; - else - return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format); -} - -static void -nv30_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) -{ - struct nv30_transfer *tx = (struct nv30_transfer *)ptx; - struct nv30_miptree *mt = (struct nv30_miptree *)tx->surface->texture; - - pipe_buffer_unmap(pscreen, mt->buffer); -} - -void -nv30_screen_init_transfer_functions(struct pipe_screen *pscreen) -{ - pscreen->get_tex_transfer = nv30_transfer_new; - pscreen->tex_transfer_destroy = nv30_transfer_del; - pscreen->transfer_map = nv30_transfer_map; - pscreen->transfer_unmap = nv30_transfer_unmap; -} diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c deleted file mode 100644 index 809be3712da..00000000000 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ /dev/null @@ -1,842 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_dump.h" - -#include "nv30_context.h" -#include "nv30_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 2. Arb. swz/negation - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv30_shader.h" - -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) - -struct nv30_vpc { - struct nv30_vertex_program *vp; - - struct nv30_vertex_program_exec *vpi; - - unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; - - int high_temp; - int temp_temp_count; - - struct nv30_sreg *imm; - unsigned nr_imm; -}; - -static struct nv30_sreg -temp(struct nv30_vpc *vpc) -{ - int idx; - - idx = vpc->temp_temp_count++; - idx += vpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); -} - -static struct nv30_sreg -constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nv30_vertex_program *vp = vpc->vp; - struct nv30_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nv30_sr(NV30SR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nv30_sr(NV30SR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) -{ - struct nv30_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NV30SR_TEMP: - sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); - break; - case NV30SR_INPUT: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); - break; - case NV30SR_CONST: - sr |= (NV30_VP_SRC_REG_TYPE_CONST << - NV30_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NV30SR_NONE: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); - -/* - * |VVV| - * d�.�b - * \u/ - * - */ - - switch (pos) { - case 0: - hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> - NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << - NV30_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> - NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << - NV30_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) -{ - struct nv30_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NV30SR_TEMP: - hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); - break; - case NV30SR_OUTPUT: - switch (dst.index) { - case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - default: - break; - } - - hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); - hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - - /*XXX: no way this is entirely correct, someone needs to - * figure out what exactly it is. - */ - hw[3] |= 0x800; - break; - default: - assert(0); - } -} - -static void -nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, - struct nv30_sreg s2) -{ - struct nv30_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); - - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); -// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; -// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); - - if (dst.type == NV30SR_OUTPUT) { - if (slot) - hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); - } else { - if (slot) - hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); - } - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nv30_sreg -tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv30_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->Register.Index) - vpc->high_temp = fsrc->Register.Index; - src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nv30_sreg -tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv30_sreg dst; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - dst = nv30_sr(NV30SR_OUTPUT, - vpc->output_map[fdst->Register.Index]); - - break; - case TGSI_FILE_TEMPORARY: - dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index); - if (vpc->high_temp < dst.index) - vpc->high_temp = dst.index; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nv30_sreg src[3], dst, tmp; - struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - int mask; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - vpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - /*XXX: index comparison is broken now that consts come from - * two different register files. - */ - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_VP_INST_DEST_POS; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV30_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_EDGEFLAG: - NOUVEAU_ERR("cannot handle edgeflag output\n"); - return FALSE; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->output_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv30_vertprog_prepare(struct nv30_vpc *vpc) -{ - struct tgsi_parse_context p; - int nr_imm = 0; - - tgsi_parse_init(&p, vpc->vp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); - assert(vpc->imm); - } - - return TRUE; -} - -static void -nv30_vertprog_translate(struct nv30_context *nv30, - struct nv30_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv30_vpc *vpc = NULL; - - tgsi_dump(vp->pipe.tokens,0); - - vpc = CALLOC(1, sizeof(struct nv30_vpc)); - if (!vpc) - return; - vpc->vp = vp; - vpc->high_temp = -1; - - if (!nv30_vertprog_prepare(vpc)) { - FREE(vpc); - return; - } - - tgsi_parse_init(&parse, vp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv30_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(imm->Immediate.NrTokens == 4 + 1); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u[0].Float, - imm->u[1].Float, - imm->u[2].Float, - imm->u[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv30_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - FREE(vpc); -} - -static boolean -nv30_vertprog_validate(struct nv30_context *nv30) -{ - struct pipe_screen *pscreen = nv30->pipe.screen; - struct nv30_screen *screen = nv30->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; - struct nv30_vertex_program *vp; - struct pipe_buffer *constbuf; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - vp = nv30->vertprog; - constbuf = nv30->constbuf[PIPE_SHADER_VERTEX]; - - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) { - nv30_vertprog_translate(nv30, vp); - if (!vp->translated) - return FALSE; - } - - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nv30->screen->vp_exec_heap; - struct nouveau_stateobj *so; - uint vplen = vp->nr_insns; - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nv30_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->exec); - } - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - so = so_new(1, 1, 0); - so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - so_ref(so, &vp->so); - so_ref(NULL, &so); - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv30->screen->vp_data_heap; - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nv30_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->data); - } - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, - &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv30_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nv30_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV30_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nv30_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (chan, i + vp->data->start); - OUT_RINGp (chan, (uint32_t *)vpd->value, 4); - } - - if (constbuf) - pipe_buffer_unmap(pscreen, constbuf); - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", - i, vp->insns[i].data[0], vp->insns[i].data[1], - vp->insns[i].data[2], vp->insns[i].data[3]); - } -#endif - BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (chan, vp->exec->start); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (chan, vp->insns[i].data, 4); - } - } - - if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) { - so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) -{ - vp->translated = FALSE; - - if (vp->nr_insns) { - FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - - if (vp->nr_consts) { - FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } - - nouveau_resource_free(&vp->exec); - vp->exec_start = 0; - nouveau_resource_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - - vp->ir = vp->or = 0; - so_ref(NULL, &vp->so); -} - -struct nv30_state_entry nv30_state_vertprog = { - .validate = nv30_vertprog_validate, - .dirty = { - .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, - .hw = NV30_STATE_VERTPROG, - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile deleted file mode 100644 index 0ecae2b4913..00000000000 --- a/src/gallium/drivers/nv40/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = nv40 - -C_SOURCES = \ - nv40_clear.c \ - nv40_context.c \ - nv40_draw.c \ - nv40_fragprog.c \ - nv40_fragtex.c \ - nv40_miptree.c \ - nv40_query.c \ - nv40_screen.c \ - nv40_state.c \ - nv40_state_blend.c \ - nv40_state_emit.c \ - nv40_state_fb.c \ - nv40_state_rasterizer.c \ - nv40_state_scissor.c \ - nv40_state_stipple.c \ - nv40_state_viewport.c \ - nv40_state_zsa.c \ - nv40_surface.c \ - nv40_transfer.c \ - nv40_vbo.c \ - nv40_vertprog.c - -include ../../Makefile.template diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c deleted file mode 100644 index 65dc73e88b3..00000000000 --- a/src/gallium/drivers/nv40/nv40_context.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "draw/draw_context.h" -#include "pipe/p_defines.h" - -#include "nv40_context.h" -#include "nv40_screen.h" - -static void -nv40_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - if (flags & PIPE_FLUSH_TEXTURE_CACHE) { - BEGIN_RING(chan, curie, 0x1fd8, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, curie, 0x1fd8, 1); - OUT_RING (chan, 1); - } - - FIRE_RING(chan); - if (fence) - *fence = NULL; -} - -static void -nv40_destroy(struct pipe_context *pipe) -{ - struct nv40_context *nv40 = nv40_context(pipe); - unsigned i; - - for (i = 0; i < NV40_STATE_MAX; i++) { - if (nv40->state.hw[i]) - so_ref(NULL, &nv40->state.hw[i]); - } - - if (nv40->draw) - draw_destroy(nv40->draw); - FREE(nv40); -} - -struct pipe_context * -nv40_create(struct pipe_screen *pscreen, void *priv) -{ - struct nv40_screen *screen = nv40_screen(pscreen); - struct pipe_winsys *ws = pscreen->winsys; - struct nv40_context *nv40; - struct nouveau_winsys *nvws = screen->nvws; - - nv40 = CALLOC(1, sizeof(struct nv40_context)); - if (!nv40) - return NULL; - nv40->screen = screen; - - nv40->nvws = nvws; - - nv40->pipe.winsys = ws; - nv40->pipe.priv = priv; - nv40->pipe.screen = pscreen; - nv40->pipe.destroy = nv40_destroy; - nv40->pipe.draw_arrays = nv40_draw_arrays; - nv40->pipe.draw_elements = nv40_draw_elements; - nv40->pipe.clear = nv40_clear; - nv40->pipe.flush = nv40_flush; - - nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced; - nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; - - screen->base.channel->user_private = nv40; - screen->base.channel->flush_notify = nv40_state_flush_notify; - - nv40_init_query_functions(nv40); - nv40_init_surface_functions(nv40); - nv40_init_state_functions(nv40); - - /* Create, configure, and install fallback swtnl path */ - nv40->draw = draw_create(); - draw_wide_point_threshold(nv40->draw, 9999999.0); - draw_wide_line_threshold(nv40->draw, 9999999.0); - draw_enable_line_stipple(nv40->draw, FALSE); - draw_enable_point_sprites(nv40->draw, FALSE); - draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); - - return &nv40->pipe; -} diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h deleted file mode 100644 index 4861924dac7..00000000000 --- a/src/gallium/drivers/nv40/nv40_context.h +++ /dev/null @@ -1,240 +0,0 @@ -#ifndef __NV40_CONTEXT_H__ -#define __NV40_CONTEXT_H__ - -#include <stdio.h> - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_compiler.h" - -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_inlines.h" - -#include "draw/draw_vertex.h" - -#include "nouveau/nouveau_winsys.h" -#include "nouveau/nouveau_gldefs.h" -#include "nouveau/nouveau_context.h" -#include "nouveau/nouveau_stateobj.h" - -#include "nv40_state.h" - -#define NOUVEAU_ERR(fmt, args...) \ - fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); -#define NOUVEAU_MSG(fmt, args...) \ - fprintf(stderr, "nouveau: "fmt, ##args); - -enum nv40_state_index { - NV40_STATE_FB = 0, - NV40_STATE_VIEWPORT = 1, - NV40_STATE_BLEND = 2, - NV40_STATE_RAST = 3, - NV40_STATE_ZSA = 4, - NV40_STATE_BCOL = 5, - NV40_STATE_CLIP = 6, - NV40_STATE_SCISSOR = 7, - NV40_STATE_STIPPLE = 8, - NV40_STATE_FRAGPROG = 9, - NV40_STATE_VERTPROG = 10, - NV40_STATE_FRAGTEX0 = 11, - NV40_STATE_FRAGTEX1 = 12, - NV40_STATE_FRAGTEX2 = 13, - NV40_STATE_FRAGTEX3 = 14, - NV40_STATE_FRAGTEX4 = 15, - NV40_STATE_FRAGTEX5 = 16, - NV40_STATE_FRAGTEX6 = 17, - NV40_STATE_FRAGTEX7 = 18, - NV40_STATE_FRAGTEX8 = 19, - NV40_STATE_FRAGTEX9 = 20, - NV40_STATE_FRAGTEX10 = 21, - NV40_STATE_FRAGTEX11 = 22, - NV40_STATE_FRAGTEX12 = 23, - NV40_STATE_FRAGTEX13 = 24, - NV40_STATE_FRAGTEX14 = 25, - NV40_STATE_FRAGTEX15 = 26, - NV40_STATE_VERTTEX0 = 27, - NV40_STATE_VERTTEX1 = 28, - NV40_STATE_VERTTEX2 = 29, - NV40_STATE_VERTTEX3 = 30, - NV40_STATE_VTXBUF = 31, - NV40_STATE_VTXFMT = 32, - NV40_STATE_VTXATTR = 33, - NV40_STATE_SR = 34, - NV40_STATE_MAX = 35 -}; - -#include "nv40_screen.h" - -#define NV40_NEW_BLEND (1 << 0) -#define NV40_NEW_RAST (1 << 1) -#define NV40_NEW_ZSA (1 << 2) -#define NV40_NEW_SAMPLER (1 << 3) -#define NV40_NEW_FB (1 << 4) -#define NV40_NEW_STIPPLE (1 << 5) -#define NV40_NEW_SCISSOR (1 << 6) -#define NV40_NEW_VIEWPORT (1 << 7) -#define NV40_NEW_BCOL (1 << 8) -#define NV40_NEW_VERTPROG (1 << 9) -#define NV40_NEW_FRAGPROG (1 << 10) -#define NV40_NEW_ARRAYS (1 << 11) -#define NV40_NEW_UCP (1 << 12) -#define NV40_NEW_SR (1 << 13) - -struct nv40_rasterizer_state { - struct pipe_rasterizer_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv40_zsa_state { - struct pipe_depth_stencil_alpha_state pipe; - struct nouveau_stateobj *so; -}; - -struct nv40_blend_state { - struct pipe_blend_state pipe; - struct nouveau_stateobj *so; -}; - - -struct nv40_state { - unsigned scissor_enabled; - unsigned stipple_enabled; - unsigned viewport_bypass; - unsigned fp_samplers; - - uint64_t dirty; - struct nouveau_stateobj *hw[NV40_STATE_MAX]; -}; - -struct nv40_context { - struct pipe_context pipe; - - struct nouveau_winsys *nvws; - struct nv40_screen *screen; - - struct draw_context *draw; - - /* HW state derived from pipe states */ - struct nv40_state state; - struct { - struct nv40_vertex_program *vertprog; - - unsigned nr_attribs; - unsigned hw[PIPE_MAX_SHADER_INPUTS]; - unsigned draw[PIPE_MAX_SHADER_INPUTS]; - unsigned emit[PIPE_MAX_SHADER_INPUTS]; - } swtnl; - - enum { - HW, SWTNL, SWRAST - } render_mode; - unsigned fallback_swtnl; - unsigned fallback_swrast; - - /* Context state */ - unsigned dirty, draw_dirty; - struct pipe_scissor_state scissor; - unsigned stipple[32]; - struct pipe_clip_state clip; - struct nv40_vertex_program *vertprog; - struct nv40_fragment_program *fragprog; - struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; - unsigned constbuf_nr[PIPE_SHADER_TYPES]; - struct nv40_rasterizer_state *rasterizer; - struct nv40_zsa_state *zsa; - struct nv40_blend_state *blend; - struct pipe_blend_color blend_colour; - struct pipe_stencil_ref stencil_ref; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state framebuffer; - struct pipe_buffer *idxbuf; - unsigned idxbuf_format; - struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; - struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; - unsigned nr_samplers; - unsigned nr_textures; - unsigned dirty_samplers; - struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; - unsigned vtxbuf_nr; - struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; - unsigned vtxelt_nr; -}; - -static INLINE struct nv40_context * -nv40_context(struct pipe_context *pipe) -{ - return (struct nv40_context *)pipe; -} - -struct nv40_state_entry { - boolean (*validate)(struct nv40_context *nv40); - struct { - unsigned pipe; - unsigned hw; - } dirty; -}; - -extern void nv40_init_state_functions(struct nv40_context *nv40); -extern void nv40_init_surface_functions(struct nv40_context *nv40); -extern void nv40_init_query_functions(struct nv40_context *nv40); - -extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); - -/* nv40_draw.c */ -extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); -extern void nv40_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_buffer *idxbuf, - unsigned ib_size, unsigned mode, - unsigned start, unsigned count); - -/* nv40_vertprog.c */ -extern void nv40_vertprog_destroy(struct nv40_context *, - struct nv40_vertex_program *); - -/* nv40_fragprog.c */ -extern void nv40_fragprog_destroy(struct nv40_context *, - struct nv40_fragment_program *); - -/* nv40_fragtex.c */ -extern void nv40_fragtex_bind(struct nv40_context *); - -/* nv40_state.c and friends */ -extern boolean nv40_state_validate(struct nv40_context *nv40); -extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); -extern void nv40_state_emit(struct nv40_context *nv40); -extern void nv40_state_flush_notify(struct nouveau_channel *chan); -extern struct nv40_state_entry nv40_state_rasterizer; -extern struct nv40_state_entry nv40_state_scissor; -extern struct nv40_state_entry nv40_state_stipple; -extern struct nv40_state_entry nv40_state_fragprog; -extern struct nv40_state_entry nv40_state_vertprog; -extern struct nv40_state_entry nv40_state_blend; -extern struct nv40_state_entry nv40_state_blend_colour; -extern struct nv40_state_entry nv40_state_zsa; -extern struct nv40_state_entry nv40_state_viewport; -extern struct nv40_state_entry nv40_state_framebuffer; -extern struct nv40_state_entry nv40_state_fragtex; -extern struct nv40_state_entry nv40_state_vbo; -extern struct nv40_state_entry nv40_state_vtxfmt; -extern struct nv40_state_entry nv40_state_sr; - -/* nv40_vbo.c */ -extern void nv40_draw_arrays(struct pipe_context *, unsigned mode, - unsigned start, unsigned count); -extern void nv40_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, - unsigned count); - -/* nv40_clear.c */ -extern void nv40_clear(struct pipe_context *pipe, unsigned buffers, - const float *rgba, double depth, unsigned stencil); - -/* nv40_context.c */ -struct pipe_context * -nv40_create(struct pipe_screen *pscreen, void *priv); - -#endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c deleted file mode 100644 index 48bd84d16c5..00000000000 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ /dev/null @@ -1,360 +0,0 @@ -#include "pipe/p_shader_tokens.h" -#include "util/u_inlines.h" - -#include "util/u_pack_color.h" - -#include "draw/draw_context.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pipe.h" - -#include "nv40_context.h" -#define NV40_SHADER_NO_FUCKEDNESS -#include "nv40_shader.h" - -/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very - * often at all. Uses "quadro style" vertex submission + a fixed vertex - * layout to avoid the need to generate a vertex program or vtxfmt. - */ - -struct nv40_render_stage { - struct draw_stage stage; - struct nv40_context *nv40; - unsigned prim; -}; - -static INLINE struct nv40_render_stage * -nv40_render_stage(struct draw_stage *stage) -{ - return (struct nv40_render_stage *)stage; -} - -static INLINE void -nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) -{ - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned i; - - for (i = 0; i < nv40->swtnl.nr_attribs; i++) { - unsigned idx = nv40->swtnl.draw[i]; - unsigned hw = nv40->swtnl.hw[i]; - - switch (nv40->swtnl.emit[i]) { - case EMIT_OMIT: - break; - case EMIT_1F: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1); - OUT_RING (chan, fui(v->data[idx][0])); - break; - case EMIT_2F: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - break; - case EMIT_3F: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - break; - case EMIT_4F: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (chan, fui(v->data[idx][0])); - OUT_RING (chan, fui(v->data[idx][1])); - OUT_RING (chan, fui(v->data[idx][2])); - OUT_RING (chan, fui(v->data[idx][3])); - break; - case EMIT_4UB: - BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), - float_to_ubyte(v->data[idx][1]), - float_to_ubyte(v->data[idx][2]), - float_to_ubyte(v->data[idx][3]))); - break; - default: - assert(0); - break; - } - } -} - -static INLINE void -nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, - unsigned mode, unsigned count) -{ - struct nv40_render_stage *rs = nv40_render_stage(stage); - struct nv40_context *nv40 = rs->nv40; - - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned i; - - /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - if (rs->prim != NV40TCL_BEGIN_END_STOP) { - NOUVEAU_ERR("AIII, missed flush\n"); - assert(0); - } - FIRE_RING(chan); - nv40_state_emit(nv40); - } - - /* Switch primitive modes if necessary */ - if (rs->prim != mode) { - if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, NV40TCL_BEGIN_END_STOP); - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, mode); - rs->prim = mode; - } - - /* Emit vertex data */ - for (i = 0; i < count; i++) - nv40_render_vertex(nv40, prim->v[i]); - - /* If it's likely we'll need to empty the push buffer soon, finish - * off the primitive now. - */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, NV40TCL_BEGIN_END_STOP); - rs->prim = NV40TCL_BEGIN_END_STOP; - } -} - -static void -nv40_render_point(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1); -} - -static void -nv40_render_line(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2); -} - -static void -nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) -{ - nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3); -} - -static void -nv40_render_flush(struct draw_stage *draw, unsigned flags) -{ - struct nv40_render_stage *rs = nv40_render_stage(draw); - struct nv40_context *nv40 = rs->nv40; - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - if (rs->prim != NV40TCL_BEGIN_END_STOP) { - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, NV40TCL_BEGIN_END_STOP); - rs->prim = NV40TCL_BEGIN_END_STOP; - } -} - -static void -nv40_render_reset_stipple_counter(struct draw_stage *draw) -{ -} - -static void -nv40_render_destroy(struct draw_stage *draw) -{ - FREE(draw); -} - -static INLINE void -emit_mov(struct nv40_vertex_program *vp, - unsigned dst, unsigned src, unsigned vor, unsigned mask) -{ - struct nv40_vertex_program_exec *inst; - - vp->insns = realloc(vp->insns, - sizeof(struct nv40_vertex_program_exec) * - ++vp->nr_insns); - inst = &vp->insns[vp->nr_insns - 1]; - - inst->data[0] = 0x401f9c6c; - inst->data[1] = 0x0040000d | (src << 8); - inst->data[2] = 0x8106c083; - inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); - inst->const_index = -1; - inst->has_branch_offset = FALSE; - - vp->ir |= (1 << src); - if (vor != ~0) - vp->or |= (1 << vor); -} - -static struct nv40_vertex_program * -create_drawvp(struct nv40_context *nv40) -{ - struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program); - unsigned i; - - emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); - emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); - for (i = 0; i < 8; i++) - emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); - - vp->insns[vp->nr_insns - 1].data[3] |= 1; - vp->translated = TRUE; - return vp; -} - -struct draw_stage * -nv40_draw_render_stage(struct nv40_context *nv40) -{ - struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); - - if (!nv40->swtnl.vertprog) - nv40->swtnl.vertprog = create_drawvp(nv40); - - render->nv40 = nv40; - render->stage.draw = nv40->draw; - render->stage.point = nv40_render_point; - render->stage.line = nv40_render_line; - render->stage.tri = nv40_render_tri; - render->stage.flush = nv40_render_flush; - render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; - render->stage.destroy = nv40_render_destroy; - - return &render->stage; -} - -void -nv40_draw_elements_swtnl(struct pipe_context *pipe, - struct pipe_buffer *idxbuf, unsigned idxbuf_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct pipe_screen *pscreen = pipe->screen; - unsigned i; - void *map; - - if (!nv40_state_validate_swtnl(nv40)) - return; - nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); - nv40_state_emit(nv40); - - for (i = 0; i < nv40->vtxbuf_nr; i++) { - map = pipe_buffer_map(pscreen, nv40->vtxbuf[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_vertex_buffer(nv40->draw, i, map); - } - - if (idxbuf) { - map = pipe_buffer_map(pscreen, idxbuf, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); - } else { - draw_set_mapped_element_buffer(nv40->draw, 0, NULL); - } - - if (nv40->constbuf[PIPE_SHADER_VERTEX]) { - const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; - - map = pipe_buffer_map(pscreen, - nv40->constbuf[PIPE_SHADER_VERTEX], - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX, 0, - map, nr); - } - - draw_arrays(nv40->draw, mode, start, count); - - for (i = 0; i < nv40->vtxbuf_nr; i++) - pipe_buffer_unmap(pscreen, nv40->vtxbuf[i].buffer); - - if (idxbuf) - pipe_buffer_unmap(pscreen, idxbuf); - - if (nv40->constbuf[PIPE_SHADER_VERTEX]) - pipe_buffer_unmap(pscreen, nv40->constbuf[PIPE_SHADER_VERTEX]); - - draw_flush(nv40->draw); - pipe->flush(pipe, 0, NULL); -} - -static INLINE void -emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, - unsigned semantic, unsigned index) -{ - unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index); - unsigned a = nv40->swtnl.nr_attribs++; - - nv40->swtnl.hw[a] = hw; - nv40->swtnl.emit[a] = emit; - nv40->swtnl.draw[a] = draw_out; -} - -static boolean -nv40_state_vtxfmt_validate(struct nv40_context *nv40) -{ - struct nv40_fragment_program *fp = nv40->fragprog; - unsigned colour = 0, texcoords = 0, fog = 0, i; - - /* Determine needed fragprog inputs */ - for (i = 0; i < fp->info.num_inputs; i++) { - switch (fp->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - break; - case TGSI_SEMANTIC_COLOR: - colour |= (1 << fp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_GENERIC: - texcoords |= (1 << fp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_FOG: - fog = 1; - break; - default: - assert(0); - } - } - - nv40->swtnl.nr_attribs = 0; - - /* Map draw vtxprog output to hw attribute IDs */ - for (i = 0; i < 2; i++) { - if (!(colour & (1 << i))) - continue; - emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); - } - - for (i = 0; i < 8; i++) { - if (!(texcoords & (1 << i))) - continue; - emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); - } - - if (fog) { - emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); - } - - emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); - - return FALSE; -} - -struct nv40_state_entry nv40_state_vtxfmt = { - .validate = nv40_state_vtxfmt_validate, - .dirty = { - .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG, - .hw = 0 - } -}; - diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c deleted file mode 100644 index 8ed4a67dd03..00000000000 --- a/src/gallium/drivers/nv40/nv40_query.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "pipe/p_context.h" - -#include "nv40_context.h" - -struct nv40_query { - struct nouveau_resource *object; - unsigned type; - boolean ready; - uint64_t result; -}; - -static INLINE struct nv40_query * -nv40_query(struct pipe_query *pipe) -{ - return (struct nv40_query *)pipe; -} - -static struct pipe_query * -nv40_query_create(struct pipe_context *pipe, unsigned query_type) -{ - struct nv40_query *q; - - q = CALLOC(1, sizeof(struct nv40_query)); - q->type = query_type; - - return (struct pipe_query *)q; -} - -static void -nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_query *q = nv40_query(pq); - - if (q->object) - nouveau_resource_free(&q->object); - FREE(q); -} - -static void -nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - /* Happens when end_query() is called, then another begin_query() - * without querying the result in-between. For now we'll wait for - * the existing query to notify completion, but it could be better. - */ - if (q->object) { - uint64_t tmp; - pipe->get_query_result(pipe, pq, 1, &tmp); - } - - if (nouveau_resource_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) - assert(0); - nouveau_notifier_reset(nv40->screen->query, q->object->start); - - BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1); - OUT_RING (chan, 1); - BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1); - OUT_RING (chan, 1); - - q->ready = FALSE; -} - -static void -nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1); - OUT_RING (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | - ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); - FIRE_RING(chan); -} - -static boolean -nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, - boolean wait, uint64_t *result) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_query *q = nv40_query(pq); - - assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); - - if (!q->ready) { - unsigned status; - - status = nouveau_notifier_status(nv40->screen->query, - q->object->start); - if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { - if (wait == FALSE) - return FALSE; - nouveau_notifier_wait_status(nv40->screen->query, - q->object->start, - NV_NOTIFY_STATE_STATUS_COMPLETED, - 0); - } - - q->result = nouveau_notifier_return_val(nv40->screen->query, - q->object->start); - q->ready = TRUE; - nouveau_resource_free(&q->object); - } - - *result = q->result; - return TRUE; -} - -void -nv40_init_query_functions(struct nv40_context *nv40) -{ - nv40->pipe.create_query = nv40_query_create; - nv40->pipe.destroy_query = nv40_query_destroy; - nv40->pipe.begin_query = nv40_query_begin; - nv40->pipe.end_query = nv40_query_end; - nv40->pipe.get_query_result = nv40_query_result; -} diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c deleted file mode 100644 index edee4b9a3aa..00000000000 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ /dev/null @@ -1,320 +0,0 @@ -#include "pipe/p_screen.h" - -#include "nv40_context.h" -#include "nv40_screen.h" - -#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf -#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 -#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 - -static int -nv40_screen_get_param(struct pipe_screen *pscreen, int param) -{ - struct nv40_screen *screen = nv40_screen(pscreen); - - switch (param) { - case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 16; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_GLSL: - return 0; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 1; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 4; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 1; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 10; - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - case PIPE_CAP_TEXTURE_MIRROR_REPEAT: - return 1; - case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: - return 0; /* We have 4 - but unsupported currently */ - case PIPE_CAP_TGSI_CONT_SUPPORTED: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; - case NOUVEAU_CAP_HW_VTXBUF: - return 1; - case NOUVEAU_CAP_HW_IDXBUF: - if (screen->curie->grclass == NV40TCL) - return 1; - return 0; - case PIPE_CAP_INDEP_BLEND_ENABLE: - return 0; - case PIPE_CAP_INDEP_BLEND_FUNC: - return 0; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 16; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0; - } -} - -static float -nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) -{ - switch (param) { - case PIPE_CAP_MAX_LINE_WIDTH: - case PIPE_CAP_MAX_LINE_WIDTH_AA: - return 10.0; - case PIPE_CAP_MAX_POINT_WIDTH: - case PIPE_CAP_MAX_POINT_WIDTH_AA: - return 64.0; - case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; - case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: - return 16.0; - default: - NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); - return 0.0; - } -} - -static boolean -nv40_screen_surface_format_supported(struct pipe_screen *pscreen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned tex_usage, unsigned geom_flags) -{ - if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - return TRUE; - default: - break; - } - } else - if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - switch (format) { - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - return TRUE; - default: - break; - } - } else { - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_L8_UNORM: - case PIPE_FORMAT_A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_A8L8_UNORM: - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return TRUE; - default: - break; - } - } - - return FALSE; -} - -static struct pipe_buffer * -nv40_surface_buffer(struct pipe_surface *surf) -{ - struct nv40_miptree *mt = (struct nv40_miptree *)surf->texture; - - return mt->buffer; -} - -static void -nv40_screen_destroy(struct pipe_screen *pscreen) -{ - struct nv40_screen *screen = nv40_screen(pscreen); - unsigned i; - - for (i = 0; i < NV40_STATE_MAX; i++) { - if (screen->state[i]) - so_ref(NULL, &screen->state[i]); - } - - nouveau_resource_destroy(&screen->vp_exec_heap); - nouveau_resource_destroy(&screen->vp_data_heap); - nouveau_resource_destroy(&screen->query_heap); - nouveau_notifier_free(&screen->query); - nouveau_notifier_free(&screen->sync); - nouveau_grobj_free(&screen->curie); - nv04_surface_2d_takedown(&screen->eng2d); - - nouveau_screen_fini(&screen->base); - - FREE(pscreen); -} - -struct pipe_screen * -nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) -{ - struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); - struct nouveau_channel *chan; - struct pipe_screen *pscreen; - struct nouveau_stateobj *so; - unsigned curie_class = 0; - int ret; - - if (!screen) - return NULL; - pscreen = &screen->base.base; - - ret = nouveau_screen_init(&screen->base, dev); - if (ret) { - nv40_screen_destroy(pscreen); - return NULL; - } - chan = screen->base.channel; - - pscreen->winsys = ws; - pscreen->destroy = nv40_screen_destroy; - pscreen->get_param = nv40_screen_get_param; - pscreen->get_paramf = nv40_screen_get_paramf; - pscreen->is_format_supported = nv40_screen_surface_format_supported; - pscreen->context_create = nv40_create; - - nv40_screen_init_miptree_functions(pscreen); - nv40_screen_init_transfer_functions(pscreen); - - /* 3D object */ - switch (dev->chipset & 0xf0) { - case 0x40: - if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) - curie_class = NV40TCL; - else - if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) - curie_class = NV44TCL; - break; - case 0x60: - if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) - curie_class = NV44TCL; - break; - } - - if (!curie_class) { - NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", dev->chipset); - return NULL; - } - - ret = nouveau_grobj_alloc(chan, 0xbeef3097, curie_class, &screen->curie); - if (ret) { - NOUVEAU_ERR("Error creating 3D object: %d\n", ret); - return FALSE; - } - - /* 2D engine setup */ - screen->eng2d = nv04_surface_2d_init(&screen->base); - screen->eng2d->buf = nv40_surface_buffer; - - /* Notifier for sync purposes */ - ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); - if (ret) { - NOUVEAU_ERR("Error creating notifier object: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Query objects */ - ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); - if (ret) { - NOUVEAU_ERR("Error initialising query objects: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - nouveau_resource_init(&screen->query_heap, 0, 32); - if (ret) { - NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Vtxprog resources */ - if (nouveau_resource_init(&screen->vp_exec_heap, 0, 512) || - nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { - nv40_screen_destroy(pscreen); - return NULL; - } - - /* Static curie initialisation */ - so = so_new(16, 25, 0); - so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); - so_data (so, screen->sync->handle); - so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); - so_data (so, chan->vram->handle); - so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->gart->handle); - so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); - so_data (so, 0); - so_data (so, screen->query->handle); - so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); - so_data (so, chan->vram->handle); - so_data (so, chan->vram->handle); - - so_method(so, screen->curie, 0x1ea4, 3); - so_data (so, 0x00000010); - so_data (so, 0x01000100); - so_data (so, 0xff800006); - - /* vtxprog output routing */ - so_method(so, screen->curie, 0x1fc4, 1); - so_data (so, 0x06144321); - so_method(so, screen->curie, 0x1fc8, 2); - so_data (so, 0xedcba987); - so_data (so, 0x00000021); - so_method(so, screen->curie, 0x1fd0, 1); - so_data (so, 0x00171615); - so_method(so, screen->curie, 0x1fd4, 1); - so_data (so, 0x001b1a19); - - so_method(so, screen->curie, 0x1ef8, 1); - so_data (so, 0x0020ffff); - so_method(so, screen->curie, 0x1d64, 1); - so_data (so, 0x00d30000); - so_method(so, screen->curie, 0x1e94, 1); - so_data (so, 0x00000001); - - so_emit(chan, so); - so_ref(NULL, &so); - nouveau_pushbuf_flush(chan, 0); - - return pscreen; -} - diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h deleted file mode 100644 index 854dccf5486..00000000000 --- a/src/gallium/drivers/nv40/nv40_shader.h +++ /dev/null @@ -1,556 +0,0 @@ -#ifndef __NV40_SHADER_H__ -#define __NV40_SHADER_H__ - -/* Vertex programs instruction set - * - * The NV40 instruction set is very similar to NV30. Most fields are in - * a slightly different position in the instruction however. - * - * Merged instructions - * In some cases it is possible to put two instructions into one opcode - * slot. The rules for when this is OK is not entirely clear to me yet. - * - * There are separate writemasks and dest temp register fields for each - * grouping of instructions. There is however only one field with the - * ID of a result register. Writing to temp/result regs is selected by - * setting VEC_RESULT/SCA_RESULT. - * - * Temporary registers - * The source/dest temp register fields have been extended by 1 bit, to - * give a total of 32 temporary registers. - * - * Relative Addressing - * NV40 can use an address register to index into vertex attribute regs. - * This is done by putting the offset value into INPUT_SRC and setting - * the INDEX_INPUT flag. - * - * Conditional execution (see NV_vertex_program{2,3} for details) - * There is a second condition code register on NV40, it's use is enabled - * by setting the COND_REG_SELECT_1 flag. - * - * Texture lookup - * TODO - */ - -/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ -#define NV40_VP_INST_VEC_RESULT (1 << 30) -/* uncertain.. */ -#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) -/* use address reg as index into attribs */ -#define NV40_VP_INST_INDEX_INPUT (1 << 27) -#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) -#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) -#define NV40_VP_INST_SRC2_ABS (1 << 23) -#define NV40_VP_INST_SRC1_ABS (1 << 22) -#define NV40_VP_INST_SRC0_ABS (1 << 21) -#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 -#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) -#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) -#define NV40_VP_INST_COND_SHIFT 10 -#define NV40_VP_INST_COND_MASK (0x7 << 10) -# define NV40_VP_INST_COND_FL 0 -# define NV40_VP_INST_COND_LT 1 -# define NV40_VP_INST_COND_EQ 2 -# define NV40_VP_INST_COND_LE 3 -# define NV40_VP_INST_COND_GT 4 -# define NV40_VP_INST_COND_NE 5 -# define NV40_VP_INST_COND_GE 6 -# define NV40_VP_INST_COND_TR 7 -#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 -#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) -#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 -#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) -#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 -#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) -#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 -#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) -#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 -#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) -#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 -#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) -#define NV40_VP_INST0_KNOWN ( \ - NV40_VP_INST_INDEX_INPUT | \ - NV40_VP_INST_COND_REG_SELECT_1 | \ - NV40_VP_INST_ADDR_REG_SELECT_1 | \ - NV40_VP_INST_SRC2_ABS | \ - NV40_VP_INST_SRC1_ABS | \ - NV40_VP_INST_SRC0_ABS | \ - NV40_VP_INST_VEC_DEST_TEMP_MASK | \ - NV40_VP_INST_COND_TEST_ENABLE | \ - NV40_VP_INST_COND_MASK | \ - NV40_VP_INST_COND_SWZ_ALL_MASK | \ - NV40_VP_INST_ADDR_SWZ_MASK) - -/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ -#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 -#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) -# define NV40_VP_INST_OP_NOP 0x00 -# define NV40_VP_INST_OP_MOV 0x01 -# define NV40_VP_INST_OP_MUL 0x02 -# define NV40_VP_INST_OP_ADD 0x03 -# define NV40_VP_INST_OP_MAD 0x04 -# define NV40_VP_INST_OP_DP3 0x05 -# define NV40_VP_INST_OP_DPH 0x06 -# define NV40_VP_INST_OP_DP4 0x07 -# define NV40_VP_INST_OP_DST 0x08 -# define NV40_VP_INST_OP_MIN 0x09 -# define NV40_VP_INST_OP_MAX 0x0A -# define NV40_VP_INST_OP_SLT 0x0B -# define NV40_VP_INST_OP_SGE 0x0C -# define NV40_VP_INST_OP_ARL 0x0D -# define NV40_VP_INST_OP_FRC 0x0E -# define NV40_VP_INST_OP_FLR 0x0F -# define NV40_VP_INST_OP_SEQ 0x10 -# define NV40_VP_INST_OP_SFL 0x11 -# define NV40_VP_INST_OP_SGT 0x12 -# define NV40_VP_INST_OP_SLE 0x13 -# define NV40_VP_INST_OP_SNE 0x14 -# define NV40_VP_INST_OP_STR 0x15 -# define NV40_VP_INST_OP_SSG 0x16 -# define NV40_VP_INST_OP_ARR 0x17 -# define NV40_VP_INST_OP_ARA 0x18 -# define NV40_VP_INST_OP_TXL 0x19 -#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 -#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) -# define NV40_VP_INST_OP_NOP 0x00 -# define NV40_VP_INST_OP_MOV 0x01 -# define NV40_VP_INST_OP_RCP 0x02 -# define NV40_VP_INST_OP_RCC 0x03 -# define NV40_VP_INST_OP_RSQ 0x04 -# define NV40_VP_INST_OP_EXP 0x05 -# define NV40_VP_INST_OP_LOG 0x06 -# define NV40_VP_INST_OP_LIT 0x07 -# define NV40_VP_INST_OP_BRA 0x09 -# define NV40_VP_INST_OP_CAL 0x0B -# define NV40_VP_INST_OP_RET 0x0C -# define NV40_VP_INST_OP_LG2 0x0D -# define NV40_VP_INST_OP_EX2 0x0E -# define NV40_VP_INST_OP_SIN 0x0F -# define NV40_VP_INST_OP_COS 0x10 -# define NV40_VP_INST_OP_PUSHA 0x13 -# define NV40_VP_INST_OP_POPA 0x14 -#define NV40_VP_INST_CONST_SRC_SHIFT 12 -#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) -#define NV40_VP_INST_INPUT_SRC_SHIFT 8 -#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) -# define NV40_VP_INST_IN_POS 0 -# define NV40_VP_INST_IN_WEIGHT 1 -# define NV40_VP_INST_IN_NORMAL 2 -# define NV40_VP_INST_IN_COL0 3 -# define NV40_VP_INST_IN_COL1 4 -# define NV40_VP_INST_IN_FOGC 5 -# define NV40_VP_INST_IN_TC0 8 -# define NV40_VP_INST_IN_TC(n) (8+n) -#define NV40_VP_INST_SRC0H_SHIFT 0 -#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) -#define NV40_VP_INST1_KNOWN ( \ - NV40_VP_INST_VEC_OPCODE_MASK | \ - NV40_VP_INST_SCA_OPCODE_MASK | \ - NV40_VP_INST_CONST_SRC_MASK | \ - NV40_VP_INST_INPUT_SRC_MASK | \ - NV40_VP_INST_SRC0H_MASK \ - ) - -/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ -#define NV40_VP_INST_SRC0L_SHIFT 23 -#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) -#define NV40_VP_INST_SRC1_SHIFT 6 -#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) -#define NV40_VP_INST_SRC2H_SHIFT 0 -#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) -#define NV40_VP_INST_IADDRH_SHIFT 0 -#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) - -/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ -#define NV40_VP_INST_IADDRL_SHIFT 29 -#define NV40_VP_INST_IADDRL_MASK (7 << 29) -#define NV40_VP_INST_SRC2L_SHIFT 21 -#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) -#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 -#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) -# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) -# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) -# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) -# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) -#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 -#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) -# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) -# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) -# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) -# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) -#define NV40_VP_INST_SCA_RESULT (1 << 12) -#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 -#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) -#define NV40_VP_INST_DEST_SHIFT 2 -#define NV40_VP_INST_DEST_MASK (31 << 2) -# define NV40_VP_INST_DEST_POS 0 -# define NV40_VP_INST_DEST_COL0 1 -# define NV40_VP_INST_DEST_COL1 2 -# define NV40_VP_INST_DEST_BFC0 3 -# define NV40_VP_INST_DEST_BFC1 4 -# define NV40_VP_INST_DEST_FOGC 5 -# define NV40_VP_INST_DEST_PSZ 6 -# define NV40_VP_INST_DEST_TC0 7 -# define NV40_VP_INST_DEST_TC(n) (7+n) -# define NV40_VP_INST_DEST_TEMP 0x1F -#define NV40_VP_INST_INDEX_CONST (1 << 1) -#define NV40_VP_INST_LAST (1 << 0) -#define NV40_VP_INST3_KNOWN ( \ - NV40_VP_INST_SRC2L_MASK |\ - NV40_VP_INST_SCA_WRITEMASK_MASK |\ - NV40_VP_INST_VEC_WRITEMASK_MASK |\ - NV40_VP_INST_SCA_DEST_TEMP_MASK |\ - NV40_VP_INST_DEST_MASK |\ - NV40_VP_INST_INDEX_CONST) - -/* Useful to split the source selection regs into their pieces */ -#define NV40_VP_SRC0_HIGH_SHIFT 9 -#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 -#define NV40_VP_SRC0_LOW_MASK 0x000001FF -#define NV40_VP_SRC2_HIGH_SHIFT 11 -#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 -#define NV40_VP_SRC2_LOW_MASK 0x000007FF - -/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ -#define NV40_VP_SRC_NEGATE (1 << 16) -#define NV40_VP_SRC_SWZ_X_SHIFT 14 -#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) -#define NV40_VP_SRC_SWZ_Y_SHIFT 12 -#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) -#define NV40_VP_SRC_SWZ_Z_SHIFT 10 -#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) -#define NV40_VP_SRC_SWZ_W_SHIFT 8 -#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) -#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 -#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) -#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 -#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) -#define NV40_VP_SRC_REG_TYPE_SHIFT 0 -#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) -# define NV40_VP_SRC_REG_TYPE_UNK0 0 -# define NV40_VP_SRC_REG_TYPE_TEMP 1 -# define NV40_VP_SRC_REG_TYPE_INPUT 2 -# define NV40_VP_SRC_REG_TYPE_CONST 3 - - -/* - * Each fragment program opcode appears to be comprised of 4 32-bit values. - * - * 0 - Opcode, output reg/mask, ATTRIB source - * 1 - Source 0 - * 2 - Source 1 - * 3 - Source 2 - * - * There appears to be no special difference between result regs and temp regs. - * result.color == R0.xyzw - * result.depth == R1.z - * When the fragprog contains instructions to write depth, - * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. - * - * Constants are inserted directly after the instruction that uses them. - * - * It appears that it's not possible to use two input registers in one - * instruction as the input sourcing is done in the instruction dword - * and not the source selection dwords. As such instructions such as: - * - * ADD result.color, fragment.color, fragment.texcoord[0]; - * - * must be split into two MOV's and then an ADD (nvidia does this) but - * I'm not sure why it's not just one MOV and then source the second input - * in the ADD instruction.. - * - * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary - * negation requires multiplication with a const. - * - * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and - * SWIZZLE_ONE. - * - * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as - * SWIZZLE_ZERO is implemented simply by not writing to the relevant components - * of the destination. - * - * Looping - * Loops appear to be fairly expensive on NV40 at least, the proprietary - * driver goes to a lot of effort to avoid using the native looping - * instructions. If the total number of *executed* instructions between - * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. - * The maximum loop count is 255. - * - * Conditional execution - * TODO - * - * Non-native instructions: - * LIT - * LRP - MAD+MAD - * SUB - ADD, negate second source - * RSQ - LG2 + EX2 - * POW - LG2 + MUL + EX2 - * SCS - COS + SIN - * XPD - * DP2 - MUL + ADD - * NRM - */ - -//== Opcode / Destination selection == -#define NV40_FP_OP_PROGRAM_END (1 << 0) -#define NV40_FP_OP_OUT_REG_SHIFT 1 -#define NV40_FP_OP_OUT_REG_MASK (63 << 1) -/* Needs to be set when writing outputs to get expected result.. */ -#define NV40_FP_OP_OUT_REG_HALF (1 << 7) -#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) -#define NV40_FP_OP_OUTMASK_SHIFT 9 -#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) -# define NV40_FP_OP_OUT_X (1 << 9) -# define NV40_FP_OP_OUT_Y (1 <<10) -# define NV40_FP_OP_OUT_Z (1 <<11) -# define NV40_FP_OP_OUT_W (1 <<12) -/* Uncertain about these, especially the input_src values.. it's possible that - * they can be dynamically changed. - */ -#define NV40_FP_OP_INPUT_SRC_SHIFT 13 -#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) -# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 -# define NV40_FP_OP_INPUT_SRC_COL0 0x1 -# define NV40_FP_OP_INPUT_SRC_COL1 0x2 -# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 -# define NV40_FP_OP_INPUT_SRC_TC0 0x4 -# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) -# define NV40_FP_OP_INPUT_SRC_FACING 0xE -#define NV40_FP_OP_TEX_UNIT_SHIFT 17 -#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) -#define NV40_FP_OP_PRECISION_SHIFT 22 -#define NV40_FP_OP_PRECISION_MASK (3 << 22) -# define NV40_FP_PRECISION_FP32 0 -# define NV40_FP_PRECISION_FP16 1 -# define NV40_FP_PRECISION_FX12 2 -#define NV40_FP_OP_OPCODE_SHIFT 24 -#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) -# define NV40_FP_OP_OPCODE_NOP 0x00 -# define NV40_FP_OP_OPCODE_MOV 0x01 -# define NV40_FP_OP_OPCODE_MUL 0x02 -# define NV40_FP_OP_OPCODE_ADD 0x03 -# define NV40_FP_OP_OPCODE_MAD 0x04 -# define NV40_FP_OP_OPCODE_DP3 0x05 -# define NV40_FP_OP_OPCODE_DP4 0x06 -# define NV40_FP_OP_OPCODE_DST 0x07 -# define NV40_FP_OP_OPCODE_MIN 0x08 -# define NV40_FP_OP_OPCODE_MAX 0x09 -# define NV40_FP_OP_OPCODE_SLT 0x0A -# define NV40_FP_OP_OPCODE_SGE 0x0B -# define NV40_FP_OP_OPCODE_SLE 0x0C -# define NV40_FP_OP_OPCODE_SGT 0x0D -# define NV40_FP_OP_OPCODE_SNE 0x0E -# define NV40_FP_OP_OPCODE_SEQ 0x0F -# define NV40_FP_OP_OPCODE_FRC 0x10 -# define NV40_FP_OP_OPCODE_FLR 0x11 -# define NV40_FP_OP_OPCODE_KIL 0x12 -# define NV40_FP_OP_OPCODE_PK4B 0x13 -# define NV40_FP_OP_OPCODE_UP4B 0x14 -/* DDX/DDY can only write to XY */ -# define NV40_FP_OP_OPCODE_DDX 0x15 -# define NV40_FP_OP_OPCODE_DDY 0x16 -# define NV40_FP_OP_OPCODE_TEX 0x17 -# define NV40_FP_OP_OPCODE_TXP 0x18 -# define NV40_FP_OP_OPCODE_TXD 0x19 -# define NV40_FP_OP_OPCODE_RCP 0x1A -# define NV40_FP_OP_OPCODE_EX2 0x1C -# define NV40_FP_OP_OPCODE_LG2 0x1D -# define NV40_FP_OP_OPCODE_STR 0x20 -# define NV40_FP_OP_OPCODE_SFL 0x21 -# define NV40_FP_OP_OPCODE_COS 0x22 -# define NV40_FP_OP_OPCODE_SIN 0x23 -# define NV40_FP_OP_OPCODE_PK2H 0x24 -# define NV40_FP_OP_OPCODE_UP2H 0x25 -# define NV40_FP_OP_OPCODE_PK4UB 0x27 -# define NV40_FP_OP_OPCODE_UP4UB 0x28 -# define NV40_FP_OP_OPCODE_PK2US 0x29 -# define NV40_FP_OP_OPCODE_UP2US 0x2A -# define NV40_FP_OP_OPCODE_DP2A 0x2E -# define NV40_FP_OP_OPCODE_TXL 0x2F -# define NV40_FP_OP_OPCODE_TXB 0x31 -# define NV40_FP_OP_OPCODE_DIV 0x3A -# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C -/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ -# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 -# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 -# define NV40_FP_OP_BRA_OPCODE_IF 0x2 -# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 -# define NV40_FP_OP_BRA_OPCODE_REP 0x4 -# define NV40_FP_OP_BRA_OPCODE_RET 0x5 -#define NV40_FP_OP_OUT_SAT (1 << 31) - -/* high order bits of SRC0 */ -#define NV40_FP_OP_OUT_ABS (1 << 29) -#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 -#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) -#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 -#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) -#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 -#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) -#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 -#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) -#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 -#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) -#define NV40_FP_OP_COND_SHIFT 18 -#define NV40_FP_OP_COND_MASK (0x07 << 18) -# define NV40_FP_OP_COND_FL 0 -# define NV40_FP_OP_COND_LT 1 -# define NV40_FP_OP_COND_EQ 2 -# define NV40_FP_OP_COND_LE 3 -# define NV40_FP_OP_COND_GT 4 -# define NV40_FP_OP_COND_NE 5 -# define NV40_FP_OP_COND_GE 6 -# define NV40_FP_OP_COND_TR 7 - -/* high order bits of SRC1 */ -#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) -#define NV40_FP_OP_DST_SCALE_SHIFT 28 -#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) -#define NV40_FP_OP_DST_SCALE_1X 0 -#define NV40_FP_OP_DST_SCALE_2X 1 -#define NV40_FP_OP_DST_SCALE_4X 2 -#define NV40_FP_OP_DST_SCALE_8X 3 -#define NV40_FP_OP_DST_SCALE_INV_2X 5 -#define NV40_FP_OP_DST_SCALE_INV_4X 6 -#define NV40_FP_OP_DST_SCALE_INV_8X 7 - -/* SRC1 LOOP */ -#define NV40_FP_OP_LOOP_INCR_SHIFT 19 -#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) -#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 -#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) -#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 -#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) - -/* SRC1 IF */ -#define NV40_FP_OP_ELSE_ID_SHIFT 2 -#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) - -/* SRC1 CAL */ -#define NV40_FP_OP_IADDR_SHIFT 2 -#define NV40_FP_OP_IADDR_MASK (0xFF << 2) - -/* SRC1 REP - * I have no idea why there are 3 count values here.. but they - * have always been filled with the same value in my tests so - * far.. - */ -#define NV40_FP_OP_REP_COUNT1_SHIFT 2 -#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) -#define NV40_FP_OP_REP_COUNT2_SHIFT 10 -#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) -#define NV40_FP_OP_REP_COUNT3_SHIFT 19 -#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) - -/* SRC2 REP/IF */ -#define NV40_FP_OP_END_ID_SHIFT 2 -#define NV40_FP_OP_END_ID_MASK (0xFF << 2) - -// SRC2 high-order -#define NV40_FP_OP_INDEX_INPUT (1 << 30) -#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 -#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) - -//== Register selection == -#define NV40_FP_REG_TYPE_SHIFT 0 -#define NV40_FP_REG_TYPE_MASK (3 << 0) -# define NV40_FP_REG_TYPE_TEMP 0 -# define NV40_FP_REG_TYPE_INPUT 1 -# define NV40_FP_REG_TYPE_CONST 2 -#define NV40_FP_REG_SRC_SHIFT 2 -#define NV40_FP_REG_SRC_MASK (63 << 2) -#define NV40_FP_REG_SRC_HALF (1 << 8) -#define NV40_FP_REG_SWZ_ALL_SHIFT 9 -#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) -#define NV40_FP_REG_SWZ_X_SHIFT 9 -#define NV40_FP_REG_SWZ_X_MASK (3 << 9) -#define NV40_FP_REG_SWZ_Y_SHIFT 11 -#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) -#define NV40_FP_REG_SWZ_Z_SHIFT 13 -#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) -#define NV40_FP_REG_SWZ_W_SHIFT 15 -#define NV40_FP_REG_SWZ_W_MASK (3 << 15) -# define NV40_FP_SWIZZLE_X 0 -# define NV40_FP_SWIZZLE_Y 1 -# define NV40_FP_SWIZZLE_Z 2 -# define NV40_FP_SWIZZLE_W 3 -#define NV40_FP_REG_NEGATE (1 << 17) - -#ifndef NV40_SHADER_NO_FUCKEDNESS -#define NV40SR_NONE 0 -#define NV40SR_OUTPUT 1 -#define NV40SR_INPUT 2 -#define NV40SR_TEMP 3 -#define NV40SR_CONST 4 - -struct nv40_sreg { - int type; - int index; - - int dst_scale; - - int negate; - int abs; - int swz[4]; - - int cc_update; - int cc_update_reg; - int cc_test; - int cc_test_reg; - int cc_swz[4]; -}; - -static INLINE struct nv40_sreg -nv40_sr(int type, int index) -{ - struct nv40_sreg temp = { - .type = type, - .index = index, - .dst_scale = DEF_SCALE, - .abs = 0, - .negate = 0, - .swz = { 0, 1, 2, 3 }, - .cc_update = 0, - .cc_update_reg = 0, - .cc_test = DEF_CTEST, - .cc_test_reg = 0, - .cc_swz = { 0, 1, 2, 3 }, - }; - return temp; -} - -static INLINE struct nv40_sreg -nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) -{ - struct nv40_sreg dst = src; - - dst.swz[SWZ_X] = src.swz[x]; - dst.swz[SWZ_Y] = src.swz[y]; - dst.swz[SWZ_Z] = src.swz[z]; - dst.swz[SWZ_W] = src.swz[w]; - return dst; -} - -static INLINE struct nv40_sreg -nv40_sr_neg(struct nv40_sreg src) -{ - src.negate = !src.negate; - return src; -} - -static INLINE struct nv40_sreg -nv40_sr_abs(struct nv40_sreg src) -{ - src.abs = 1; - return src; -} - -static INLINE struct nv40_sreg -nv40_sr_scale(struct nv40_sreg src, int scale) -{ - src.dst_scale = scale; - return src; -} -#endif - -#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c deleted file mode 100644 index 4f28675e64c..00000000000 --- a/src/gallium/drivers/nv40/nv40_state.c +++ /dev/null @@ -1,743 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "draw/draw_context.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv40_context.h" -#include "nv40_state.h" - -static void * -nv40_blend_state_create(struct pipe_context *pipe, - const struct pipe_blend_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nouveau_grobj *curie = nv40->screen->curie; - struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); - struct nouveau_stateobj *so = so_new(5, 8, 0); - - if (cso->rt[0].blend_enable) { - so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); - so_data (so, 1); - so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | - nvgl_blend_func(cso->rt[0].rgb_src_factor)); - so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | - nvgl_blend_func(cso->rt[0].rgb_dst_factor)); - so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); - so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | - nvgl_blend_eqn(cso->rt[0].rgb_func)); - } else { - so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, curie, NV40TCL_COLOR_MASK, 1); - so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | - ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); - - if (cso->logicop_enable) { - so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); - so_data (so, 1); - so_data (so, nvgl_logicop_func(cso->logicop_func)); - } else { - so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); - so_data (so, 0); - } - - so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); - so_data (so, cso->dither ? 1 : 0); - - so_ref(so, &bso->so); - so_ref(NULL, &so); - bso->pipe = *cso; - return (void *)bso; -} - -static void -nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->blend = hwcso; - nv40->dirty |= NV40_NEW_BLEND; -} - -static void -nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_blend_state *bso = hwcso; - - so_ref(NULL, &bso->so); - FREE(bso); -} - - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV40TCL_TEX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV40TCL_TEX_WRAP_S_CLAMP; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; - break; - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV40TCL_TEX_WRAP_S_REPEAT; - break; - } - - return ret >> NV40TCL_TEX_WRAP_S_SHIFT; -} - -static void * -nv40_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nv40_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nv40_sampler_state)); - - ps->fmt = 0; - if (!cso->normalized_coords) - ps->fmt |= NV40TCL_TEX_FORMAT_RECT; - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); - - ps->en = 0; - if (cso->max_anisotropy >= 2) { - /* no idea, binary driver sets it, works without it.. meh.. */ - ps->wrap |= (1 << 5); - - if (cso->max_anisotropy >= 16) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; - } else { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; - } - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 7; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 19; - } - - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - -static void -nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) -{ - struct nv40_context *nv40 = nv40_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - nv40->tex_sampler[unit] = sampler[unit]; - nv40->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nv40->nr_samplers; unit++) { - nv40->tex_sampler[unit] = NULL; - nv40->dirty_samplers |= (1 << unit); - } - - nv40->nr_samplers = nr; - nv40->dirty |= NV40_NEW_SAMPLER; -} - -static void -nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); -} - -static void -nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, - struct pipe_texture **miptree) -{ - struct nv40_context *nv40 = nv40_context(pipe); - unsigned unit; - - for (unit = 0; unit < nr; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nv40->tex_miptree[unit], miptree[unit]); - nv40->dirty_samplers |= (1 << unit); - } - - for (unit = nr; unit < nv40->nr_textures; unit++) { - pipe_texture_reference((struct pipe_texture **) - &nv40->tex_miptree[unit], NULL); - nv40->dirty_samplers |= (1 << unit); - } - - nv40->nr_textures = nr; - nv40->dirty |= NV40_NEW_SAMPLER; -} - -static void * -nv40_rasterizer_state_create(struct pipe_context *pipe, - const struct pipe_rasterizer_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); - struct nouveau_stateobj *so = so_new(9, 19, 0); - struct nouveau_grobj *curie = nv40->screen->curie; - - /*XXX: ignored: - * light_twoside - * point_smooth -nohw - * multisample - */ - - so_method(so, curie, NV40TCL_SHADE_MODEL, 1); - so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : - NV40TCL_SHADE_MODEL_SMOOTH); - - so_method(so, curie, NV40TCL_LINE_WIDTH, 2); - so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); - so_data (so, cso->line_smooth ? 1 : 0); - so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); - so_data (so, cso->line_stipple_enable ? 1 : 0); - so_data (so, (cso->line_stipple_pattern << 16) | - cso->line_stipple_factor); - - so_method(so, curie, NV40TCL_POINT_SIZE, 1); - so_data (so, fui(cso->point_size)); - - so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); - if (cso->front_winding == PIPE_WINDING_CCW) { - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV40TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_CW: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV40TCL_FRONT_FACE_CCW); - } else { - so_data(so, nvgl_polygon_mode(cso->fill_cw)); - so_data(so, nvgl_polygon_mode(cso->fill_ccw)); - switch (cso->cull_mode) { - case PIPE_WINDING_CCW: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - case PIPE_WINDING_CW: - so_data(so, NV40TCL_CULL_FACE_FRONT); - break; - case PIPE_WINDING_BOTH: - so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); - break; - default: - so_data(so, NV40TCL_CULL_FACE_BACK); - break; - } - so_data(so, NV40TCL_FRONT_FACE_CW); - } - so_data(so, cso->poly_smooth ? 1 : 0); - so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); - - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, cso->poly_stipple_enable ? 1 : 0); - - so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) - so_data(so, 1); - else - so_data(so, 0); - if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || - (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) - so_data(so, 1); - else - so_data(so, 0); - if (cso->offset_cw || cso->offset_ccw) { - so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); - so_data (so, fui(cso->offset_scale)); - so_data (so, fui(cso->offset_units * 2)); - } - - so_method(so, curie, NV40TCL_POINT_SPRITE, 1); - if (cso->point_quad_rasterization) { - unsigned psctl = (1 << 0), i; - - for (i = 0; i < 8; i++) { - if ((cso->sprite_coord_enable >> i) & 1) - psctl |= (1 << (8 + i)); - } - - so_data(so, psctl); - } else { - so_data(so, 0); - } - - so_ref(so, &rsso->so); - so_ref(NULL, &so); - rsso->pipe = *cso; - return (void *)rsso; -} - -static void -nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->rasterizer = hwcso; - nv40->dirty |= NV40_NEW_RAST; - nv40->draw_dirty |= NV40_NEW_RAST; -} - -static void -nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_rasterizer_state *rsso = hwcso; - - so_ref(NULL, &rsso->so); - FREE(rsso); -} - -static void * -nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); - struct nouveau_stateobj *so = so_new(6, 20, 0); - struct nouveau_grobj *curie = nv40->screen->curie; - - so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); - so_data (so, nvgl_comparison_op(cso->depth.func)); - so_data (so, cso->depth.writemask ? 1 : 0); - so_data (so, cso->depth.enabled ? 1 : 0); - - so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); - so_data (so, cso->alpha.enabled ? 1 : 0); - so_data (so, nvgl_comparison_op(cso->alpha.func)); - so_data (so, float_to_ubyte(cso->alpha.ref_value)); - - if (cso->stencil[0].enabled) { - so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 3); - so_data (so, cso->stencil[0].enabled ? 1 : 0); - so_data (so, cso->stencil[0].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[0].func)); - so_method(so, curie, NV40TCL_STENCIL_FRONT_FUNC_MASK, 4); - so_data (so, cso->stencil[0].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); - } else { - so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); - so_data (so, 0); - } - - if (cso->stencil[1].enabled) { - so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 3); - so_data (so, cso->stencil[1].enabled ? 1 : 0); - so_data (so, cso->stencil[1].writemask); - so_data (so, nvgl_comparison_op(cso->stencil[1].func)); - so_method(so, curie, NV40TCL_STENCIL_BACK_FUNC_MASK, 4); - so_data (so, cso->stencil[1].valuemask); - so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); - so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); - } else { - so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &zsaso->so); - so_ref(NULL, &so); - zsaso->pipe = *cso; - return (void *)zsaso; -} - -static void -nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->zsa = hwcso; - nv40->dirty |= NV40_NEW_ZSA; -} - -static void -nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_zsa_state *zsaso = hwcso; - - so_ref(NULL, &zsaso->so); - FREE(zsaso); -} - -static void * -nv40_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp; - - vp = CALLOC(1, sizeof(struct nv40_vertex_program)); - vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); - - return (void *)vp; -} - -static void -nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->vertprog = hwcso; - nv40->dirty |= NV40_NEW_VERTPROG; - nv40->draw_dirty |= NV40_NEW_VERTPROG; -} - -static void -nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_vertex_program *vp = hwcso; - - draw_delete_vertex_shader(nv40->draw, vp->draw); - nv40_vertprog_destroy(nv40, vp); - FREE((void*)vp->pipe.tokens); - FREE(vp); -} - -static void * -nv40_fp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) -{ - struct nv40_fragment_program *fp; - - fp = CALLOC(1, sizeof(struct nv40_fragment_program)); - fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - - tgsi_scan_shader(fp->pipe.tokens, &fp->info); - - return (void *)fp; -} - -static void -nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->fragprog = hwcso; - nv40->dirty |= NV40_NEW_FRAGPROG; -} - -static void -nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_fragment_program *fp = hwcso; - - nv40_fragprog_destroy(nv40, fp); - FREE((void*)fp->pipe.tokens); - FREE(fp); -} - -static void -nv40_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *bcol) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->blend_colour = *bcol; - nv40->dirty |= NV40_NEW_BCOL; -} - - static void -nv40_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *sr) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->stencil_ref = *sr; - nv40->dirty |= NV40_NEW_SR; -} - -static void -nv40_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->clip = *clip; - nv40->dirty |= NV40_NEW_UCP; - nv40->draw_dirty |= NV40_NEW_UCP; -} - -static void -nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf ) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->constbuf[shader] = buf; - nv40->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); - - if (shader == PIPE_SHADER_VERTEX) { - nv40->dirty |= NV40_NEW_VERTPROG; - } else - if (shader == PIPE_SHADER_FRAGMENT) { - nv40->dirty |= NV40_NEW_FRAGPROG; - } -} - -static void -nv40_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->framebuffer = *fb; - nv40->dirty |= NV40_NEW_FB; -} - -static void -nv40_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - memcpy(nv40->stipple, stipple->stipple, 4 * 32); - nv40->dirty |= NV40_NEW_STIPPLE; -} - -static void -nv40_set_scissor_state(struct pipe_context *pipe, - const struct pipe_scissor_state *s) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->scissor = *s; - nv40->dirty |= NV40_NEW_SCISSOR; -} - -static void -nv40_set_viewport_state(struct pipe_context *pipe, - const struct pipe_viewport_state *vpt) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - nv40->viewport = *vpt; - nv40->dirty |= NV40_NEW_VIEWPORT; - nv40->draw_dirty |= NV40_NEW_VIEWPORT; -} - -static void -nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_buffer *vb) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); - nv40->vtxbuf_nr = count; - - nv40->dirty |= NV40_NEW_ARRAYS; - nv40->draw_dirty |= NV40_NEW_ARRAYS; -} - -static void -nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_element *ve) -{ - struct nv40_context *nv40 = nv40_context(pipe); - - memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); - nv40->vtxelt_nr = count; - - nv40->dirty |= NV40_NEW_ARRAYS; - nv40->draw_dirty |= NV40_NEW_ARRAYS; -} - -void -nv40_init_state_functions(struct nv40_context *nv40) -{ - nv40->pipe.create_blend_state = nv40_blend_state_create; - nv40->pipe.bind_blend_state = nv40_blend_state_bind; - nv40->pipe.delete_blend_state = nv40_blend_state_delete; - - nv40->pipe.create_sampler_state = nv40_sampler_state_create; - nv40->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; - nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; - nv40->pipe.set_fragment_sampler_textures = nv40_set_sampler_texture; - - nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; - nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; - nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; - - nv40->pipe.create_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_create; - nv40->pipe.bind_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_bind; - nv40->pipe.delete_depth_stencil_alpha_state = - nv40_depth_stencil_alpha_state_delete; - - nv40->pipe.create_vs_state = nv40_vp_state_create; - nv40->pipe.bind_vs_state = nv40_vp_state_bind; - nv40->pipe.delete_vs_state = nv40_vp_state_delete; - - nv40->pipe.create_fs_state = nv40_fp_state_create; - nv40->pipe.bind_fs_state = nv40_fp_state_bind; - nv40->pipe.delete_fs_state = nv40_fp_state_delete; - - nv40->pipe.set_blend_color = nv40_set_blend_color; - nv40->pipe.set_stencil_ref = nv40_set_stencil_ref; - nv40->pipe.set_clip_state = nv40_set_clip_state; - nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; - nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; - nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; - nv40->pipe.set_scissor_state = nv40_set_scissor_state; - nv40->pipe.set_viewport_state = nv40_set_viewport_state; - - nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; - nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; -} - diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c deleted file mode 100644 index 3ff00a37f66..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_blend.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_blend_validate(struct nv40_context *nv40) -{ - so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); - return TRUE; -} - -struct nv40_state_entry nv40_state_blend = { - .validate = nv40_state_blend_validate, - .dirty = { - .pipe = NV40_NEW_BLEND, - .hw = NV40_STATE_BLEND - } -}; - -static boolean -nv40_state_blend_colour_validate(struct nv40_context *nv40) -{ - struct nouveau_stateobj *so = so_new(1, 1, 0); - struct pipe_blend_color *bcol = &nv40->blend_colour; - - so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); - so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | - (float_to_ubyte(bcol->color[0]) << 16) | - (float_to_ubyte(bcol->color[1]) << 8) | - (float_to_ubyte(bcol->color[2]) << 0))); - - so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_blend_colour = { - .validate = nv40_state_blend_colour_validate, - .dirty = { - .pipe = NV40_NEW_BCOL, - .hw = NV40_STATE_BCOL - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c deleted file mode 100644 index 8990f303ce4..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ /dev/null @@ -1,189 +0,0 @@ -#include "nv40_context.h" -#include "nv40_state.h" -#include "draw/draw_context.h" - -static struct nv40_state_entry *render_states[] = { - &nv40_state_framebuffer, - &nv40_state_rasterizer, - &nv40_state_scissor, - &nv40_state_stipple, - &nv40_state_fragprog, - &nv40_state_fragtex, - &nv40_state_vertprog, - &nv40_state_blend, - &nv40_state_blend_colour, - &nv40_state_zsa, - &nv40_state_sr, - &nv40_state_viewport, - &nv40_state_vbo, - NULL -}; - -static struct nv40_state_entry *swtnl_states[] = { - &nv40_state_framebuffer, - &nv40_state_rasterizer, - &nv40_state_scissor, - &nv40_state_stipple, - &nv40_state_fragprog, - &nv40_state_fragtex, - &nv40_state_vertprog, - &nv40_state_blend, - &nv40_state_blend_colour, - &nv40_state_zsa, - &nv40_state_sr, - &nv40_state_viewport, - &nv40_state_vtxfmt, - NULL -}; - -static void -nv40_state_do_validate(struct nv40_context *nv40, - struct nv40_state_entry **states) -{ - while (*states) { - struct nv40_state_entry *e = *states; - - if (nv40->dirty & e->dirty.pipe) { - if (e->validate(nv40)) - nv40->state.dirty |= (1ULL << e->dirty.hw); - } - - states++; - } - nv40->dirty = 0; -} - -void -nv40_state_emit(struct nv40_context *nv40) -{ - struct nv40_state *state = &nv40->state; - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned i; - uint64_t states; - - /* XXX: race conditions - */ - if (nv40 != screen->cur_ctx) { - for (i = 0; i < NV40_STATE_MAX; i++) { - if (state->hw[i] && screen->state[i] != state->hw[i]) - state->dirty |= (1ULL << i); - } - - screen->cur_ctx = nv40; - } - - for (i = 0, states = state->dirty; states; i++) { - if (!(states & (1ULL << i))) - continue; - so_ref (state->hw[i], &nv40->screen->state[i]); - if (state->hw[i]) - so_emit(chan, nv40->screen->state[i]); - states &= ~(1ULL << i); - } - - if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | - (1ULL << NV40_STATE_FRAGTEX0))) { - BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (chan, 2); - BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1); - OUT_RING (chan, 1); - } - - state->dirty = 0; -} - -void -nv40_state_flush_notify(struct nouveau_channel *chan) -{ - struct nv40_context *nv40 = chan->user_private; - struct nv40_state *state = &nv40->state; - unsigned i, samplers; - - so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]); - for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { - if (!(samplers & (1 << i))) - continue; - so_emit_reloc_markers(chan, - state->hw[NV40_STATE_FRAGTEX0+i]); - samplers &= ~(1ULL << i); - } - so_emit_reloc_markers(chan, state->hw[NV40_STATE_FRAGPROG]); - if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW) - so_emit_reloc_markers(chan, state->hw[NV40_STATE_VTXBUF]); -} - -boolean -nv40_state_validate(struct nv40_context *nv40) -{ - boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE; - - if (nv40->render_mode != HW) { - /* Don't even bother trying to go back to hw if none - * of the states that caused swtnl previously have changed. - */ - if ((nv40->fallback_swtnl & nv40->dirty) - != nv40->fallback_swtnl) - return FALSE; - - /* Attempt to go to hwtnl again */ - nv40->pipe.flush(&nv40->pipe, 0, NULL); - nv40->dirty |= (NV40_NEW_VIEWPORT | - NV40_NEW_VERTPROG | - NV40_NEW_ARRAYS); - nv40->render_mode = HW; - } - - nv40_state_do_validate(nv40, render_states); - if (nv40->fallback_swtnl || nv40->fallback_swrast) - return FALSE; - - if (was_sw) - NOUVEAU_ERR("swtnl->hw\n"); - - return TRUE; -} - -boolean -nv40_state_validate_swtnl(struct nv40_context *nv40) -{ - struct draw_context *draw = nv40->draw; - - /* Setup for swtnl */ - if (nv40->render_mode == HW) { - NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); - nv40->pipe.flush(&nv40->pipe, 0, NULL); - nv40->dirty |= (NV40_NEW_VIEWPORT | - NV40_NEW_VERTPROG | - NV40_NEW_ARRAYS); - nv40->render_mode = SWTNL; - } - - if (nv40->draw_dirty & NV40_NEW_VERTPROG) - draw_bind_vertex_shader(draw, nv40->vertprog->draw); - - if (nv40->draw_dirty & NV40_NEW_RAST) - draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); - - if (nv40->draw_dirty & NV40_NEW_UCP) - draw_set_clip_state(draw, &nv40->clip); - - if (nv40->draw_dirty & NV40_NEW_VIEWPORT) - draw_set_viewport_state(draw, &nv40->viewport); - - if (nv40->draw_dirty & NV40_NEW_ARRAYS) { - draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); - draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt); - } - - nv40_state_do_validate(nv40, swtnl_states); - if (nv40->fallback_swrast) { - NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); - return FALSE; - } - - nv40->draw_dirty = 0; - return TRUE; -} - diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c deleted file mode 100644 index a58fe9ddb19..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ /dev/null @@ -1,175 +0,0 @@ -#include "nv40_context.h" -#include "nouveau/nouveau_util.h" - -static struct pipe_buffer * -nv40_do_surface_buffer(struct pipe_surface *surface) -{ - struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; - return mt->buffer; -} - -#define nv40_surface_buffer(ps) nouveau_bo(nv40_do_surface_buffer(ps)) - -static boolean -nv40_state_framebuffer_validate(struct nv40_context *nv40) -{ - struct nouveau_channel *chan = nv40->screen->base.channel; - struct nouveau_grobj *curie = nv40->screen->curie; - struct pipe_framebuffer_state *fb = &nv40->framebuffer; - struct nv04_surface *rt[4], *zeta; - uint32_t rt_enable, rt_format; - int i, colour_format = 0, zeta_format = 0; - struct nouveau_stateobj *so = so_new(18, 24, 10); - unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; - unsigned w = fb->width; - unsigned h = fb->height; - - rt_enable = 0; - for (i = 0; i < fb->nr_cbufs; i++) { - if (colour_format) { - assert(colour_format == fb->cbufs[i]->format); - } else { - colour_format = fb->cbufs[i]->format; - rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); - rt[i] = (struct nv04_surface *)fb->cbufs[i]; - } - } - - if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | - NV40TCL_RT_ENABLE_COLOR3)) - rt_enable |= NV40TCL_RT_ENABLE_MRT; - - if (fb->zsbuf) { - zeta_format = fb->zsbuf->format; - zeta = (struct nv04_surface *)fb->zsbuf; - } - - if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { - assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->nr_cbufs; i++) - assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); - - rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED | - log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT | - log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT; - } - else - rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; - - switch (colour_format) { - case PIPE_FORMAT_X8R8G8B8_UNORM: - rt_format |= NV40TCL_RT_FORMAT_COLOR_X8R8G8B8; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; - break; - case PIPE_FORMAT_R5G6B5_UNORM: - rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; - break; - default: - assert(0); - } - - switch (zeta_format) { - case PIPE_FORMAT_Z16_UNORM: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case 0: - rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; - break; - default: - assert(0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { - so_method(so, curie, NV40TCL_DMA_COLOR0, 1); - so_reloc (so, nv40_surface_buffer(&rt[0]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_COLOR0_PITCH, 2); - so_data (so, rt[0]->pitch); - so_reloc (so, nv40_surface_buffer(&rt[0]->base), - rt[0]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { - so_method(so, curie, NV40TCL_DMA_COLOR1, 1); - so_reloc (so, nv40_surface_buffer(&rt[1]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_COLOR1_OFFSET, 2); - so_reloc (so, nv40_surface_buffer(&rt[1]->base), - rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_data (so, rt[1]->pitch); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { - so_method(so, curie, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, nv40_surface_buffer(&rt[2]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&rt[2]->base), - rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_method(so, curie, NV40TCL_COLOR2_PITCH, 1); - so_data (so, rt[2]->pitch); - } - - if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { - so_method(so, curie, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, nv40_surface_buffer(&rt[3]->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&rt[3]->base), - rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW, - 0, 0); - so_method(so, curie, NV40TCL_COLOR3_PITCH, 1); - so_data (so, rt[3]->pitch); - } - - if (zeta_format) { - so_method(so, curie, NV40TCL_DMA_ZETA, 1); - so_reloc (so, nv40_surface_buffer(&zeta->base), 0, - rt_flags | NOUVEAU_BO_OR, - chan->vram->handle, chan->gart->handle); - so_method(so, curie, NV40TCL_ZETA_OFFSET, 1); - so_reloc (so, nv40_surface_buffer(&zeta->base), - zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); - so_method(so, curie, NV40TCL_ZETA_PITCH, 1); - so_data (so, zeta->pitch); - } - - so_method(so, curie, NV40TCL_RT_ENABLE, 1); - so_data (so, rt_enable); - so_method(so, curie, NV40TCL_RT_HORIZ, 3); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_data (so, rt_format); - so_method(so, curie, NV40TCL_VIEWPORT_HORIZ, 2); - so_data (so, (w << 16) | 0); - so_data (so, (h << 16) | 0); - so_method(so, curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); - so_data (so, ((w - 1) << 16) | 0); - so_data (so, ((h - 1) << 16) | 0); - so_method(so, curie, 0x1d88, 1); - so_data (so, (1 << 12) | h); - - so_ref(so, &nv40->state.hw[NV40_STATE_FB]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_framebuffer = { - .validate = nv40_state_framebuffer_validate, - .dirty = { - .pipe = NV40_NEW_FB, - .hw = NV40_STATE_FB - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c deleted file mode 100644 index 9ecda5990f0..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_rasterizer.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_rasterizer_validate(struct nv40_context *nv40) -{ - so_ref(nv40->rasterizer->so, - &nv40->state.hw[NV40_STATE_RAST]); - return TRUE; -} - -struct nv40_state_entry nv40_state_rasterizer = { - .validate = nv40_state_rasterizer_validate, - .dirty = { - .pipe = NV40_NEW_RAST, - .hw = NV40_STATE_RAST - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c deleted file mode 100644 index 753a505e934..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_scissor.c +++ /dev/null @@ -1,36 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_scissor_validate(struct nv40_context *nv40) -{ - struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; - struct pipe_scissor_state *s = &nv40->scissor; - struct nouveau_stateobj *so; - - if (nv40->state.hw[NV40_STATE_SCISSOR] && - (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) - return FALSE; - nv40->state.scissor_enabled = rast->scissor; - - so = so_new(1, 2, 0); - so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); - if (nv40->state.scissor_enabled) { - so_data (so, ((s->maxx - s->minx) << 16) | s->minx); - so_data (so, ((s->maxy - s->miny) << 16) | s->miny); - } else { - so_data (so, 4096 << 16); - so_data (so, 4096 << 16); - } - - so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_scissor = { - .validate = nv40_state_scissor_validate, - .dirty = { - .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, - .hw = NV40_STATE_SCISSOR - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c deleted file mode 100644 index 2b371ebfec0..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_stipple.c +++ /dev/null @@ -1,39 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_stipple_validate(struct nv40_context *nv40) -{ - struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; - struct nouveau_grobj *curie = nv40->screen->curie; - struct nouveau_stateobj *so; - - if (nv40->state.hw[NV40_STATE_STIPPLE] && - (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) - return FALSE; - - if (rast->poly_stipple_enable) { - unsigned i; - - so = so_new(2, 33, 0); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 1); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, nv40->stipple[i]); - } else { - so = so_new(1, 1, 0); - so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); - so_data (so, 0); - } - - so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); - return TRUE; -} - -struct nv40_state_entry nv40_state_stipple = { - .validate = nv40_state_stipple_validate, - .dirty = { - .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, - .hw = NV40_STATE_STIPPLE, - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c deleted file mode 100644 index 9919ba1d0b0..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_viewport.c +++ /dev/null @@ -1,69 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_viewport_validate(struct nv40_context *nv40) -{ - struct pipe_viewport_state *vpt = &nv40->viewport; - struct nouveau_stateobj *so; - unsigned bypass; - - if (nv40->render_mode == HW && - !nv40->rasterizer->pipe.bypass_vs_clip_and_viewport) - bypass = 0; - else - bypass = 1; - - if (nv40->state.hw[NV40_STATE_VIEWPORT] && - (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) && - nv40->state.viewport_bypass == bypass) - return FALSE; - nv40->state.viewport_bypass = bypass; - - so = so_new(2, 9, 0); - if (!bypass) { - so_method(so, nv40->screen->curie, - NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(vpt->translate[0])); - so_data (so, fui(vpt->translate[1])); - so_data (so, fui(vpt->translate[2])); - so_data (so, fui(vpt->translate[3])); - so_data (so, fui(vpt->scale[0])); - so_data (so, fui(vpt->scale[1])); - so_data (so, fui(vpt->scale[2])); - so_data (so, fui(vpt->scale[3])); - so_method(so, nv40->screen->curie, 0x1d78, 1); - so_data (so, 1); - } else { - so_method(so, nv40->screen->curie, - NV40TCL_VIEWPORT_TRANSLATE_X, 8); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(0.0)); - so_data (so, fui(1.0)); - so_data (so, fui(1.0)); - so_data (so, fui(1.0)); - so_data (so, fui(0.0)); - /* Not entirely certain what this is yet. The DDX uses this - * value also as it fixes rendering when you pass - * pre-transformed vertices to the GPU. My best gusss is that - * this bypasses some culling/clipping stage. Might be worth - * noting that points/lines are uneffected by whatever this - * value fixes, only filled polygons are effected. - */ - so_method(so, nv40->screen->curie, 0x1d78, 1); - so_data (so, 0x110); - } - - so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_viewport = { - .validate = nv40_state_viewport_validate, - .dirty = { - .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST, - .hw = NV40_STATE_VIEWPORT - } -}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c deleted file mode 100644 index 9cbe7da6db1..00000000000 --- a/src/gallium/drivers/nv40/nv40_state_zsa.c +++ /dev/null @@ -1,41 +0,0 @@ -#include "nv40_context.h" - -static boolean -nv40_state_zsa_validate(struct nv40_context *nv40) -{ - so_ref(nv40->zsa->so, - &nv40->state.hw[NV40_STATE_ZSA]); - return TRUE; -} - -struct nv40_state_entry nv40_state_zsa = { - .validate = nv40_state_zsa_validate, - .dirty = { - .pipe = NV40_NEW_ZSA, - .hw = NV40_STATE_ZSA - } -}; - -static boolean -nv40_state_sr_validate(struct nv40_context *nv40) -{ - struct nouveau_stateobj *so = so_new(2, 2, 0); - struct pipe_stencil_ref *sr = &nv40->stencil_ref; - - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, sr->ref_value[0]); - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, sr->ref_value[1]); - - so_ref(so, &nv40->state.hw[NV40_STATE_SR]); - so_ref(NULL, &so); - return TRUE; -} - -struct nv40_state_entry nv40_state_sr = { - .validate = nv40_state_sr_validate, - .dirty = { - .pipe = NV40_NEW_SR, - .hw = NV40_STATE_SR - } -}; diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c deleted file mode 100644 index 7812460d2ed..00000000000 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ /dev/null @@ -1,565 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" -#include "util/u_format.h" - -#include "nv40_context.h" -#include "nv40_state.h" - -#include "nouveau/nouveau_channel.h" -#include "nouveau/nouveau_pushbuf.h" -#include "nouveau/nouveau_util.h" - -#define FORCE_SWTNL 0 - -static INLINE int -nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) -{ - switch (pipe) { - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - *fmt = NV40TCL_VTXFMT_TYPE_FLOAT; - break; - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - *fmt = NV40TCL_VTXFMT_TYPE_UBYTE; - break; - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = NV40TCL_VTXFMT_TYPE_USHORT; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; - } - - switch (pipe) { - case PIPE_FORMAT_R8_UNORM: - case PIPE_FORMAT_R32_FLOAT: - case PIPE_FORMAT_R16_SSCALED: - *ncomp = 1; - break; - case PIPE_FORMAT_R8G8_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - case PIPE_FORMAT_R16G16_SSCALED: - *ncomp = 2; - break; - case PIPE_FORMAT_R8G8B8_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - case PIPE_FORMAT_R16G16B16_SSCALED: - *ncomp = 3; - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - case PIPE_FORMAT_R16G16B16A16_SSCALED: - *ncomp = 4; - break; - default: - NOUVEAU_ERR("Unknown format %s\n", util_format_name(pipe)); - return 1; - } - - return 0; -} - -static boolean -nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, - unsigned ib_size) -{ - struct pipe_screen *pscreen = &nv40->screen->base.base; - unsigned type; - - if (!ib) { - nv40->idxbuf = NULL; - nv40->idxbuf_format = 0xdeadbeef; - return FALSE; - } - - if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) - return FALSE; - - switch (ib_size) { - case 2: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; - break; - case 4: - type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; - break; - default: - return FALSE; - } - - if (ib != nv40->idxbuf || - type != nv40->idxbuf_format) { - nv40->dirty |= NV40_NEW_ARRAYS; - nv40->idxbuf = ib; - nv40->idxbuf_format = type; - } - - return TRUE; -} - -static boolean -nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, - int attrib, struct pipe_vertex_element *ve, - struct pipe_vertex_buffer *vb) -{ - struct pipe_screen *pscreen = nv40->pipe.screen; - struct nouveau_grobj *curie = nv40->screen->curie; - unsigned type, ncomp; - void *map; - - if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) - return FALSE; - - map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); - map += vb->buffer_offset + ve->src_offset; - - switch (type) { - case NV40TCL_VTXFMT_TYPE_FLOAT: - { - float *v = map; - - switch (ncomp) { - case 4: - so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - so_data (so, fui(v[3])); - break; - case 3: - so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - so_data (so, fui(v[2])); - break; - case 2: - so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2); - so_data (so, fui(v[0])); - so_data (so, fui(v[1])); - break; - case 1: - so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1); - so_data (so, fui(v[0])); - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - } - break; - default: - pipe_buffer_unmap(pscreen, vb->buffer); - return FALSE; - } - - pipe_buffer_unmap(pscreen, vb->buffer); - - return TRUE; -} - -void -nv40_draw_arrays(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned restart; - - nv40_vbo_set_idxbuf(nv40, NULL, 0); - if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); - return; - } - - while (count) { - unsigned vc, nr; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - nr = (vc & 0xff); - if (nr) { - BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; - } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - count -= vc; - start = restart; - } - - pipe->flush(pipe, 0, NULL); -} - -static INLINE void -nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - while (count) { - uint8_t *elts = (uint8_t *)ib + start; - unsigned vc, push, restart; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - if (vc & 1) { - BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, elts[0]); - elts++; vc--; - } - - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static INLINE void -nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - while (count) { - uint16_t *elts = (uint16_t *)ib + start; - unsigned vc, push, restart; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - if (vc & 1) { - BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1); - OUT_RING (chan, elts[0]); - elts++; vc--; - } - - while (vc) { - unsigned i; - - push = MIN2(vc, 2047 * 2); - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1); - for (i = 0; i < push; i+=2) - OUT_RING(chan, (elts[i+1] << 16) | elts[i]); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static INLINE void -nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - - while (count) { - uint32_t *elts = (uint32_t *)ib + start; - unsigned vc, push, restart; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, - mode, start, count, &restart); - if (vc == 0) { - FIRE_RING(chan); - continue; - } - count -= vc; - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - while (vc) { - push = MIN2(vc, 2047); - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push); - OUT_RINGp (chan, elts, push); - - vc -= push; - elts += push; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - start = restart; - } -} - -static void -nv40_draw_elements_inline(struct pipe_context *pipe, - struct pipe_buffer *ib, unsigned ib_size, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct pipe_screen *pscreen = pipe->screen; - void *map; - - map = pipe_buffer_map(pscreen, ib, PIPE_BUFFER_USAGE_CPU_READ); - if (!ib) { - NOUVEAU_ERR("failed mapping ib\n"); - return; - } - - switch (ib_size) { - case 1: - nv40_draw_elements_u08(nv40, map, mode, start, count); - break; - case 2: - nv40_draw_elements_u16(nv40, map, mode, start, count); - break; - case 4: - nv40_draw_elements_u32(nv40, map, mode, start, count); - break; - default: - NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); - break; - } - - pipe_buffer_unmap(pscreen, ib); -} - -static void -nv40_draw_elements_vbo(struct pipe_context *pipe, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - struct nv40_screen *screen = nv40->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - unsigned restart; - - while (count) { - unsigned nr, vc; - - nv40_state_emit(nv40); - - vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, - mode, start, count, &restart); - if (!vc) { - FIRE_RING(chan); - continue; - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, nvgl_primitive(mode)); - - nr = (vc & 0xff); - if (nr) { - BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1); - OUT_RING (chan, ((nr - 1) << 24) | start); - start += nr; - } - - nr = vc >> 8; - while (nr) { - unsigned push = nr > 2047 ? 2047 : nr; - - nr -= push; - - BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push); - while (push--) { - OUT_RING(chan, ((0x100 - 1) << 24) | start); - start += 0x100; - } - } - - BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1); - OUT_RING (chan, 0); - - count -= vc; - start = restart; - } -} - -void -nv40_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, unsigned indexSize, - unsigned mode, unsigned start, unsigned count) -{ - struct nv40_context *nv40 = nv40_context(pipe); - boolean idxbuf; - - idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); - if (FORCE_SWTNL || !nv40_state_validate(nv40)) { - nv40_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count); - return; - } - - if (idxbuf) { - nv40_draw_elements_vbo(pipe, mode, start, count); - } else { - nv40_draw_elements_inline(pipe, indexBuffer, indexSize, - mode, start, count); - } - - pipe->flush(pipe, 0, NULL); -} - -static boolean -nv40_vbo_validate(struct nv40_context *nv40) -{ - struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; - struct nouveau_grobj *curie = nv40->screen->curie; - struct pipe_buffer *ib = nv40->idxbuf; - unsigned ib_format = nv40->idxbuf_format; - unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - int hw; - - vtxbuf = so_new(3, 17, 18); - so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); - vtxfmt = so_new(1, 16, 0); - so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); - - for (hw = 0; hw < nv40->vtxelt_nr; hw++) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - unsigned type, ncomp; - - ve = &nv40->vtxelt[hw]; - vb = &nv40->vtxbuf[ve->vertex_buffer_index]; - - if (!vb->stride) { - if (!sattr) - sattr = so_new(16, 16 * 4, 0); - - if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { - so_data(vtxbuf, 0); - so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); - continue; - } - } - - if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - nv40->fallback_swtnl |= NV40_NEW_ARRAYS; - so_ref(NULL, &vtxbuf); - so_ref(NULL, &vtxfmt); - return FALSE; - } - - so_reloc(vtxbuf, nouveau_bo(vb->buffer), - vb->buffer_offset + ve->src_offset, - vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, - 0, NV40TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) | - (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); - } - - if (ib) { - struct nouveau_bo *bo = nouveau_bo(ib); - - so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2); - so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); - so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR, - 0, NV40TCL_IDXBUF_FORMAT_DMA1); - } - - so_method(vtxbuf, curie, 0x1710, 1); - so_data (vtxbuf, 0); - - so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); - so_ref(NULL, &vtxbuf); - nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); - so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); - so_ref(NULL, &vtxfmt); - nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); - so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]); - so_ref(NULL, &sattr); - nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR); - return FALSE; -} - -struct nv40_state_entry nv40_state_vbo = { - .validate = nv40_vbo_validate, - .dirty = { - .pipe = NV40_NEW_ARRAYS, - .hw = 0, - } -}; - diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile index 612aea28a34..5d622e1c13c 100644 --- a/src/gallium/drivers/nv50/Makefile +++ b/src/gallium/drivers/nv50/Makefile @@ -16,6 +16,7 @@ C_SOURCES = \ nv50_surface.c \ nv50_tex.c \ nv50_transfer.c \ - nv50_vbo.c + nv50_vbo.c \ + nv50_push.c include ../../Makefile.template diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c index e0b2d2880b0..8afc95c9fc6 100644 --- a/src/gallium/drivers/nv50/nv50_clear.c +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -36,7 +36,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned mode = 0, i; - if (!nv50_state_validate(nv50)) + if (!nv50_state_validate(nv50, 64)) return; if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 7be12fcdef4..aa14e17872d 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -46,43 +46,13 @@ static void nv50_destroy(struct pipe_context *pipe) { struct nv50_context *nv50 = nv50_context(pipe); + int i; - if (nv50->state.fb) - so_ref(NULL, &nv50->state.fb); - if (nv50->state.blend) - so_ref(NULL, &nv50->state.blend); - if (nv50->state.blend_colour) - so_ref(NULL, &nv50->state.blend_colour); - if (nv50->state.zsa) - so_ref(NULL, &nv50->state.zsa); - if (nv50->state.rast) - so_ref(NULL, &nv50->state.rast); - if (nv50->state.stipple) - so_ref(NULL, &nv50->state.stipple); - if (nv50->state.scissor) - so_ref(NULL, &nv50->state.scissor); - if (nv50->state.viewport) - so_ref(NULL, &nv50->state.viewport); - if (nv50->state.tsc_upload) - so_ref(NULL, &nv50->state.tsc_upload); - if (nv50->state.tic_upload) - so_ref(NULL, &nv50->state.tic_upload); - if (nv50->state.vertprog) - so_ref(NULL, &nv50->state.vertprog); - if (nv50->state.fragprog) - so_ref(NULL, &nv50->state.fragprog); - if (nv50->state.geomprog) - so_ref(NULL, &nv50->state.geomprog); - if (nv50->state.fp_linkage) - so_ref(NULL, &nv50->state.fp_linkage); - if (nv50->state.gp_linkage) - so_ref(NULL, &nv50->state.gp_linkage); - if (nv50->state.vtxfmt) - so_ref(NULL, &nv50->state.vtxfmt); - if (nv50->state.vtxbuf) - so_ref(NULL, &nv50->state.vtxbuf); - if (nv50->state.vtxattr) - so_ref(NULL, &nv50->state.vtxattr); + for (i = 0; i < 64; i++) { + if (!nv50->state.hw[i]) + continue; + so_ref(NULL, &nv50->state.hw[i]); + } draw_destroy(nv50->draw); @@ -123,11 +93,11 @@ nv50_create(struct pipe_screen *pscreen, void *priv) nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; screen->base.channel->user_private = nv50; - screen->base.channel->flush_notify = nv50_state_flush_notify; nv50_init_surface_functions(nv50); nv50_init_state_functions(nv50); nv50_init_query_functions(nv50); + nv50_init_transfer_functions(nv50); nv50->draw = draw_create(); assert(nv50->draw); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index b4de3e2ba57..1743f6fb394 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -72,6 +72,12 @@ struct nv50_sampler_stateobj { unsigned tsc[8]; }; +struct nv50_vtxelt_stateobj { + struct pipe_vertex_element pipe[16]; + unsigned num_elements; + uint32_t hw[16]; +}; + static INLINE unsigned get_tile_height(uint32_t tile_mode) { @@ -90,10 +96,12 @@ struct nv50_miptree_level { unsigned tile_mode; }; +#define NV50_MAX_TEXTURE_LEVELS 16 + struct nv50_miptree { struct nouveau_miptree base; - struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS]; + struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS]; int image_nr; int total_size; }; @@ -115,31 +123,12 @@ nv50_surface(struct pipe_surface *pt) } struct nv50_state { - unsigned dirty; + struct nouveau_stateobj *hw[64]; + uint64_t hw_dirty; - struct nouveau_stateobj *fb; - struct nouveau_stateobj *blend; - struct nouveau_stateobj *blend_colour; - struct nouveau_stateobj *zsa; - struct nouveau_stateobj *stencil_ref; - struct nouveau_stateobj *rast; - struct nouveau_stateobj *stipple; - struct nouveau_stateobj *scissor; - unsigned scissor_enabled; - struct nouveau_stateobj *viewport; - unsigned viewport_bypass; - struct nouveau_stateobj *tsc_upload; - struct nouveau_stateobj *tic_upload; unsigned miptree_nr[PIPE_SHADER_TYPES]; - struct nouveau_stateobj *vertprog; - struct nouveau_stateobj *fragprog; - struct nouveau_stateobj *geomprog; - struct nouveau_stateobj *fp_linkage; - struct nouveau_stateobj *gp_linkage; - struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; - struct nouveau_stateobj *instbuf; unsigned vtxelt_nr; }; @@ -168,14 +157,13 @@ struct nv50_context { struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; - struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; - unsigned vtxelt_nr; + struct nv50_vtxelt_stateobj *vtxelt; struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; unsigned sampler_nr[PIPE_SHADER_TYPES]; struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; unsigned miptree_nr[PIPE_SHADER_TYPES]; - uint16_t vbo_fifo; + unsigned vbo_fifo; }; static INLINE struct nv50_context * @@ -187,6 +175,7 @@ nv50_context(struct pipe_context *pipe) extern void nv50_init_surface_functions(struct nv50_context *nv50); extern void nv50_init_state_functions(struct nv50_context *nv50); extern void nv50_init_query_functions(struct nv50_context *nv50); +extern void nv50_init_transfer_functions(struct nv50_context *nv50); extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); @@ -217,24 +206,36 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe, unsigned count, unsigned startInstance, unsigned instanceCount); -extern void nv50_vbo_validate(struct nv50_context *nv50); +extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso); +extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50); + +/* nv50_push.c */ +extern void +nv50_push_elements_instanced(struct pipe_context *, struct pipe_buffer *, + unsigned idxsize, unsigned mode, unsigned start, + unsigned count, unsigned i_start, + unsigned i_count); /* nv50_clear.c */ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil); /* nv50_program.c */ -extern void nv50_vertprog_validate(struct nv50_context *nv50); -extern void nv50_fragprog_validate(struct nv50_context *nv50); -extern void nv50_geomprog_validate(struct nv50_context *nv50); -extern void nv50_fp_linkage_validate(struct nv50_context *nv50); -extern void nv50_gp_linkage_validate(struct nv50_context *nv50); +extern struct nouveau_stateobj * +nv50_vertprog_validate(struct nv50_context *nv50); +extern struct nouveau_stateobj * +nv50_fragprog_validate(struct nv50_context *nv50); +extern struct nouveau_stateobj * +nv50_geomprog_validate(struct nv50_context *nv50); +extern struct nouveau_stateobj * +nv50_fp_linkage_validate(struct nv50_context *nv50); +extern struct nouveau_stateobj * +nv50_gp_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); /* nv50_state_validate.c */ -extern boolean nv50_state_validate(struct nv50_context *nv50); -extern void nv50_state_flush_notify(struct nouveau_channel *chan); +extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords); extern void nv50_so_init_sifc(struct nv50_context *nv50, struct nouveau_stateobj *so, @@ -242,7 +243,8 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50, unsigned offset, unsigned size); /* nv50_tex.c */ -extern void nv50_tex_validate(struct nv50_context *); +extern void nv50_tex_relocs(struct nv50_context *); +extern struct nouveau_stateobj *nv50_tex_validate(struct nv50_context *); /* nv50_transfer.c */ extern void @@ -256,4 +258,35 @@ nv50_upload_sifc(struct nv50_context *nv50, struct pipe_context * nv50_create(struct pipe_screen *pscreen, void *priv); +static INLINE unsigned +nv50_prim(unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; + case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; + case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; + case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; + case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + case PIPE_PRIM_LINES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; + default: + break; + } + + NOUVEAU_ERR("invalid primitive type %d\n", mode); + return NV50TCL_VERTEX_BEGIN_POINTS; +} + #endif diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 7297c74a83c..e091cae6024 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -89,14 +89,14 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) case PIPE_FORMAT_Z32_FLOAT: tile_flags = 0x4800; break; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: tile_flags = 0x1800; break; case PIPE_FORMAT_Z16_UNORM: tile_flags = 0x6c00; break; - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: tile_flags = 0x2800; break; case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -104,7 +104,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) tile_flags = 0x7400; break; default: - if ((pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) && + if ((pt->tex_usage & PIPE_TEXTURE_USAGE_SCANOUT) && util_format_get_blocksizebits(pt->format) == 32) tile_flags = 0x7a00; else @@ -255,9 +255,10 @@ void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) { pscreen->texture_create = nv50_miptree_create; - pscreen->texture_blanket = nv50_miptree_blanket; pscreen->texture_destroy = nv50_miptree_destroy; pscreen->get_tex_surface = nv50_miptree_surface_new; pscreen->tex_surface_destroy = nv50_miptree_surface_del; + + nouveau_screen(pscreen)->texture_blanket = nv50_miptree_blanket; } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 2372cbbef69..c857816b31a 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -4270,7 +4270,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) FREE(up); } -void +struct nouveau_stateobj * nv50_vertprog_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; @@ -4286,6 +4286,9 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); + if (!(nv50->dirty & NV50_NEW_VERTPROG)) + return NULL; + so = so_new(5, 7, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | @@ -4301,11 +4304,10 @@ nv50_vertprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_VP_START_ID, 1); so_data (so, 0); /* program start offset */ - so_ref(so, &nv50->state.vertprog); - so_ref(NULL, &so); + return so; } -void +struct nouveau_stateobj * nv50_fragprog_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; @@ -4321,6 +4323,9 @@ nv50_fragprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); + if (!(nv50->dirty & NV50_NEW_FRAGPROG)) + return NULL; + so = so_new(6, 7, 2); so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | @@ -4337,11 +4342,10 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.regs[3]); so_method(so, tesla, NV50TCL_FP_START_ID, 1); so_data (so, 0); /* program start offset */ - so_ref(so, &nv50->state.fragprog); - so_ref(NULL, &so); + return so; } -void +struct nouveau_stateobj * nv50_geomprog_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; @@ -4357,6 +4361,9 @@ nv50_geomprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); + if (!(nv50->dirty & NV50_NEW_GEOMPROG)) + return NULL; + so = so_new(6, 7, 2); so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | @@ -4373,8 +4380,7 @@ nv50_geomprog_validate(struct nv50_context *nv50) so_data (so, p->cfg.vert_count); so_method(so, tesla, NV50TCL_GP_START_ID, 1); so_data (so, 0); - so_ref(so, &nv50->state.geomprog); - so_ref(NULL, &so); + return so; } static uint32_t @@ -4454,7 +4460,7 @@ nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4], return mid; } -void +struct nouveau_stateobj * nv50_fp_linkage_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; @@ -4580,8 +4586,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_GP_ENABLE, 1); so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0); - so_ref(so, &nv50->state.fp_linkage); - so_ref(NULL, &so); + return so; } static int @@ -4615,7 +4620,7 @@ construct_vp_gp_mapping(uint32_t *map32, int m, return m; } -void +struct nouveau_stateobj * nv50_gp_linkage_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; @@ -4625,10 +4630,8 @@ nv50_gp_linkage_validate(struct nv50_context *nv50) uint32_t map[16]; int m = 0; - if (!gp) { - so_ref(NULL, &nv50->state.gp_linkage); - return; - } + if (!gp) + return NULL; memset(map, 0, sizeof(map)); m = construct_vp_gp_mapping(map, m, vp, gp); @@ -4646,8 +4649,7 @@ nv50_gp_linkage_validate(struct nv50_context *nv50) so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m); so_datap (so, map, m); - so_ref(so, &nv50->state.gp_linkage); - so_ref(NULL, &so); + return so; } void diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c new file mode 100644 index 00000000000..96a1f32d304 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -0,0 +1,326 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "nouveau/nouveau_util.h" +#include "nv50_context.h" + +struct push_context { + struct nv50_context *nv50; + + unsigned vtx_size; + + void *idxbuf; + unsigned idxsize; + + float edgeflag; + int edgeflag_attr; + + struct { + void *map; + unsigned stride; + unsigned divisor; + unsigned step; + void (*push)(struct nouveau_channel *, void *); + } attr[16]; + unsigned attr_nr; +}; + +static void +emit_b32_1(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b32_2(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); +} + +static void +emit_b32_3(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); +} + +static void +emit_b32_4(struct nouveau_channel *chan, void *data) +{ + uint32_t *v = data; + + OUT_RING(chan, v[0]); + OUT_RING(chan, v[1]); + OUT_RING(chan, v[2]); + OUT_RING(chan, v[3]); +} + +static void +emit_b16_1(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b16_3(struct nouveau_channel *chan, void *data) +{ + uint16_t *v = data; + + OUT_RING(chan, (v[1] << 16) | v[0]); + OUT_RING(chan, v[2]); +} + +static void +emit_b08_1(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, v[0]); +} + +static void +emit_b08_3(struct nouveau_channel *chan, void *data) +{ + uint8_t *v = data; + + OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); +} + +static INLINE void +emit_vertex(struct push_context *ctx, unsigned n) +{ + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + int i; + + if (ctx->edgeflag_attr < 16) { + float *edgeflag = ctx->attr[ctx->edgeflag_attr].map + + ctx->attr[ctx->edgeflag_attr].stride * n; + + if (*edgeflag != ctx->edgeflag) { + BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, *edgeflag ? 1 : 0); + ctx->edgeflag = *edgeflag; + } + } + + BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size); + for (i = 0; i < ctx->attr_nr; i++) + ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n); +} + +static void +emit_edgeflag(void *priv, boolean enabled) +{ + struct push_context *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + + BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, enabled ? 1 : 0); +} + +static void +emit_elt08(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint8_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt16(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint16_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_elt32(void *priv, unsigned start, unsigned count) +{ + struct push_context *ctx = priv; + uint32_t *idxbuf = ctx->idxbuf; + + while (count--) + emit_vertex(ctx, idxbuf[start++]); +} + +static void +emit_verts(void *priv, unsigned start, unsigned count) +{ + while (count--) + emit_vertex(priv, start++); +} + +void +nv50_push_elements_instanced(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxsize, + unsigned mode, unsigned start, unsigned count, + unsigned i_start, unsigned i_count) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + struct push_context ctx; + const unsigned p_overhead = 4 + /* begin/end */ + 4; /* potential edgeflag enable/disable */ + const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */ + 2; /* potential edgeflag modification */ + struct u_split_prim s; + unsigned vtx_size; + boolean nzi = FALSE; + int i; + + ctx.nv50 = nv50; + ctx.attr_nr = 0; + ctx.idxbuf = NULL; + ctx.vtx_size = 0; + ctx.edgeflag = 0.5f; + ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in; + + /* map vertex buffers, determine vertex size */ + for (i = 0; i < nv50->vtxelt->num_elements; i++) { + struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; + struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index]; + struct nouveau_bo *bo = nouveau_bo(vb->buffer); + unsigned size, nr_components, n; + + if (!(nv50->vbo_fifo & (1 << i))) + continue; + n = ctx.attr_nr++; + + if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { + assert(bo->map); + return; + } + ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset; + nouveau_bo_unmap(bo); + + ctx.attr[n].stride = vb->stride; + ctx.attr[n].divisor = ve->instance_divisor; + if (ctx.attr[n].divisor) { + ctx.attr[n].step = i_start % ve->instance_divisor; + ctx.attr[n].map += i_start * vb->stride; + } + + size = util_format_get_component_bits(ve->src_format, + UTIL_FORMAT_COLORSPACE_RGB, 0); + nr_components = util_format_get_nr_components(ve->src_format); + switch (size) { + case 8: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b08_1; break; + case 2: ctx.attr[n].push = emit_b16_1; break; + case 3: ctx.attr[n].push = emit_b08_3; break; + case 4: ctx.attr[n].push = emit_b32_1; break; + } + ctx.vtx_size++; + break; + case 16: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b16_1; break; + case 2: ctx.attr[n].push = emit_b32_1; break; + case 3: ctx.attr[n].push = emit_b16_3; break; + case 4: ctx.attr[n].push = emit_b32_2; break; + } + ctx.vtx_size += (nr_components + 1) >> 1; + break; + case 32: + switch (nr_components) { + case 1: ctx.attr[n].push = emit_b32_1; break; + case 2: ctx.attr[n].push = emit_b32_2; break; + case 3: ctx.attr[n].push = emit_b32_3; break; + case 4: ctx.attr[n].push = emit_b32_4; break; + } + ctx.vtx_size += nr_components; + break; + default: + assert(0); + return; + } + } + vtx_size = ctx.vtx_size + v_overhead; + + /* map index buffer, if present */ + if (idxbuf) { + struct nouveau_bo *bo = nouveau_bo(idxbuf); + + if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { + assert(bo->map); + return; + } + ctx.idxbuf = bo->map; + ctx.idxsize = idxsize; + nouveau_bo_unmap(bo); + } + + s.priv = &ctx; + s.edge = emit_edgeflag; + if (idxbuf) { + if (idxsize == 1) + s.emit = emit_elt08; + else + if (idxsize == 2) + s.emit = emit_elt16; + else + s.emit = emit_elt32; + } else + s.emit = emit_verts; + + /* per-instance loop */ + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, i_start); + while (i_count--) { + unsigned max_verts; + boolean done; + + for (i = 0; i < ctx.attr_nr; i++) { + if (!ctx.attr[i].divisor || + ctx.attr[i].divisor != ++ctx.attr[i].step) + continue; + ctx.attr[i].step = 0; + ctx.attr[i].map += ctx.attr[i].stride; + } + + u_split_prim_init(&s, mode, start, count); + do { + if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) { + FIRE_RING(chan); + if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) { + assert(0); + return; + } + } + + max_verts = AVAIL_RING(chan); + max_verts -= p_overhead; + max_verts /= vtx_size; + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0)); + done = u_split_prim_next(&s, max_verts); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } while (!done); + + nzi = TRUE; + } +} diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 2232461b9b6..adf0d3b3741 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -35,9 +35,9 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, { if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { switch (format) { - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R32G32B32A32_FLOAT: @@ -51,32 +51,32 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { switch (format) { case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: return TRUE; default: break; } } else { switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_A8R8G8B8_SRGB: - case PIPE_FORMAT_X8R8G8B8_SRGB: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_A8_UNORM: case PIPE_FORMAT_I8_UNORM: - case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_L8A8_UNORM: case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: - case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R16G16B16A16_SNORM: case PIPE_FORMAT_R16G16B16A16_UNORM: @@ -95,6 +95,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, static int nv50_screen_get_param(struct pipe_screen *pscreen, int param) { + struct nv50_screen *screen = nv50_screen(pscreen); + switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return 32; @@ -132,9 +134,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case NOUVEAU_CAP_HW_VTXBUF: - return 1; + return screen->force_push ? 0 : 1; case NOUVEAU_CAP_HW_IDXBUF: - return 1; + return screen->force_push ? 0 : 1; case PIPE_CAP_INDEP_BLEND_ENABLE: return 1; case PIPE_CAP_INDEP_BLEND_FUNC: @@ -202,28 +204,6 @@ nv50_screen_destroy(struct pipe_screen *pscreen) FREE(screen); } -static int -nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb, - unsigned usage) -{ - struct nv50_screen *screen = nv50_screen(pscreen); - struct nv50_context *ctx = screen->cur_ctx; - - if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX)) - return 0; - - /* Our vtxbuf got mapped, it can no longer be considered part of current - * state, remove it to avoid emitting reloc markers. - */ - if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf, - nouveau_bo(pb))) { - so_ref(NULL, &ctx->state.vtxbuf); - ctx->dirty |= NV50_NEW_ARRAYS; - } - - return 0; -} - struct pipe_screen * nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) { @@ -252,10 +232,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) pscreen->get_paramf = nv50_screen_get_paramf; pscreen->is_format_supported = nv50_screen_is_format_supported; pscreen->context_create = nv50_create; - screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map; nv50_screen_init_miptree_functions(pscreen); - nv50_transfer_init_screen_functions(pscreen); /* DMA engine object */ ret = nouveau_grobj_alloc(chan, 0xbeef5039, @@ -508,10 +486,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1); so_data (so, 1); - /* activate first scissor rectangle */ - so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1); - so_data (so, 1); - so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1); so_data (so, 1); /* default edgeflag to TRUE */ @@ -520,6 +494,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref (NULL, &so); nouveau_pushbuf_flush(chan, 0); + screen->force_push = debug_get_bool_option("NV50_ALWAYS_PUSH", FALSE); return pscreen; } diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index 2687b721277..ec19ea655b1 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -28,6 +28,8 @@ struct nv50_screen { struct nouveau_bo *tsc; struct nouveau_stateobj *static_init; + + boolean force_push; }; static INLINE struct nv50_screen * @@ -36,6 +38,4 @@ nv50_screen(struct pipe_screen *screen) return (struct nv50_screen *)screen; } -void nv50_transfer_init_screen_functions(struct pipe_screen *); - #endif diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 7d304907b65..b0e5552eff4 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -302,7 +302,7 @@ static void * nv50_rasterizer_state_create(struct pipe_context *pipe, const struct pipe_rasterizer_state *cso) { - struct nouveau_stateobj *so = so_new(15, 21, 0); + struct nouveau_stateobj *so = so_new(16, 22, 0); struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; struct nv50_rasterizer_stateobj *rso = CALLOC_STRUCT(nv50_rasterizer_stateobj); @@ -314,6 +314,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe, * - point_sprite / sprite_coord_mode */ + so_method(so, tesla, NV50TCL_SCISSOR_ENABLE(0), 1); + so_data (so, cso->scissor); + so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : NV50TCL_SHADE_MODEL_SMOOTH); @@ -720,15 +723,34 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, nv50->dirty |= NV50_NEW_ARRAYS; } +static void * +nv50_vtxelts_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nv50_vtxelt_stateobj *cso = CALLOC_STRUCT(nv50_vtxelt_stateobj); + + assert(num_elements < 16); /* not doing fallbacks yet */ + cso->num_elements = num_elements; + memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); + + nv50_vtxelt_construct(cso); + + return (void *)cso; +} + static void -nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count, - const struct pipe_vertex_element *ve) +nv50_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nv50_context *nv50 = nv50_context(pipe); + FREE(hwcso); +} - memcpy(nv50->vtxelt, ve, sizeof(*ve) * count); - nv50->vtxelt_nr = count; +static void +nv50_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + nv50->vtxelt = hwcso; nv50->dirty |= NV50_NEW_ARRAYS; } @@ -778,7 +800,10 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.set_scissor_state = nv50_set_scissor_state; nv50->pipe.set_viewport_state = nv50_set_viewport_state; + nv50->pipe.create_vertex_elements_state = nv50_vtxelts_state_create; + nv50->pipe.delete_vertex_elements_state = nv50_vtxelts_state_delete; + nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind; + nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; - nv50->pipe.set_vertex_elements = nv50_set_vertex_elements; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index efab94cab74..2c8e7ca7982 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -25,14 +25,8 @@ #include "nv50_context.h" #include "nouveau/nouveau_stateobj.h" -#define NV50_CBUF_FORMAT_CASE(n) \ - case PIPE_FORMAT_##n: so_data(so, NV50TCL_RT_FORMAT_##n); break - -#define NV50_ZETA_FORMAT_CASE(n) \ - case PIPE_FORMAT_##n: so_data(so, NV50TCL_ZETA_FORMAT_##n); break - -static void -nv50_state_validate_fb(struct nv50_context *nv50) +static struct nouveau_stateobj * +validate_fb(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_stateobj *so = so_new(32, 79, 18); @@ -71,14 +65,30 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->cbufs[i]->format) { - NV50_CBUF_FORMAT_CASE(A8R8G8B8_UNORM); - NV50_CBUF_FORMAT_CASE(X8R8G8B8_UNORM); - NV50_CBUF_FORMAT_CASE(R5G6B5_UNORM); - NV50_CBUF_FORMAT_CASE(R16G16B16A16_SNORM); - NV50_CBUF_FORMAT_CASE(R16G16B16A16_UNORM); - NV50_CBUF_FORMAT_CASE(R32G32B32A32_FLOAT); - NV50_CBUF_FORMAT_CASE(R16G16_SNORM); - NV50_CBUF_FORMAT_CASE(R16G16_UNORM); + case PIPE_FORMAT_B8G8R8A8_UNORM: + so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM); + break; + case PIPE_FORMAT_B8G8R8X8_UNORM: + so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM); + break; + case PIPE_FORMAT_B5G6R5_UNORM: + so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + so_data(so, NV50TCL_RT_FORMAT_R16G16B16A16_SNORM); + break; + case PIPE_FORMAT_R16G16B16A16_UNORM: + so_data(so, NV50TCL_RT_FORMAT_R16G16B16A16_UNORM); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + so_data(so, NV50TCL_RT_FORMAT_R32G32B32A32_FLOAT); + break; + case PIPE_FORMAT_R16G16_SNORM: + so_data(so, NV50TCL_RT_FORMAT_R16G16_SNORM); + break; + case PIPE_FORMAT_R16G16_UNORM: + so_data(so, NV50TCL_RT_FORMAT_R16G16_UNORM); + break; default: NOUVEAU_ERR("AIIII unknown format %s\n", util_format_name(fb->cbufs[i]->format)); @@ -112,10 +122,18 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0); switch (fb->zsbuf->format) { - NV50_ZETA_FORMAT_CASE(S8Z24_UNORM); - NV50_ZETA_FORMAT_CASE(X8Z24_UNORM); - NV50_ZETA_FORMAT_CASE(Z24S8_UNORM); - NV50_ZETA_FORMAT_CASE(Z32_FLOAT); + case PIPE_FORMAT_Z24S8_UNORM: + so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM); + break; + case PIPE_FORMAT_Z24X8_UNORM: + so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM); + break; + case PIPE_FORMAT_S8Z24_UNORM: + so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM); + break; + case PIPE_FORMAT_Z32_FLOAT: + so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT); + break; default: NOUVEAU_ERR("AIIII unknown format %s\n", util_format_name(fb->zsbuf->format)); @@ -149,12 +167,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data (so, w << 16); so_data (so, h << 16); - /* we set scissors to framebuffer size when they're 'turned off' */ - nv50->dirty |= NV50_NEW_SCISSOR; - so_ref(NULL, &nv50->state.scissor); - - so_ref(so, &nv50->state.fb); - so_ref(NULL, &so); + return so; } static void @@ -181,281 +194,256 @@ nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so, } } -static void -nv50_state_emit(struct nv50_context *nv50) +static struct nouveau_stateobj * +validate_blend(struct nv50_context *nv50) +{ + struct nouveau_stateobj *so = NULL; + so_ref(nv50->blend->so, &so); + return so; +} + +static struct nouveau_stateobj * +validate_zsa(struct nv50_context *nv50) { - struct nv50_screen *screen = nv50->screen; - struct nouveau_channel *chan = screen->base.channel; + struct nouveau_stateobj *so = NULL; + so_ref(nv50->zsa->so, &so); + return so; +} - /* XXX: this is racy for multiple contexts active on separate - * threads. - */ - if (screen->cur_ctx != nv50) { - if (nv50->state.fb) - nv50->state.dirty |= NV50_NEW_FRAMEBUFFER; - if (nv50->state.blend) - nv50->state.dirty |= NV50_NEW_BLEND; - if (nv50->state.zsa) - nv50->state.dirty |= NV50_NEW_ZSA; - if (nv50->state.vertprog) - nv50->state.dirty |= NV50_NEW_VERTPROG; - if (nv50->state.fragprog) - nv50->state.dirty |= NV50_NEW_FRAGPROG; - if (nv50->state.geomprog) - nv50->state.dirty |= NV50_NEW_GEOMPROG; - if (nv50->state.rast) - nv50->state.dirty |= NV50_NEW_RASTERIZER; - if (nv50->state.blend_colour) - nv50->state.dirty |= NV50_NEW_BLEND_COLOUR; - if (nv50->state.stencil_ref) - nv50->state.dirty |= NV50_NEW_STENCIL_REF; - if (nv50->state.stipple) - nv50->state.dirty |= NV50_NEW_STIPPLE; - if (nv50->state.scissor) - nv50->state.dirty |= NV50_NEW_SCISSOR; - if (nv50->state.viewport) - nv50->state.dirty |= NV50_NEW_VIEWPORT; - if (nv50->state.tsc_upload) - nv50->state.dirty |= NV50_NEW_SAMPLER; - if (nv50->state.tic_upload) - nv50->state.dirty |= NV50_NEW_TEXTURE; - if (nv50->state.vtxfmt && nv50->state.vtxbuf) - nv50->state.dirty |= NV50_NEW_ARRAYS; - screen->cur_ctx = nv50; - } +static struct nouveau_stateobj * +validate_rast(struct nv50_context *nv50) +{ + struct nouveau_stateobj *so = NULL; + so_ref(nv50->rasterizer->so, &so); + return so; +} - if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER) - so_emit(chan, nv50->state.fb); - if (nv50->state.dirty & NV50_NEW_BLEND) - so_emit(chan, nv50->state.blend); - if (nv50->state.dirty & NV50_NEW_ZSA) - so_emit(chan, nv50->state.zsa); - if (nv50->state.dirty & NV50_NEW_VERTPROG) - so_emit(chan, nv50->state.vertprog); - if (nv50->state.dirty & NV50_NEW_FRAGPROG) - so_emit(chan, nv50->state.fragprog); - if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog) - so_emit(chan, nv50->state.geomprog); - if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) - so_emit(chan, nv50->state.fp_linkage); - if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG)) - && nv50->state.gp_linkage) - so_emit(chan, nv50->state.gp_linkage); - if (nv50->state.dirty & NV50_NEW_RASTERIZER) - so_emit(chan, nv50->state.rast); - if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) - so_emit(chan, nv50->state.blend_colour); - if (nv50->state.dirty & NV50_NEW_STENCIL_REF) - so_emit(chan, nv50->state.stencil_ref); - if (nv50->state.dirty & NV50_NEW_STIPPLE) - so_emit(chan, nv50->state.stipple); - if (nv50->state.dirty & NV50_NEW_SCISSOR) - so_emit(chan, nv50->state.scissor); - if (nv50->state.dirty & NV50_NEW_VIEWPORT) - so_emit(chan, nv50->state.viewport); - if (nv50->state.dirty & NV50_NEW_SAMPLER) - so_emit(chan, nv50->state.tsc_upload); - if (nv50->state.dirty & NV50_NEW_TEXTURE) - so_emit(chan, nv50->state.tic_upload); - if (nv50->state.dirty & NV50_NEW_ARRAYS) { - so_emit(chan, nv50->state.vtxfmt); - so_emit(chan, nv50->state.vtxbuf); - if (nv50->state.vtxattr) - so_emit(chan, nv50->state.vtxattr); - } - nv50->state.dirty = 0; +static struct nouveau_stateobj * +validate_blend_colour(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so_new(1, 4, 0); + + so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); + so_data (so, fui(nv50->blend_colour.color[0])); + so_data (so, fui(nv50->blend_colour.color[1])); + so_data (so, fui(nv50->blend_colour.color[2])); + so_data (so, fui(nv50->blend_colour.color[3])); + return so; } -void -nv50_state_flush_notify(struct nouveau_channel *chan) +static struct nouveau_stateobj * +validate_stencil_ref(struct nv50_context *nv50) { - struct nv50_context *nv50 = chan->user_private; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so = so_new(2, 2, 0); - if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE)) - so_emit(chan, nv50->state.tic_upload); + so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1); + so_data (so, nv50->stencil_ref.ref_value[0]); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1); + so_data (so, nv50->stencil_ref.ref_value[1]); + return so; +} - so_emit_reloc_markers(chan, nv50->state.fb); - so_emit_reloc_markers(chan, nv50->state.vertprog); - so_emit_reloc_markers(chan, nv50->state.fragprog); - so_emit_reloc_markers(chan, nv50->state.vtxbuf); - so_emit_reloc_markers(chan, nv50->screen->static_init); +static struct nouveau_stateobj * +validate_stipple(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so_new(1, 32, 0); + int i; - if (nv50->state.instbuf) - so_emit_reloc_markers(chan, nv50->state.instbuf); + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, util_bswap32(nv50->stipple.stipple[i])); + return so; } -boolean -nv50_state_validate(struct nv50_context *nv50) +static struct nouveau_stateobj * +validate_scissor(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; + struct pipe_scissor_state *s = &nv50->scissor; struct nouveau_stateobj *so; - unsigned i; - if (nv50->dirty & NV50_NEW_FRAMEBUFFER) - nv50_state_validate_fb(nv50); + so = so_new(1, 2, 0); + so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); + so_data (so, (s->maxx << 16) | s->minx); + so_data (so, (s->maxy << 16) | s->miny); + return so; +} + +static struct nouveau_stateobj * +validate_viewport(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so_new(5, 9, 0); + + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); + so_data (so, fui(nv50->viewport.translate[0])); + so_data (so, fui(nv50->viewport.translate[1])); + so_data (so, fui(nv50->viewport.translate[2])); + so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); + so_data (so, fui(nv50->viewport.scale[0])); + so_data (so, fui(nv50->viewport.scale[1])); + so_data (so, fui(nv50->viewport.scale[2])); + + so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); + so_data (so, 1); + /* 0x0000 = remove whole primitive only (xyz) + * 0x1018 = remove whole primitive only (xy), clamp z + * 0x1080 = clip primitive (xyz) + * 0x1098 = clip primitive (xy), clamp z + */ + so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); + so_data (so, 0x1080); + /* no idea what 0f90 does */ + so_method(so, tesla, 0x0f90, 1); + so_data (so, 0); + + return so; +} - if (nv50->dirty & NV50_NEW_BLEND) - so_ref(nv50->blend->so, &nv50->state.blend); +static struct nouveau_stateobj * +validate_sampler(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + unsigned nr = 0, i; - if (nv50->dirty & NV50_NEW_ZSA) - so_ref(nv50->zsa->so, &nv50->state.zsa); + for (i = 0; i < PIPE_SHADER_TYPES; ++i) + nr += nv50->sampler_nr[i]; - if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) - nv50_vertprog_validate(nv50); + so = so_new(1 + 5 * PIPE_SHADER_TYPES, + 1 + 19 * PIPE_SHADER_TYPES + nr * 8, + PIPE_SHADER_TYPES * 2); - if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) - nv50_fragprog_validate(nv50); + nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); + nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); - if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB)) - nv50_geomprog_validate(nv50); + so_method(so, tesla, 0x1334, 1); /* flush TSC */ + so_data (so, 0); - if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) - nv50_fp_linkage_validate(nv50); + return so; +} - if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG)) - nv50_gp_linkage_validate(nv50); +static struct nouveau_stateobj * +validate_vtxbuf(struct nv50_context *nv50) +{ + struct nouveau_stateobj *so = NULL; + so_ref(nv50->state.vtxbuf, &so); + return so; +} - if (nv50->dirty & NV50_NEW_RASTERIZER) - so_ref(nv50->rasterizer->so, &nv50->state.rast); +static struct nouveau_stateobj * +validate_vtxattr(struct nv50_context *nv50) +{ + struct nouveau_stateobj *so = NULL; + so_ref(nv50->state.vtxattr, &so); + return so; +} - if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { - so = so_new(1, 4, 0); - so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); - so_data (so, fui(nv50->blend_colour.color[0])); - so_data (so, fui(nv50->blend_colour.color[1])); - so_data (so, fui(nv50->blend_colour.color[2])); - so_data (so, fui(nv50->blend_colour.color[3])); - so_ref(so, &nv50->state.blend_colour); - so_ref(NULL, &so); - } +struct state_validate { + struct nouveau_stateobj *(*func)(struct nv50_context *nv50); + unsigned states; +} validate_list[] = { + { validate_fb , NV50_NEW_FRAMEBUFFER }, + { validate_blend , NV50_NEW_BLEND }, + { validate_zsa , NV50_NEW_ZSA }, + { nv50_vertprog_validate , NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB }, + { nv50_fragprog_validate , NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB }, + { nv50_geomprog_validate , NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB }, + { nv50_fp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG | + NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER }, + { nv50_gp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG }, + { validate_rast , NV50_NEW_RASTERIZER }, + { validate_blend_colour , NV50_NEW_BLEND_COLOUR }, + { validate_stencil_ref , NV50_NEW_STENCIL_REF }, + { validate_stipple , NV50_NEW_STIPPLE }, + { validate_scissor , NV50_NEW_SCISSOR }, + { validate_viewport , NV50_NEW_VIEWPORT }, + { validate_sampler , NV50_NEW_SAMPLER }, + { nv50_tex_validate , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER }, + { nv50_vbo_validate , NV50_NEW_ARRAYS }, + { validate_vtxbuf , NV50_NEW_ARRAYS }, + { validate_vtxattr , NV50_NEW_ARRAYS }, + {} +}; +#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) - if (nv50->dirty & NV50_NEW_STENCIL_REF) { - so = so_new(2, 2, 0); - so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1); - so_data (so, nv50->stencil_ref.ref_value[0]); - so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1); - so_data (so, nv50->stencil_ref.ref_value[1]); - so_ref(so, &nv50->state.stencil_ref); - so_ref(NULL, &so); - } +boolean +nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords) +{ + struct nouveau_channel *chan = nv50->screen->base.channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128 + 4; + int ret, i; - if (nv50->dirty & NV50_NEW_STIPPLE) { - so = so_new(1, 32, 0); - so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); - for (i = 0; i < 32; i++) - so_data(so, util_bswap32(nv50->stipple.stipple[i])); - so_ref(so, &nv50->state.stipple); - so_ref(NULL, &so); - } + for (i = 0; i < validate_list_len; i++) { + struct state_validate *validate = &validate_list[i]; + struct nouveau_stateobj *so; - if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) { - struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe; - struct pipe_scissor_state *s = &nv50->scissor; + if (!(nv50->dirty & validate->states)) + continue; - if (nv50->state.scissor && - (rast->scissor == 0 && nv50->state.scissor_enabled == 0)) - goto scissor_uptodate; - nv50->state.scissor_enabled = rast->scissor; + so = validate->func(nv50); + if (!so) + continue; - so = so_new(1, 2, 0); - so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2); - if (nv50->state.scissor_enabled) { - so_data(so, (s->maxx << 16) | s->minx); - so_data(so, (s->maxy << 16) | s->miny); - } else { - so_data(so, (nv50->framebuffer.width << 16)); - so_data(so, (nv50->framebuffer.height << 16)); - } - so_ref(so, &nv50->state.scissor); - so_ref(NULL, &so); - nv50->state.dirty |= NV50_NEW_SCISSOR; - } -scissor_uptodate: - - if (nv50->dirty & (NV50_NEW_VIEWPORT | NV50_NEW_RASTERIZER)) { - unsigned bypass; - - if (!nv50->rasterizer->pipe.bypass_vs_clip_and_viewport) - bypass = 0; - else - bypass = 1; - - if (nv50->state.viewport && - (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) && - nv50->state.viewport_bypass == bypass) - goto viewport_uptodate; - nv50->state.viewport_bypass = bypass; - - so = so_new(5, 9, 0); - if (!bypass) { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3); - so_data (so, fui(nv50->viewport.translate[0])); - so_data (so, fui(nv50->viewport.translate[1])); - so_data (so, fui(nv50->viewport.translate[2])); - so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3); - so_data (so, fui(nv50->viewport.scale[0])); - so_data (so, fui(nv50->viewport.scale[1])); - so_data (so, fui(nv50->viewport.scale[2])); - - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); - so_data (so, 1); - /* 0x0000 = remove whole primitive only (xyz) - * 0x1018 = remove whole primitive only (xy), clamp z - * 0x1080 = clip primitive (xyz) - * 0x1098 = clip primitive (xy), clamp z - */ - so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); - so_data (so, 0x1080); - /* no idea what 0f90 does */ - so_method(so, tesla, 0x0f90, 1); - so_data (so, 0); - } else { - so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1); - so_data (so, 0); - so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1); - so_data (so, 0x0000); - so_method(so, tesla, 0x0f90, 1); - so_data (so, 1); - } + nr_dwords += (so->total + so->cur); + nr_relocs += so->cur_reloc; - so_ref(so, &nv50->state.viewport); + so_ref(so, &nv50->state.hw[i]); so_ref(NULL, &so); - nv50->state.dirty |= NV50_NEW_VIEWPORT; + nv50->state.hw_dirty |= (1 << i); } -viewport_uptodate: - - if (nv50->dirty & NV50_NEW_SAMPLER) { - unsigned nr = 0; - - for (i = 0; i < PIPE_SHADER_TYPES; ++i) - nr += nv50->sampler_nr[i]; + nv50->dirty = 0; - so = so_new(1 + 5 * PIPE_SHADER_TYPES, - 1 + 19 * PIPE_SHADER_TYPES + nr * 8, - PIPE_SHADER_TYPES * 2); + if (nv50->screen->cur_ctx != nv50) { + for (i = 0; i < validate_list_len; i++) { + if (!nv50->state.hw[i] || + (nv50->state.hw_dirty & (1 << i))) + continue; - nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); - nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); + nr_dwords += (nv50->state.hw[i]->total + + nv50->state.hw[i]->cur); + nr_relocs += nv50->state.hw[i]->cur_reloc; + nv50->state.hw_dirty |= (1 << i); + } - so_method(so, tesla, 0x1334, 1); /* flush TSC */ - so_data (so, 0); + nv50->screen->cur_ctx = nv50; + } - so_ref(so, &nv50->state.tsc_upload); - so_ref(NULL, &so); + ret = MARK_RING(chan, nr_dwords, nr_relocs); + if (ret) { + debug_printf("MARK_RING(%d, %d) failed: %d\n", + nr_dwords, nr_relocs, ret); + return FALSE; } - if (nv50->dirty & (NV50_NEW_TEXTURE | NV50_NEW_SAMPLER)) - nv50_tex_validate(nv50); + while (nv50->state.hw_dirty) { + i = ffs(nv50->state.hw_dirty) - 1; + nv50->state.hw_dirty &= ~(1 << i); - if (nv50->dirty & NV50_NEW_ARRAYS) - nv50_vbo_validate(nv50); + so_emit(chan, nv50->state.hw[i]); + } - nv50->state.dirty |= nv50->dirty; - nv50->dirty = 0; - nv50_state_emit(nv50); + /* Yes, really, we need to do this. If a buffer that is referenced + * on the hardware isn't part of changed state above, without doing + * this the kernel is given no clue that the buffer is being used + * still. This can cause all sorts of fun issues. + */ + nv50_tex_relocs(nv50); + so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */ + so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */ + so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */ + so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */ + so_emit_reloc_markers(chan, nv50->screen->static_init); + /* No idea.. */ + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); return TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 7405b67414a..cabd148bc5b 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -33,11 +33,11 @@ static INLINE int nv50_format(enum pipe_format format) { switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM; - case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: return NV50_2D_DST_FORMAT_X8R8G8B8_UNORM; - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return NV50_2D_DST_FORMAT_R5G6B5_UNORM; case PIPE_FORMAT_A8_UNORM: return NV50_2D_DST_FORMAT_R8_UNORM; diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index 9f1a1713032..4c48b12cd87 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -24,6 +24,7 @@ #include "nv50_texture.h" #include "nouveau/nouveau_stateobj.h" +#include "nouveau/nouveau_reloc.h" #include "util/u_format.h" @@ -49,28 +50,28 @@ struct nv50_texture_format { static const struct nv50_texture_format nv50_tex_format_list[] = { - _(A8R8G8B8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8), - _(A8R8G8B8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8), - _(X8R8G8B8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8), - _(X8R8G8B8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8), - _(A1R5G5B5_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5), - _(A4R4G4B4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4), + _(B8G8R8A8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8), + _(B8G8R8A8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8), + _(B8G8R8X8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8), + _(B8G8R8X8_SRGB, UNORM, C2, C1, C0, ONE, 8_8_8_8), + _(B5G5R5A1_UNORM, UNORM, C2, C1, C0, C3, 1_5_5_5), + _(B4G4R4A4_UNORM, UNORM, C2, C1, C0, C3, 4_4_4_4), - _(R5G6B5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5), + _(B5G6R5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5), _(L8_UNORM, UNORM, C0, C0, C0, ONE, 8), _(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8), _(I8_UNORM, UNORM, C0, C0, C0, C0, 8), - _(A8L8_UNORM, UNORM, C0, C0, C0, C1, 8_8), + _(L8A8_UNORM, UNORM, C0, C0, C0, C1, 8_8), _(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1), _(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1), _(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3), _(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5), - _MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8), - _MIXED(S8Z24_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24), + _MIXED(S8Z24_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8), + _MIXED(Z24S8_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24), _(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16), _(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16), @@ -195,6 +196,35 @@ nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so, } void +nv50_tex_relocs(struct nv50_context *nv50) +{ + struct nouveau_channel *chan = nv50->screen->tesla->channel; + int p, unit; + + p = PIPE_SHADER_FRAGMENT; + for (unit = 0; unit < nv50->miptree_nr[p]; unit++) { + if (!nv50->miptree[p][unit]) + continue; + nouveau_reloc_emit(chan, nv50->screen->tic, + ((p * 32) + unit) * 32, NULL, + nv50->miptree[p][unit]->base.bo, 0, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_RD, 0, 0); + } + + p = PIPE_SHADER_VERTEX; + for (unit = 0; unit < nv50->miptree_nr[p]; unit++) { + if (!nv50->miptree[p][unit]) + continue; + nouveau_reloc_emit(chan, nv50->screen->tic, + ((p * 32) + unit) * 32, NULL, + nv50->miptree[p][unit]->base.bo, 0, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_RD, 0, 0); + } +} + +struct nouveau_stateobj * nv50_tex_validate(struct nv50_context *nv50) { struct nouveau_stateobj *so; @@ -217,12 +247,11 @@ nv50_tex_validate(struct nv50_context *nv50) so_ref(NULL, &so); NOUVEAU_ERR("failed tex validate\n"); - return; + return NULL; } so_method(so, tesla, 0x1330, 1); /* flush TIC */ so_data (so, 0); - so_ref(so, &nv50->state.tic_upload); - so_ref(NULL, &so); + return so; } diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c index 7c360e9e73a..9eb223eca65 100644 --- a/src/gallium/drivers/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -121,11 +121,12 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, } static struct pipe_transfer * -nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +nv50_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, unsigned w, unsigned h) { + struct pipe_screen *pscreen = pcontext->screen; struct nouveau_device *dev = nouveau_screen(pscreen)->device; struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_miptree_level *lvl = &mt->level[level]; @@ -186,7 +187,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, } static void -nv50_transfer_del(struct pipe_transfer *ptx) +nv50_transfer_del(struct pipe_context *pcontext, struct pipe_transfer *ptx) { struct nv50_transfer *tx = (struct nv50_transfer *)ptx; struct nv50_miptree *mt = nv50_miptree(ptx->texture); @@ -196,7 +197,7 @@ nv50_transfer_del(struct pipe_transfer *ptx) unsigned ny = util_format_get_nblocksy(pt->format, tx->base.height); if (ptx->usage & PIPE_TRANSFER_WRITE) { - struct pipe_screen *pscreen = pt->screen; + struct pipe_screen *pscreen = pcontext->screen; nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride, tx->bo->tile_mode, @@ -218,7 +219,7 @@ nv50_transfer_del(struct pipe_transfer *ptx) } static void * -nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nv50_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx) { struct nv50_transfer *tx = (struct nv50_transfer *)ptx; unsigned flags = 0; @@ -236,7 +237,7 @@ nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) } static void -nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nv50_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx) { struct nv50_transfer *tx = (struct nv50_transfer *)ptx; @@ -244,12 +245,12 @@ nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) } void -nv50_transfer_init_screen_functions(struct pipe_screen *pscreen) +nv50_init_transfer_functions(struct nv50_context *nv50) { - pscreen->get_tex_transfer = nv50_transfer_new; - pscreen->tex_transfer_destroy = nv50_transfer_del; - pscreen->transfer_map = nv50_transfer_map; - pscreen->transfer_unmap = nv50_transfer_unmap; + nv50->pipe.get_tex_transfer = nv50_transfer_new; + nv50->pipe.tex_transfer_destroy = nv50_transfer_del; + nv50->pipe.transfer_map = nv50_transfer_map; + nv50->pipe.transfer_unmap = nv50_transfer_unmap; } void diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 1c8ee0b9adf..50472868063 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -25,53 +25,9 @@ #include "util/u_inlines.h" #include "util/u_format.h" +#include "nouveau/nouveau_util.h" #include "nv50_context.h" -static boolean -nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned); - -static boolean -nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned); - -static boolean -nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned); - -static boolean -nv50_push_arrays(struct nv50_context *, unsigned, unsigned); - -#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16) - -static INLINE unsigned -nv50_prim(unsigned mode) -{ - switch (mode) { - case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; - case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; - case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; - case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; - case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; - case PIPE_PRIM_TRIANGLE_STRIP: - return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; - case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; - case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; - case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; - case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; - case PIPE_PRIM_LINES_ADJACENCY: - return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; - default: - break; - } - - NOUVEAU_ERR("invalid primitive type %d\n", mode); - return NV50TCL_VERTEX_BEGIN_POINTS; -} - static INLINE uint32_t nv50_vbo_type_to_hw(enum pipe_format format) { @@ -139,15 +95,16 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) uint32_t hw_type, hw_size; enum pipe_format pf = ve->src_format; const struct util_format_description *desc; - unsigned size; + unsigned size, nr_components; desc = util_format_description(pf); assert(desc); size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0); + nr_components = util_format_get_nr_components(pf); hw_type = nv50_vbo_type_to_hw(pf); - hw_size = nv50_vbo_size_to_hw(size, ve->nr_components); + hw_size = nv50_vbo_size_to_hw(size, nr_components); if (!hw_type || !hw_size) { NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf)); @@ -161,250 +118,58 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) return (hw_type | hw_size); } -/* For instanced drawing from user buffers, hitting the FIFO repeatedly - * with the same vertex data is probably worse than uploading all data. - */ -static boolean -nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i) -{ - struct nv50_screen *nscreen = nv50->screen; - struct pipe_screen *pscreen = &nscreen->base.base; - struct pipe_buffer *buf = nscreen->strm_vbuf[i]; - struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; - uint8_t *src; - unsigned size = align(vb->buffer->size, 4096); - - if (buf && buf->size < size) - pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL); - - if (!nscreen->strm_vbuf[i]) { - nscreen->strm_vbuf[i] = pipe_buffer_create( - pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size); - buf = nscreen->strm_vbuf[i]; - } - - src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); - if (!src) - return FALSE; - src += vb->buffer_offset; - - size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */ - if (vb->buffer_offset + size > vb->buffer->size) - size = vb->buffer->size - vb->buffer_offset; - - pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src); - pipe_buffer_unmap(pscreen, vb->buffer); - - vb->buffer = buf; /* don't pipe_reference, this is a private copy */ - return TRUE; -} - -static void -nv50_upload_user_vbufs(struct nv50_context *nv50) -{ - unsigned i; - - if (nv50->vbo_fifo) - nv50->dirty |= NV50_NEW_ARRAYS; - if (!(nv50->dirty & NV50_NEW_ARRAYS)) - return; - - for (i = 0; i < nv50->vtxbuf_nr; ++i) { - if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX) - continue; - nv50_upload_vtxbuf(nv50, i); - } -} - -static void -nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data) -{ - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; - float v[4]; - - util_format_read_4f(nv50->vtxelt[i].src_format, - v, 0, data, 0, 0, 0, 1, 1); - - switch (nv50->vtxelt[i].nr_components) { - case 4: - BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4); - OUT_RINGf (chan, v[0]); - OUT_RINGf (chan, v[1]); - OUT_RINGf (chan, v[2]); - OUT_RINGf (chan, v[3]); - break; - case 3: - BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3); - OUT_RINGf (chan, v[0]); - OUT_RINGf (chan, v[1]); - OUT_RINGf (chan, v[2]); - break; - case 2: - BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2); - OUT_RINGf (chan, v[0]); - OUT_RINGf (chan, v[1]); - break; - case 1: - BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1); - OUT_RINGf (chan, v[0]); - break; - default: - assert(0); - break; - } -} - -static unsigned -init_per_instance_arrays_immd(struct nv50_context *nv50, - unsigned startInstance, - unsigned pos[16], unsigned step[16]) -{ - struct nouveau_bo *bo; - unsigned i, b, count = 0; - - for (i = 0; i < nv50->vtxelt_nr; ++i) { - if (!nv50->vtxelt[i].instance_divisor) - continue; - ++count; - b = nv50->vtxelt[i].vertex_buffer_index; - - pos[i] = nv50->vtxelt[i].src_offset + - nv50->vtxbuf[b].buffer_offset + - startInstance * nv50->vtxbuf[b].stride; - step[i] = startInstance % nv50->vtxelt[i].instance_divisor; - - bo = nouveau_bo(nv50->vtxbuf[b].buffer); - if (!bo->map) - nouveau_bo_map(bo, NOUVEAU_BO_RD); - - nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); - } - - return count; -} - -static unsigned -init_per_instance_arrays(struct nv50_context *nv50, - unsigned startInstance, - unsigned pos[16], unsigned step[16]) -{ - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; +struct instance { struct nouveau_bo *bo; - struct nouveau_stateobj *so; - unsigned i, b, count = 0; - const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - if (nv50->vbo_fifo) - return init_per_instance_arrays_immd(nv50, startInstance, - pos, step); - - so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); - - for (i = 0; i < nv50->vtxelt_nr; ++i) { - if (!nv50->vtxelt[i].instance_divisor) - continue; - ++count; - b = nv50->vtxelt[i].vertex_buffer_index; - - pos[i] = nv50->vtxelt[i].src_offset + - nv50->vtxbuf[b].buffer_offset + - startInstance * nv50->vtxbuf[b].stride; - - if (!startInstance) { - step[i] = 0; - continue; - } - step[i] = startInstance % nv50->vtxelt[i].instance_divisor; - - bo = nouveau_bo(nv50->vtxbuf[b].buffer); - - so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); - so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); - } - - if (count && startInstance) { - so_ref (so, &nv50->state.instbuf); /* for flush notify */ - so_emit(chan, nv50->state.instbuf); - } - so_ref (NULL, &so); - - return count; -} + unsigned delta; + unsigned stride; + unsigned step; + unsigned divisor; +}; static void -step_per_instance_arrays_immd(struct nv50_context *nv50, - unsigned pos[16], unsigned step[16]) +instance_init(struct nv50_context *nv50, struct instance *a, unsigned first) { - struct nouveau_bo *bo; - unsigned i, b; + int i; - for (i = 0; i < nv50->vtxelt_nr; ++i) { - if (!nv50->vtxelt[i].instance_divisor) - continue; - if (++step[i] != nv50->vtxelt[i].instance_divisor) - continue; - b = nv50->vtxelt[i].vertex_buffer_index; - bo = nouveau_bo(nv50->vtxbuf[b].buffer); + for (i = 0; i < nv50->vtxelt->num_elements; i++) { + struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; + struct pipe_vertex_buffer *vb; - step[i] = 0; - pos[i] += nv50->vtxbuf[b].stride; + a[i].divisor = ve->instance_divisor; + if (a[i].divisor) { + vb = &nv50->vtxbuf[ve->vertex_buffer_index]; - nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]); + a[i].bo = nouveau_bo(vb->buffer); + a[i].stride = vb->stride; + a[i].step = first % a[i].divisor; + a[i].delta = vb->buffer_offset + ve->src_offset + + (first * a[i].stride); + } } } static void -step_per_instance_arrays(struct nv50_context *nv50, - unsigned pos[16], unsigned step[16]) +instance_step(struct nv50_context *nv50, struct instance *a) { + struct nouveau_channel *chan = nv50->screen->tesla->channel; struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; - struct nouveau_bo *bo; - struct nouveau_stateobj *so; - unsigned i, b; - const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; - - if (nv50->vbo_fifo) { - step_per_instance_arrays_immd(nv50, pos, step); - return; - } - - so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2); + int i; - for (i = 0; i < nv50->vtxelt_nr; ++i) { - if (!nv50->vtxelt[i].instance_divisor) + for (i = 0; i < nv50->vtxelt->num_elements; i++) { + if (!a[i].divisor) continue; - b = nv50->vtxelt[i].vertex_buffer_index; - if (++step[i] == nv50->vtxelt[i].instance_divisor) { - step[i] = 0; - pos[i] += nv50->vtxbuf[b].stride; + BEGIN_RING(chan, tesla, + NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); + OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD | + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); + OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD | + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); + if (++a[i].step == a[i].divisor) { + a[i].step = 0; + a[i].delta += a[i].stride; } - - bo = nouveau_bo(nv50->vtxbuf[b].buffer); - - so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2); - so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0); } - - so_ref (so, &nv50->state.instbuf); /* for flush notify */ - so_ref (NULL, &so); - - so_emit(chan, nv50->state.instbuf); -} - -static INLINE void -nv50_unmap_vbufs(struct nv50_context *nv50) -{ - unsigned i; - - for (i = 0; i < nv50->vtxbuf_nr; ++i) - if (nouveau_bo(nv50->vtxbuf[i].buffer)->map) - nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); } void @@ -415,198 +180,207 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe, struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_channel *chan = nv50->screen->tesla->channel; struct nouveau_grobj *tesla = nv50->screen->tesla; - unsigned i, nz_divisors; - unsigned step[16], pos[16]; - - if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) - nv50_upload_user_vbufs(nv50); + struct instance a[16]; + unsigned prim = nv50_prim(mode); - nv50_state_validate(nv50); + instance_init(nv50, a, startInstance); + if (!nv50_state_validate(nv50, 10 + 16*3)) + return; - nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); + if (nv50->vbo_fifo) { + nv50_push_elements_instanced(pipe, NULL, 0, mode, start, + count, startInstance, + instanceCount); + return; + } BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); OUT_RING (chan, NV50_CB_AUX | (24 << 8)); OUT_RING (chan, startInstance); + while (instanceCount--) { + if (AVAIL_RING(chan) < (7 + 16*3)) { + FIRE_RING(chan); + if (!nv50_state_validate(nv50, 7 + 16*3)) { + assert(0); + return; + } + } + instance_step(nv50, a); - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(mode)); - - if (nv50->vbo_fifo) - nv50_push_arrays(nv50, start, count); - else { + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, prim); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); OUT_RING (chan, start); OUT_RING (chan, count); - } - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); - - for (i = 1; i < instanceCount; i++) { - if (nz_divisors) /* any non-zero array divisors ? */ - step_per_instance_arrays(nv50, pos, step); - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(mode) | (1 << 28)); - - if (nv50->vbo_fifo) - nv50_push_arrays(nv50, start, count); - else { - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); - OUT_RING (chan, start); - OUT_RING (chan, count); - } BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - } - nv50_unmap_vbufs(nv50); - so_ref(NULL, &nv50->state.instbuf); + prim |= (1 << 28); + } } void nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - boolean ret; - - nv50_state_validate(nv50); - - BEGIN_RING(chan, tesla, 0x142c, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, 0x142c, 1); - OUT_RING (chan, 0); - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(mode)); - - if (nv50->vbo_fifo) - ret = nv50_push_arrays(nv50, start, count); - else { - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); - OUT_RING (chan, start); - OUT_RING (chan, count); - ret = TRUE; - } - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); - - nv50_unmap_vbufs(nv50); - - /* XXX: not sure what to do if ret != TRUE: flush and retry? - */ - assert(ret); + nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1); } -static INLINE boolean -nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, - unsigned start, unsigned count) -{ - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - - map += start; +struct inline_ctx { + struct nv50_context *nv50; + void *map; +}; - if (nv50->vbo_fifo) - return nv50_push_elements_u08(nv50, map, count); +static void +inline_elt08(void *priv, unsigned start, unsigned count) +{ + struct inline_ctx *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + uint8_t *map = (uint8_t *)ctx->map + start; if (count & 1) { BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); map++; - count--; + count &= ~1; } - while (count) { - unsigned nr = count > 2046 ? 2046 : count; - int i; - - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); - for (i = 0; i < nr; i += 2) - OUT_RING (chan, (map[i + 1] << 16) | map[i]); + count >>= 1; + if (!count) + return; - count -= nr; - map += nr; + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count); + while (count--) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; } - return TRUE; } -static INLINE boolean -nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, - unsigned start, unsigned count) +static void +inline_elt16(void *priv, unsigned start, unsigned count) { - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - - map += start; - - if (nv50->vbo_fifo) - return nv50_push_elements_u16(nv50, map, count); + struct inline_ctx *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + uint16_t *map = (uint16_t *)ctx->map + start; if (count & 1) { BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, map[0]); + count &= ~1; map++; - count--; } - while (count) { - unsigned nr = count > 2046 ? 2046 : count; - int i; - - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1); - for (i = 0; i < nr; i += 2) - OUT_RING (chan, (map[i + 1] << 16) | map[i]); + count >>= 1; + if (!count) + return; - count -= nr; - map += nr; + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count); + while (count--) { + OUT_RING(chan, (map[1] << 16) | map[0]); + map += 2; } - return TRUE; } -static INLINE boolean -nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map, - unsigned start, unsigned count) +static void +inline_elt32(void *priv, unsigned start, unsigned count) +{ + struct inline_ctx *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + + BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count); + OUT_RINGp (chan, (uint32_t *)ctx->map + start, count); +} + +static void +inline_edgeflag(void *priv, boolean enabled) { + struct inline_ctx *ctx = priv; + struct nouveau_grobj *tesla = ctx->nv50->screen->tesla; + struct nouveau_channel *chan = tesla->channel; + + BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (chan, enabled ? 1 : 0); +} + +static void +nv50_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count, + unsigned startInstance, unsigned instanceCount) +{ + struct pipe_screen *pscreen = pipe->screen; + struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_channel *chan = nv50->screen->tesla->channel; struct nouveau_grobj *tesla = nv50->screen->tesla; + struct instance a[16]; + struct inline_ctx ctx; + struct u_split_prim s; + boolean nzi = FALSE; + unsigned overhead; + + overhead = 16*3; /* potential instance adjustments */ + overhead += 4; /* Begin()/End() */ + overhead += 4; /* potential edgeflag disable/reenable */ + overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */ + + s.priv = &ctx; + if (indexSize == 1) + s.emit = inline_elt08; + else + if (indexSize == 2) + s.emit = inline_elt16; + else + s.emit = inline_elt32; + s.edge = inline_edgeflag; + + ctx.nv50 = nv50; + ctx.map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + assert(ctx.map); + if (!ctx.map) + return; - map += start; + instance_init(nv50, a, startInstance); + if (!nv50_state_validate(nv50, overhead + 6 + 3)) + return; - if (nv50->vbo_fifo) - return nv50_push_elements_u32(nv50, map, count); + BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); + OUT_RING (chan, NV50_CB_AUX | (24 << 8)); + OUT_RING (chan, startInstance); + while (instanceCount--) { + unsigned max_verts; + boolean done; + + u_split_prim_init(&s, mode, start, count); + do { + if (AVAIL_RING(chan) < (overhead + 6)) { + FIRE_RING(chan); + if (!nv50_state_validate(nv50, (overhead + 6))) { + assert(0); + return; + } + } - while (count) { - unsigned nr = count > 2047 ? 2047 : count; + max_verts = AVAIL_RING(chan) - overhead; + if (max_verts > 2047) + max_verts = 2047; + if (indexSize != 4) + max_verts <<= 1; + instance_step(nv50, a); - BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr); - OUT_RINGp (chan, map, nr); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0)); + done = u_split_prim_next(&s, max_verts); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + } while (!done); - count -= nr; - map += nr; + nzi = TRUE; } - return TRUE; -} -static INLINE void -nv50_draw_elements_inline(struct nv50_context *nv50, - void *map, unsigned indexSize, - unsigned start, unsigned count) -{ - switch (indexSize) { - case 1: - nv50_draw_elements_inline_u08(nv50, map, start, count); - break; - case 2: - nv50_draw_elements_inline_u16(nv50, map, start, count); - break; - case 4: - nv50_draw_elements_inline_u32(nv50, map, start, count); - break; - } + pipe_buffer_unmap(pscreen, indexBuffer); } void @@ -617,49 +391,68 @@ nv50_draw_elements_instanced(struct pipe_context *pipe, unsigned startInstance, unsigned instanceCount) { struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->tesla->channel; struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nouveau_channel *chan = tesla->channel; - struct pipe_screen *pscreen = pipe->screen; - void *map; - unsigned i, nz_divisors; - unsigned step[16], pos[16]; + struct instance a[16]; + unsigned prim = nv50_prim(mode); - map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); - - if (!NV50_USING_LOATHED_EDGEFLAG(nv50)) - nv50_upload_user_vbufs(nv50); - - nv50_state_validate(nv50); + instance_init(nv50, a, startInstance); + if (!nv50_state_validate(nv50, 13 + 16*3)) + return; - nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step); + if (nv50->vbo_fifo) { + nv50_push_elements_instanced(pipe, indexBuffer, indexSize, + mode, start, count, startInstance, + instanceCount); + return; + } else + if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || indexSize == 1) { + nv50_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count, startInstance, + instanceCount); + return; + } BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); OUT_RING (chan, NV50_CB_AUX | (24 << 8)); OUT_RING (chan, startInstance); - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(mode)); - - nv50_draw_elements_inline(nv50, map, indexSize, start, count); - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); - - for (i = 1; i < instanceCount; ++i) { - if (nz_divisors) /* any non-zero array divisors ? */ - step_per_instance_arrays(nv50, pos, step); + while (instanceCount--) { + if (AVAIL_RING(chan) < (7 + 16*3)) { + FIRE_RING(chan); + if (!nv50_state_validate(nv50, 10 + 16*3)) { + assert(0); + return; + } + } + instance_step(nv50, a); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(mode) | (1 << 28)); - - nv50_draw_elements_inline(nv50, map, indexSize, start, count); - + OUT_RING (chan, prim); + if (indexSize == 4) { + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0); + OUT_RING (chan, count); + nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer), + start << 2, count << 2); + } else + if (indexSize == 2) { + unsigned vb_start = (start & ~1); + unsigned vb_end = (start + count + 1) & ~1; + unsigned dwords = (vb_end - vb_start) >> 1; + + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1); + OUT_RING (chan, ((start & 1) << 31) | count); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0); + OUT_RING (chan, dwords); + nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer), + vb_start << 1, dwords << 2); + BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1); + OUT_RING (chan, 0); + } BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); - } - nv50_unmap_vbufs(nv50); - so_ref(NULL, &nv50->state.instbuf); + prim |= (1 << 28); + } } void @@ -667,51 +460,8 @@ nv50_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - struct nv50_context *nv50 = nv50_context(pipe); - struct nouveau_channel *chan = nv50->screen->tesla->channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct pipe_screen *pscreen = pipe->screen; - void *map; - - nv50_state_validate(nv50); - - BEGIN_RING(chan, tesla, 0x142c, 1); - OUT_RING (chan, 0); - BEGIN_RING(chan, tesla, 0x142c, 1); - OUT_RING (chan, 0); - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); - OUT_RING (chan, nv50_prim(mode)); - - if (!nv50->vbo_fifo && indexSize == 4) { - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0); - OUT_RING (chan, count); - nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer), - start << 2, count << 2); - } else - if (!nv50->vbo_fifo && indexSize == 2) { - unsigned vb_start = (start & ~1); - unsigned vb_end = (start + count + 1) & ~1; - unsigned dwords = (vb_end - vb_start) >> 1; - - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1); - OUT_RING (chan, ((start & 1) << 31) | count); - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0); - OUT_RING (chan, dwords); - nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer), - vb_start << 1, dwords << 2); - BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1); - OUT_RING (chan, 0); - } else { - map = pipe_buffer_map(pscreen, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); - nv50_draw_elements_inline(nv50, map, indexSize, start, count); - nv50_unmap_vbufs(nv50); - pipe_buffer_unmap(pscreen, indexBuffer); - } - - BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); - OUT_RING (chan, 0); + nv50_draw_elements_instanced(pipe, indexBuffer, indexSize, + mode, start, count, 0, 1); } static INLINE boolean @@ -726,6 +476,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, struct nouveau_bo *bo = nouveau_bo(vb->buffer); float v[4]; int ret; + unsigned nr_components = util_format_get_nr_components(ve->src_format); ret = nouveau_bo_map(bo, NOUVEAU_BO_RD); if (ret) @@ -736,9 +487,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, 0, 0, 1, 1); so = *pso; if (!so) - *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0); + *pso = so = so_new(nv50->vtxelt->num_elements, + nv50->vtxelt->num_elements * 4, 0); - switch (ve->nr_components) { + switch (nr_components) { case 4: so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4); so_data (so, fui(v[0])); @@ -775,6 +527,18 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib, } void +nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso) +{ + unsigned i; + + for (i = 0; i < cso->num_elements; ++i) { + struct pipe_vertex_element *ve = &cso->pipe[i]; + + cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve); + } +} + +struct nouveau_stateobj * nv50_vbo_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; @@ -783,30 +547,32 @@ nv50_vbo_validate(struct nv50_context *nv50) /* don't validate if Gallium took away our buffers */ if (nv50->vtxbuf_nr == 0) - return; + return NULL; + nv50->vbo_fifo = 0; + if (nv50->screen->force_push || + nv50->vertprog->cfg.edgeflag_in < 16) + nv50->vbo_fifo = 0xffff; - for (i = 0; i < nv50->vtxbuf_nr; ++i) + for (i = 0; i < nv50->vtxbuf_nr; i++) { if (nv50->vtxbuf[i].stride && !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)) nv50->vbo_fifo = 0xffff; + } - if (NV50_USING_LOATHED_EDGEFLAG(nv50)) - nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */ - - n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr); + n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr); vtxattr = NULL; - vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4); + vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4); vtxfmt = so_new(1, n_ve, 0); so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve); - for (i = 0; i < nv50->vtxelt_nr; i++) { - struct pipe_vertex_element *ve = &nv50->vtxelt[i]; + for (i = 0; i < nv50->vtxelt->num_elements; i++) { + struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index]; struct nouveau_bo *bo = nouveau_bo(vb->buffer); - uint32_t hw = nv50_vbo_vtxelt_to_hw(ve); + uint32_t hw = nv50->vtxelt->hw[i]; if (!vb->stride && nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) { @@ -821,13 +587,13 @@ nv50_vbo_validate(struct nv50_context *nv50) } if (nv50->vbo_fifo) { - so_data (vtxfmt, hw | - (ve->instance_divisor ? (1 << 4) : i)); + so_data (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i)); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); continue; } + so_data(vtxfmt, hw | i); so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3); @@ -855,355 +621,13 @@ nv50_vbo_validate(struct nv50_context *nv50) so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1); so_data (vtxbuf, 0); } - nv50->state.vtxelt_nr = nv50->vtxelt_nr; + nv50->state.vtxelt_nr = nv50->vtxelt->num_elements; - so_ref (vtxfmt, &nv50->state.vtxfmt); so_ref (vtxbuf, &nv50->state.vtxbuf); so_ref (vtxattr, &nv50->state.vtxattr); so_ref (NULL, &vtxbuf); - so_ref (NULL, &vtxfmt); so_ref (NULL, &vtxattr); + return vtxfmt; } -typedef void (*pfn_push)(struct nouveau_channel *, void *); - -struct nv50_vbo_emitctx -{ - pfn_push push[16]; - uint8_t *map[16]; - unsigned stride[16]; - unsigned nr_ve; - unsigned vtx_dwords; - unsigned vtx_max; - - float edgeflag; - unsigned ve_edgeflag; -}; - -static INLINE void -emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit) -{ - unsigned i; - - for (i = 0; i < emit->nr_ve; ++i) { - emit->push[i](chan, emit->map[i]); - emit->map[i] += emit->stride[i]; - } -} - -static INLINE void -emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit, - uint32_t vi) -{ - unsigned i; - - for (i = 0; i < emit->nr_ve; ++i) - emit->push[i](chan, emit->map[i] + emit->stride[i] * vi); -} - -static INLINE boolean -nv50_map_vbufs(struct nv50_context *nv50) -{ - int i; - - for (i = 0; i < nv50->vtxbuf_nr; ++i) { - struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i]; - unsigned size = vb->stride * (vb->max_index + 1) + 16; - - if (nouveau_bo(vb->buffer)->map) - continue; - - size = vb->stride * (vb->max_index + 1) + 16; - size = MIN2(size, vb->buffer->size); - if (!size) - size = vb->buffer->size; - - if (nouveau_bo_map_range(nouveau_bo(vb->buffer), - 0, size, NOUVEAU_BO_RD)) - break; - } - - if (i == nv50->vtxbuf_nr) - return TRUE; - for (; i >= 0; --i) - nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer)); - return FALSE; -} - -static void -emit_b32_1(struct nouveau_channel *chan, void *data) -{ - uint32_t *v = data; - - OUT_RING(chan, v[0]); -} - -static void -emit_b32_2(struct nouveau_channel *chan, void *data) -{ - uint32_t *v = data; - - OUT_RING(chan, v[0]); - OUT_RING(chan, v[1]); -} - -static void -emit_b32_3(struct nouveau_channel *chan, void *data) -{ - uint32_t *v = data; - - OUT_RING(chan, v[0]); - OUT_RING(chan, v[1]); - OUT_RING(chan, v[2]); -} - -static void -emit_b32_4(struct nouveau_channel *chan, void *data) -{ - uint32_t *v = data; - - OUT_RING(chan, v[0]); - OUT_RING(chan, v[1]); - OUT_RING(chan, v[2]); - OUT_RING(chan, v[3]); -} - -static void -emit_b16_1(struct nouveau_channel *chan, void *data) -{ - uint16_t *v = data; - - OUT_RING(chan, v[0]); -} - -static void -emit_b16_3(struct nouveau_channel *chan, void *data) -{ - uint16_t *v = data; - - OUT_RING(chan, (v[1] << 16) | v[0]); - OUT_RING(chan, v[2]); -} - -static void -emit_b08_1(struct nouveau_channel *chan, void *data) -{ - uint8_t *v = data; - - OUT_RING(chan, v[0]); -} - -static void -emit_b08_3(struct nouveau_channel *chan, void *data) -{ - uint8_t *v = data; - - OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]); -} - -static boolean -emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit, - unsigned start) -{ - unsigned i; - - if (nv50_map_vbufs(nv50) == FALSE) - return FALSE; - - emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in; - - emit->edgeflag = 0.5f; - emit->nr_ve = 0; - emit->vtx_dwords = 0; - - for (i = 0; i < nv50->vtxelt_nr; ++i) { - struct pipe_vertex_element *ve; - struct pipe_vertex_buffer *vb; - unsigned n, size; - const struct util_format_description *desc; - - ve = &nv50->vtxelt[i]; - vb = &nv50->vtxbuf[ve->vertex_buffer_index]; - if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor) - continue; - n = emit->nr_ve++; - - emit->stride[n] = vb->stride; - emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map + - vb->buffer_offset + - (start * vb->stride + ve->src_offset); - - desc = util_format_description(ve->src_format); - assert(desc); - - size = util_format_get_component_bits( - ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); - - assert(ve->nr_components > 0 && ve->nr_components <= 4); - - /* It shouldn't be necessary to push the implicit 1s - * for case 3 and size 8 cases 1, 2, 3. - */ - switch (size) { - default: - NOUVEAU_ERR("unsupported vtxelt size: %u\n", size); - return FALSE; - case 32: - switch (ve->nr_components) { - case 1: emit->push[n] = emit_b32_1; break; - case 2: emit->push[n] = emit_b32_2; break; - case 3: emit->push[n] = emit_b32_3; break; - case 4: emit->push[n] = emit_b32_4; break; - } - emit->vtx_dwords += ve->nr_components; - break; - case 16: - switch (ve->nr_components) { - case 1: emit->push[n] = emit_b16_1; break; - case 2: emit->push[n] = emit_b32_1; break; - case 3: emit->push[n] = emit_b16_3; break; - case 4: emit->push[n] = emit_b32_2; break; - } - emit->vtx_dwords += (ve->nr_components + 1) >> 1; - break; - case 8: - switch (ve->nr_components) { - case 1: emit->push[n] = emit_b08_1; break; - case 2: emit->push[n] = emit_b16_1; break; - case 3: emit->push[n] = emit_b08_3; break; - case 4: emit->push[n] = emit_b32_1; break; - } - emit->vtx_dwords += 1; - break; - } - } - - emit->vtx_max = 512 / emit->vtx_dwords; - if (emit->ve_edgeflag < 16) - emit->vtx_max = 1; - - return TRUE; -} - -static INLINE void -set_edgeflag(struct nouveau_channel *chan, - struct nouveau_grobj *tesla, - struct nv50_vbo_emitctx *emit, uint32_t index) -{ - unsigned i = emit->ve_edgeflag; - - if (i < 16) { - float f = *((float *)(emit->map[i] + index * emit->stride[i])); - - if (emit->edgeflag != f) { - emit->edgeflag = f; - - BEGIN_RING(chan, tesla, 0x15e4, 1); - OUT_RING (chan, f ? 1 : 0); - } - } -} - -static boolean -nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count) -{ - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nv50_vbo_emitctx emit; - if (emit_prepare(nv50, &emit, start) == FALSE) - return FALSE; - - while (count) { - unsigned i, dw, nr = MIN2(count, emit.vtx_max); - dw = nr * emit.vtx_dwords; - - set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */ - - BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); - for (i = 0; i < nr; ++i) - emit_vtx_next(chan, &emit); - - count -= nr; - } - - return TRUE; -} - -static boolean -nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count) -{ - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nv50_vbo_emitctx emit; - - if (emit_prepare(nv50, &emit, 0) == FALSE) - return FALSE; - - while (count) { - unsigned i, dw, nr = MIN2(count, emit.vtx_max); - dw = nr * emit.vtx_dwords; - - set_edgeflag(chan, tesla, &emit, *map); - - BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); - for (i = 0; i < nr; ++i) - emit_vtx(chan, &emit, *map++); - - count -= nr; - } - - return TRUE; -} - -static boolean -nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count) -{ - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nv50_vbo_emitctx emit; - - if (emit_prepare(nv50, &emit, 0) == FALSE) - return FALSE; - - while (count) { - unsigned i, dw, nr = MIN2(count, emit.vtx_max); - dw = nr * emit.vtx_dwords; - - set_edgeflag(chan, tesla, &emit, *map); - - BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); - for (i = 0; i < nr; ++i) - emit_vtx(chan, &emit, *map++); - - count -= nr; - } - - return TRUE; -} - -static boolean -nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count) -{ - struct nouveau_channel *chan = nv50->screen->base.channel; - struct nouveau_grobj *tesla = nv50->screen->tesla; - struct nv50_vbo_emitctx emit; - - if (emit_prepare(nv50, &emit, 0) == FALSE) - return FALSE; - - while (count) { - unsigned i, dw, nr = MIN2(count, emit.vtx_max); - dw = nr * emit.vtx_dwords; - - set_edgeflag(chan, tesla, &emit, *map); - - BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw); - for (i = 0; i < nr; ++i) - emit_vtx(chan, &emit, *map++); - - count -= nr; - } - - return TRUE; -} diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile new file mode 100644 index 00000000000..dfe97e6ed5f --- /dev/null +++ b/src/gallium/drivers/nvfx/Makefile @@ -0,0 +1,32 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nvfx + +C_SOURCES = \ + nv04_surface_2d.c \ + nvfx_context.c \ + nvfx_clear.c \ + nvfx_draw.c \ + nvfx_fragprog.c \ + nvfx_fragtex.c \ + nv30_fragtex.c \ + nv40_fragtex.c \ + nvfx_miptree.c \ + nvfx_query.c \ + nvfx_screen.c \ + nvfx_state.c \ + nvfx_state_blend.c \ + nvfx_state_emit.c \ + nvfx_state_fb.c \ + nvfx_state_rasterizer.c \ + nvfx_state_scissor.c \ + nvfx_state_stipple.c \ + nvfx_state_viewport.c \ + nvfx_state_zsa.c \ + nvfx_surface.c \ + nvfx_transfer.c \ + nvfx_vbo.c \ + nvfx_vertprog.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nouveau/nv04_surface_2d.c b/src/gallium/drivers/nvfx/nv04_surface_2d.c index 42c2ca932d8..ed18c9f24dc 100644 --- a/src/gallium/drivers/nouveau/nv04_surface_2d.c +++ b/src/gallium/drivers/nvfx/nv04_surface_2d.c @@ -18,15 +18,15 @@ nv04_surface_format(enum pipe_format format) case PIPE_FORMAT_I8_UNORM: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_L8A8_UNORM: return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; default: return -1; @@ -39,14 +39,14 @@ nv04_rect_format(enum pipe_format format) switch (format) { case PIPE_FORMAT_A8_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_L8A8_UNORM: case PIPE_FORMAT_Z16_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; default: return -1; @@ -61,15 +61,15 @@ nv04_scaled_image_format(enum pipe_format format) case PIPE_FORMAT_L8_UNORM: case PIPE_FORMAT_I8_UNORM: return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8; - case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; - case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_L8A8_UNORM: return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; default: return -1; @@ -518,7 +518,6 @@ nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ; } - struct nv40_screen* screen = (struct nv40_screen*)pscreen; ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE; struct pipe_texture templ; @@ -544,4 +543,3 @@ nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d return temp_ns; } - diff --git a/src/gallium/drivers/nouveau/nv04_surface_2d.h b/src/gallium/drivers/nvfx/nv04_surface_2d.h index ce696a11a39..ce696a11a39 100644 --- a/src/gallium/drivers/nouveau/nv04_surface_2d.h +++ b/src/gallium/drivers/nvfx/nv04_surface_2d.h diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c index 9f4a104f673..2b56f454921 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nvfx/nv30_fragtex.c @@ -1,7 +1,37 @@ #include "util/u_format.h" -#include "nv30_context.h" +#include "nvfx_context.h" #include "nouveau/nouveau_util.h" +#include "nvfx_tex.h" + +void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 8) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else + if (cso->max_anisotropy >= 2) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; + } +} #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ { \ @@ -23,17 +53,17 @@ struct nv30_texture_format { static struct nv30_texture_format nv30_texture_formats[] = { - _(X8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W), - _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), - _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), - _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), - _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), + _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W), + _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), + _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), + _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), + _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), - _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), + _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), _(Z16_UNORM , R5G6B5 , S1, S1, S1, ONE, X, X, X, X), - _(Z24S8_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), + _(S8Z24_UNORM , A8R8G8B8, S1, S1, S1, ONE, X, X, X, X), _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), @@ -57,11 +87,11 @@ nv30_fragtex_format(uint pipe_format) } -static struct nouveau_stateobj * -nv30_fragtex_build(struct nv30_context *nv30, int unit) +struct nouveau_stateobj * +nv30_fragtex_build(struct nvfx_context *nvfx, int unit) { - struct nv30_sampler_state *ps = nv30->tex_sampler[unit]; - struct nv30_miptree *nv30mt = nv30->tex_miptree[unit]; + struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; + struct nvfx_miptree *nv30mt = nvfx->tex_miptree[unit]; struct pipe_texture *pt = &nv30mt->base; struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer); struct nv30_texture_format *tf; @@ -101,7 +131,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) txs = tf->swizzle; so = so_new(1, 8, 2); - so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); @@ -115,47 +145,3 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit) return so; } - -static boolean -nv30_fragtex_validate(struct nv30_context *nv30) -{ - struct nv30_fragment_program *fp = nv30->fragprog; - struct nv30_state *state = &nv30->state; - struct nouveau_stateobj *so; - unsigned samplers, unit; - - samplers = state->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = so_new(1, 1, 0); - so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); - so_data (so, 0); - so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); - } - - samplers = nv30->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = nv30_fragtex_build(nv30, unit); - so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); - } - - nv30->state.fp_samplers = fp->samplers; - return FALSE; -} - -struct nv30_state_entry nv30_state_fragtex = { - .validate = nv30_fragtex_validate, - .dirty = { - .pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG, - .hw = 0 - } -}; diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h new file mode 100644 index 00000000000..ec0444c07f8 --- /dev/null +++ b/src/gallium/drivers/nvfx/nv30_vertprog.h @@ -0,0 +1,169 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + * ABS - MOV + NV40_VP_INST0_DEST_ABS + * POW - EX2 + MUL + LG2 + * SUB - ADD, second source negated + * SWZ - MOV + * XPD - + * + * Register access + * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + * - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + * According to the value returned for + * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * + * there are only two address registers available. The destination in the + * ARL instruction is set to TEMP <n> (The temp isn't actually written). + * + * When using vanilla ARB_v_p, the proprietary driver will squish both the + * available ADDRESS regs into the first hardware reg in the X and Y + * components. + * + * To use an address reg as an index into consts, the CONST_SRC is set to + * (const_base + offset) and INDEX_CONST is set. + * + * To access the second address reg use ADDR_REG_SELECT_1. A particular + * component of the address regs is selected with ADDR_SWZ. + * + * Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) Conditional + * execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with + * COND_{FL,LT,...}. It is possible to swizzle the values in the condition + * register, which allows for testing against an individual component. + * + * Branching: + * + * The BRA/CAL instructions seem to follow a slightly different opcode + * layout. The destination instruction ID (IADDR) overlaps a source field. + * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO + * command, and is incremented automatically on each UPLOAD_INST FIFO + * command. + * + * Conditional branching is achieved by using the condition tests described + * above. There doesn't appear to be dedicated looping instructions, but + * this can be done using a temp reg + conditional branching. + * + * Subroutines may be uploaded before the main program itself, but the first + * executed instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ + +#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ +#define NV30_VP_INST_VEC_RESULT (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) +#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) +#define NV30_VP_INST_COND_SHIFT 11 +#define NV30_VP_INST_COND_MASK (0x07 << 11) +#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 +#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 +#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 +#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 +#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 +#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 +#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) +#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 +#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) +#define NV30_VP_INST_CONST_SRC_SHIFT 14 +#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ +#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ + +/* Please note: the IADDR fields overlap other fields because they are used + * only for branch instructions. See Branching: label above + * + * DWORD 2 + */ +#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ +#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ +#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ +#define NV30_VP_INST_IADDR_SHIFT 2 +#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ +#define NV30_VP_INST_DEST_SHIFT 2 +#define NV30_VP_INST_DEST_MASK (0x0F << 2) +# define NV30_VP_INST_DEST_POS 0 +# define NV30_VP_INST_DEST_BFC0 1 +# define NV30_VP_INST_DEST_BFC1 2 +# define NV30_VP_INST_DEST_COL0 3 +# define NV30_VP_INST_DEST_COL1 4 +# define NV30_VP_INST_DEST_FOGC 5 +# define NV30_VP_INST_DEST_PSZ 6 +# define NV30_VP_INST_DEST_TC(n) (8+n) + +/* Useful to split the source selection regs into their pieces */ +#define NV30_VP_SRC0_HIGH_SHIFT 6 +#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 +#define NV30_VP_SRC0_LOW_MASK 0x0000003F +#define NV30_VP_SRC2_HIGH_SHIFT 4 +#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 +#define NV30_VP_SRC2_LOW_MASK 0x0000000F + + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_NEGATE (1<<14) +#define NV30_VP_SRC_SWZ_X_SHIFT 12 +#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV30_VP_SRC_SWZ_Y_SHIFT 10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV30_VP_SRC_SWZ_Z_SHIFT 8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV30_VP_SRC_SWZ_W_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT 0 +#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP 1 +#define NV30_VP_SRC_REG_TYPE_INPUT 2 +#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ + +#include "nvfx_shader.h" + +#endif diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c index 7a28d577b14..5889b5e40d5 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nvfx/nv40_fragtex.c @@ -1,18 +1,63 @@ #include "util/u_format.h" +#include "nvfx_context.h" +#include "nvfx_tex.h" -#include "nv40_context.h" +void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 2) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } +} #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ { \ TRUE, \ PIPE_FORMAT_##m, \ NV40TCL_TEX_FORMAT_FORMAT_##tf, \ - (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \ - NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \ - NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \ - NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \ - ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \ - (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \ + (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ + NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ + NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ + NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w), \ + ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) | \ + (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw)) \ } struct nv40_texture_format { @@ -25,18 +70,18 @@ struct nv40_texture_format { static struct nv40_texture_format nv40_texture_formats[] = { - _(X8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), - _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), - _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(B8G8R8X8_UNORM, A8R8G8B8, S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(B8G8R8A8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(B5G5R5A1_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(B4G4R4A4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(B5G6R5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0), _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1), _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0), - _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), + _(L8A8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), - _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(S8Z24_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), @@ -60,11 +105,11 @@ nv40_fragtex_format(uint pipe_format) } -static struct nouveau_stateobj * -nv40_fragtex_build(struct nv40_context *nv40, int unit) +struct nouveau_stateobj * +nv40_fragtex_build(struct nvfx_context *nvfx, int unit) { - struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; - struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; + struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit]; + struct nvfx_miptree *nv40mt = nvfx->tex_miptree[unit]; struct nouveau_bo *bo = nouveau_bo(nv40mt->buffer); struct pipe_texture *pt = &nv40mt->base; struct nv40_texture_format *tf; @@ -81,20 +126,20 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); if (1) /* XXX */ - txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + txf |= NV34TCL_TX_FORMAT_NO_BORDER; switch (pt->target) { case PIPE_TEXTURE_CUBE: - txf |= NV40TCL_TEX_FORMAT_CUBIC; + txf |= NV34TCL_TX_FORMAT_CUBIC; /* fall-through */ case PIPE_TEXTURE_2D: - txf |= NV40TCL_TEX_FORMAT_DIMS_2D; + txf |= NV34TCL_TX_FORMAT_DIMS_2D; break; case PIPE_TEXTURE_3D: - txf |= NV40TCL_TEX_FORMAT_DIMS_3D; + txf |= NV34TCL_TX_FORMAT_DIMS_3D; break; case PIPE_TEXTURE_1D: - txf |= NV40TCL_TEX_FORMAT_DIMS_1D; + txf |= NV34TCL_TX_FORMAT_DIMS_1D; break; default: NOUVEAU_ERR("Unknown target %d\n", pt->target); @@ -111,63 +156,19 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit) txs = tf->swizzle; so = so_new(2, 9, 2); - so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8); so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR, - NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); so_data (so, ps->wrap); so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); so_data (so, txs); so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); - so_data (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) | + so_data (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pt->height0); so_data (so, ps->bcol); - so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); + so_method(so, nvfx->screen->eng3d, NV40TCL_TEX_SIZE1(unit), 1); so_data (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); return so; } - -static boolean -nv40_fragtex_validate(struct nv40_context *nv40) -{ - struct nv40_fragment_program *fp = nv40->fragprog; - struct nv40_state *state = &nv40->state; - struct nouveau_stateobj *so; - unsigned samplers, unit; - - samplers = state->fp_samplers & ~fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = so_new(1, 1, 0); - so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); - so_data (so, 0); - so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); - state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); - } - - samplers = nv40->dirty_samplers & fp->samplers; - while (samplers) { - unit = ffs(samplers) - 1; - samplers &= ~(1 << unit); - - so = nv40_fragtex_build(nv40, unit); - so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); - so_ref(NULL, &so); - state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); - } - - nv40->state.fp_samplers = fp->samplers; - return FALSE; -} - -struct nv40_state_entry nv40_state_fragtex = { - .validate = nv40_fragtex_validate, - .dirty = { - .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, - .hw = 0 - } -}; - diff --git a/src/gallium/drivers/nvfx/nv40_vertprog.h b/src/gallium/drivers/nvfx/nv40_vertprog.h new file mode 100644 index 00000000000..7337293babc --- /dev/null +++ b/src/gallium/drivers/nvfx/nv40_vertprog.h @@ -0,0 +1,177 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + +#include "nvfx_shader.h" + +#endif diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nvfx/nvfx_clear.c index ddf13addf3b..2be70fcee40 100644 --- a/src/gallium/drivers/nv40/nv40_clear.c +++ b/src/gallium/drivers/nvfx/nvfx_clear.c @@ -3,12 +3,12 @@ #include "pipe/p_state.h" #include "util/u_clear.h" -#include "nv40_context.h" +#include "nvfx_context.h" void -nv40_clear(struct pipe_context *pipe, unsigned buffers, +nvfx_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba, double depth, unsigned stencil) { - util_clear(pipe, &nv40_context(pipe)->framebuffer, buffers, rgba, depth, + util_clear(pipe, &nvfx_context(pipe)->framebuffer, buffers, rgba, depth, stencil); } diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c new file mode 100644 index 00000000000..fc3cbdb558f --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -0,0 +1,90 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" + +#include "nvfx_context.h" +#include "nvfx_screen.h" + +static void +nvfx_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(chan, eng3d, 0x1fd8, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, eng3d, 0x1fd8, 1); + OUT_RING (chan, 1); + } + + FIRE_RING(chan); + if (fence) + *fence = NULL; +} + +static void +nvfx_destroy(struct pipe_context *pipe) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned i; + + for (i = 0; i < NVFX_STATE_MAX; i++) { + if (nvfx->state.hw[i]) + so_ref(NULL, &nvfx->state.hw[i]); + } + + if (nvfx->draw) + draw_destroy(nvfx->draw); + FREE(nvfx); +} + +struct pipe_context * +nvfx_create(struct pipe_screen *pscreen, void *priv) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nvfx_context *nvfx; + struct nouveau_winsys *nvws = screen->nvws; + + nvfx = CALLOC(1, sizeof(struct nvfx_context)); + if (!nvfx) + return NULL; + nvfx->screen = screen; + + nvfx->nvws = nvws; + + nvfx->pipe.winsys = ws; + nvfx->pipe.screen = pscreen; + nvfx->pipe.priv = priv; + nvfx->pipe.destroy = nvfx_destroy; + nvfx->pipe.draw_arrays = nvfx_draw_arrays; + nvfx->pipe.draw_elements = nvfx_draw_elements; + nvfx->pipe.clear = nvfx_clear; + nvfx->pipe.flush = nvfx_flush; + + nvfx->pipe.is_texture_referenced = nouveau_is_texture_referenced; + nvfx->pipe.is_buffer_referenced = nouveau_is_buffer_referenced; + + screen->base.channel->user_private = nvfx; + screen->base.channel->flush_notify = nvfx_state_flush_notify; + + nvfx->is_nv4x = screen->is_nv4x; + + nvfx_init_query_functions(nvfx); + nvfx_init_surface_functions(nvfx); + nvfx_init_state_functions(nvfx); + nvfx_init_transfer_functions(nvfx); + + /* Create, configure, and install fallback swtnl path */ + nvfx->draw = draw_create(); + draw_wide_point_threshold(nvfx->draw, 9999999.0); + draw_wide_line_threshold(nvfx->draw, 9999999.0); + draw_enable_line_stipple(nvfx->draw, FALSE); + draw_enable_point_sprites(nvfx->draw, FALSE); + draw_set_rasterize_stage(nvfx->draw, nvfx_draw_render_stage(nvfx)); + + return &nvfx->pipe; +} diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h new file mode 100644 index 00000000000..5eed8a560e5 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -0,0 +1,264 @@ +#ifndef __NVFX_CONTEXT_H__ +#define __NVFX_CONTEXT_H__ + +#include <stdio.h> + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_context.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nvfx_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +enum nvfx_state_index { + NVFX_STATE_FB = 0, + NVFX_STATE_VIEWPORT = 1, + NVFX_STATE_BLEND = 2, + NVFX_STATE_RAST = 3, + NVFX_STATE_ZSA = 4, + NVFX_STATE_BCOL = 5, + NVFX_STATE_CLIP = 6, + NVFX_STATE_SCISSOR = 7, + NVFX_STATE_STIPPLE = 8, + NVFX_STATE_FRAGPROG = 9, + NVFX_STATE_VERTPROG = 10, + NVFX_STATE_FRAGTEX0 = 11, + NVFX_STATE_FRAGTEX1 = 12, + NVFX_STATE_FRAGTEX2 = 13, + NVFX_STATE_FRAGTEX3 = 14, + NVFX_STATE_FRAGTEX4 = 15, + NVFX_STATE_FRAGTEX5 = 16, + NVFX_STATE_FRAGTEX6 = 17, + NVFX_STATE_FRAGTEX7 = 18, + NVFX_STATE_FRAGTEX8 = 19, + NVFX_STATE_FRAGTEX9 = 20, + NVFX_STATE_FRAGTEX10 = 21, + NVFX_STATE_FRAGTEX11 = 22, + NVFX_STATE_FRAGTEX12 = 23, + NVFX_STATE_FRAGTEX13 = 24, + NVFX_STATE_FRAGTEX14 = 25, + NVFX_STATE_FRAGTEX15 = 26, + NVFX_STATE_VERTTEX0 = 27, + NVFX_STATE_VERTTEX1 = 28, + NVFX_STATE_VERTTEX2 = 29, + NVFX_STATE_VERTTEX3 = 30, + NVFX_STATE_VTXBUF = 31, + NVFX_STATE_VTXFMT = 32, + NVFX_STATE_VTXATTR = 33, + NVFX_STATE_SR = 34, + NVFX_STATE_MAX = 35 +}; + +#include "nvfx_screen.h" + +#define NVFX_NEW_BLEND (1 << 0) +#define NVFX_NEW_RAST (1 << 1) +#define NVFX_NEW_ZSA (1 << 2) +#define NVFX_NEW_SAMPLER (1 << 3) +#define NVFX_NEW_FB (1 << 4) +#define NVFX_NEW_STIPPLE (1 << 5) +#define NVFX_NEW_SCISSOR (1 << 6) +#define NVFX_NEW_VIEWPORT (1 << 7) +#define NVFX_NEW_BCOL (1 << 8) +#define NVFX_NEW_VERTPROG (1 << 9) +#define NVFX_NEW_FRAGPROG (1 << 10) +#define NVFX_NEW_ARRAYS (1 << 11) +#define NVFX_NEW_UCP (1 << 12) +#define NVFX_NEW_SR (1 << 13) + +struct nvfx_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nvfx_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nvfx_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + +struct nvfx_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned fp_samplers; + + uint64_t dirty; + struct nouveau_stateobj *hw[NVFX_STATE_MAX]; +}; + +struct nvfx_vtxelt_state { + struct pipe_vertex_element pipe[16]; + unsigned num_elements; +}; + +struct nvfx_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nvfx_screen *screen; + + unsigned is_nv4x; /* either 0 or ~0 */ + + struct draw_context *draw; + + /* HW state derived from pipe states */ + struct nvfx_state state; + struct { + struct nvfx_vertex_program *vertprog; + + unsigned nr_attribs; + unsigned hw[PIPE_MAX_SHADER_INPUTS]; + unsigned draw[PIPE_MAX_SHADER_INPUTS]; + unsigned emit[PIPE_MAX_SHADER_INPUTS]; + } swtnl; + + enum { + HW, SWTNL, SWRAST + } render_mode; + unsigned fallback_swtnl; + unsigned fallback_swrast; + + /* Context state */ + unsigned dirty, draw_dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct pipe_clip_state clip; + struct nvfx_vertex_program *vertprog; + struct nvfx_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + struct nvfx_rasterizer_state *rasterizer; + struct nvfx_zsa_state *zsa; + struct nvfx_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_stencil_ref stencil_ref; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; + struct nvfx_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nvfx_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; + unsigned dirty_samplers; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct nvfx_vtxelt_state *vtxelt; +}; + +static INLINE struct nvfx_context * +nvfx_context(struct pipe_context *pipe) +{ + return (struct nvfx_context *)pipe; +} + +struct nvfx_state_entry { + boolean (*validate)(struct nvfx_context *nvfx); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +extern struct nvfx_state_entry nvfx_state_blend; +extern struct nvfx_state_entry nvfx_state_blend_colour; +extern struct nvfx_state_entry nvfx_state_fragprog; +extern struct nvfx_state_entry nvfx_state_fragtex; +extern struct nvfx_state_entry nvfx_state_framebuffer; +extern struct nvfx_state_entry nvfx_state_rasterizer; +extern struct nvfx_state_entry nvfx_state_scissor; +extern struct nvfx_state_entry nvfx_state_sr; +extern struct nvfx_state_entry nvfx_state_stipple; +extern struct nvfx_state_entry nvfx_state_vbo; +extern struct nvfx_state_entry nvfx_state_vertprog; +extern struct nvfx_state_entry nvfx_state_viewport; +extern struct nvfx_state_entry nvfx_state_vtxfmt; +extern struct nvfx_state_entry nvfx_state_zsa; + +extern void nvfx_init_query_functions(struct nvfx_context *nvfx); +extern void nvfx_init_surface_functions(struct nvfx_context *nvfx); + +/* nvfx_context.c */ +struct pipe_context * +nvfx_create(struct pipe_screen *pscreen, void *priv); + +/* nvfx_clear.c */ +extern void nvfx_clear(struct pipe_context *pipe, unsigned buffers, + const float *rgba, double depth, unsigned stencil); + +/* nvfx_draw.c */ +extern struct draw_stage *nvfx_draw_render_stage(struct nvfx_context *nvfx); +extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, + unsigned ib_size, unsigned mode, + unsigned start, unsigned count); + +/* nvfx_fragprog.c */ +extern void nvfx_fragprog_destroy(struct nvfx_context *, + struct nvfx_fragment_program *); + +/* nv30_fragtex.c */ +extern void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso); +extern void nv30_fragtex_bind(struct nvfx_context *); +extern struct nouveau_stateobj * +nv30_fragtex_build(struct nvfx_context *nvfx, int unit); + +/* nv40_fragtex.c */ +extern void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso); +extern void nv40_fragtex_bind(struct nvfx_context *); +extern struct nouveau_stateobj * +nv40_fragtex_build(struct nvfx_context *nvfx, int unit); + +/* nvfx_state.c */ +extern void nvfx_init_state_functions(struct nvfx_context *nvfx); + +/* nvfx_state_emit.c */ +extern void nvfx_state_flush_notify(struct nouveau_channel *chan); +extern boolean nvfx_state_validate(struct nvfx_context *nvfx); +extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx); +extern void nvfx_state_emit(struct nvfx_context *nvfx); + +/* nvfx_transfer.c */ +extern void nvfx_init_transfer_functions(struct nvfx_context *nvfx); + +/* nvfx_vbo.c */ +extern void nvfx_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern void nvfx_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nvfx_vertprog.c */ +extern void nvfx_vertprog_destroy(struct nvfx_context *, + struct nvfx_vertex_program *); + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c new file mode 100644 index 00000000000..5379b29efd1 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -0,0 +1,350 @@ +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" +#include "tgsi/tgsi_ureg.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nvfx_context.h" +#include "nv30_vertprog.h" +#include "nv40_vertprog.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all. Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nvfx_render_stage { + struct draw_stage stage; + struct nvfx_context *nvfx; + unsigned prim; +}; + +static INLINE struct nvfx_render_stage * +nvfx_render_stage(struct draw_stage *stage) +{ + return (struct nvfx_render_stage *)stage; +} + +static INLINE void +nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) +{ + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + + for (i = 0; i < nvfx->swtnl.nr_attribs; i++) { + unsigned idx = nvfx->swtnl.draw[i]; + unsigned hw = nvfx->swtnl.hw[i]; + + switch (nvfx->swtnl.emit[i]) { + case EMIT_OMIT: + break; + case EMIT_1F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (chan, fui(v->data[idx][0])); + break; + case EMIT_2F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + break; + case EMIT_3F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + break; + case EMIT_4F: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0])); + OUT_RING (chan, fui(v->data[idx][1])); + OUT_RING (chan, fui(v->data[idx][2])); + OUT_RING (chan, fui(v->data[idx][3])); + break; + case 0xff: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (chan, fui(v->data[idx][0] / v->data[idx][3])); + OUT_RING (chan, fui(v->data[idx][1] / v->data[idx][3])); + OUT_RING (chan, fui(v->data[idx][2] / v->data[idx][3])); + OUT_RING (chan, fui(1.0f / v->data[idx][3])); + break; + case EMIT_4UB: + BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), + float_to_ubyte(v->data[idx][1]), + float_to_ubyte(v->data[idx][2]), + float_to_ubyte(v->data[idx][3]))); + break; + default: + assert(0); + break; + } + } +} + +static INLINE void +nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, + unsigned mode, unsigned count) +{ + struct nvfx_render_stage *rs = nvfx_render_stage(stage); + struct nvfx_context *nvfx = rs->nvfx; + + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + + /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ + if (AVAIL_RING(chan) < ((count * 20) + 6)) { + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + NOUVEAU_ERR("AIII, missed flush\n"); + assert(0); + } + FIRE_RING(chan); + nvfx_state_emit(nvfx); + } + + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + } + + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, mode); + rs->prim = mode; + } + + /* Emit vertex data */ + for (i = 0; i < count; i++) + nvfx_render_vertex(nvfx, prim->v[i]); + + /* If it's likely we'll need to empty the push buffer soon, finish + * off the primitive now. + */ + if (AVAIL_RING(chan) < ((count * 20) + 6)) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + } +} + +static void +nvfx_render_point(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_POINTS, 1); +} + +static void +nvfx_render_line(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_LINES, 2); +} + +static void +nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ + nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3); +} + +static void +nvfx_render_flush(struct draw_stage *draw, unsigned flags) +{ + struct nvfx_render_stage *rs = nvfx_render_stage(draw); + struct nvfx_context *nvfx = rs->nvfx; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + } +} + +static void +nvfx_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nvfx_render_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +static struct nvfx_vertex_program * +nvfx_create_drawvp(struct nvfx_context *nvfx) +{ + struct ureg_program *ureg; + uint i; + + ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + if (ureg == NULL) + return NULL; + + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3)); + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4)); + ureg_MOV(ureg, + ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X), + ureg_DECL_vs_input(ureg, 5)); + for (i = 0; i < 8; ++i) + ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i)); + + ureg_END( ureg ); + + return ureg_create_shader_and_destroy( ureg, &nvfx->pipe ); +} + +struct draw_stage * +nvfx_draw_render_stage(struct nvfx_context *nvfx) +{ + struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage); + + if (!nvfx->swtnl.vertprog) + nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx); + + render->nvfx = nvfx; + render->stage.draw = nvfx->draw; + render->stage.point = nvfx_render_point; + render->stage.line = nvfx_render_line; + render->stage.tri = nvfx_render_tri; + render->stage.flush = nvfx_render_flush; + render->stage.reset_stipple_counter = nvfx_render_reset_stipple_counter; + render->stage.destroy = nvfx_render_destroy; + + return &render->stage; +} + +void +nvfx_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxbuf_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct pipe_screen *pscreen = pipe->screen; + unsigned i; + void *map; + + if (!nvfx_state_validate_swtnl(nvfx)) + return; + nvfx->state.dirty &= ~(1ULL << NVFX_STATE_VTXBUF); + nvfx_state_emit(nvfx); + + for (i = 0; i < nvfx->vtxbuf_nr; i++) { + map = pipe_buffer_map(pscreen, nvfx->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(nvfx->draw, i, map); + } + + if (idxbuf) { + map = pipe_buffer_map(pscreen, idxbuf, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(nvfx->draw, idxbuf_size, map); + } else { + draw_set_mapped_element_buffer(nvfx->draw, 0, NULL); + } + + if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { + const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; + + map = pipe_buffer_map(pscreen, + nvfx->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, + map, nr); + } + + draw_arrays(nvfx->draw, mode, start, count); + + for (i = 0; i < nvfx->vtxbuf_nr; i++) + pipe_buffer_unmap(pscreen, nvfx->vtxbuf[i].buffer); + + if (idxbuf) + pipe_buffer_unmap(pscreen, idxbuf); + + if (nvfx->constbuf[PIPE_SHADER_VERTEX]) + pipe_buffer_unmap(pscreen, nvfx->constbuf[PIPE_SHADER_VERTEX]); + + draw_flush(nvfx->draw); + pipe->flush(pipe, 0, NULL); +} + +static INLINE void +emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, + unsigned semantic, unsigned index) +{ + unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index); + unsigned a = nvfx->swtnl.nr_attribs++; + + nvfx->swtnl.hw[a] = hw; + nvfx->swtnl.emit[a] = emit; + nvfx->swtnl.draw[a] = draw_out; +} + +static boolean +nvfx_state_vtxfmt_validate(struct nvfx_context *nvfx) +{ + struct nvfx_fragment_program *fp = nvfx->fragprog; + unsigned colour = 0, texcoords = 0, fog = 0, i; + + /* Determine needed fragprog inputs */ + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + colour |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_GENERIC: + texcoords |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_FOG: + fog = 1; + break; + default: + assert(0); + } + } + + nvfx->swtnl.nr_attribs = 0; + + /* Map draw vtxprog output to hw attribute IDs */ + for (i = 0; i < 2; i++) { + if (!(colour & (1 << i))) + continue; + emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i); + } + + for (i = 0; i < 8; i++) { + if (!(texcoords & (1 << i))) + continue; + emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); + } + + if (fog) { + emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); + } + + emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0); + + return FALSE; +} + +struct nvfx_state_entry nvfx_state_vtxfmt = { + .validate = nvfx_state_vtxfmt_validate, + .dirty = { + .pipe = NVFX_NEW_ARRAYS | NVFX_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index dc24f9b08a5..76351430f44 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -7,37 +7,20 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" -#include "nv40_context.h" - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 1 -#define MASK_Y 2 -#define MASK_Z 4 -#define MASK_W 8 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV40_FP_OP_COND_TR -#include "nv40_shader.h" - -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) -#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) +#include "nvfx_context.h" +#include "nvfx_shader.h" #define MAX_CONSTS 128 #define MAX_IMM 32 -struct nv40_fpc { - struct nv40_fragment_program *fp; +struct nvfx_fpc { + struct nvfx_fragment_program *fp; uint attrib_map[PIPE_MAX_SHADER_INPUTS]; unsigned r_temps; unsigned r_temps_discard; - struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nv40_sreg *r_temp; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_temp; int num_regs; @@ -50,35 +33,35 @@ struct nv40_fpc { } consts[MAX_CONSTS]; int nr_consts; - struct nv40_sreg imm[MAX_IMM]; + struct nvfx_sreg imm[MAX_IMM]; unsigned nr_imm; }; -static INLINE struct nv40_sreg -temp(struct nv40_fpc *fpc) +static INLINE struct nvfx_sreg +temp(struct nvfx_fpc *fpc) { int idx = ffs(~fpc->r_temps) - 1; if (idx < 0) { NOUVEAU_ERR("out of temps!!\n"); assert(0); - return nv40_sr(NV40SR_TEMP, 0); + return nvfx_sr(NVFXSR_TEMP, 0); } fpc->r_temps |= (1 << idx); fpc->r_temps_discard |= (1 << idx); - return nv40_sr(NV40SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } static INLINE void -release_temps(struct nv40_fpc *fpc) +release_temps(struct nvfx_fpc *fpc) { fpc->r_temps &= ~fpc->r_temps_discard; fpc->r_temps_discard = 0; } -static INLINE struct nv40_sreg -constant(struct nv40_fpc *fpc, int pipe, float vals[4]) +static INLINE struct nvfx_sreg +constant(struct nvfx_fpc *fpc, int pipe, float vals[4]) { int idx; @@ -89,45 +72,45 @@ constant(struct nv40_fpc *fpc, int pipe, float vals[4]) fpc->consts[idx].pipe = pipe; if (pipe == -1) memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ + nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ (d), (m), (s0), (s1), (s2)) #define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ + nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ (d), (m), (s0), none, none) static void -grow_insns(struct nv40_fpc *fpc, int size) +grow_insns(struct nvfx_fpc *fpc, int size) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; fp->insn_len += size; fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); } static void -emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; uint32_t sr = 0; switch (src.type) { - case NV40SR_INPUT: - sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); + case NVFXSR_INPUT: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); break; - case NV40SR_OUTPUT: - sr |= NV40_FP_REG_SRC_HALF; + case NVFXSR_OUTPUT: + sr |= NVFX_FP_REG_SRC_HALF; /* fall-through */ - case NV40SR_TEMP: - sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV40_FP_REG_SRC_SHIFT); + case NVFXSR_TEMP: + sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); + sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); break; - case NV40SR_CONST: + case NVFXSR_CONST: if (!fpc->have_const) { grow_insns(fpc, 4); fpc->have_const = 1; @@ -135,7 +118,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) hw = &fp->insn[fpc->inst_offset]; if (fpc->consts[src.index].pipe >= 0) { - struct nv40_fragment_program_data *fpd; + struct nvfx_fragment_program_data *fpd; fp->consts = realloc(fp->consts, ++fp->nr_consts * sizeof(*fpd)); @@ -149,63 +132,63 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); break; - case NV40SR_NONE: - sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + case NVFXSR_NONE: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); break; default: assert(0); } if (src.negate) - sr |= NV40_FP_REG_NEGATE; + sr |= NVFX_FP_REG_NEGATE; if (src.abs) hw[1] |= (1 << (29 + pos)); - sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); hw[pos + 1] |= sr; } static void -emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; switch (dst.type) { - case NV40SR_TEMP: + case NVFXSR_TEMP: if (fpc->num_regs < (dst.index + 1)) fpc->num_regs = dst.index + 1; break; - case NV40SR_OUTPUT: + case NVFXSR_OUTPUT: if (dst.index == 1) { fp->fp_control |= 0xe; } else { - hw[0] |= NV40_FP_OP_OUT_REG_HALF; + hw[0] |= NVFX_FP_OP_OUT_REG_HALF; } break; - case NV40SR_NONE: + case NVFXSR_NONE: hw[0] |= (1 << 30); break; default: assert(0); } - hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); + hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); } static void -nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw; fpc->inst_offset = fp->insn_len; @@ -214,22 +197,22 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, hw = &fp->insn[fpc->inst_offset]; memset(hw, 0, sizeof(uint32_t) * 4); - if (op == NV40_FP_OP_OPCODE_KIL) - fp->fp_control |= NV40TCL_FP_CONTROL_KIL; - hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + if (op == NVFX_FP_OP_OPCODE_KIL) + fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; + hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); if (sat) - hw[0] |= NV40_FP_OP_OUT_SAT; + hw[0] |= NVFX_FP_OP_OUT_SAT; if (dst.cc_update) - hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); emit_dst(fpc, dst); emit_src(fpc, 0, s0); @@ -238,26 +221,26 @@ nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, } static void -nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { - struct nv40_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; - nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); + fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); fp->samplers |= (1 << unit); } -static INLINE struct nv40_sreg -tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +static INLINE struct nvfx_sreg +tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc) { - struct nv40_sreg src; + struct nvfx_sreg src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, + src = nvfx_sr(NVFXSR_INPUT, fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: @@ -288,18 +271,18 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) return src; } -static INLINE struct nv40_sreg -tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { +static INLINE struct nvfx_sreg +tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) { switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: return fpc->r_result[fdst->Register.Index]; case TGSI_FILE_TEMPORARY: return fpc->r_temp[fdst->Register.Index]; case TGSI_FILE_NULL: - return nv40_sr(NV40SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); default: NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); - return nv40_sr(NV40SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); } } @@ -308,52 +291,19 @@ tgsi_mask(uint tgsi) { int mask = 0; - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W; return mask; } static boolean -src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv40_sreg *src) -{ - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= (1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(fpc); - - if (mask) - arith(fpc, 0, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, +nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_instruction *finst) { - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg src[3], dst, tmp; + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; int mask, sat, unit; int ai = -1, ci = -1, ii = -1; int i; @@ -377,23 +327,12 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(fpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->Register.Index) { ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -404,7 +343,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -415,7 +354,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); - arith(fpc, 0, MOV, src[i], MASK_ALL, + arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL, tgsi_src(fpc, fsrc), none, none); } break; @@ -445,25 +384,25 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = nv40_sr(NV40SR_NONE, 0); + tmp = nvfx_sr(NVFXSR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV40_VP_INST_COND_GE; + dst.cc_test = NVFX_COND_GE; arith(fpc, sat, MOV, dst, mask, src[2], none, none); - dst.cc_test = NV40_VP_INST_COND_LT; + dst.cc_test = NVFX_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; case TGSI_OPCODE_COS: arith(fpc, sat, COS, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DDX: - if (mask & (MASK_Z | MASK_W)) { + if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { tmp = temp(fpc); - arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, + arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], + arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none); arith(fpc, 0, MOV, dst, mask, tmp, none, none); } else { @@ -471,13 +410,13 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } break; case TGSI_OPCODE_DDY: - if (mask & (MASK_Z | MASK_W)) { + if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) { tmp = temp(fpc); - arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, + arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, swz(src[0], Z, W, Z, W), none, none); - arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W, swz(tmp, X, Y, X, Y), none, none); - arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], + arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0], none, none); arith(fpc, 0, MOV, dst, mask, tmp, none, none); } else { @@ -492,7 +431,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; case TGSI_OPCODE_DPH: tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none); arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), swz(src[1], W, W, W, W), none); break; @@ -512,10 +451,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, 0, KIL, none, 0, none, none, none); break; case TGSI_OPCODE_KIL: - dst = nv40_sr(NV40SR_NONE, 0); + dst = nvfx_sr(NVFXSR_NONE, 0); dst.cc_update = 1; - arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; + arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NVFX_COND_LT; arith(fpc, 0, KIL, dst, 0, none, none, none); break; case TGSI_OPCODE_LG2: @@ -523,9 +462,13 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, break; // case TGSI_OPCODE_LIT: case TGSI_OPCODE_LRP: - tmp = temp(fpc); - arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); - arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + if(!nvfx->is_nv4x) + arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]); + else { + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + } break; case TGSI_OPCODE_MAD: arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); @@ -543,13 +486,17 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: - tmp = temp(fpc); - arith(fpc, 0, LG2, tmp, MASK_X, - swz(src[0], X, X, X, X), none, none); - arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(fpc, sat, EX2, dst, mask, - swz(tmp, X, X, X, X), none, none); + if(!nvfx->is_nv4x) + arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none); + else { + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + } break; case TGSI_OPCODE_RCP: arith(fpc, sat, RCP, dst, mask, src[0], none, none); @@ -558,42 +505,50 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, assert(0); break; case TGSI_OPCODE_RFL: - tmp = temp(fpc); - arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); - arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); - arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, - swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); - arith(fpc, sat, MAD, dst, mask, - swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + if(!nvfx->is_nv4x) + arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none); + else { + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + } break; case TGSI_OPCODE_RSQ: - tmp = temp(fpc); - arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, - abs(swz(src[0], X, X, X, X)), none, none); - arith(fpc, sat, EX2, dst, mask, - neg(swz(tmp, X, X, X, X)), none, none); + if(!nvfx->is_nv4x) + arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); + else { + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + } break; case TGSI_OPCODE_SCS: /* avoid overwriting the source */ - if(src[0].swz[SWZ_X] != SWZ_X) + if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X) { - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, + if (mask & NVFX_FP_MASK_X) { + arith(fpc, sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none); } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, + if (mask & NVFX_FP_MASK_Y) { + arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none); } } else { - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, + if (mask & NVFX_FP_MASK_Y) { + arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y, swz(src[0], X, X, X, X), none, none); } - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, + if (mask & NVFX_FP_MASK_X) { + arith(fpc, sat, COS, dst, NVFX_FP_MASK_X, swz(src[0], X, X, X, X), none, none); } } @@ -641,7 +596,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, tmp = temp(fpc); arith(fpc, 0, MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; @@ -655,32 +610,32 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, } static boolean -nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, +nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_declaration *fdec) { int hw; switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - hw = NV40_FP_OP_INPUT_SRC_POSITION; + hw = NVFX_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: if (fdec->Semantic.Index == 0) { - hw = NV40_FP_OP_INPUT_SRC_COL0; + hw = NVFX_FP_OP_INPUT_SRC_COL0; } else if (fdec->Semantic.Index == 1) { - hw = NV40_FP_OP_INPUT_SRC_COL1; + hw = NVFX_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); return FALSE; } break; case TGSI_SEMANTIC_FOG: - hw = NV40_FP_OP_INPUT_SRC_FOGC; + hw = NVFX_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: if (fdec->Semantic.Index <= 7) { - hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. + hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); @@ -697,7 +652,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, } static boolean -nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, +nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc, const struct tgsi_full_declaration *fdec) { unsigned idx = fdec->Range.First; @@ -708,12 +663,14 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, hw = 1; break; case TGSI_SEMANTIC_COLOR: + hw = ~0; switch (fdec->Semantic.Index) { case 0: hw = 0; break; case 1: hw = 2; break; case 2: hw = 3; break; case 3: hw = 4; break; - default: + } + if(hw > ((nvfx->is_nv4x) ? 4 : 2)) { NOUVEAU_ERR("bad rcol index\n"); return FALSE; } @@ -723,13 +680,13 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, return FALSE; } - fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); fpc->r_temps |= (1 << hw); return TRUE; } static boolean -nv40_fragprog_prepare(struct nv40_fpc *fpc) +nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc) { struct tgsi_parse_context p; int high_temp = -1, i; @@ -746,11 +703,11 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) fdec = &p.FullToken.FullDeclaration; switch (fdec->Declaration.File) { case TGSI_FILE_INPUT: - if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) + if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec)) goto out_err; break; case TGSI_FILE_OUTPUT: - if (!nv40_fragprog_parse_decl_output(fpc, fdec)) + if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec)) goto out_err; break; case TGSI_FILE_TEMPORARY: @@ -787,7 +744,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc) tgsi_parse_free(&p); if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); for (i = 0; i < high_temp; i++) fpc->r_temp[i] = temp(fpc); fpc->r_temps_discard = 0; @@ -803,19 +760,19 @@ out_err: } static void -nv40_fragprog_translate(struct nv40_context *nv40, - struct nv40_fragment_program *fp) +nvfx_fragprog_translate(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { struct tgsi_parse_context parse; - struct nv40_fpc *fpc = NULL; + struct nvfx_fpc *fpc = NULL; - fpc = CALLOC(1, sizeof(struct nv40_fpc)); + fpc = CALLOC(1, sizeof(struct nvfx_fpc)); if (!fpc) return; fpc->fp = fp; fpc->num_regs = 2; - if (!nv40_fragprog_prepare(fpc)) { + if (!nvfx_fragprog_prepare(nvfx, fpc)) { FREE(fpc); return; } @@ -831,7 +788,7 @@ nv40_fragprog_translate(struct nv40_context *nv40, const struct tgsi_full_instruction *finst; finst = &parse.FullToken.FullInstruction; - if (!nv40_fragprog_parse_instruction(fpc, finst)) + if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst)) goto out_err; } break; @@ -840,7 +797,10 @@ nv40_fragprog_translate(struct nv40_context *nv40, } } - fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + if(!nvfx->is_nv4x) + fp->fp_control |= (fpc->num_regs-1)/2; + else + fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; /* Terminate final instruction */ fp->insn[fpc->inst_offset] |= 0x00000001; @@ -862,10 +822,10 @@ out_err: } static void -nv40_fragprog_upload(struct nv40_context *nv40, - struct nv40_fragment_program *fp) +nvfx_fragprog_upload(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { - struct pipe_screen *pscreen = nv40->pipe.screen; + struct pipe_screen *pscreen = nvfx->pipe.screen; const uint32_t le = 1; uint32_t *map; int i; @@ -896,12 +856,12 @@ nv40_fragprog_upload(struct nv40_context *nv40, } static boolean -nv40_fragprog_validate(struct nv40_context *nv40) +nvfx_fragprog_validate(struct nvfx_context *nvfx) { - struct nv40_fragment_program *fp = nv40->fragprog; + struct nvfx_fragment_program *fp = nvfx->fragprog; struct pipe_buffer *constbuf = - nv40->constbuf[PIPE_SHADER_FRAGMENT]; - struct pipe_screen *pscreen = nv40->pipe.screen; + nvfx->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_screen *pscreen = nvfx->pipe.screen; struct nouveau_stateobj *so; boolean new_consts = FALSE; int i; @@ -909,24 +869,31 @@ nv40_fragprog_validate(struct nv40_context *nv40) if (fp->translated) goto update_constants; - nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG; - nv40_fragprog_translate(nv40, fp); + nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG; + nvfx_fragprog_translate(nvfx, fp); if (!fp->translated) { - nv40->fallback_swrast |= NV40_NEW_FRAGPROG; + nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG; return FALSE; } fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); - nv40_fragprog_upload(nv40, fp); + nvfx_fragprog_upload(nvfx, fp); - so = so_new(2, 2, 1); - so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); + so = so_new(4, 4, 1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, NV40TCL_FP_ADDRESS_DMA0, - NV40TCL_FP_ADDRESS_DMA1); - so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); + NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, + NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1); so_data (so, fp->fp_control); + if(!nvfx->is_nv4x) { + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1); + so_data (so, (1<<16)|0x4); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1); + so_data (so, fp->samplers); + } + so_ref(so, &fp->so); so_ref(NULL, &so); @@ -937,7 +904,7 @@ update_constants: map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { - struct nv40_fragment_program_data *fpd = &fp->consts[i]; + struct nvfx_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; @@ -949,11 +916,11 @@ update_constants: pipe_buffer_unmap(pscreen, constbuf); if (new_consts) - nv40_fragprog_upload(nv40, fp); + nvfx_fragprog_upload(nvfx, fp); } - if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { - so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); + if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) { + so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]); return TRUE; } @@ -961,8 +928,8 @@ update_constants: } void -nv40_fragprog_destroy(struct nv40_context *nv40, - struct nv40_fragment_program *fp) +nvfx_fragprog_destroy(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { if (fp->buffer) pipe_buffer_reference(&fp->buffer, NULL); @@ -974,11 +941,10 @@ nv40_fragprog_destroy(struct nv40_context *nv40, FREE(fp->insn); } -struct nv40_state_entry nv40_state_fragprog = { - .validate = nv40_fragprog_validate, +struct nvfx_state_entry nvfx_state_fragprog = { + .validate = nvfx_fragprog_validate, .dirty = { - .pipe = NV40_NEW_FRAGPROG, - .hw = NV40_STATE_FRAGPROG + .pipe = NVFX_NEW_FRAGPROG, + .hw = NVFX_STATE_FRAGPROG } }; - diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c new file mode 100644 index 00000000000..84e4eb10042 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -0,0 +1,49 @@ +#include "nvfx_context.h" + +static boolean +nvfx_fragtex_validate(struct nvfx_context *nvfx) +{ + struct nvfx_fragment_program *fp = nvfx->fragprog; + struct nvfx_state *state = &nvfx->state; + struct nouveau_stateobj *so; + unsigned samplers, unit; + + samplers = state->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = so_new(1, 1, 0); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); + so_ref(NULL, &so); + state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); + } + + samplers = nvfx->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + if(!nvfx->is_nv4x) + so = nv30_fragtex_build(nvfx, unit); + else + so = nv40_fragtex_build(nvfx, unit); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_FRAGTEX0 + unit]); + so_ref(NULL, &so); + state->dirty |= (1ULL << (NVFX_STATE_FRAGTEX0 + unit)); + } + + nvfx->state.fp_samplers = fp->samplers; + return FALSE; +} + +struct nvfx_state_entry nvfx_state_fragtex = { + .validate = nvfx_fragtex_validate, + .dirty = { + .pipe = NVFX_NEW_SAMPLER | NVFX_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index ad1a9a51952..0f5ed61aab7 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -4,13 +4,13 @@ #include "util/u_format.h" #include "util/u_math.h" -#include "nv40_context.h" -#include "../nouveau/nv04_surface_2d.h" +#include "nvfx_context.h" +#include "nv04_surface_2d.h" static void -nv40_miptree_layout(struct nv40_miptree *mt) +nvfx_miptree_layout(struct nvfx_miptree *mt) { struct pipe_texture *pt = &mt->base; uint width = pt->width0; @@ -20,7 +20,7 @@ nv40_miptree_layout(struct nv40_miptree *mt) PIPE_TEXTURE_USAGE_DEPTH_STENCIL | PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY); + PIPE_TEXTURE_USAGE_SCANOUT); if (pt->target == PIPE_TEXTURE_CUBE) { nr_faces = 6; @@ -62,13 +62,13 @@ nv40_miptree_layout(struct nv40_miptree *mt) } static struct pipe_texture * -nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) { - struct nv40_miptree *mt; + struct nvfx_miptree *mt; unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | NOUVEAU_BUFFER_USAGE_TEXTURE; - mt = MALLOC(sizeof(struct nv40_miptree)); + mt = MALLOC(sizeof(struct nvfx_miptree)); if (!mt) return NULL; mt->base = *pt; @@ -80,7 +80,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) pt->height0 & (pt->height0 - 1)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else - if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | + if (pt->tex_usage & (PIPE_TEXTURE_USAGE_SCANOUT | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; @@ -89,9 +89,21 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; else { switch (pt->format) { + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + /* TODO: we can actually swizzle these formats on nv40, we + are just preserving the pre-unification behavior. + The whole 2D code is going to be rewritten anyway. */ + if(nvfx_screen(pscreen)->is_nv4x) { + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + break; + } /* TODO: Figure out which formats can be swizzled */ - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: case PIPE_FORMAT_R16_SNORM: { if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) @@ -112,7 +124,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64) mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; - nv40_miptree_layout(mt); + nvfx_miptree_layout(mt); mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size); if (!mt->buffer) { @@ -124,17 +136,17 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) } static struct pipe_texture * -nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +nvfx_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, const unsigned *stride, struct pipe_buffer *pb) { - struct nv40_miptree *mt; + struct nvfx_miptree *mt; /* Only supports 2D, non-mipmapped textures for the moment */ if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || pt->depth0 != 1) return NULL; - mt = CALLOC_STRUCT(nv40_miptree); + mt = CALLOC_STRUCT(nvfx_miptree); if (!mt) return NULL; @@ -153,9 +165,9 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, } static void -nv40_miptree_destroy(struct pipe_texture *pt) +nvfx_miptree_destroy(struct pipe_texture *pt) { - struct nv40_miptree *mt = (struct nv40_miptree *)pt; + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; int l; pipe_buffer_reference(&mt->buffer, NULL); @@ -168,11 +180,11 @@ nv40_miptree_destroy(struct pipe_texture *pt) } static struct pipe_surface * -nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags) { - struct nv40_miptree *mt = (struct nv40_miptree *)pt; + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; struct nv04_surface *ns; ns = CALLOC_STRUCT(nv04_surface); @@ -202,21 +214,21 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/ if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE) - return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base; + return &nv04_surface_wrap_for_render(pscreen, ((struct nvfx_screen*)pscreen)->eng2d, ns)->base; return &ns->base; } static void -nv40_miptree_surface_del(struct pipe_surface *ps) +nvfx_miptree_surface_del(struct pipe_surface *ps) { struct nv04_surface* ns = (struct nv04_surface*)ps; if(ns->backing) { - struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen; + struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen; if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE) screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height); - nv40_miptree_surface_del(&ns->backing->base); + nvfx_miptree_surface_del(&ns->backing->base); } pipe_texture_reference(&ps->texture, NULL); @@ -224,12 +236,12 @@ nv40_miptree_surface_del(struct pipe_surface *ps) } void -nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +nvfx_screen_init_miptree_functions(struct pipe_screen *pscreen) { - pscreen->texture_create = nv40_miptree_create; - pscreen->texture_blanket = nv40_miptree_blanket; - pscreen->texture_destroy = nv40_miptree_destroy; - pscreen->get_tex_surface = nv40_miptree_surface_new; - pscreen->tex_surface_destroy = nv40_miptree_surface_del; -} + pscreen->texture_create = nvfx_miptree_create; + pscreen->texture_destroy = nvfx_miptree_destroy; + pscreen->get_tex_surface = nvfx_miptree_surface_new; + pscreen->tex_surface_destroy = nvfx_miptree_surface_del; + nouveau_screen(pscreen)->texture_blanket = nvfx_miptree_blanket; +} diff --git a/src/gallium/drivers/nvfx/nvfx_query.c b/src/gallium/drivers/nvfx/nvfx_query.c new file mode 100644 index 00000000000..acbaf75a236 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_query.c @@ -0,0 +1,127 @@ +#include "pipe/p_context.h" + +#include "nvfx_context.h" + +struct nvfx_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nvfx_query * +nvfx_query(struct pipe_query *pipe) +{ + return (struct nvfx_query *)pipe; +} + +static struct pipe_query * +nvfx_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nvfx_query *q; + + q = CALLOC(1, sizeof(struct nvfx_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nvfx_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_query *q = nvfx_query(pq); + + if (q->object) + nouveau_resource_free(&q->object); + FREE(q); +} + +static void +nvfx_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_query *q = nvfx_query(pq); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64_t tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nouveau_resource_alloc(nvfx->screen->query_heap, 1, NULL, &q->object)) + assert(0); + nouveau_notifier_reset(nvfx->screen->query, q->object->start); + + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_RESET, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (chan, 1); + + q->ready = FALSE; +} + +static void +nvfx_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + struct nvfx_query *q = nvfx_query(pq); + + BEGIN_RING(chan, eng3d, NV34TCL_QUERY_GET, 1); + OUT_RING (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(chan); +} + +static boolean +nvfx_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_query *q = nvfx_query(pq); + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nouveau_notifier_status(nvfx->screen->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + + nouveau_notifier_wait_status(nvfx->screen->query, + q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, 0); + } + + q->result = nouveau_notifier_return_val(nvfx->screen->query, + q->object->start); + q->ready = TRUE; + nouveau_resource_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nvfx_init_query_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_query = nvfx_query_create; + nvfx->pipe.destroy_query = nvfx_query_destroy; + nvfx->pipe.begin_query = nvfx_query_begin; + nvfx->pipe.end_query = nvfx_query_end; + nvfx->pipe.get_query_result = nvfx_query_result; +} diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c new file mode 100644 index 00000000000..8138715cc7d --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -0,0 +1,433 @@ +#include "pipe/p_screen.h" +#include "pipe/p_state.h" + +#include "nouveau/nouveau_screen.h" + +#include "nvfx_context.h" +#include "nvfx_screen.h" + +#define NV30TCL_CHIPSET_3X_MASK 0x00000003 +#define NV34TCL_CHIPSET_3X_MASK 0x00000010 +#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 + +/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h +* to get the pointer to the context front buffer, so I copied nouveau_winsys here. +* nv30_screen_surface_format_supported() can then use it to enforce creating fbo +* with same number of bits everywhere. +*/ +struct nouveau_winsys { + struct pipe_winsys base; + + struct pipe_screen *pscreen; + + struct pipe_surface *front; +}; +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static int +nvfx_screen_get_param(struct pipe_screen *pscreen, int param) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + /* TODO: check this */ + return screen->is_nv4x ? 16 : 8; + case PIPE_CAP_NPOT_TEXTURES: + return !!screen->is_nv4x; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return screen->is_nv4x ? 4 : 2; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return !!screen->is_nv4x; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; /* We have 4 on nv40 - but unsupported currently */ + case PIPE_CAP_TGSI_CONT_SUPPORTED: + return 0; + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return !!screen->is_nv4x; + case NOUVEAU_CAP_HW_VTXBUF: + /* TODO: this is almost surely wrong */ + return !!screen->is_nv4x; + case NOUVEAU_CAP_HW_IDXBUF: + /* TODO: this is also almost surely wrong */ + return screen->is_nv4x && screen->eng3d->grclass == NV40TCL; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return 16; + case PIPE_CAP_INDEP_BLEND_ENABLE: + /* TODO: on nv40 we have separate color masks */ + /* TODO: nv40 mrt blending is probably broken */ + return 0; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 0; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nvfx_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return screen->is_nv4x ? 16.0 : 8.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return screen->is_nv4x ? 16.0 : 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nvfx_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front; + + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + return TRUE; + default: + break; + } + } else + if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + switch (format) { + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + return TRUE; + case PIPE_FORMAT_Z16_UNORM: + /* TODO: this nv30 limitation probably does not exist */ + if (!screen->is_nv4x && front) + return (front->format == PIPE_FORMAT_B5G6R5_UNORM); + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + /* TODO: does nv30 support this? */ + case PIPE_FORMAT_R16_SNORM: + return !!screen->is_nv4x; + default: + break; + } + } + + return FALSE; +} + +static struct pipe_buffer * +nvfx_surface_buffer(struct pipe_surface *surf) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; + + return mt->buffer; +} + +static void +nvfx_screen_destroy(struct pipe_screen *pscreen) +{ + struct nvfx_screen *screen = nvfx_screen(pscreen); + unsigned i; + + for (i = 0; i < NVFX_STATE_MAX; i++) { + if (screen->state[i]) + so_ref(NULL, &screen->state[i]); + } + + nouveau_resource_destroy(&screen->vp_exec_heap); + nouveau_resource_destroy(&screen->vp_data_heap); + nouveau_resource_destroy(&screen->query_heap); + nouveau_notifier_free(&screen->query); + nouveau_notifier_free(&screen->sync); + nouveau_grobj_free(&screen->eng3d); + nv04_surface_2d_takedown(&screen->eng2d); + + nouveau_screen_fini(&screen->base); + + FREE(pscreen); +} + +static void nv30_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) +{ + int i; + + /* TODO: perhaps we should do some of this on nv40 too? */ + for (i=1; i<8; i++) { + so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); + so_data (so, 0); + so_method(so, screen->eng3d, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); + so_data (so, 0); + } + + so_method(so, screen->eng3d, 0x220, 1); + so_data (so, 1); + + so_method(so, screen->eng3d, 0x03b0, 1); + so_data (so, 0x00100000); + so_method(so, screen->eng3d, 0x1454, 1); + so_data (so, 0); + so_method(so, screen->eng3d, 0x1d80, 1); + so_data (so, 3); + so_method(so, screen->eng3d, 0x1450, 1); + so_data (so, 0x00030004); + + /* NEW */ + so_method(so, screen->eng3d, 0x1e98, 1); + so_data (so, 0); + so_method(so, screen->eng3d, 0x17e0, 3); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_method(so, screen->eng3d, 0x1f80, 16); + for (i=0; i<16; i++) { + so_data (so, (i==8) ? 0x0000ffff : 0); + } + + so_method(so, screen->eng3d, 0x120, 3); + so_data (so, 0); + so_data (so, 1); + so_data (so, 2); + + so_method(so, screen->eng3d, 0x1d88, 1); + so_data (so, 0x00001200); + + so_method(so, screen->eng3d, NV34TCL_RC_ENABLE, 1); + so_data (so, 0); + + so_method(so, screen->eng3d, NV34TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->eng3d, NV34TCL_MULTISAMPLE_CONTROL, 1); + so_data (so, 0xffff0000); + + /* enables use of vp rather than fixed-function somehow */ + so_method(so, screen->eng3d, 0x1e94, 1); + so_data (so, 0x13); +} + +static void nv40_screen_init(struct nvfx_screen *screen, struct nouveau_stateobj* so) +{ + so_method(so, screen->eng3d, NV40TCL_DMA_COLOR2, 2); + so_data (so, screen->base.channel->vram->handle); + so_data (so, screen->base.channel->vram->handle); + + so_method(so, screen->eng3d, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, screen->eng3d, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, screen->eng3d, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, screen->eng3d, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, screen->eng3d, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, screen->eng3d, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, screen->eng3d, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, screen->eng3d, 0x1e94, 1); + so_data (so, 0x00000001); +} + +struct pipe_screen * +nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) +{ + struct nvfx_screen *screen = CALLOC_STRUCT(nvfx_screen); + struct nouveau_channel *chan; + struct pipe_screen *pscreen; + struct nouveau_stateobj *so; + unsigned eng3d_class = 0; + int ret; + + if (!screen) + return NULL; + + pscreen = &screen->base.base; + + ret = nouveau_screen_init(&screen->base, dev); + if (ret) { + nvfx_screen_destroy(pscreen); + return NULL; + } + chan = screen->base.channel; + + pscreen->winsys = ws; + pscreen->destroy = nvfx_screen_destroy; + pscreen->get_param = nvfx_screen_get_param; + pscreen->get_paramf = nvfx_screen_get_paramf; + pscreen->is_format_supported = nvfx_screen_surface_format_supported; + pscreen->context_create = nvfx_create; + + switch (dev->chipset & 0xf0) { + case 0x30: + if (NV30TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = 0x0397; + else if (NV34TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = 0x0697; + else if (NV35TCL_CHIPSET_3X_MASK & (1 << (dev->chipset & 0x0f))) + eng3d_class = 0x0497; + break; + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV40TCL; + else if (NV4X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV44TCL; + screen->is_nv4x = ~0; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (dev->chipset & 0x0f))) + eng3d_class = NV44TCL; + screen->is_nv4x = ~0; + break; + } + + if (!eng3d_class) { + NOUVEAU_ERR("Unknown nv3x/nv4x chipset: nv%02x\n", dev->chipset); + return NULL; + } + + nvfx_screen_init_miptree_functions(pscreen); + + ret = nouveau_grobj_alloc(chan, 0xbeef3097, eng3d_class, &screen->eng3d); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* 2D engine setup */ + screen->eng2d = nv04_surface_2d_init(&screen->base); + screen->eng2d->buf = nvfx_surface_buffer; + + /* Notifier for sync purposes */ + ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nvfx_screen_destroy(pscreen); + return NULL; + } + + /* Query objects */ + ret = nouveau_notifier_alloc(chan, 0xbeef0302, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nvfx_screen_destroy(pscreen); + return NULL; + } + + ret = nouveau_resource_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nvfx_screen_destroy(pscreen); + return NULL; + } + + /* Vtxprog resources */ + if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) || + nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { + nvfx_screen_destroy(pscreen); + return NULL; + } + + /* Static eng3d initialisation */ + /* make the so big and don't worry about exact values + since we it will be thrown away immediately after use */ + so = so_new(256, 256, 0); + so_method(so, screen->eng3d, NV34TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->eng3d, NV34TCL_DMA_TEXTURE0, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->gart->handle); + so_method(so, screen->eng3d, NV34TCL_DMA_COLOR1, 1); + so_data (so, chan->vram->handle); + so_method(so, screen->eng3d, NV34TCL_DMA_COLOR0, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->vram->handle); + so_method(so, screen->eng3d, NV34TCL_DMA_VTXBUF0, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->gart->handle); + + so_method(so, screen->eng3d, NV34TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle); + + so_method(so, screen->eng3d, NV34TCL_DMA_IN_MEMORY7, 2); + so_data (so, chan->vram->handle); + so_data (so, chan->vram->handle); + + if(!screen->is_nv4x) + nv30_screen_init(screen, so); + else + nv40_screen_init(screen, so); + + so_emit(chan, so); + so_ref(NULL, &so); + nouveau_pushbuf_flush(chan, 0); + + return pscreen; +} diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h index 9437aa050d4..c0b4b9899dd 100644 --- a/src/gallium/drivers/nv40/nv40_screen.h +++ b/src/gallium/drivers/nvfx/nvfx_screen.h @@ -1,19 +1,21 @@ -#ifndef __NV40_SCREEN_H__ -#define __NV40_SCREEN_H__ +#ifndef __NVFX_SCREEN_H__ +#define __NVFX_SCREEN_H__ #include "nouveau/nouveau_screen.h" -#include "nouveau/nv04_surface_2d.h" +#include "nv04_surface_2d.h" -struct nv40_screen { +struct nvfx_screen { struct nouveau_screen base; struct nouveau_winsys *nvws; - struct nv40_context *cur_ctx; + struct nvfx_context *cur_ctx; + + unsigned is_nv4x; /* either 0 or ~0 */ /* HW graphics objects */ struct nv04_surface_2d *eng2d; - struct nouveau_grobj *curie; + struct nouveau_grobj *eng3d; struct nouveau_notifier *sync; /* Query object resources */ @@ -25,16 +27,13 @@ struct nv40_screen { struct nouveau_resource *vp_data_heap; /* Current 3D state of channel */ - struct nouveau_stateobj *state[NV40_STATE_MAX]; + struct nouveau_stateobj *state[NVFX_STATE_MAX]; }; -static INLINE struct nv40_screen * -nv40_screen(struct pipe_screen *screen) +static INLINE struct nvfx_screen * +nvfx_screen(struct pipe_screen *screen) { - return (struct nv40_screen *)screen; + return (struct nvfx_screen *)screen; } -void -nv40_screen_init_transfer_functions(struct pipe_screen *pscreen); - #endif diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h new file mode 100644 index 00000000000..0b2f044f7fe --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -0,0 +1,429 @@ +#ifndef __NVFX_SHADER_H__ +#define __NVFX_SHADER_H__ + +/* this will resolve to either the NV30 or the NV40 version + * depending on the current hardware */ +/* unusual, but very fast and compact method */ +#define NVFX_VP(c) ((NV30_VP_##c) + (nvfx->is_nv4x & ((NV40_VP_##c) - (NV30_VP_##c)))) + +#define NVFX_VP_INST_SLOT_VEC 0 +#define NVFX_VP_INST_SLOT_SCA 1 + +#define NVFX_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ +#define NVFX_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ +#define NVFX_VP_INST_IN_NORMAL 2 +#define NVFX_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ +#define NVFX_VP_INST_IN_COL1 4 +#define NVFX_VP_INST_IN_FOGC 5 +#define NVFX_VP_INST_IN_TC0 8 +#define NVFX_VP_INST_IN_TC(n) (8+n) + +#define NVFX_VP_INST_SCA_OP_NOP 0x00 +#define NVFX_VP_INST_SCA_OP_MOV 0x01 +#define NVFX_VP_INST_SCA_OP_RCP 0x02 +#define NVFX_VP_INST_SCA_OP_RCC 0x03 +#define NVFX_VP_INST_SCA_OP_RSQ 0x04 +#define NVFX_VP_INST_SCA_OP_EXP 0x05 +#define NVFX_VP_INST_SCA_OP_LOG 0x06 +#define NVFX_VP_INST_SCA_OP_LIT 0x07 +#define NVFX_VP_INST_SCA_OP_BRA 0x09 +#define NVFX_VP_INST_SCA_OP_CAL 0x0B +#define NVFX_VP_INST_SCA_OP_RET 0x0C +#define NVFX_VP_INST_SCA_OP_LG2 0x0D +#define NVFX_VP_INST_SCA_OP_EX2 0x0E +#define NVFX_VP_INST_SCA_OP_SIN 0x0F +#define NVFX_VP_INST_SCA_OP_COS 0x10 + +#define NV40_VP_INST_SCA_OP_PUSHA 0x13 +#define NV40_VP_INST_SCA_OP_POPA 0x14 + +#define NVFX_VP_INST_VEC_OP_NOP 0x00 +#define NVFX_VP_INST_VEC_OP_MOV 0x01 +#define NVFX_VP_INST_VEC_OP_MUL 0x02 +#define NVFX_VP_INST_VEC_OP_ADD 0x03 +#define NVFX_VP_INST_VEC_OP_MAD 0x04 +#define NVFX_VP_INST_VEC_OP_DP3 0x05 +#define NVFX_VP_INST_VEC_OP_DPH 0x06 +#define NVFX_VP_INST_VEC_OP_DP4 0x07 +#define NVFX_VP_INST_VEC_OP_DST 0x08 +#define NVFX_VP_INST_VEC_OP_MIN 0x09 +#define NVFX_VP_INST_VEC_OP_MAX 0x0A +#define NVFX_VP_INST_VEC_OP_SLT 0x0B +#define NVFX_VP_INST_VEC_OP_SGE 0x0C +#define NVFX_VP_INST_VEC_OP_ARL 0x0D +#define NVFX_VP_INST_VEC_OP_FRC 0x0E +#define NVFX_VP_INST_VEC_OP_FLR 0x0F +#define NVFX_VP_INST_VEC_OP_SEQ 0x10 +#define NVFX_VP_INST_VEC_OP_SFL 0x11 +#define NVFX_VP_INST_VEC_OP_SGT 0x12 +#define NVFX_VP_INST_VEC_OP_SLE 0x13 +#define NVFX_VP_INST_VEC_OP_SNE 0x14 +#define NVFX_VP_INST_VEC_OP_STR 0x15 +#define NVFX_VP_INST_VEC_OP_SSG 0x16 +#define NVFX_VP_INST_VEC_OP_ARR 0x17 +#define NVFX_VP_INST_VEC_OP_ARA 0x18 + +#define NV40_VP_INST_VEC_OP_TXL 0x19 + +/* DWORD 3 */ +#define NVFX_VP_INST_LAST (1 << 0) + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * + * NV40 Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + */ + +//== Opcode / Destination selection == +#define NVFX_FP_OP_PROGRAM_END (1 << 0) +#define NVFX_FP_OP_OUT_REG_SHIFT 1 +#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ +#define NV40_FP_OP_OUT_REG_MASK (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NVFX_FP_OP_OUT_REG_HALF (1 << 7) +#define NVFX_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NVFX_FP_OP_OUTMASK_SHIFT 9 +#define NVFX_FP_OP_OUTMASK_MASK (0xF << 9) +# define NVFX_FP_OP_OUT_X (1<<9) +# define NVFX_FP_OP_OUT_Y (1<<10) +# define NVFX_FP_OP_OUT_Z (1<<11) +# define NVFX_FP_OP_OUT_W (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NVFX_FP_OP_INPUT_SRC_SHIFT 13 +#define NVFX_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NVFX_FP_OP_INPUT_SRC_POSITION 0x0 +# define NVFX_FP_OP_INPUT_SRC_COL0 0x1 +# define NVFX_FP_OP_INPUT_SRC_COL1 0x2 +# define NVFX_FP_OP_INPUT_SRC_FOGC 0x3 +# define NVFX_FP_OP_INPUT_SRC_TC0 0x4 +# define NVFX_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NVFX_FP_OP_TEX_UNIT_SHIFT 17 +#define NVFX_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ +#define NVFX_FP_OP_PRECISION_SHIFT 22 +#define NVFX_FP_OP_PRECISION_MASK (3 << 22) +# define NVFX_FP_PRECISION_FP32 0 +# define NVFX_FP_PRECISION_FP16 1 +# define NVFX_FP_PRECISION_FX12 2 +#define NVFX_FP_OP_OPCODE_SHIFT 24 +#define NVFX_FP_OP_OPCODE_MASK (0x3F << 24) +/* NV30/NV40 fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_NOP 0x00 +#define NVFX_FP_OP_OPCODE_MOV 0x01 +#define NVFX_FP_OP_OPCODE_MUL 0x02 +#define NVFX_FP_OP_OPCODE_ADD 0x03 +#define NVFX_FP_OP_OPCODE_MAD 0x04 +#define NVFX_FP_OP_OPCODE_DP3 0x05 +#define NVFX_FP_OP_OPCODE_DP4 0x06 +#define NVFX_FP_OP_OPCODE_DST 0x07 +#define NVFX_FP_OP_OPCODE_MIN 0x08 +#define NVFX_FP_OP_OPCODE_MAX 0x09 +#define NVFX_FP_OP_OPCODE_SLT 0x0A +#define NVFX_FP_OP_OPCODE_SGE 0x0B +#define NVFX_FP_OP_OPCODE_SLE 0x0C +#define NVFX_FP_OP_OPCODE_SGT 0x0D +#define NVFX_FP_OP_OPCODE_SNE 0x0E +#define NVFX_FP_OP_OPCODE_SEQ 0x0F +#define NVFX_FP_OP_OPCODE_FRC 0x10 +#define NVFX_FP_OP_OPCODE_FLR 0x11 +#define NVFX_FP_OP_OPCODE_KIL 0x12 +#define NVFX_FP_OP_OPCODE_PK4B 0x13 +#define NVFX_FP_OP_OPCODE_UP4B 0x14 +#define NVFX_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +#define NVFX_FP_OP_OPCODE_TEX 0x17 +#define NVFX_FP_OP_OPCODE_TXP 0x18 +#define NVFX_FP_OP_OPCODE_TXD 0x19 +#define NVFX_FP_OP_OPCODE_RCP 0x1A +#define NVFX_FP_OP_OPCODE_EX2 0x1C +#define NVFX_FP_OP_OPCODE_LG2 0x1D +#define NVFX_FP_OP_OPCODE_STR 0x20 +#define NVFX_FP_OP_OPCODE_SFL 0x21 +#define NVFX_FP_OP_OPCODE_COS 0x22 +#define NVFX_FP_OP_OPCODE_SIN 0x23 +#define NVFX_FP_OP_OPCODE_PK2H 0x24 +#define NVFX_FP_OP_OPCODE_UP2H 0x25 +#define NVFX_FP_OP_OPCODE_PK4UB 0x27 +#define NVFX_FP_OP_OPCODE_UP4UB 0x28 +#define NVFX_FP_OP_OPCODE_PK2US 0x29 +#define NVFX_FP_OP_OPCODE_UP2US 0x2A +#define NVFX_FP_OP_OPCODE_DP2A 0x2E +#define NVFX_FP_OP_OPCODE_TXB 0x31 +#define NVFX_FP_OP_OPCODE_DIV 0x3A + +/* NV30 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_RSQ_NV30 0x1B +#define NVFX_FP_OP_OPCODE_LIT_NV30 0x1E +#define NVFX_FP_OP_OPCODE_LRP_NV30 0x1F +#define NVFX_FP_OP_OPCODE_POW_NV30 0x26 +#define NVFX_FP_OP_OPCODE_RFL_NV30 0x36 + +/* NV40 only fragment program opcodes */ +#define NVFX_FP_OP_OPCODE_TXL_NV40 0x31 +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +#define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +#define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +#define NV40_FP_OP_BRA_OPCODE_IF 0x2 +#define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +#define NV40_FP_OP_BRA_OPCODE_REP 0x4 +#define NV40_FP_OP_BRA_OPCODE_RET 0x5 + +#define NVFX_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NVFX_FP_OP_OUT_ABS (1 << 29) +#define NVFX_FP_OP_COND_SWZ_W_SHIFT 27 +#define NVFX_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NVFX_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NVFX_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NVFX_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NVFX_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NVFX_FP_OP_COND_SWZ_X_SHIFT 21 +#define NVFX_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NVFX_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NVFX_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NVFX_FP_OP_COND_SHIFT 18 +#define NVFX_FP_OP_COND_MASK (0x07 << 18) +# define NVFX_FP_OP_COND_FL 0 +# define NVFX_FP_OP_COND_LT 1 +# define NVFX_FP_OP_COND_EQ 2 +# define NVFX_FP_OP_COND_LE 3 +# define NVFX_FP_OP_COND_GT 4 +# define NVFX_FP_OP_COND_NE 5 +# define NVFX_FP_OP_COND_GE 6 +# define NVFX_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NVFX_FP_OP_DST_SCALE_SHIFT 28 +#define NVFX_FP_OP_DST_SCALE_MASK (3 << 28) +#define NVFX_FP_OP_DST_SCALE_1X 0 +#define NVFX_FP_OP_DST_SCALE_2X 1 +#define NVFX_FP_OP_DST_SCALE_4X 2 +#define NVFX_FP_OP_DST_SCALE_8X 3 +#define NVFX_FP_OP_DST_SCALE_INV_2X 5 +#define NVFX_FP_OP_DST_SCALE_INV_4X 6 +#define NVFX_FP_OP_DST_SCALE_INV_8X 7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +/* high order bits of SRC2 */ +#define NVFX_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NVFX_FP_REG_TYPE_SHIFT 0 +#define NVFX_FP_REG_TYPE_MASK (3 << 0) +# define NVFX_FP_REG_TYPE_TEMP 0 +# define NVFX_FP_REG_TYPE_INPUT 1 +# define NVFX_FP_REG_TYPE_CONST 2 +#define NVFX_FP_REG_SRC_SHIFT 2 +#define NV30_FP_REG_SRC_MASK (31 << 2) +#define NV40_FP_REG_SRC_MASK (63 << 2) +#define NVFX_FP_REG_SRC_HALF (1 << 8) +#define NVFX_FP_REG_SWZ_ALL_SHIFT 9 +#define NVFX_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NVFX_FP_REG_SWZ_X_SHIFT 9 +#define NVFX_FP_REG_SWZ_X_MASK (3 << 9) +#define NVFX_FP_REG_SWZ_Y_SHIFT 11 +#define NVFX_FP_REG_SWZ_Y_MASK (3 << 11) +#define NVFX_FP_REG_SWZ_Z_SHIFT 13 +#define NVFX_FP_REG_SWZ_Z_MASK (3 << 13) +#define NVFX_FP_REG_SWZ_W_SHIFT 15 +#define NVFX_FP_REG_SWZ_W_MASK (3 << 15) +# define NVFX_FP_SWIZZLE_X 0 +# define NVFX_FP_SWIZZLE_Y 1 +# define NVFX_FP_SWIZZLE_Z 2 +# define NVFX_FP_SWIZZLE_W 3 +#define NVFX_FP_REG_NEGATE (1 << 17) + +#define NVFXSR_NONE 0 +#define NVFXSR_OUTPUT 1 +#define NVFXSR_INPUT 2 +#define NVFXSR_TEMP 3 +#define NVFXSR_CONST 4 + +#define NVFX_COND_FL 0 +#define NVFX_COND_LT 1 +#define NVFX_COND_EQ 2 +#define NVFX_COND_LE 3 +#define NVFX_COND_GT 4 +#define NVFX_COND_NE 5 +#define NVFX_COND_GE 6 +#define NVFX_COND_TR 7 + +/* Yes, this are ordered differently... */ + +#define NVFX_VP_MASK_X 8 +#define NVFX_VP_MASK_Y 4 +#define NVFX_VP_MASK_Z 2 +#define NVFX_VP_MASK_W 1 +#define NVFX_VP_MASK_ALL 0xf + +#define NVFX_FP_MASK_X 1 +#define NVFX_FP_MASK_Y 2 +#define NVFX_FP_MASK_Z 4 +#define NVFX_FP_MASK_W 8 +#define NVFX_FP_MASK_ALL 0xf + +#define NVFX_SWZ_X 0 +#define NVFX_SWZ_Y 1 +#define NVFX_SWZ_Z 2 +#define NVFX_SWZ_W 3 + +#define swz(s,x,y,z,w) nvfx_sr_swz((s), NVFX_SWZ_##x, NVFX_SWZ_##y, NVFX_SWZ_##z, NVFX_SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) + +struct nvfx_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nvfx_sreg +nvfx_sr(int type, int index) +{ + struct nvfx_sreg temp = { + .type = type, + .index = index, + .dst_scale = 0, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = NVFX_COND_TR, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nvfx_sreg +nvfx_sr_swz(struct nvfx_sreg src, int x, int y, int z, int w) +{ + struct nvfx_sreg dst = src; + + dst.swz[NVFX_SWZ_X] = src.swz[x]; + dst.swz[NVFX_SWZ_Y] = src.swz[y]; + dst.swz[NVFX_SWZ_Z] = src.swz[z]; + dst.swz[NVFX_SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nvfx_sreg +nvfx_sr_neg(struct nvfx_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nvfx_sreg +nvfx_sr_abs(struct nvfx_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nvfx_sreg +nvfx_sr_scale(struct nvfx_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} + +#endif diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c new file mode 100644 index 00000000000..88a9d01c509 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -0,0 +1,619 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvfx_context.h" +#include "nvfx_state.h" +#include "nvfx_tex.h" + +static void * +nvfx_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(5, 8, 0); + + if (cso->rt[0].blend_enable) { + so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | + nvgl_blend_func(cso->rt[0].rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rt[0].rgb_dst_factor)); + if(nvfx->screen->base.device->chipset < 0x40) { + so_method(so, eng3d, NV34TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + } else { + so_method(so, eng3d, NV40TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | + nvgl_blend_eqn(cso->rt[0].rgb_func)); + } + } else { + so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, eng3d, NV34TCL_COLOR_MASK, 1); + so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + /* TODO: add NV40 MRT color mask */ + + if (cso->logicop_enable) { + so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, eng3d, NV34TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + so_ref(NULL, &so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nvfx_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->blend = hwcso; + nvfx->dirty |= NVFX_NEW_BLEND; +} + +static void +nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + +static void * +nvfx_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_sampler_state *ps; + + ps = MALLOC(sizeof(struct nvfx_sampler_state)); + + /* on nv30, we use this as an internal flag */ + ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT; + ps->en = 0; + ps->filt = nvfx_tex_filter(cso); + ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT) | + nvfx_tex_wrap_compare_mode(cso); + ps->bcol = nvfx_tex_border_color(cso->border_color); + + if(nvfx->is_nv4x) + nv40_sampler_state_init(pipe, ps, cso); + else + nv30_sampler_state_init(pipe, ps, cso); + + return (void *)ps; +} + +static void +nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nvfx->tex_sampler[unit] = sampler[unit]; + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_samplers; unit++) { + nvfx->tex_sampler[unit] = NULL; + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_samplers = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static void +nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nvfx->tex_miptree[unit], miptree[unit]); + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nvfx->tex_miptree[unit], NULL); + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_textures = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static void * +nvfx_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(9, 19, 0); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, eng3d, NV34TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : + NV34TCL_SHADE_MODEL_SMOOTH); + + so_method(so, eng3d, NV34TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, eng3d, NV34TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, eng3d, NV34TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, eng3d, NV34TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, eng3d, NV34TCL_POINT_SPRITE, 1); + if (cso->point_quad_rasterization) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if ((cso->sprite_coord_enable >> i) & 1) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + so_ref(NULL, &so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->rasterizer = hwcso; + nvfx->dirty |= NVFX_NEW_RAST; + nvfx->draw_dirty |= NVFX_NEW_RAST; +} + +static void +nvfx_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(6, 20, 0); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + so_method(so, eng3d, NV34TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, eng3d, NV34TCL_ALPHA_FUNC_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref_value)); + + if (cso->stencil[0].enabled) { + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 3); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); + so_data (so, cso->stencil[0].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 3); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_method(so, eng3d, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); + so_data (so, cso->stencil[1].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + so_ref(NULL, &so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nvfx_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->zsa = hwcso; + nvfx->dirty |= NVFX_NEW_ZSA; +} + +static void +nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nvfx_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); + + return (void *)vp; +} + +static void +nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vertprog = hwcso; + nvfx->dirty |= NVFX_NEW_VERTPROG; + nvfx->draw_dirty |= NVFX_NEW_VERTPROG; +} + +static void +nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_vertex_program *vp = hwcso; + + draw_delete_vertex_shader(nvfx->draw, vp->draw); + nvfx_vertprog_destroy(nvfx, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nvfx_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nvfx_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->fragprog = hwcso; + nvfx->dirty |= NVFX_NEW_FRAGPROG; +} + +static void +nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_fragment_program *fp = hwcso; + + nvfx_fragprog_destroy(nvfx, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nvfx_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->blend_colour = *bcol; + nvfx->dirty |= NVFX_NEW_BCOL; +} + +static void +nvfx_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->stencil_ref = *sr; + nvfx->dirty |= NVFX_NEW_SR; +} + +static void +nvfx_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->clip = *clip; + nvfx->dirty |= NVFX_NEW_UCP; + nvfx->draw_dirty |= NVFX_NEW_UCP; +} + +static void +nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_buffer *buf ) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->constbuf[shader] = buf; + nvfx->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nvfx->dirty |= NVFX_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nvfx->dirty |= NVFX_NEW_FRAGPROG; + } +} + +static void +nvfx_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->framebuffer = *fb; + nvfx->dirty |= NVFX_NEW_FB; +} + +static void +nvfx_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + memcpy(nvfx->stipple, stipple->stipple, 4 * 32); + nvfx->dirty |= NVFX_NEW_STIPPLE; +} + +static void +nvfx_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->scissor = *s; + nvfx->dirty |= NVFX_NEW_SCISSOR; +} + +static void +nvfx_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->viewport = *vpt; + nvfx->dirty |= NVFX_NEW_VIEWPORT; + nvfx->draw_dirty |= NVFX_NEW_VIEWPORT; +} + +static void +nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count); + nvfx->vtxbuf_nr = count; + + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} + +static void * +nvfx_vtxelts_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); + + assert(num_elements < 16); /* not doing fallbacks yet */ + cso->num_elements = num_elements; + memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); + +/* nvfx_vtxelt_construct(cso);*/ + + return (void *)cso; +} + +static void +nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vtxelt = hwcso; + nvfx->dirty |= NVFX_NEW_ARRAYS; + /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ +} + +void +nvfx_init_state_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_blend_state = nvfx_blend_state_create; + nvfx->pipe.bind_blend_state = nvfx_blend_state_bind; + nvfx->pipe.delete_blend_state = nvfx_blend_state_delete; + + nvfx->pipe.create_sampler_state = nvfx_sampler_state_create; + nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind; + nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete; + nvfx->pipe.set_fragment_sampler_textures = nvfx_set_sampler_texture; + + nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create; + nvfx->pipe.bind_rasterizer_state = nvfx_rasterizer_state_bind; + nvfx->pipe.delete_rasterizer_state = nvfx_rasterizer_state_delete; + + nvfx->pipe.create_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_create; + nvfx->pipe.bind_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_bind; + nvfx->pipe.delete_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_delete; + + nvfx->pipe.create_vs_state = nvfx_vp_state_create; + nvfx->pipe.bind_vs_state = nvfx_vp_state_bind; + nvfx->pipe.delete_vs_state = nvfx_vp_state_delete; + + nvfx->pipe.create_fs_state = nvfx_fp_state_create; + nvfx->pipe.bind_fs_state = nvfx_fp_state_bind; + nvfx->pipe.delete_fs_state = nvfx_fp_state_delete; + + nvfx->pipe.set_blend_color = nvfx_set_blend_color; + nvfx->pipe.set_stencil_ref = nvfx_set_stencil_ref; + nvfx->pipe.set_clip_state = nvfx_set_clip_state; + nvfx->pipe.set_constant_buffer = nvfx_set_constant_buffer; + nvfx->pipe.set_framebuffer_state = nvfx_set_framebuffer_state; + nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple; + nvfx->pipe.set_scissor_state = nvfx_set_scissor_state; + nvfx->pipe.set_viewport_state = nvfx_set_viewport_state; + + nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; + nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; + nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; + + nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; +} diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index 192074e7471..e585246879b 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -1,29 +1,21 @@ -#ifndef __NV40_STATE_H__ -#define __NV40_STATE_H__ +#ifndef __NVFX_STATE_H__ +#define __NVFX_STATE_H__ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -struct nv40_sampler_state { - uint32_t fmt; - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - -struct nv40_vertex_program_exec { +struct nvfx_vertex_program_exec { uint32_t data[4]; boolean has_branch_offset; int const_index; }; -struct nv40_vertex_program_data { +struct nvfx_vertex_program_data { int index; /* immediates == -1 */ float value[4]; }; -struct nv40_vertex_program { +struct nvfx_vertex_program { struct pipe_shader_state pipe; struct draw_vertex_shader *draw; @@ -32,9 +24,9 @@ struct nv40_vertex_program { struct pipe_clip_state ucp; - struct nv40_vertex_program_exec *insns; + struct nvfx_vertex_program_exec *insns; unsigned nr_insns; - struct nv40_vertex_program_data *consts; + struct nvfx_vertex_program_data *consts; unsigned nr_consts; struct nouveau_resource *exec; @@ -49,12 +41,12 @@ struct nv40_vertex_program { struct nouveau_stateobj *so; }; -struct nv40_fragment_program_data { +struct nvfx_fragment_program_data { unsigned offset; unsigned index; }; -struct nv40_fragment_program { +struct nvfx_fragment_program { struct pipe_shader_state pipe; struct tgsi_shader_info info; @@ -64,7 +56,7 @@ struct nv40_fragment_program { uint32_t *insn; int insn_len; - struct nv40_fragment_program_data *consts; + struct nvfx_fragment_program_data *consts; unsigned nr_consts; struct pipe_buffer *buffer; @@ -73,7 +65,9 @@ struct nv40_fragment_program { struct nouveau_stateobj *so; }; -struct nv40_miptree { +#define NVFX_MAX_TEXTURE_LEVELS 16 + +struct nvfx_miptree { struct pipe_texture base; struct nouveau_bo *bo; @@ -83,7 +77,7 @@ struct nv40_miptree { struct { uint pitch; uint *image_offset; - } level[PIPE_MAX_TEXTURE_LEVELS]; + } level[NVFX_MAX_TEXTURE_LEVELS]; }; #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state_blend.c b/src/gallium/drivers/nvfx/nvfx_state_blend.c new file mode 100644 index 00000000000..03b6ef8117d --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_blend.c @@ -0,0 +1,41 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_blend_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->blend->so, &nvfx->state.hw[NVFX_STATE_BLEND]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_blend = { + .validate = nvfx_state_blend_validate, + .dirty = { + .pipe = NVFX_NEW_BLEND, + .hw = NVFX_STATE_BLEND + } +}; + +static boolean +nvfx_state_blend_colour_validate(struct nvfx_context *nvfx) +{ + struct nouveau_stateobj *so = so_new(1, 1, 0); + struct pipe_blend_color *bcol = &nvfx->blend_colour; + + so_method(so, nvfx->screen->eng3d, NV34TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_BCOL]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_blend_colour = { + .validate = nvfx_state_blend_colour_validate, + .dirty = { + .pipe = NVFX_NEW_BCOL, + .hw = NVFX_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c new file mode 100644 index 00000000000..72537388ea4 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -0,0 +1,179 @@ +#include "nvfx_context.h" +#include "nvfx_state.h" +#include "draw/draw_context.h" + +#define RENDER_STATES(name, vbo) \ +static struct nvfx_state_entry *name##render_states[] = { \ + &nvfx_state_framebuffer, \ + &nvfx_state_rasterizer, \ + &nvfx_state_scissor, \ + &nvfx_state_stipple, \ + &nvfx_state_fragprog, \ + &nvfx_state_fragtex, \ + &nvfx_state_vertprog, \ + &nvfx_state_blend, \ + &nvfx_state_blend_colour, \ + &nvfx_state_zsa, \ + &nvfx_state_sr, \ + &nvfx_state_viewport, \ + &nvfx_state_##vbo, \ + NULL \ +} + +RENDER_STATES(, vbo); +RENDER_STATES(swtnl_, vtxfmt); + +static void +nvfx_state_do_validate(struct nvfx_context *nvfx, + struct nvfx_state_entry **states) +{ + while (*states) { + struct nvfx_state_entry *e = *states; + + if (nvfx->dirty & e->dirty.pipe) { + if (e->validate(nvfx)) + nvfx->state.dirty |= (1ULL << e->dirty.hw); + } + + states++; + } + nvfx->dirty = 0; +} + +void +nvfx_state_emit(struct nvfx_context *nvfx) +{ + struct nvfx_state *state = &nvfx->state; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + unsigned i; + uint64_t states; + + /* XXX: race conditions + */ + if (nvfx != screen->cur_ctx) { + for (i = 0; i < NVFX_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_ctx = nvfx; + } + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nvfx->screen->state[i]); + if (state->hw[i]) + so_emit(chan, nvfx->screen->state[i]); + states &= ~(1ULL << i); + } + + /* TODO: could nv30 need this or something similar too? */ + if(nvfx->is_nv4x) { + if (state->dirty & ((1ULL << NVFX_STATE_FRAGPROG) | + (1ULL << NVFX_STATE_FRAGTEX0))) { + BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 2); + BEGIN_RING(chan, eng3d, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (chan, 1); + } + } + state->dirty = 0; +} + +void +nvfx_state_flush_notify(struct nouveau_channel *chan) +{ + struct nvfx_context *nvfx = chan->user_private; + struct nvfx_state *state = &nvfx->state; + unsigned i, samplers; + + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(chan, + state->hw[NVFX_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_FRAGPROG]); + if (state->hw[NVFX_STATE_VTXBUF] && nvfx->render_mode == HW) + so_emit_reloc_markers(chan, state->hw[NVFX_STATE_VTXBUF]); +} + +boolean +nvfx_state_validate(struct nvfx_context *nvfx) +{ + boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE; + + if (nvfx->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nvfx->fallback_swtnl & nvfx->dirty) + != nvfx->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nvfx->pipe.flush(&nvfx->pipe, 0, NULL); + nvfx->dirty |= (NVFX_NEW_VIEWPORT | + NVFX_NEW_VERTPROG | + NVFX_NEW_ARRAYS); + nvfx->render_mode = HW; + } + + nvfx_state_do_validate(nvfx, render_states); + + if (nvfx->fallback_swtnl || nvfx->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); + + return TRUE; +} + +boolean +nvfx_state_validate_swtnl(struct nvfx_context *nvfx) +{ + struct draw_context *draw = nvfx->draw; + + /* Setup for swtnl */ + if (nvfx->render_mode == HW) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); + nvfx->pipe.flush(&nvfx->pipe, 0, NULL); + nvfx->dirty |= (NVFX_NEW_VIEWPORT | + NVFX_NEW_VERTPROG | + NVFX_NEW_ARRAYS); + nvfx->render_mode = SWTNL; + } + + if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) + draw_bind_vertex_shader(draw, nvfx->vertprog->draw); + + if (nvfx->draw_dirty & NVFX_NEW_RAST) + draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe); + + if (nvfx->draw_dirty & NVFX_NEW_UCP) + draw_set_clip_state(draw, &nvfx->clip); + + if (nvfx->draw_dirty & NVFX_NEW_VIEWPORT) + draw_set_viewport_state(draw, &nvfx->viewport); + + if (nvfx->draw_dirty & NVFX_NEW_ARRAYS) { + draw_set_vertex_buffers(draw, nvfx->vtxbuf_nr, nvfx->vtxbuf); + draw_set_vertex_elements(draw, nvfx->vtxelt->num_elements, nvfx->vtxelt->pipe); + } + + nvfx_state_do_validate(nvfx, swtnl_render_states); + + if (nvfx->fallback_swrast) { + NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nvfx->fallback_swrast); + return FALSE; + } + + nvfx->draw_dirty = 0; + return TRUE; +} diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c new file mode 100644 index 00000000000..dd64ba4193c --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -0,0 +1,234 @@ +#include "nvfx_context.h" +#include "nouveau/nouveau_util.h" + +static struct pipe_buffer * +nvfx_do_surface_buffer(struct pipe_surface *surface) +{ + struct nvfx_miptree *mt = (struct nvfx_miptree *)surface->texture; + return mt->buffer; +} + +#define nvfx_surface_buffer(ps) nouveau_bo(nvfx_do_surface_buffer(ps)) + +static boolean +nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) +{ + struct pipe_framebuffer_state *fb = &nvfx->framebuffer; + struct nouveau_channel *chan = nvfx->screen->base.channel; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nv04_surface *rt[4], *zeta = NULL; + uint32_t rt_enable = 0, rt_format = 0; + int i, colour_format = 0, zeta_format = 0; + int depth_only = 0; + struct nouveau_stateobj *so = so_new(18, 24, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + int colour_bits = 32, zeta_bits = 32; + + if(!nvfx->is_nv4x) + assert(fb->nr_cbufs <= 2); + else + assert(fb->nr_cbufs <= 4); + + for (i = 0; i < fb->nr_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + rt[i] = (struct nv04_surface *)fb->cbufs[i]; + } + } + + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 | + NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV34TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = (struct nv04_surface *)fb->zsbuf; + } + + if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 | + NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) { + /* Render to at least a colour buffer */ + if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->nr_cbufs; i++) + assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else if (fb->zsbuf) { + depth_only = 1; + + /* Render to depth buffer only */ + if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + (log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | + (log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + } else { + return FALSE; + } + + switch (colour_format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + colour_bits = 16; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + zeta_bits = 16; + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) { + /* TODO: does this limitation really exist? + TODO: can it be worked around somehow? */ + return FALSE; + } + + if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) + || ((!nvfx->is_nv4x) && depth_only)) { + struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]); + uint32_t pitch = rt0->pitch; + + if(!nvfx->is_nv4x) + { + if (zeta) { + pitch |= (zeta->pitch << 16); + } else { + pitch |= (pitch << 16); + } + } + + so_method(so, eng3d, NV34TCL_DMA_COLOR0, 1); + so_reloc (so, nvfx_surface_buffer(&rt0->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_COLOR0_PITCH, 2); + so_data (so, pitch); + so_reloc (so, nvfx_surface_buffer(&rt[0]->base), + rt0->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + so_method(so, eng3d, NV34TCL_DMA_COLOR1, 1); + so_reloc (so, nvfx_surface_buffer(&rt[1]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_COLOR1_OFFSET, 2); + so_reloc (so, nvfx_surface_buffer(&rt[1]->base), + rt[1]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_data (so, rt[1]->pitch); + } + + if(nvfx->is_nv4x) + { + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, eng3d, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, nvfx_surface_buffer(&rt[2]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, nvfx_surface_buffer(&rt[2]->base), + rt[2]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_method(so, eng3d, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->pitch); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, eng3d, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, nvfx_surface_buffer(&rt[3]->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, nvfx_surface_buffer(&rt[3]->base), + rt[3]->base.offset, rt_flags | NOUVEAU_BO_LOW, + 0, 0); + so_method(so, eng3d, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->pitch); + } + } + + if (zeta_format) { + so_method(so, eng3d, NV34TCL_DMA_ZETA, 1); + so_reloc (so, nvfx_surface_buffer(&zeta->base), 0, + rt_flags | NOUVEAU_BO_OR, + chan->vram->handle, chan->gart->handle); + so_method(so, eng3d, NV34TCL_ZETA_OFFSET, 1); + /* TODO: reverse engineer LMA */ + so_reloc (so, nvfx_surface_buffer(&zeta->base), + zeta->base.offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); + if(nvfx->is_nv4x) { + so_method(so, eng3d, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->pitch); + } + } + + so_method(so, eng3d, NV34TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, eng3d, NV34TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, eng3d, NV34TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, eng3d, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, eng3d, 0x1d88, 1); + so_data (so, (1 << 12) | h); + + if(!nvfx->is_nv4x) { + /* Wonder why this is needed, context should all be set to zero on init */ + /* TODO: we can most likely remove this, after putting it in context init */ + so_method(so, eng3d, NV34TCL_VIEWPORT_TX_ORIGIN, 1); + so_data (so, 0); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_FB]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_framebuffer = { + .validate = nvfx_state_framebuffer_validate, + .dirty = { + .pipe = NVFX_NEW_FB, + .hw = NVFX_STATE_FB + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c new file mode 100644 index 00000000000..0d35ecbf209 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_rasterizer_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->rasterizer->so, + &nvfx->state.hw[NVFX_STATE_RAST]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_rasterizer = { + .validate = nvfx_state_rasterizer_validate, + .dirty = { + .pipe = NVFX_NEW_RAST, + .hw = NVFX_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_scissor.c b/src/gallium/drivers/nvfx/nvfx_state_scissor.c new file mode 100644 index 00000000000..940d8cb5c0c --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_scissor.c @@ -0,0 +1,36 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_scissor_validate(struct nvfx_context *nvfx) +{ + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct pipe_scissor_state *s = &nvfx->scissor; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_SCISSOR] && + (rast->scissor == 0 && nvfx->state.scissor_enabled == 0)) + return FALSE; + nvfx->state.scissor_enabled = rast->scissor; + + so = so_new(1, 2, 0); + so_method(so, nvfx->screen->eng3d, NV34TCL_SCISSOR_HORIZ, 2); + if (nvfx->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_SCISSOR]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_scissor = { + .validate = nvfx_state_scissor_validate, + .dirty = { + .pipe = NVFX_NEW_SCISSOR | NVFX_NEW_RAST, + .hw = NVFX_STATE_SCISSOR + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_stipple.c b/src/gallium/drivers/nvfx/nvfx_state_stipple.c new file mode 100644 index 00000000000..57cd3c936a7 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_stipple.c @@ -0,0 +1,40 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_stipple_validate(struct nvfx_context *nvfx) +{ + struct pipe_rasterizer_state *rast = &nvfx->rasterizer->pipe; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nvfx->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(2, 33, 0); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nvfx->stipple[i]); + } else { + so = so_new(1, 1, 0); + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_STIPPLE]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_stipple = { + .validate = nvfx_state_stipple_validate, + .dirty = { + .pipe = NVFX_NEW_STIPPLE | NVFX_NEW_RAST, + .hw = NVFX_STATE_STIPPLE, + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_viewport.c b/src/gallium/drivers/nvfx/nvfx_state_viewport.c new file mode 100644 index 00000000000..82e0e9220b0 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_viewport.c @@ -0,0 +1,51 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_viewport_validate(struct nvfx_context *nvfx) +{ + struct pipe_viewport_state *vpt = &nvfx->viewport; + struct nouveau_stateobj *so; + + if (nvfx->state.hw[NVFX_STATE_VIEWPORT] && + !(nvfx->dirty & NVFX_NEW_VIEWPORT)) + return FALSE; + + so = so_new(2, 9, 0); + so_method(so, nvfx->screen->eng3d, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + if(nvfx->render_mode == HW) { + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nvfx->screen->eng3d, 0x1d78, 1); + so_data (so, 1); + } else { + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(0.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_data (so, fui(1.0f)); + so_method(so, nvfx->screen->eng3d, 0x1d78, 1); + so_data (so, nvfx->is_nv4x ? 0x110 : 1); + } + + so_ref(so, &nvfx->state.hw[NVFX_STATE_VIEWPORT]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_viewport = { + .validate = nvfx_state_viewport_validate, + .dirty = { + .pipe = NVFX_NEW_VIEWPORT, + .hw = NVFX_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nvfx/nvfx_state_zsa.c b/src/gallium/drivers/nvfx/nvfx_state_zsa.c new file mode 100644 index 00000000000..c84fd041c1e --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state_zsa.c @@ -0,0 +1,41 @@ +#include "nvfx_context.h" + +static boolean +nvfx_state_zsa_validate(struct nvfx_context *nvfx) +{ + so_ref(nvfx->zsa->so, + &nvfx->state.hw[NVFX_STATE_ZSA]); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_zsa = { + .validate = nvfx_state_zsa_validate, + .dirty = { + .pipe = NVFX_NEW_ZSA, + .hw = NVFX_STATE_ZSA + } +}; + +static boolean +nvfx_state_sr_validate(struct nvfx_context *nvfx) +{ + struct nouveau_stateobj *so = so_new(2, 2, 0); + struct pipe_stencil_ref *sr = &nvfx->stencil_ref; + + so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_FRONT_FUNC_REF, 1); + so_data (so, sr->ref_value[0]); + so_method(so, nvfx->screen->eng3d, NV34TCL_STENCIL_BACK_FUNC_REF, 1); + so_data (so, sr->ref_value[1]); + + so_ref(so, &nvfx->state.hw[NVFX_STATE_SR]); + so_ref(NULL, &so); + return TRUE; +} + +struct nvfx_state_entry nvfx_state_sr = { + .validate = nvfx_state_sr_validate, + .dirty = { + .pipe = NVFX_NEW_SR, + .hw = NVFX_STATE_SR + } +}; diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index bc18e577eee..8a05ad0a571 100644 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -1,9 +1,9 @@ /************************************************************************** - * + * * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -11,11 +11,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -23,40 +23,40 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#include "nv30_context.h" +#include "nvfx_context.h" #include "pipe/p_defines.h" #include "util/u_inlines.h" #include "util/u_tile.h" static void -nv30_surface_copy(struct pipe_context *pipe, +nvfx_surface_copy(struct pipe_context *pipe, struct pipe_surface *dest, unsigned destx, unsigned desty, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - struct nv30_context *nv30 = nv30_context(pipe); - struct nv04_surface_2d *eng2d = nv30->screen->eng2d; + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); } static void -nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +nvfx_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, unsigned destx, unsigned desty, unsigned width, unsigned height, unsigned value) { - struct nv30_context *nv30 = nv30_context(pipe); - struct nv04_surface_2d *eng2d = nv30->screen->eng2d; + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nv04_surface_2d *eng2d = nvfx->screen->eng2d; eng2d->fill(eng2d, dest, destx, desty, width, height, value); } void -nv30_init_surface_functions(struct nv30_context *nv30) +nvfx_init_surface_functions(struct nvfx_context *nvfx) { - nv30->pipe.surface_copy = nv30_surface_copy; - nv30->pipe.surface_fill = nv30_surface_fill; + nvfx->pipe.surface_copy = nvfx_surface_copy; + nvfx->pipe.surface_fill = nvfx_surface_fill; } diff --git a/src/gallium/drivers/nvfx/nvfx_tex.h b/src/gallium/drivers/nvfx/nvfx_tex.h new file mode 100644 index 00000000000..69187a79e79 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_tex.h @@ -0,0 +1,133 @@ +#ifndef NVFX_TEX_H_ +#define NVFX_TEX_H_ + +static inline unsigned +nvfx_tex_wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV34TCL_TX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + break; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + } + + return ret >> NV34TCL_TX_WRAP_S_SHIFT; +} + +static inline unsigned +nvfx_tex_wrap_compare_mode(const struct pipe_sampler_state* cso) +{ + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + return NV34TCL_TX_WRAP_RCOMP_NEVER; + case PIPE_FUNC_GREATER: + return NV34TCL_TX_WRAP_RCOMP_GREATER; + case PIPE_FUNC_EQUAL: + return NV34TCL_TX_WRAP_RCOMP_EQUAL; + case PIPE_FUNC_GEQUAL: + return NV34TCL_TX_WRAP_RCOMP_GEQUAL; + case PIPE_FUNC_LESS: + return NV34TCL_TX_WRAP_RCOMP_LESS; + case PIPE_FUNC_NOTEQUAL: + return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; + case PIPE_FUNC_LEQUAL: + return NV34TCL_TX_WRAP_RCOMP_LEQUAL; + case PIPE_FUNC_ALWAYS: + return NV34TCL_TX_WRAP_RCOMP_ALWAYS; + default: + break; + } + } + return 0; +} + +static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso) +{ + unsigned filter = 0; + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + return filter; +} + +static inline unsigned nvfx_tex_border_color(const float* border_color) +{ + return ((float_to_ubyte(border_color[3]) << 24) | + (float_to_ubyte(border_color[0]) << 16) | + (float_to_ubyte(border_color[1]) << 8) | + (float_to_ubyte(border_color[2]) << 0)); +} + +struct nvfx_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +#endif /* NVFX_TEX_H_ */ diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index ee266c6cfb1..409b354d582 100644 --- a/src/gallium/drivers/nv40/nv40_transfer.c +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -1,22 +1,22 @@ -#include <pipe/p_state.h> -#include <pipe/p_defines.h> -#include <util/u_inlines.h> -#include <util/u_format.h> -#include <util/u_memory.h> -#include <util/u_math.h> -#include <nouveau/nouveau_winsys.h> -#include "nv40_context.h" -#include "nv40_screen.h" -#include "nv40_state.h" - -struct nv40_transfer { +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "nouveau/nouveau_winsys.h" +#include "nvfx_context.h" +#include "nvfx_screen.h" +#include "nvfx_state.h" + +struct nvfx_transfer { struct pipe_transfer base; struct pipe_surface *surface; boolean direct; }; static void -nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, +nvfx_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height, struct pipe_texture *template) { memset(template, 0, sizeof(struct pipe_texture)); @@ -33,16 +33,17 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned h } static struct pipe_transfer * -nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +nvfx_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, unsigned x, unsigned y, unsigned w, unsigned h) { - struct nv40_miptree *mt = (struct nv40_miptree *)pt; - struct nv40_transfer *tx; + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; + struct nvfx_transfer *tx; struct pipe_texture tx_tex_template, *tx_tex; - tx = CALLOC_STRUCT(nv40_transfer); + tx = CALLOC_STRUCT(nvfx_transfer); if (!tx) return NULL; @@ -71,7 +72,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, tx->direct = false; - nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template); + nvfx_compatible_transfer_tex(pt, w, h, &tx_tex_template); tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); if (!tx_tex) @@ -80,7 +81,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, return NULL; } - tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch; + tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch; tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, 0, 0, 0, @@ -96,7 +97,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, } if (usage & PIPE_TRANSFER_READ) { - struct nv40_screen *nvscreen = nv40_screen(pscreen); + struct nvfx_screen *nvscreen = nvfx_screen(pscreen); struct pipe_surface *src; src = pscreen->get_tex_surface(pscreen, pt, @@ -117,13 +118,14 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, } static void -nv40_transfer_del(struct pipe_transfer *ptx) +nvfx_transfer_del(struct pipe_context *pcontext, + struct pipe_transfer *ptx) { - struct nv40_transfer *tx = (struct nv40_transfer *)ptx; + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) { - struct pipe_screen *pscreen = ptx->texture->screen; - struct nv40_screen *nvscreen = nv40_screen(pscreen); + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_screen *nvscreen = nvfx_screen(pscreen); struct pipe_surface *dst; dst = pscreen->get_tex_surface(pscreen, ptx->texture, @@ -145,11 +147,12 @@ nv40_transfer_del(struct pipe_transfer *ptx) } static void * -nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nvfx_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx) { - struct nv40_transfer *tx = (struct nv40_transfer *)ptx; + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; struct nv04_surface *ns = (struct nv04_surface *)tx->surface; - struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture; + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; void *map = pipe_buffer_map(pscreen, mt->buffer, pipe_transfer_buffer_flags(ptx)); @@ -160,19 +163,20 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) } static void -nv40_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +nvfx_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx) { - struct nv40_transfer *tx = (struct nv40_transfer *)ptx; - struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture; + struct pipe_screen *pscreen = pcontext->screen; + struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx; + struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture; pipe_buffer_unmap(pscreen, mt->buffer); } void -nv40_screen_init_transfer_functions(struct pipe_screen *pscreen) +nvfx_init_transfer_functions(struct nvfx_context *nvfx) { - pscreen->get_tex_transfer = nv40_transfer_new; - pscreen->tex_transfer_destroy = nv40_transfer_del; - pscreen->transfer_map = nv40_transfer_map; - pscreen->transfer_unmap = nv40_transfer_unmap; + nvfx->pipe.get_tex_transfer = nvfx_transfer_new; + nvfx->pipe.tex_transfer_destroy = nvfx_transfer_del; + nvfx->pipe.transfer_map = nvfx_transfer_map; + nvfx->pipe.transfer_unmap = nvfx_transfer_unmap; } diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index e48823a9138..257087f8f63 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -3,17 +3,24 @@ #include "util/u_inlines.h" #include "util/u_format.h" -#include "nv30_context.h" -#include "nv30_state.h" +#include "nvfx_context.h" +#include "nvfx_state.h" #include "nouveau/nouveau_channel.h" #include "nouveau/nouveau_pushbuf.h" #include "nouveau/nouveau_util.h" -#define FORCE_SWTNL 0 +static boolean +nvfx_force_swtnl(struct nvfx_context *nvfx) +{ + static int force_swtnl = -1; + if(force_swtnl < 0) + force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", 0); + return force_swtnl; +} static INLINE int -nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +nvfx_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) { switch (pipe) { case PIPE_FORMAT_R32_FLOAT: @@ -69,15 +76,15 @@ nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) } static boolean -nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib, +nvfx_vbo_set_idxbuf(struct nvfx_context *nvfx, struct pipe_buffer *ib, unsigned ib_size) { - struct pipe_screen *pscreen = &nv30->screen->base.base; + struct pipe_screen *pscreen = &nvfx->screen->base.base; unsigned type; if (!ib) { - nv30->idxbuf = NULL; - nv30->idxbuf_format = 0xdeadbeef; + nvfx->idxbuf = NULL; + nvfx->idxbuf_format = 0xdeadbeef; return FALSE; } @@ -95,27 +102,27 @@ nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib, return FALSE; } - if (ib != nv30->idxbuf || - type != nv30->idxbuf_format) { - nv30->dirty |= NV30_NEW_ARRAYS; - nv30->idxbuf = ib; - nv30->idxbuf_format = type; + if (ib != nvfx->idxbuf || + type != nvfx->idxbuf_format) { + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->idxbuf = ib; + nvfx->idxbuf_format = type; } return TRUE; } static boolean -nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, +nvfx_vbo_static_attrib(struct nvfx_context *nvfx, struct nouveau_stateobj *so, int attrib, struct pipe_vertex_element *ve, struct pipe_vertex_buffer *vb) { - struct pipe_screen *pscreen = nv30->pipe.screen; - struct nouveau_grobj *rankine = nv30->screen->rankine; + struct pipe_screen *pscreen = nvfx->pipe.screen; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; unsigned type, ncomp; void *map; - if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) return FALSE; map = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); @@ -128,25 +135,25 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, switch (ncomp) { case 4: - so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4); + so_method(so, eng3d, NV34TCL_VTX_ATTR_4F_X(attrib), 4); so_data (so, fui(v[0])); so_data (so, fui(v[1])); so_data (so, fui(v[2])); so_data (so, fui(v[3])); break; case 3: - so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3); + so_method(so, eng3d, NV34TCL_VTX_ATTR_3F_X(attrib), 3); so_data (so, fui(v[0])); so_data (so, fui(v[1])); so_data (so, fui(v[2])); break; case 2: - so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2); + so_method(so, eng3d, NV34TCL_VTX_ATTR_2F_X(attrib), 2); so_data (so, fui(v[0])); so_data (so, fui(v[1])); break; case 1: - so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1); + so_method(so, eng3d, NV34TCL_VTX_ATTR_1F(attrib), 1); so_data (so, fui(v[0])); break; default: @@ -165,26 +172,26 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, } void -nv30_draw_arrays(struct pipe_context *pipe, +nvfx_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_screen *screen = nv30->screen; + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; unsigned restart = 0; - nv30_vbo_set_idxbuf(nv30, NULL, 0); - if (FORCE_SWTNL || !nv30_state_validate(nv30)) { - /*return nv30_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count);*/ - return; + nvfx_vbo_set_idxbuf(nvfx, NULL, 0); + if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { + nvfx_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + return; } while (count) { unsigned vc, nr; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, mode, start, count, &restart); @@ -193,12 +200,12 @@ nv30_draw_arrays(struct pipe_context *pipe, continue; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, 1); OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -209,14 +216,14 @@ nv30_draw_arrays(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_VERTEX_BATCH, push); while (push--) { OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); count -= vc; @@ -227,18 +234,18 @@ nv30_draw_arrays(struct pipe_context *pipe, } static INLINE void -nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, +nvfx_draw_elements_u08(struct nvfx_context *nvfx, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nv30_screen *screen = nv30->screen; + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint8_t *elts = (uint8_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, mode, start, count, &restart); @@ -248,11 +255,11 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, } count -= vc; - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -262,7 +269,7 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) OUT_RING(chan, (elts[i+1] << 16) | elts[i]); @@ -270,7 +277,7 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, elts += push; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); start = restart; @@ -278,18 +285,18 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, } static INLINE void -nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, +nvfx_draw_elements_u16(struct nvfx_context *nvfx, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nv30_screen *screen = nv30->screen; + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint16_t *elts = (uint16_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 2, mode, start, count, &restart); @@ -299,11 +306,11 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, } count -= vc; - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); if (vc & 1) { - BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VB_ELEMENT_U32, 1); OUT_RING (chan, elts[0]); elts++; vc--; } @@ -313,7 +320,7 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, push = MIN2(vc, 2047 * 2); - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U16, push >> 1); for (i = 0; i < push; i+=2) OUT_RING(chan, (elts[i+1] << 16) | elts[i]); @@ -321,7 +328,7 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, elts += push; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); start = restart; @@ -329,18 +336,18 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, } static INLINE void -nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, +nvfx_draw_elements_u32(struct nvfx_context *nvfx, void *ib, unsigned mode, unsigned start, unsigned count) { - struct nv30_screen *screen = nv30->screen; + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; while (count) { uint32_t *elts = (uint32_t *)ib + start; unsigned vc, push, restart = 0; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 5, 1, mode, start, count, &restart); @@ -350,20 +357,20 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, } count -= vc; - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); while (vc) { push = MIN2(vc, 2047); - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_ELEMENT_U32, push); OUT_RINGp (chan, elts, push); vc -= push; elts += push; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); start = restart; @@ -371,11 +378,11 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, } static void -nv30_draw_elements_inline(struct pipe_context *pipe, +nvfx_draw_elements_inline(struct pipe_context *pipe, struct pipe_buffer *ib, unsigned ib_size, unsigned mode, unsigned start, unsigned count) { - struct nv30_context *nv30 = nv30_context(pipe); + struct nvfx_context *nvfx = nvfx_context(pipe); struct pipe_screen *pscreen = pipe->screen; void *map; @@ -387,13 +394,13 @@ nv30_draw_elements_inline(struct pipe_context *pipe, switch (ib_size) { case 1: - nv30_draw_elements_u08(nv30, map, mode, start, count); + nvfx_draw_elements_u08(nvfx, map, mode, start, count); break; case 2: - nv30_draw_elements_u16(nv30, map, mode, start, count); + nvfx_draw_elements_u16(nvfx, map, mode, start, count); break; case 4: - nv30_draw_elements_u32(nv30, map, mode, start, count); + nvfx_draw_elements_u32(nvfx, map, mode, start, count); break; default: NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); @@ -404,19 +411,19 @@ nv30_draw_elements_inline(struct pipe_context *pipe, } static void -nv30_draw_elements_vbo(struct pipe_context *pipe, +nvfx_draw_elements_vbo(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) { - struct nv30_context *nv30 = nv30_context(pipe); - struct nv30_screen *screen = nv30->screen; + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *rankine = screen->rankine; + struct nouveau_grobj *eng3d = screen->eng3d; unsigned restart = 0; while (count) { unsigned nr, vc; - nv30_state_emit(nv30); + nvfx_state_emit(nvfx); vc = nouveau_vbuf_split(AVAIL_RING(chan), 6, 256, mode, start, count, &restart); @@ -425,12 +432,12 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, continue; } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, nvgl_primitive(mode)); nr = (vc & 0xff); if (nr) { - BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VB_INDEX_BATCH, 1); OUT_RING (chan, ((nr - 1) << 24) | start); start += nr; } @@ -441,14 +448,14 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, nr -= push; - BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push); + BEGIN_RING_NI(chan, eng3d, NV34TCL_VB_INDEX_BATCH, push); while (push--) { OUT_RING(chan, ((0x100 - 1) << 24) | start); start += 0x100; } } - BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); OUT_RING (chan, 0); count -= vc; @@ -457,24 +464,24 @@ nv30_draw_elements_vbo(struct pipe_context *pipe, } void -nv30_draw_elements(struct pipe_context *pipe, +nvfx_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - struct nv30_context *nv30 = nv30_context(pipe); + struct nvfx_context *nvfx = nvfx_context(pipe); boolean idxbuf; - idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize); - if (FORCE_SWTNL || !nv30_state_validate(nv30)) { - /*return nv30_draw_elements_swtnl(pipe, NULL, 0, - mode, start, count);*/ + idxbuf = nvfx_vbo_set_idxbuf(nvfx, indexBuffer, indexSize); + if (nvfx_force_swtnl(nvfx) || !nvfx_state_validate(nvfx)) { + nvfx_draw_elements_swtnl(pipe, indexBuffer, indexSize, + mode, start, count); return; } if (idxbuf) { - nv30_draw_elements_vbo(pipe, mode, start, count); + nvfx_draw_elements_vbo(pipe, mode, start, count); } else { - nv30_draw_elements_inline(pipe, indexBuffer, indexSize, + nvfx_draw_elements_inline(pipe, indexBuffer, indexSize, mode, start, count); } @@ -482,49 +489,50 @@ nv30_draw_elements(struct pipe_context *pipe, } static boolean -nv30_vbo_validate(struct nv30_context *nv30) +nvfx_vbo_validate(struct nvfx_context *nvfx) { struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; - struct nouveau_grobj *rankine = nv30->screen->rankine; - struct pipe_buffer *ib = nv30->idxbuf; - unsigned ib_format = nv30->idxbuf_format; + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct pipe_buffer *ib = nvfx->idxbuf; + unsigned ib_format = nvfx->idxbuf_format; unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; int hw; vtxbuf = so_new(3, 17, 18); - so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); + so_method(vtxbuf, eng3d, NV34TCL_VTXBUF_ADDRESS(0), nvfx->vtxelt->num_elements); vtxfmt = so_new(1, 16, 0); - so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); + so_method(vtxfmt, eng3d, NV34TCL_VTXFMT(0), nvfx->vtxelt->num_elements); - for (hw = 0; hw < nv30->vtxelt_nr; hw++) { + for (hw = 0; hw < nvfx->vtxelt->num_elements; hw++) { struct pipe_vertex_element *ve; struct pipe_vertex_buffer *vb; unsigned type, ncomp; - ve = &nv30->vtxelt[hw]; - vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + ve = &nvfx->vtxelt->pipe[hw]; + vb = &nvfx->vtxbuf[ve->vertex_buffer_index]; if (!vb->stride) { if (!sattr) sattr = so_new(16, 16 * 4, 0); - if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { + if (nvfx_vbo_static_attrib(nvfx, sattr, hw, ve, vb)) { so_data(vtxbuf, 0); so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); continue; } } - if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { - /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ + if (nvfx_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + nvfx->fallback_swtnl |= NVFX_NEW_ARRAYS; so_ref(NULL, &vtxbuf); so_ref(NULL, &vtxfmt); return FALSE; } - so_reloc(vtxbuf, nouveau_bo(vb->buffer), vb->buffer_offset + - ve->src_offset, vb_flags | NOUVEAU_BO_LOW | - NOUVEAU_BO_OR, 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + so_reloc(vtxbuf, nouveau_bo(vb->buffer), + vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); } @@ -532,31 +540,31 @@ nv30_vbo_validate(struct nv30_context *nv30) if (ib) { struct nouveau_bo *bo = nouveau_bo(ib); - so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2); + so_method(vtxbuf, eng3d, NV34TCL_IDXBUF_ADDRESS, 2); so_reloc (vtxbuf, bo, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); so_reloc (vtxbuf, bo, ib_format, vb_flags | NOUVEAU_BO_OR, 0, NV34TCL_IDXBUF_FORMAT_DMA1); } - so_method(vtxbuf, rankine, 0x1710, 1); + so_method(vtxbuf, eng3d, 0x1710, 1); so_data (vtxbuf, 0); - so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]); + so_ref(vtxbuf, &nvfx->state.hw[NVFX_STATE_VTXBUF]); so_ref(NULL, &vtxbuf); - nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF); - so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXBUF); + so_ref(vtxfmt, &nvfx->state.hw[NVFX_STATE_VTXFMT]); so_ref(NULL, &vtxfmt); - nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT); - so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXFMT); + so_ref(sattr, &nvfx->state.hw[NVFX_STATE_VTXATTR]); so_ref(NULL, &sattr); - nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR); + nvfx->state.dirty |= (1ULL << NVFX_STATE_VTXATTR); return FALSE; } -struct nv30_state_entry nv30_state_vbo = { - .validate = nv30_vbo_validate, +struct nvfx_state_entry nvfx_state_vbo = { + .validate = nvfx_vbo_validate, .dirty = { - .pipe = NV30_NEW_ARRAYS, + .pipe = NVFX_NEW_ARRAYS, .hw = 0, } }; diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index b289eef0fc2..2d243be16a3 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -5,10 +5,11 @@ #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_util.h" -#include "nv40_context.h" -#include "nv40_state.h" +#include "nvfx_context.h" +#include "nvfx_state.h" /* TODO (at least...): * 1. Indexed consts + ARL @@ -21,76 +22,62 @@ * 4. bugs */ -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv40_shader.h" +#include "nv30_vertprog.h" +#include "nv40_vertprog.h" -#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv40_sr_neg((s)) -#define abs(s) nv40_sr_abs((s)) +#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) -#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) +struct nvfx_vpc { + struct nvfx_vertex_program *vp; -struct nv40_vpc { - struct nv40_vertex_program *vp; - - struct nv40_vertex_program_exec *vpi; + struct nvfx_vertex_program_exec *vpi; unsigned r_temps; unsigned r_temps_discard; - struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nv40_sreg *r_address; - struct nv40_sreg *r_temp; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_address; + struct nvfx_sreg *r_temp; - struct nv40_sreg *imm; + struct nvfx_sreg *imm; unsigned nr_imm; unsigned hpos_idx; }; -static struct nv40_sreg -temp(struct nv40_vpc *vpc) +static struct nvfx_sreg +temp(struct nvfx_vpc *vpc) { int idx = ffs(~vpc->r_temps) - 1; if (idx < 0) { NOUVEAU_ERR("out of temps!!\n"); assert(0); - return nv40_sr(NV40SR_TEMP, 0); + return nvfx_sr(NVFXSR_TEMP, 0); } vpc->r_temps |= (1 << idx); vpc->r_temps_discard |= (1 << idx); - return nv40_sr(NV40SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } static INLINE void -release_temps(struct nv40_vpc *vpc) +release_temps(struct nvfx_vpc *vpc) { vpc->r_temps &= ~vpc->r_temps_discard; vpc->r_temps_discard = 0; } -static struct nv40_sreg -constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) +static struct nvfx_sreg +constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) { - struct nv40_vertex_program *vp = vpc->vp; - struct nv40_vertex_program_data *vpd; + struct nvfx_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program_data *vpd; int idx; if (pipe >= 0) { for (idx = 0; idx < vp->nr_consts; idx++) { if (vp->consts[idx].index == pipe) - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } } @@ -103,70 +90,70 @@ constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) vpd->value[1] = y; vpd->value[2] = z; vpd->value[3] = w; - return nv40_sr(NV40SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + nvfx_vp_arith(nvfx, (cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) static void -emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) { - struct nv40_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program *vp = vpc->vp; uint32_t sr = 0; switch (src.type) { - case NV40SR_TEMP: - sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); + case NVFXSR_TEMP: + sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); + sr |= (src.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); break; - case NV40SR_INPUT: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); + case NVFXSR_INPUT: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); + hw[1] |= (src.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); break; - case NV40SR_CONST: - sr |= (NV40_VP_SRC_REG_TYPE_CONST << - NV40_VP_SRC_REG_TYPE_SHIFT); + case NVFXSR_CONST: + sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); assert(vpc->vpi->const_index == -1 || vpc->vpi->const_index == src.index); vpc->vpi->const_index = src.index; break; - case NV40SR_NONE: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); + case NVFXSR_NONE: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); break; default: assert(0); } if (src.negate) - sr |= NV40_VP_SRC_NEGATE; + sr |= NVFX_VP(SRC_NEGATE); if (src.abs) hw[0] |= (1 << (21 + pos)); - sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | + (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | + (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | + (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); switch (pos) { case 0: - hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> - NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << - NV40_VP_INST_SRC0L_SHIFT; + hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> + NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); + hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << + NVFX_VP(INST_SRC0L_SHIFT); break; case 1: - hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; + hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); break; case 2: - hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> - NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << - NV40_VP_INST_SRC2L_SHIFT; + hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> + NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); + hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << + NVFX_VP(INST_SRC2L_SHIFT); break; default: assert(0); @@ -174,78 +161,114 @@ emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) } static void -emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) { - struct nv40_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program *vp = vpc->vp; switch (dst.type) { - case NV40SR_TEMP: - hw[3] |= NV40_VP_INST_DEST_MASK; - if (slot == 0) { - hw[0] |= (dst.index << - NV40_VP_INST_VEC_DEST_TEMP_SHIFT); - } else { - hw[3] |= (dst.index << - NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + case NVFXSR_TEMP: + if(!nvfx->is_nv4x) + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + else { + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } } break; - case NV40SR_OUTPUT: + case NVFXSR_OUTPUT: + /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ switch (dst.index) { - case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - case NV40_VP_INST_DEST_CLIP(0): + case NVFX_VP_INST_DEST_CLIP(0): vp->or |= (1 << 6); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; - dst.index = NV40_VP_INST_DEST_FOGC; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0; + dst.index = NVFX_VP(INST_DEST_FOGC); break; - case NV40_VP_INST_DEST_CLIP(1): + case NVFX_VP_INST_DEST_CLIP(1): vp->or |= (1 << 7); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; - dst.index = NV40_VP_INST_DEST_FOGC; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1; + dst.index = NVFX_VP(INST_DEST_FOGC); break; - case NV40_VP_INST_DEST_CLIP(2): + case NVFX_VP_INST_DEST_CLIP(2): vp->or |= (1 << 8); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; - dst.index = NV40_VP_INST_DEST_FOGC; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2; + dst.index = NVFX_VP(INST_DEST_FOGC); break; - case NV40_VP_INST_DEST_CLIP(3): + case NVFX_VP_INST_DEST_CLIP(3): vp->or |= (1 << 9); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; - dst.index = NV40_VP_INST_DEST_PSZ; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3; + dst.index = NVFX_VP(INST_DEST_PSZ); break; - case NV40_VP_INST_DEST_CLIP(4): + case NVFX_VP_INST_DEST_CLIP(4): vp->or |= (1 << 10); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; - dst.index = NV40_VP_INST_DEST_PSZ; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4; + dst.index = NVFX_VP(INST_DEST_PSZ); break; - case NV40_VP_INST_DEST_CLIP(5): + case NVFX_VP_INST_DEST_CLIP(5): vp->or |= (1 << 11); - vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; - dst.index = NV40_VP_INST_DEST_PSZ; + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5; + dst.index = NVFX_VP(INST_DEST_PSZ); break; default: + if(!nvfx->is_nv4x) { + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + } + } else { + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + } + } break; } - hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); - if (slot == 0) { - hw[0] |= NV40_VP_INST_VEC_RESULT; - hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + if(!nvfx->is_nv4x) { + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; } else { - hw[3] |= NV40_VP_INST_SCA_RESULT; - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } } break; default: @@ -254,12 +277,12 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) } static void -nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, - struct nv40_sreg dst, int mask, - struct nv40_sreg s0, struct nv40_sreg s1, - struct nv40_sreg s2) +nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, + struct nvfx_sreg s2) { - struct nv40_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program *vp = vpc->vp; uint32_t *hw; vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); @@ -269,35 +292,53 @@ nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, hw = vpc->vpi->data; - hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); - - if (slot == 0) { - hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); - } else { - hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); - hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); - hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + hw[0] |= (NVFX_COND_TR << NVFX_VP(INST_COND_SHIFT)); + hw[0] |= ((0 << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | + (1 << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | + (2 << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | + (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT))); + + if(!nvfx->is_nv4x) { + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); +// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); + + if (dst.type == NVFXSR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + } else { + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } } - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); + emit_dst(nvfx, vpc, hw, slot, dst); + emit_src(nvfx, vpc, hw, 0, s0); + emit_src(nvfx, vpc, hw, 1, s1); + emit_src(nvfx, vpc, hw, 2, s2); } -static INLINE struct nv40_sreg -tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nv40_sreg src; +static INLINE struct nvfx_sreg +tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nvfx_sreg src; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index); + src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); @@ -322,9 +363,9 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { return src; } -static INLINE struct nv40_sreg -tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nv40_sreg dst; +static INLINE struct nvfx_sreg +tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nvfx_sreg dst; switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: @@ -349,52 +390,19 @@ tgsi_mask(uint tgsi) { int mask = 0; - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_VP_MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_VP_MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_VP_MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_VP_MASK_W; return mask; } static boolean -src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, - struct nv40_sreg *src) -{ - const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); - struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= tgsi_mask(1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(vpc); - - if (mask) - arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, +nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgsi_full_instruction *finst) { - struct nv40_sreg src[3], dst, tmp; - struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); int mask; int ai = -1, ci = -1, ii = -1; int i; @@ -418,23 +426,12 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(vpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: if (ai == -1 || ai == fsrc->Register.Index) { ai = fsrc->Register.Index; src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -445,7 +442,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -456,7 +453,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, src[i] = tgsi_src(vpc, fsrc); } else { src[i] = temp(vpc); - arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + arith(vpc, VEC, MOV, src[i], NVFX_VP_MASK_ALL, tgsi_src(vpc, fsrc), none, none); } break; @@ -474,93 +471,96 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); break; case TGSI_OPCODE_ADD: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); break; case TGSI_OPCODE_ARL: - arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + arith(vpc, VEC, ARL, dst, mask, src[0], none, none); break; case TGSI_OPCODE_DP3: - arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DP4: - arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DPH: - arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_DST: - arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_EX2: - arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_EXP: - arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_FLR: - arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + arith(vpc, VEC, FLR, dst, mask, src[0], none, none); break; case TGSI_OPCODE_FRC: - arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + arith(vpc, VEC, FRC, dst, mask, src[0], none, none); break; case TGSI_OPCODE_LG2: - arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LIT: - arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_LOG: - arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_MAD: - arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAX: - arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MIN: - arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_MOV: - arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + arith(vpc, VEC, MOV, dst, mask, src[0], none, none); break; case TGSI_OPCODE_MUL: - arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: tmp = temp(vpc); - arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + arith(vpc, SCA, LG2, tmp, NVFX_VP_MASK_X, none, none, swz(src[0], X, X, X, X)); - arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + arith(vpc, VEC, MUL, tmp, NVFX_VP_MASK_X, swz(tmp, X, X, X, X), swz(src[1], X, X, X, X), none); - arith(vpc, 1, OP_EX2, dst, mask, none, none, + arith(vpc, SCA, EX2, dst, mask, none, none, swz(tmp, X, X, X, X)); break; case TGSI_OPCODE_RCP: - arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); break; case TGSI_OPCODE_RET: break; case TGSI_OPCODE_RSQ: - arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); + arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); break; case TGSI_OPCODE_SGE: - arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SLT: - arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SUB: - arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); break; case TGSI_OPCODE_XPD: tmp = temp(vpc); - arith(vpc, 0, OP_MUL, tmp, mask, + arith(vpc, VEC, MUL, tmp, mask, swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + arith(vpc, VEC, MAD, dst, (mask & ~NVFX_VP_MASK_W), swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), neg(tmp)); break; @@ -574,7 +574,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, } static boolean -nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, +nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgsi_full_declaration *fdec) { unsigned idx = fdec->Range.First; @@ -582,15 +582,15 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - hw = NV40_VP_INST_DEST_POS; + hw = NVFX_VP(INST_DEST_POS); vpc->hpos_idx = idx; break; case TGSI_SEMANTIC_COLOR: if (fdec->Semantic.Index == 0) { - hw = NV40_VP_INST_DEST_COL0; + hw = NVFX_VP(INST_DEST_COL0); } else if (fdec->Semantic.Index == 1) { - hw = NV40_VP_INST_DEST_COL1; + hw = NVFX_VP(INST_DEST_COL1); } else { NOUVEAU_ERR("bad colour semantic index\n"); return FALSE; @@ -598,24 +598,24 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, break; case TGSI_SEMANTIC_BCOLOR: if (fdec->Semantic.Index == 0) { - hw = NV40_VP_INST_DEST_BFC0; + hw = NVFX_VP(INST_DEST_BFC0); } else if (fdec->Semantic.Index == 1) { - hw = NV40_VP_INST_DEST_BFC1; + hw = NVFX_VP(INST_DEST_BFC1); } else { NOUVEAU_ERR("bad bcolour semantic index\n"); return FALSE; } break; case TGSI_SEMANTIC_FOG: - hw = NV40_VP_INST_DEST_FOGC; + hw = NVFX_VP(INST_DEST_FOGC); break; case TGSI_SEMANTIC_PSIZE: - hw = NV40_VP_INST_DEST_PSZ; + hw = NVFX_VP(INST_DEST_PSZ); break; case TGSI_SEMANTIC_GENERIC: if (fdec->Semantic.Index <= 7) { - hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index); + hw = NVFX_VP(INST_DEST_TC(fdec->Semantic.Index)); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -630,12 +630,12 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, return FALSE; } - vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); return TRUE; } static boolean -nv40_vertprog_prepare(struct nv40_vpc *vpc) +nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) { struct tgsi_parse_context p; int high_temp = -1, high_addr = -1, nr_imm = 0, i; @@ -670,7 +670,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) break; #endif case TGSI_FILE_OUTPUT: - if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec)) return FALSE; break; default: @@ -691,7 +691,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) if (fdst->Register.Index > high_addr) high_addr = fdst->Register.Index; } - + } break; #endif @@ -702,18 +702,18 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) tgsi_parse_free(&p); if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); + vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); assert(vpc->imm); } if (++high_temp) { - vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); for (i = 0; i < high_temp; i++) vpc->r_temp[i] = temp(vpc); } if (++high_addr) { - vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); + vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg)); for (i = 0; i < high_addr; i++) vpc->r_address[i] = temp(vpc); } @@ -723,26 +723,26 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc) } static void -nv40_vertprog_translate(struct nv40_context *nv40, - struct nv40_vertex_program *vp) +nvfx_vertprog_translate(struct nvfx_context *nvfx, + struct nvfx_vertex_program *vp) { struct tgsi_parse_context parse; - struct nv40_vpc *vpc = NULL; - struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nvfx_vpc *vpc = NULL; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); int i; - vpc = CALLOC(1, sizeof(struct nv40_vpc)); + vpc = CALLOC(1, sizeof(struct nvfx_vpc)); if (!vpc) return; vpc->vp = vp; - if (!nv40_vertprog_prepare(vpc)) { + if (!nvfx_vertprog_prepare(nvfx, vpc)) { FREE(vpc); return; } /* Redirect post-transform vertex position to a temp if user clip - * planes are enabled. We need to append code the the vtxprog + * planes are enabled. We need to append code to the vtxprog * to handle clip planes later. */ if (vp->ucp.nr) { @@ -775,7 +775,7 @@ nv40_vertprog_translate(struct nv40_context *nv40, { const struct tgsi_full_instruction *finst; finst = &parse.FullToken.FullInstruction; - if (!nv40_vertprog_parse_instruction(vpc, finst)) + if (!nvfx_vertprog_parse_instruction(nvfx, vpc, finst)) goto out_err; } break; @@ -785,74 +785,74 @@ nv40_vertprog_translate(struct nv40_context *nv40, } /* Write out HPOS if it was redirected to a temp earlier */ - if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { - struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, - NV40_VP_INST_DEST_POS); - struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { + struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT, + NVFX_VP(INST_DEST_POS)); + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); + arith(vpc, VEC, MOV, hpos, NVFX_VP_MASK_ALL, htmp, none, none); } /* Insert code to handle user clip planes */ for (i = 0; i < vp->ucp.nr; i++) { - struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, - NV40_VP_INST_DEST_CLIP(i)); - struct nv40_sreg ceqn = constant(vpc, -1, - nv40->clip.ucp[i][0], - nv40->clip.ucp[i][1], - nv40->clip.ucp[i][2], - nv40->clip.ucp[i][3]); - struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT, + NVFX_VP_INST_DEST_CLIP(i)); + struct nvfx_sreg ceqn = constant(vpc, -1, + nvfx->clip.ucp[i][0], + nvfx->clip.ucp[i][1], + nvfx->clip.ucp[i][2], + nvfx->clip.ucp[i][3]); + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; unsigned mask; switch (i) { - case 0: case 3: mask = MASK_Y; break; - case 1: case 4: mask = MASK_Z; break; - case 2: case 5: mask = MASK_W; break; + case 0: case 3: mask = NVFX_VP_MASK_Y; break; + case 1: case 4: mask = NVFX_VP_MASK_Z; break; + case 2: case 5: mask = NVFX_VP_MASK_W; break; default: NOUVEAU_ERR("invalid clip dist #%d\n", i); goto out_err; } - arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); + arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none); } - vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; + vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; vp->translated = TRUE; out_err: tgsi_parse_free(&parse); if (vpc->r_temp) - FREE(vpc->r_temp); + FREE(vpc->r_temp); if (vpc->r_address) - FREE(vpc->r_address); - if (vpc->imm) - FREE(vpc->imm); + FREE(vpc->r_address); + if (vpc->imm) + FREE(vpc->imm); FREE(vpc); } static boolean -nv40_vertprog_validate(struct nv40_context *nv40) -{ - struct pipe_screen *pscreen = nv40->pipe.screen; - struct nv40_screen *screen = nv40->screen; +nvfx_vertprog_validate(struct nvfx_context *nvfx) +{ + struct pipe_screen *pscreen = nvfx->pipe.screen; + struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *curie = screen->curie; - struct nv40_vertex_program *vp; + struct nouveau_grobj *eng3d = screen->eng3d; + struct nvfx_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; int i; - if (nv40->render_mode == HW) { - vp = nv40->vertprog; - constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + if (nvfx->render_mode == HW) { + vp = nvfx->vertprog; + constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; - if ((nv40->dirty & NV40_NEW_UCP) || - memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { - nv40_vertprog_destroy(nv40, vp); - memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); + if ((nvfx->dirty & NVFX_NEW_UCP) || + memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { + nvfx_vertprog_destroy(nvfx, vp); + memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp)); } } else { - vp = nv40->swtnl.vertprog; + vp = nvfx->swtnl.vertprog; constbuf = NULL; } @@ -860,24 +860,24 @@ nv40_vertprog_validate(struct nv40_context *nv40) if (vp->translated) goto check_gpu_resources; - nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG; - nv40_vertprog_translate(nv40, vp); + nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; + nvfx_vertprog_translate(nvfx, vp); if (!vp->translated) { - nv40->fallback_swtnl |= NV40_NEW_VERTPROG; - return FALSE; + nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; + return FALSE; } check_gpu_resources: /* Allocate hw vtxprog exec slots */ if (!vp->exec) { - struct nouveau_resource *heap = nv40->screen->vp_exec_heap; + struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; struct nouveau_stateobj *so; uint vplen = vp->nr_insns; if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { while (heap->next && heap->size < vplen) { - struct nv40_vertex_program *evict; - + struct nvfx_vertex_program *evict; + evict = heap->next->priv; nouveau_resource_free(&evict->exec); } @@ -887,12 +887,14 @@ check_gpu_resources: } so = so_new(3, 4, 0); - so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); + so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); - so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); - so_data (so, vp->ir); - so_data (so, vp->or); - so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1); + if(nvfx->is_nv4x) { + so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + } + so_method(so, eng3d, NV34TCL_VP_CLIP_PLANES_ENABLE, 1); so_data (so, vp->clip_ctrl); so_ref(so, &vp->so); so_ref(NULL, &so); @@ -902,12 +904,12 @@ check_gpu_resources: /* Allocate hw vtxprog const slots */ if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nv40->screen->vp_data_heap; + struct nouveau_resource *heap = nvfx->screen->vp_data_heap; if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { while (heap->next && heap->size < vp->nr_consts) { - struct nv40_vertex_program *evict; - + struct nvfx_vertex_program *evict; + evict = heap->next->priv; nouveau_resource_free(&evict->data); } @@ -929,7 +931,7 @@ check_gpu_resources: */ if (vp->exec_start != vp->exec->start) { for (i = 0; i < vp->nr_insns; i++) { - struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; if (vpi->has_branch_offset) { assert(0); @@ -941,13 +943,13 @@ check_gpu_resources: if (vp->nr_consts && vp->data_start != vp->data->start) { for (i = 0; i < vp->nr_insns; i++) { - struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; + vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK); vpi->data[1] |= (vpi->const_index + vp->data->start) << - NV40_VP_INST_CONST_SRC_SHIFT; + NVFX_VP(INST_CONST_SRC_SHIFT); } } @@ -965,7 +967,7 @@ check_gpu_resources: } for (i = 0; i < vp->nr_consts; i++) { - struct nv40_vertex_program_data *vpd = &vp->consts[i]; + struct nvfx_vertex_program_data *vpd = &vp->consts[i]; if (vpd->index >= 0) { if (!upload_data && @@ -976,13 +978,13 @@ check_gpu_resources: 4 * sizeof(float)); } - BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); OUT_RING (chan, i + vp->data->start); OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } if (constbuf) - pscreen->buffer_unmap(pscreen, constbuf); + pipe_buffer_unmap(pscreen, constbuf); } /* Upload vtxprog */ @@ -995,16 +997,16 @@ check_gpu_resources: NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); } #endif - BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4); + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); OUT_RINGp (chan, vp->insns[i].data, 4); } } - if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { - so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); + if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { + so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); return TRUE; } @@ -1012,7 +1014,7 @@ check_gpu_resources: } void -nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) +nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) { vp->translated = FALSE; @@ -1038,11 +1040,10 @@ nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) so_ref(NULL, &vp->so); } -struct nv40_state_entry nv40_state_vertprog = { - .validate = nv40_vertprog_validate, +struct nvfx_state_entry nvfx_state_vertprog = { + .validate = nvfx_vertprog_validate, .dirty = { - .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, - .hw = NV40_STATE_VERTPROG, + .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP, + .hw = NVFX_STATE_VERTPROG, } }; - diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index afddcb161fa..a6529b20604 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -14,12 +14,14 @@ C_SOURCES = \ r300_query.c \ r300_render.c \ r300_screen.c \ + r300_screen_buffer.c \ r300_state.c \ r300_state_derived.c \ r300_state_invariant.c \ r300_vs.c \ r300_texture.c \ - r300_tgsi_to_rc.c + r300_tgsi_to_rc.c \ + r300_transfer.c LIBRARY_INCLUDES = \ -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ @@ -32,7 +34,5 @@ EXTRA_OBJECTS = \ include ../../Makefile.template -.PHONY : $(COMPILER_ARCHIVE) - $(COMPILER_ARCHIVE): $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript index 183aa17f9b3..27b2e309932 100644 --- a/src/gallium/drivers/r300/SConscript +++ b/src/gallium/drivers/r300/SConscript @@ -30,6 +30,7 @@ r300 = env.ConvenienceLibrary( 'r300_vs.c', 'r300_texture.c', 'r300_tgsi_to_rc.c', + 'r300_transfer.c', ] + r300compiler) + r300compiler Export('r300') diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index eb9b0beeb5a..b7ad6b20206 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -33,7 +33,10 @@ static void r300_blitter_save_states(struct r300_context* r300) util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref)); util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); - util_blitter_save_vertex_shader(r300->blitter, r300->vs); + util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state); + util_blitter_save_viewport(r300->blitter, &r300->viewport); + util_blitter_save_clip(r300->blitter, &r300->clip); + util_blitter_save_vertex_elements(r300->blitter, r300->velems); } /* Clear currently bound buffers. */ @@ -98,6 +101,8 @@ static void r300_hw_copy(struct pipe_context* pipe, unsigned width, unsigned height) { struct r300_context* r300 = r300_context(pipe); + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; /* Yeah we have to save all those states to ensure this blitter operation * is really transparent. The states will be restored by the blitter once @@ -106,11 +111,11 @@ static void r300_hw_copy(struct pipe_context* pipe, util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); util_blitter_save_fragment_sampler_states( - r300->blitter, r300->sampler_count, (void**)r300->sampler_states); + r300->blitter, state->sampler_count, (void**)state->sampler_states); util_blitter_save_fragment_sampler_textures( - r300->blitter, r300->texture_count, - (struct pipe_texture**)r300->textures); + r300->blitter, state->texture_count, + (struct pipe_texture**)state->textures); /* Do a copy */ util_blitter_copy(r300->blitter, @@ -139,10 +144,10 @@ void r300_surface_copy(struct pipe_context* pipe, new_format = PIPE_FORMAT_I8_UNORM; break; case 2: - new_format = PIPE_FORMAT_A4R4G4B4_UNORM; + new_format = PIPE_FORMAT_B4G4R4A4_UNORM; break; case 4: - new_format = PIPE_FORMAT_A8R8G8B8_UNORM; + new_format = PIPE_FORMAT_B8G8R8A8_UNORM; break; default: debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n" diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index f631b4ed27f..d994a46ccfe 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -24,6 +24,7 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" +#include "util/u_upload_mgr.h" #include "r300_blit.h" #include "r300_context.h" @@ -34,6 +35,7 @@ #include "r300_screen.h" #include "r300_state_invariant.h" #include "r300_texture.h" +#include "r300_transfer.h" #include "radeon_winsys.h" @@ -54,12 +56,16 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } + u_upload_destroy(r300->upload_vb); + u_upload_destroy(r300->upload_ib); + FREE(r300->blend_color_state.state); FREE(r300->clip_state.state); FREE(r300->fb_state.state); FREE(r300->rs_block_state.state); FREE(r300->scissor_state.state); - FREE(r300->vertex_format_state.state); + FREE(r300->textures_state.state); + FREE(r300->vap_output_state.state); FREE(r300->viewport_state.state); FREE(r300->ztop_state.state); FREE(r300); @@ -70,11 +76,7 @@ r300_is_texture_referenced(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level) { - struct pipe_buffer* buf = 0; - - r300_get_texture_buffer(pipe->screen, texture, &buf, NULL); - - return pipe->is_buffer_referenced(pipe, buf); + return 0; } static unsigned int @@ -84,7 +86,14 @@ r300_is_buffer_referenced(struct pipe_context *pipe, /* This only checks to see whether actual hardware buffers are * referenced. Since we use managed BOs and transfers, it's actually not * possible for pipe_buffers to ever reference the actual hardware, so - * buffers are never referenced. */ + * buffers are never referenced. + */ + + /* XXX: that doesn't make sense given that + * r300_is_texture_referenced is implemented on top of this + * function and hardware can certainly refer to textures + * directly... + */ return 0; } @@ -96,39 +105,54 @@ static void r300_flush_cb(void *data) } #define R300_INIT_ATOM(atomname, atomsize) \ - r300->atomname##_state.name = #atomname; \ - r300->atomname##_state.state = NULL; \ - r300->atomname##_state.size = atomsize; \ - r300->atomname##_state.emit = r300_emit_##atomname##_state; \ - r300->atomname##_state.dirty = FALSE; \ - insert_at_tail(&r300->atom_list, &r300->atomname##_state); + r300->atomname.name = #atomname; \ + r300->atomname.state = NULL; \ + r300->atomname.size = atomsize; \ + r300->atomname.emit = r300_emit_##atomname; \ + r300->atomname.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname); static void r300_setup_atoms(struct r300_context* r300) { + boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; + boolean has_tcl = r300_screen(r300->context.screen)->caps->has_tcl; + /* Create the actual atom list. * * Each atom is examined and emitted in the order it appears here, which * can affect performance and conformance if not handled with care. * - * Some atoms never change size, others change every emit. This is just - * an upper bound on each atom, to keep the emission machinery from - * underallocating space. */ + * Some atoms never change size, others change every emit - those have + * the size of 0 here. */ make_empty_list(&r300->atom_list); - R300_INIT_ATOM(invariant, 71); - R300_INIT_ATOM(ztop, 2); - R300_INIT_ATOM(blend, 8); - R300_INIT_ATOM(blend_color, 3); - R300_INIT_ATOM(clip, 29); - R300_INIT_ATOM(dsa, 8); - R300_INIT_ATOM(fb, 56); - R300_INIT_ATOM(rs, 25); - R300_INIT_ATOM(scissor, 3); - R300_INIT_ATOM(viewport, 9); - R300_INIT_ATOM(rs_block, 21); - R300_INIT_ATOM(vertex_format, 26); + R300_INIT_ATOM(invariant_state, 71); + R300_INIT_ATOM(ztop_state, 2); + R300_INIT_ATOM(blend_state, 8); + R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); + R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2); + R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6); + R300_INIT_ATOM(fb_state, 0); + R300_INIT_ATOM(rs_state, 0); + R300_INIT_ATOM(scissor_state, 3); + R300_INIT_ATOM(viewport_state, 9); + R300_INIT_ATOM(rs_block_state, 0); + R300_INIT_ATOM(vertex_stream_state, 0); + R300_INIT_ATOM(vap_output_state, 6); + R300_INIT_ATOM(pvs_flush, 2); + R300_INIT_ATOM(vs_state, 0); + R300_INIT_ATOM(texture_cache_inval, 2); + R300_INIT_ATOM(textures_state, 0); /* Some non-CSO atoms need explicit space to store the state locally. */ + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state); + r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); + r300->textures_state.state = CALLOC_STRUCT(r300_textures_state); + r300->vap_output_state.state = CALLOC_STRUCT(r300_vap_output_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); } struct pipe_context* r300_create_context(struct pipe_screen* screen, @@ -136,14 +160,14 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, { struct r300_context* r300 = CALLOC_STRUCT(r300_context); struct r300_screen* r300screen = r300_screen(screen); - struct radeon_winsys* radeon_winsys = r300screen->radeon_winsys; + struct r300_winsys_screen *rws = r300screen->rws; if (!r300) return NULL; - r300->winsys = radeon_winsys; + r300->rws = rws; - r300->context.winsys = (struct pipe_winsys*)radeon_winsys; + r300->context.winsys = (struct pipe_winsys*)rws; r300->context.screen = screen; r300->context.priv = priv; @@ -178,14 +202,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_setup_atoms(r300); - r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); - r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); - r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); - r300->vertex_format_state.state = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); - r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); - /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, PIPE_BUFFER_USAGE_VERTEX, 4096); @@ -195,17 +211,35 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300_init_query_functions(r300); - /* r300_init_surface_functions(r300); */ + r300_init_transfer_functions(r300); r300_init_state_functions(r300); r300->invariant_state.dirty = TRUE; - r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300); - r300->dirty_state = R300_NEW_KITCHEN_SINK; + rws->set_flush_cb(r300->rws, r300_flush_cb, r300); r300->dirty_hw++; r300->blitter = util_blitter_create(&r300->context); + r300->upload_ib = u_upload_create(screen, + 32 * 1024, 16, + PIPE_BUFFER_USAGE_INDEX); + + if (r300->upload_ib == NULL) + goto no_upload_ib; + + r300->upload_vb = u_upload_create(screen, + 128 * 1024, 16, + PIPE_BUFFER_USAGE_VERTEX); + if (r300->upload_vb == NULL) + goto no_upload_vb; + return &r300->context; + + no_upload_ib: + u_upload_destroy(r300->upload_ib); + no_upload_vb: + FREE(r300); + return NULL; } diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 443af4ec2e3..db2f74e0745 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -32,6 +32,7 @@ #include "r300_screen.h" +struct u_upload_mgr; struct r300_context; struct r300_fragment_shader; @@ -45,7 +46,7 @@ struct r300_atom { /* Opaque state. */ void* state; /* Emit the state to the context. */ - void (*emit)(struct r300_context*, void*); + void (*emit)(struct r300_context*, unsigned, void*); /* Upper bound on number of dwords to emit. */ unsigned size; /* Whether this atom should be emitted. */ @@ -86,7 +87,6 @@ struct r300_rs_state { uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ - uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ @@ -119,22 +119,58 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_texture_state { +struct r300_texture_format_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ uint32_t format2; /* R300_TX_FORMAT2: 0x4500 */ }; +#define R300_MAX_TEXTURE_LEVELS 13 + struct r300_texture_fb_state { /* Colorbuffer. */ - uint32_t colorpitch[PIPE_MAX_TEXTURE_LEVELS]; /* R300_RB3D_COLORPITCH[0-3]*/ + uint32_t colorpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_COLORPITCH[0-3]*/ uint32_t us_out_fmt; /* R300_US_OUT_FMT[0-3] */ /* Zbuffer. */ - uint32_t depthpitch[PIPE_MAX_TEXTURE_LEVELS]; /* R300_RB3D_DEPTHPITCH */ + uint32_t depthpitch[R300_MAX_TEXTURE_LEVELS]; /* R300_RB3D_DEPTHPITCH */ uint32_t zb_format; /* R300_ZB_FORMAT */ }; +struct r300_textures_state { + /* Textures. */ + struct r300_texture *textures[8]; + int texture_count; + /* Sampler states. */ + struct r300_sampler_state *sampler_states[8]; + int sampler_count; + + /* These is the merge of the texture and sampler states. */ + unsigned count; + uint32_t tx_enable; /* R300_TX_ENABLE: 0x4101 */ + struct r300_texture_sampler_state { + uint32_t format[3]; /* R300_TX_FORMAT[0-2] */ + uint32_t filter[2]; /* R300_TX_FILTER[0-1] */ + uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ + uint32_t tile_config; /* R300_TX_OFFSET (subset thereof) */ + } regs[8]; +}; + +struct r300_vertex_stream_state { + /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ + uint32_t vap_prog_stream_cntl[8]; + /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ + uint32_t vap_prog_stream_cntl_ext[8]; + + unsigned count; +}; + +struct r300_vap_output_state { + uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */ + uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */ + uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */ +}; + struct r300_viewport_state { float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */ @@ -151,11 +187,6 @@ struct r300_ztop_state { #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_SAMPLER 0x00000200 -#define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_TEXTURE 0x00040000 -#define R300_ANY_NEW_TEXTURES 0x03fc0000 -#define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -208,16 +239,16 @@ struct r300_texture { struct pipe_texture tex; /* Offsets into the buffer. */ - unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned offset[R300_MAX_TEXTURE_LEVELS]; /* A pitch for each mip-level */ - unsigned pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned pitch[R300_MAX_TEXTURE_LEVELS]; /* Size of one zslice or face based on the texture target */ - unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS]; + unsigned layer_size[R300_MAX_TEXTURE_LEVELS]; /* Whether the mipmap level is macrotiled. */ - enum r300_buffer_tiling mip_macrotile[PIPE_MAX_TEXTURE_LEVELS]; + enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS]; /** * If non-zero, override the natural texture layout with @@ -238,10 +269,10 @@ struct r300_texture { boolean is_npot; /* Pipe buffer backing this texture. */ - struct pipe_buffer* buffer; + struct r300_winsys_buffer *buffer; /* Registers carrying texture format data. */ - struct r300_texture_state state; + struct r300_texture_format_state state; struct r300_texture_fb_state fb_state; /* Buffer tiling */ @@ -258,6 +289,13 @@ struct r300_vertex_info { uint32_t vap_prog_stream_cntl_ext[8]; }; +struct r300_vertex_element_state { + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; + + struct r300_vertex_stream_state vertex_stream; +}; + extern struct pipe_viewport_state r300_viewport_identity; struct r300_context { @@ -265,7 +303,7 @@ struct r300_context { struct pipe_context context; /* The interface to the windowing system, etc. */ - struct radeon_winsys* winsys; + struct r300_winsys_screen *rws; /* Draw module. Used mostly for SW TCL. */ struct draw_context* draw; /* Accelerated blit support. */ @@ -282,9 +320,6 @@ struct r300_context { struct r300_query *query_current; struct r300_query query_list; - /* Vertex formatting information. */ - struct r300_atom vertex_format_state; - /* Various CSO state objects. */ /* Beginning of atom list. */ struct r300_atom atom_list; @@ -306,20 +341,24 @@ struct r300_context { struct r300_atom rs_state; /* RS block state. */ struct r300_atom rs_block_state; - /* Sampler states. */ - struct r300_sampler_state* sampler_states[8]; - int sampler_count; /* Scissor state. */ struct r300_atom scissor_state; - /* Texture states. */ - struct r300_texture* textures[8]; - int texture_count; + /* Textures state. */ + struct r300_atom textures_state; + /* Vertex stream formatting state. */ + struct r300_atom vertex_stream_state; + /* VAP (vertex shader) output mapping state. */ + struct r300_atom vap_output_state; /* Vertex shader. */ - struct r300_vertex_shader* vs; + struct r300_atom vs_state; /* Viewport state. */ struct r300_atom viewport_state; /* ZTOP state. */ struct r300_atom ztop_state; + /* PVS flush. */ + struct r300_atom pvs_flush; + /* Texture cache invalidate. */ + struct r300_atom texture_cache_inval; /* Invariant state. This must be emitted to get the engine started. */ struct r300_atom invariant_state; @@ -327,22 +366,33 @@ struct r300_context { /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; int vertex_buffer_count; + int vertex_buffer_max_index; /* Vertex elements for Gallium. */ - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - int vertex_element_count; + struct r300_vertex_element_state *velems; + bool any_user_vbs; + + /* Vertex info for Draw. */ + struct vertex_info vertex_info; struct pipe_stencil_ref stencil_ref; + struct pipe_clip_state clip; + + struct pipe_viewport_state viewport; + /* Bitmask of dirty state objects. */ uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; - /* Whether the TCL engine should be in bypass mode. */ - boolean tcl_bypass; /* Whether polygon offset is enabled. */ boolean polygon_offset_enabled; /* Z buffer bit depth. */ uint32_t zbuffer_bpp; + /* Whether scissor is enabled. */ + boolean scissor_enabled; + /* upload managers */ + struct u_upload_mgr *upload_vb; + struct u_upload_mgr *upload_ib; }; /* Convenience cast wrapper. */ @@ -359,6 +409,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, struct draw_stage* r300_draw_stage(struct r300_context* r300); void r300_init_state_functions(struct r300_context* r300); void r300_init_surface_functions(struct r300_context* r300); +void r300_init_tex_functions( struct pipe_context *pipe ); static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags) { diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 151f72b0fe4..ad07efbffdb 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -51,7 +51,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ - struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ + struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \ int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ @@ -105,22 +105,34 @@ cs_count--; \ } while (0) -#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ +#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, offset %d, " \ "domains (%d, %d, %d)\n", \ bo, offset, rd, wd, flags); \ assert(bo); \ cs_winsys->write_cs_dword(cs_winsys, offset); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ cs_count -= 3; \ } while (0) -#define OUT_CS_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ + +#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \ + DBG(cs_context_copy, DBG_CS, "r300: writing relocation for texture %p, offset %d, " \ + "domains (%d, %d, %d)\n", \ + tex, offset, rd, wd, flags); \ + assert(tex); \ + cs_winsys->write_cs_dword(cs_winsys, offset); \ + r300_texture_write_reloc(cs_winsys, tex, rd, wd, flags); \ + cs_count -= 3; \ +} while (0) + + +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \ DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \ "domains (%d, %d, %d)\n", \ bo, rd, wd, flags); \ assert(bo); \ - cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \ + r300_buffer_write_reloc(cs_winsys, r300_buffer(bo), rd, wd, flags); \ cs_count -= 2; \ } while (0) diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index b881730848a..d6177577c8d 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -37,6 +37,7 @@ static struct debug_option debug_options[] = { { "draw", DBG_DRAW, "Draw and emit" }, { "tex", DBG_TEX, "Textures" }, { "fall", DBG_FALL, "Fallbacks" }, + { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking purposes only!)" }, { "all", ~0, "Convenience option that enables all debug flags" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index f7dcd8dc52f..d8c64dd9001 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -32,16 +32,19 @@ #include "r300_emit.h" #include "r300_fs.h" #include "r300_screen.h" +#include "r300_screen_buffer.h" +#include "r300_state_inlines.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, void* state) +void r300_emit_blend_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_blend_state* blend = (struct r300_blend_state*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; CS_LOCALS(r300); - BEGIN_CS(8); + BEGIN_CS(size); OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (fb->nr_cbufs) { @@ -58,26 +61,28 @@ void r300_emit_blend_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, void* state) +void r300_emit_blend_color_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); if (r300screen->caps->is_r500) { - BEGIN_CS(3); + BEGIN_CS(size); OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); OUT_CS(bc->blend_color_red_alpha); OUT_CS(bc->blend_color_green_blue); END_CS; } else { - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color); END_CS; } } -void r300_emit_clip_state(struct r300_context* r300, void* state) +void r300_emit_clip_state(struct r300_context* r300, + unsigned size, void* state) { struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; @@ -85,7 +90,7 @@ void r300_emit_clip_state(struct r300_context* r300, void* state) CS_LOCALS(r300); if (r300screen->caps->has_tcl) { - BEGIN_CS(5 + (6 * 4)); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, (r300screen->caps->is_r500 ? R500_PVS_UCP_START : R300_PVS_UCP_START)); @@ -100,14 +105,14 @@ void r300_emit_clip_state(struct r300_context* r300, void* state) R300_PS_UCP_MODE_CLIP_AS_TRIFAN); END_CS; } else { - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE); END_CS; } } -void r300_emit_dsa_state(struct r300_context* r300, void* state) +void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) { struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); @@ -116,7 +121,7 @@ void r300_emit_dsa_state(struct r300_context* r300, void* state) struct pipe_stencil_ref stencil_ref = r300->stencil_ref; CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); + BEGIN_CS(size); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); @@ -143,6 +148,8 @@ static const float * get_shader_constant( { struct r300_viewport_state* viewport = (struct r300_viewport_state*)r300->viewport_state.state; + struct r300_textures_state* texstate = + (struct r300_textures_state*)r300->textures_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -158,7 +165,7 @@ static const float * get_shader_constant( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = &r300->textures[constant->u.State[1]]->tex; + tex = &texstate->textures[constant->u.State[1]]->tex; vec[0] = 1.0 / tex->width0; vec[1] = 1.0 / tex->height0; break; @@ -170,23 +177,15 @@ static const float * get_shader_constant( break; case RC_STATE_R300_VIEWPORT_SCALE: - if (r300->tcl_bypass) { - vec[0] = 1; - vec[1] = 1; - vec[2] = 1; - } else { - vec[0] = viewport->xscale; - vec[1] = viewport->yscale; - vec[2] = viewport->zscale; - } + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; break; case RC_STATE_R300_VIEWPORT_OFFSET: - if (!r300->tcl_bypass) { - vec[0] = viewport->xoffset; - vec[1] = viewport->yoffset; - vec[2] = viewport->zoffset; - } + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; break; default: @@ -378,7 +377,7 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_fb_state(struct r300_context* r300, void* state) +void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); @@ -387,7 +386,7 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) int i; CS_LOCALS(r300); - BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 6); + BEGIN_CS(size); /* Flush and free renderbuffer caches. */ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, @@ -418,14 +417,17 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) assert(tex && tex->buffer && "cbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(tex->buffer, tex->fb_state.colorpitch[surf->level], + OUT_CS_TEX_RELOC(tex, tex->fb_state.colorpitch[surf->level], 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), tex->fb_state.us_out_fmt); } + for (; i < 4; i++) { + OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED); + } /* Set up a zbuffer. */ if (fb->zsbuf) { @@ -434,19 +436,21 @@ void r300_emit_fb_state(struct r300_context* r300, void* state) assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_TEX_RELOC(tex, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_ZB_FORMAT, tex->fb_state.zb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->fb_state.depthpitch[surf->level], + OUT_CS_TEX_RELOC(tex, tex->fb_state.depthpitch[surf->level], 0, RADEON_GEM_DOMAIN_VRAM, 0); } + OUT_CS_REG(R300_GA_POINT_MINMAX, + (MAX2(fb->width, fb->height) * 6) << R300_GA_POINT_MINMAX_MAX_SHIFT); END_CS; } -static void r300_emit_query_start(struct r300_context *r300) +void r300_emit_query_start(struct r300_context *r300) { struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps; struct r300_query *query = r300->query_current; @@ -489,13 +493,13 @@ static void r300_emit_query_finish(struct r300_context *r300, /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 3), 0, RADEON_GEM_DOMAIN_GTT, 0); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 2), 0, RADEON_GEM_DOMAIN_GTT, 0); case 2: /* pipe 1 only */ @@ -503,13 +507,13 @@ static void r300_emit_query_finish(struct r300_context *r300, OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 1), 0, RADEON_GEM_DOMAIN_GTT, 0); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0), 0, RADEON_GEM_DOMAIN_GTT, 0); break; default: @@ -531,7 +535,7 @@ static void rv530_emit_query_single(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -544,10 +548,10 @@ static void rv530_emit_query_double(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_CS_BUF_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -572,21 +576,19 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, void* state) +void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_rs_state* rs = (struct r300_rs_state*)state; float scale, offset; CS_LOCALS(r300); - BEGIN_CS(18 + (rs->polygon_offset_enable ? 5 : 0)); + BEGIN_CS(size); OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); - OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); - OUT_CS(rs->point_minmax); - OUT_CS(rs->line_control); + OUT_CS_REG(R300_GA_LINE_CNTL, rs->line_control); if (rs->polygon_offset_enable) { scale = rs->depth_scale * 12; @@ -617,7 +619,8 @@ void r300_emit_rs_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_rs_block_state(struct r300_context* r300, void* state) +void r300_emit_rs_block_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_rs_block* rs = (struct r300_rs_block*)state; unsigned i; @@ -628,7 +631,7 @@ void r300_emit_rs_block_state(struct r300_context* r300, void* state) DBG(r300, DBG_DRAW, "r300: RS emit:\n"); - BEGIN_CS(5 + count*2); + BEGIN_CS(size); if (r300screen->caps->is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); } else { @@ -659,7 +662,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, void* state) END_CS; } -void r300_emit_scissor_state(struct r300_context* r300, void* state) +void r300_emit_scissor_state(struct r300_context* r300, + unsigned size, void* state) { unsigned minx, miny, maxx, maxy; uint32_t top_left, bottom_right; @@ -673,7 +677,7 @@ void r300_emit_scissor_state(struct r300_context* r300, void* state) maxx = fb->width; maxy = fb->height; - if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { + if (r300->scissor_enabled) { minx = MAX2(minx, scissor->minx); miny = MAX2(miny, scissor->miny); maxx = MIN2(maxx, scissor->maxx); @@ -713,65 +717,51 @@ void r300_emit_scissor_state(struct r300_context* r300, void* state) (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } - BEGIN_CS(3); + BEGIN_CS(size); OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); OUT_CS(top_left); OUT_CS(bottom_right); END_CS; } -void r300_emit_texture(struct r300_context* r300, - struct r300_sampler_state* sampler, - struct r300_texture* tex, - unsigned offset) +void r300_emit_textures_state(struct r300_context *r300, + unsigned size, void *state) { - uint32_t filter0 = sampler->filter0; - uint32_t format0 = tex->state.format0; - unsigned min_level, max_level; + struct r300_textures_state *allstate = (struct r300_textures_state*)state; + struct r300_texture_sampler_state *texstate; + unsigned i; CS_LOCALS(r300); - /* to emulate 1D textures through 2D ones correctly */ - if (tex->tex.target == PIPE_TEXTURE_1D) { - filter0 &= ~R300_TX_WRAP_T_MASK; - filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); - } + BEGIN_CS(size); + OUT_CS_REG(R300_TX_ENABLE, allstate->tx_enable); - if (tex->is_npot) { - /* NPOT textures don't support mip filter, unfortunately. - * This prevents incorrect rendering. */ - filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK; - } else { - /* determine min/max levels */ - /* the MAX_MIP level is the largest (finest) one */ - max_level = MIN2(sampler->max_lod, tex->tex.last_level); - min_level = MIN2(sampler->min_lod, max_level); - format0 |= R300_TX_NUM_LEVELS(max_level); - filter0 |= R300_TX_MAX_MIP_LEVEL(min_level); - } + for (i = 0; i < allstate->count; i++) { + if ((1 << i) & allstate->tx_enable) { + texstate = &allstate->regs[i]; + + OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter[0]); + OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter[1]); + OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (i * 4), + texstate->border_color); - BEGIN_CS(16); - OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 | - (offset << 28)); - OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1); - OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color); - - OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0); - OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); - OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); - OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, - R300_TXO_MACRO_TILE(tex->macrotile) | - R300_TXO_MICRO_TILE(tex->microtile), - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_REG(R300_TX_FORMAT0_0 + (i * 4), texstate->format[0]); + OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format[1]); + OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format[2]); + + OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_CS_TEX_RELOC(allstate->textures[i], texstate->tile_config, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } END_CS; } void r300_emit_aos(struct r300_context* r300, unsigned offset) { struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_vertex_element *velem = r300->velems->velem; int i; - unsigned size1, size2, aos_count = r300->vertex_element_count; + unsigned size1, size2, aos_count = r300->velems->count; unsigned packet_size = (aos_count * 3 + 1) / 2; CS_LOCALS(r300); @@ -800,74 +790,116 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) } for (i = 0; i < aos_count; i++) { - OUT_CS_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, - RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_CS_BUF_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer, + RADEON_GEM_DOMAIN_GTT, 0, 0); } END_CS; } -void r300_emit_vertex_format_state(struct r300_context* r300, void* state) +void r300_emit_vertex_buffer(struct r300_context* r300) { - struct r300_vertex_info* vertex_info = (struct r300_vertex_info*)state; - unsigned i; CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n"); + DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " + "vertex size %d\n", r300->vbo, + r300->vertex_info.size); + /* Set the pointer to our vertex buffer. The emitted values are this: + * PACKET3 [3D_LOAD_VBPNTR] + * COUNT [1] + * FORMAT [size | stride << 8] + * OFFSET [offset into BO] + * VBPNTR [relocated BO] + */ + BEGIN_CS(7); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); + OUT_CS(1); + OUT_CS(r300->vertex_info.size | + (r300->vertex_info.size << 8)); + OUT_CS(r300->vbo_offset); + OUT_CS_BUF_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_CS; +} - BEGIN_CS(26); - OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_info->vinfo.size); +void r300_emit_vertex_stream_state(struct r300_context* r300, + unsigned size, void* state) +{ + struct r300_vertex_stream_state *streams = + (struct r300_vertex_stream_state*)state; + unsigned i; + CS_LOCALS(r300); - OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); - OUT_CS(vertex_info->vinfo.hwfmt[0]); - OUT_CS(vertex_info->vinfo.hwfmt[1]); - OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); - OUT_CS(vertex_info->vinfo.hwfmt[2]); - OUT_CS(vertex_info->vinfo.hwfmt[3]); - for (i = 0; i < 4; i++) { - DBG(r300, DBG_DRAW, " : hwfmt%d: 0x%08x\n", i, - vertex_info->vinfo.hwfmt[i]); - } + DBG(r300, DBG_DRAW, "r300: PSC emit:\n"); - OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); - for (i = 0; i < 8; i++) { - OUT_CS(vertex_info->vap_prog_stream_cntl[i]); + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); + for (i = 0; i < streams->count; i++) { + OUT_CS(streams->vap_prog_stream_cntl[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - vertex_info->vap_prog_stream_cntl[i]); + streams->vap_prog_stream_cntl[i]); } - OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); - for (i = 0; i < 8; i++) { - OUT_CS(vertex_info->vap_prog_stream_cntl_ext[i]); + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); + for (i = 0; i < streams->count; i++) { + OUT_CS(streams->vap_prog_stream_cntl_ext[i]); DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - vertex_info->vap_prog_stream_cntl_ext[i]); + streams->vap_prog_stream_cntl_ext[i]); } END_CS; } +void r300_emit_vap_output_state(struct r300_context* r300, + unsigned size, void* state) +{ + struct r300_vap_output_state *vap_out_state = + (struct r300_vap_output_state*)state; + CS_LOCALS(r300); + + DBG(r300, DBG_DRAW, "r300: VAP emit:\n"); + + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(vap_out_state->vap_vtx_state_cntl); + OUT_CS(vap_out_state->vap_vsm_vtx_assm); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(vap_out_state->vap_out_vtx_fmt[0]); + OUT_CS(vap_out_state->vap_out_vtx_fmt[1]); + END_CS; +} + +void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) +{ + CS_LOCALS(r300); -void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code) + BEGIN_CS(size); + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + END_CS; +} + +void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) { - int i; + struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; + struct r300_vertex_program_code* code = &vs->code; struct r300_screen* r300screen = r300_screen(r300->context.screen); unsigned instruction_count = code->length / 4; + unsigned i; + + unsigned vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; + unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1); + unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1); + unsigned temp_count = MAX2(code->num_temporaries, 1); - int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72; - int input_count = MAX2(util_bitcount(code->InputsRead), 1); - int output_count = MAX2(util_bitcount(code->OutputsWritten), 1); - int temp_count = MAX2(code->num_temporaries, 1); - int pvs_num_slots = MIN3(vtx_mem_size / input_count, - vtx_mem_size / output_count, 10); - int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); + unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count, + vtx_mem_size / output_count, 10); + unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { - debug_printf("r300: Implementation error: emit_vertex_shader called," + debug_printf("r300: Implementation error: emit_vs_state called," " but has_tcl is FALSE!\n"); return; } - BEGIN_CS(9 + code->length); + BEGIN_CS(size); /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 @@ -881,8 +913,9 @@ void r300_emit_vertex_program_code(struct r300_context* r300, OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); - for (i = 0; i < code->length; i++) + for (i = 0; i < code->length; i++) { OUT_CS(code->body.d[i]); + } OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | R300_PVS_NUM_CNTLRS(pvs_num_controllers) | @@ -892,12 +925,6 @@ void r300_emit_vertex_program_code(struct r300_context* r300, END_CS; } -void r300_emit_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs) -{ - r300_emit_vertex_program_code(r300, &vs->code); -} - void r300_emit_vs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants) { @@ -906,7 +933,7 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, CS_LOCALS(r300); if (!r300screen->caps->has_tcl) { - debug_printf("r300: Implementation error: emit_vertex_shader called," + debug_printf("r300: Implementation error: emit_vs_constant_buffer called," " but has_tcl is FALSE!\n"); return; } @@ -931,96 +958,75 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, void* state) +void r300_emit_viewport_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - if (r300->tcl_bypass) { - BEGIN_CS(2); - OUT_CS_REG(R300_VAP_VTE_CNTL, 0); - END_CS; - } else { - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - END_CS; - } + BEGIN_CS(size); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } -void r300_emit_texture_count(struct r300_context* r300) -{ - uint32_t tx_enable = 0; - int i; - CS_LOCALS(r300); - - /* Notice that texture_count and sampler_count are just sizes - * of the respective arrays. We still have to check for the individual - * elements. */ - for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { - if (r300->textures[i]) { - tx_enable |= 1 << i; - } - } - - BEGIN_CS(2); - OUT_CS_REG(R300_TX_ENABLE, tx_enable); - END_CS; - -} - -void r300_emit_ztop_state(struct r300_context* r300, void* state) +void r300_emit_ztop_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; CS_LOCALS(r300); - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); END_CS; } -void r300_flush_textures(struct r300_context* r300) +void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state) { CS_LOCALS(r300); - BEGIN_CS(2); + BEGIN_CS(size); OUT_CS_REG(R300_TX_INVALTAGS, 0); END_CS; } -static void r300_flush_pvs(struct r300_context* r300) -{ - CS_LOCALS(r300); - - BEGIN_CS(2); - OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); - END_CS; -} - -void r300_emit_buffer_validate(struct r300_context *r300) +void r300_emit_buffer_validate(struct r300_context *r300, + boolean do_validate_vertex_buffers, + struct pipe_buffer *index_buffer) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_textures_state *texstate = + (struct r300_textures_state*)r300->textures_state.state; struct r300_texture* tex; + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_element *velem = r300->velems->velem; + struct pipe_buffer *pbuf; unsigned i; boolean invalid = FALSE; + /* upload buffers first */ + if (r300->any_user_vbs) { + r300_upload_user_buffers(r300); + r300->any_user_vbs = false; + } + /* Clean out BOs. */ - r300->winsys->reset_bos(r300->winsys); + r300->rws->reset_bos(r300->rws); validate: /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { tex = (struct r300_texture*)fb->cbufs[i]->texture; assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { + if (!r300_add_texture(r300->rws, tex, + 0, RADEON_GEM_DOMAIN_VRAM)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } @@ -1029,42 +1035,60 @@ validate: if (fb->zsbuf) { tex = (struct r300_texture*)fb->zsbuf->texture; assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - 0, RADEON_GEM_DOMAIN_VRAM)) { + if (!r300_add_texture(r300->rws, tex, + 0, RADEON_GEM_DOMAIN_VRAM)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } /* ...textures... */ - for (i = 0; i < r300->texture_count; i++) { - tex = r300->textures[i]; - if (!tex) + for (i = 0; i < texstate->count; i++) { + tex = texstate->textures[i]; + if (!tex || !texstate->sampler_states[i]) continue; - if (!r300->winsys->add_buffer(r300->winsys, tex->buffer, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { + if (!r300_add_texture(r300->rws, tex, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } /* ...occlusion query buffer... */ if (r300->dirty_state & R300_NEW_QUERY) { - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { + if (!r300_add_buffer(r300->rws, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } } - /* ...and vertex buffer. */ + /* ...vertex buffer for SWTCL path... */ if (r300->vbo) { - if (!r300->winsys->add_buffer(r300->winsys, r300->vbo, - RADEON_GEM_DOMAIN_GTT, 0)) { + if (!r300_add_buffer(r300->rws, r300->vbo, + RADEON_GEM_DOMAIN_GTT, 0)) { r300->context.flush(&r300->context, 0, NULL); goto validate; } - } else { - /* debug_printf("No VBO while emitting dirty state!\n"); */ } - if (!r300->winsys->validate(r300->winsys)) { + /* ...vertex buffers for HWTCL path... */ + if (do_validate_vertex_buffers) { + for (i = 0; i < r300->velems->count; i++) { + pbuf = vbuf[velem[i].vertex_buffer_index].buffer; + + if (!r300_add_buffer(r300->rws, pbuf, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + } + /* ...and index buffer for HWTCL path. */ + if (index_buffer) { + if (!r300_add_buffer(r300->rws, index_buffer, + RADEON_GEM_DOMAIN_GTT, 0)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } + } + if (!r300->rws->validate(r300->rws)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ @@ -1076,16 +1100,10 @@ validate: } } -/* Emit all dirty state. */ -void r300_emit_dirty_state(struct r300_context* r300) +unsigned r300_get_num_dirty_dwords(struct r300_context *r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_atom* atom; - unsigned i, dwords = 1024; - int dirty_tex = 0; - - /* Check the required number of dwords against the space remaining in the - * current CS object. If we need more, then flush. */ + unsigned dwords = 0; foreach(atom, &r300->atom_list) { if (atom->dirty || atom->always_dirty) { @@ -1093,12 +1111,17 @@ void r300_emit_dirty_state(struct r300_context* r300) } } - /* Make sure we have at least 2*1024 spare dwords. */ - /* XXX It would be nice to know the number of dwords we really need to - * XXX emit. */ - while (!r300->winsys->check_cs(r300->winsys, dwords)) { - r300->context.flush(&r300->context, 0, NULL); - } + /* XXX This is the compensation for the non-atomized states. */ + dwords += 1024; + + return dwords; +} + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300) +{ + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct r300_atom* atom; if (r300->dirty_state & R300_NEW_QUERY) { r300_emit_query_start(r300); @@ -1107,7 +1130,7 @@ void r300_emit_dirty_state(struct r300_context* r300) foreach(atom, &r300->atom_list) { if (atom->dirty || atom->always_dirty) { - atom->emit(r300, atom->state); + atom->emit(r300, atom->size, atom->state); atom->dirty = FALSE; } } @@ -1133,43 +1156,9 @@ void r300_emit_dirty_state(struct r300_context* r300) r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS; } - /* Samplers and textures are tracked separately but emitted together. */ - if (r300->dirty_state & - (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { - r300_emit_texture_count(r300); - - for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) { - if (r300->dirty_state & - ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) { - if (r300->textures[i]) { - r300_emit_texture(r300, - r300->sampler_states[i], - r300->textures[i], - i); - dirty_tex |= r300->dirty_state & (R300_NEW_TEXTURE << i); - } - r300->dirty_state &= - ~((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i)); - } - } - r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); - } - - if (dirty_tex) { - r300_flush_textures(r300); - } - - if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) { - r300_flush_pvs(r300); - } - - if (r300->dirty_state & R300_NEW_VERTEX_SHADER) { - r300_emit_vertex_shader(r300, r300->vs); - r300->dirty_state &= ~R300_NEW_VERTEX_SHADER; - } - if (r300->dirty_state & R300_NEW_VERTEX_SHADER_CONSTANTS) { - r300_emit_vs_constant_buffer(r300, &r300->vs->code.constants); + struct r300_vertex_shader* vs = r300->vs_state.state; + r300_emit_vs_constant_buffer(r300, &vs->code.constants); r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS; } @@ -1177,8 +1166,10 @@ void r300_emit_dirty_state(struct r300_context* r300) assert(r300->dirty_state == 0); */ - /* Finally, emit the VBO. */ - /* r300_emit_vertex_buffer(r300); */ + /* Emit the VBO for SWTCL. */ + if (!r300screen->caps->has_tcl) { + r300_emit_vertex_buffer(r300); + } r300->dirty_hw++; } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 6b96d9b57c0..7db2fc6a1a1 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,13 +31,17 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, void* state); +void r300_emit_blend_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, void* state); +void r300_emit_blend_color_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_clip_state(struct r300_context* r300, void* state); +void r300_emit_clip_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_dsa_state(struct r300_context* r300, void* state); +void r300_emit_dsa_state(struct r300_context* r300, + unsigned size, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); @@ -51,48 +55,53 @@ void r500_emit_fragment_program_code(struct r300_context* r300, void r500_emit_fs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants); -void r300_emit_fb_state(struct r300_context* r300, void* state); +void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_query_begin(struct r300_context* r300, - struct r300_query* query); +void r300_emit_query_start(struct r300_context* r300); void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, void* state); +void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_rs_block_state(struct r300_context* r300, void* state); +void r300_emit_rs_block_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_scissor_state(struct r300_context* r300, void* state); +void r300_emit_scissor_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_texture(struct r300_context* r300, - struct r300_sampler_state* sampler, - struct r300_texture* tex, - unsigned offset); +void r300_emit_textures_state(struct r300_context *r300, + unsigned size, void *state); void r300_emit_vertex_buffer(struct r300_context* r300); -void r300_emit_vertex_format_state(struct r300_context* r300, void* state); +void r300_emit_vertex_stream_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_vertex_program_code(struct r300_context* r300, - struct r300_vertex_program_code* code); +void r300_emit_vap_output_state(struct r300_context* r300, + unsigned size, void* state); void r300_emit_vs_constant_buffer(struct r300_context* r300, struct rc_constant_list* constants); -void r300_emit_vertex_shader(struct r300_context* r300, - struct r300_vertex_shader* vs); +void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state); -void r300_emit_viewport_state(struct r300_context* r300, void* state); +void r300_emit_viewport_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, + unsigned size, void* state); -void r300_emit_ztop_state(struct r300_context* r300, void* state); +void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state); -void r300_flush_textures(struct r300_context* r300); +void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state); + +unsigned r300_get_num_dirty_dwords(struct r300_context *r300); /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); -void r300_emit_buffer_validate(struct r300_context *r300); +void r300_emit_buffer_validate(struct r300_context *r300, + boolean do_validate_vertex_buffers, + struct pipe_buffer *index_buffer); #endif /* R300_EMIT_H */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index e37d3092703..70de152713d 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -61,6 +61,12 @@ static void r300_flush(struct pipe_context* pipe, atom->dirty = TRUE; } } + + /* Unmark HWTCL state for SWTCL. */ + if (!r300_screen(pipe->screen)->caps->has_tcl) { + r300->vs_state.dirty = FALSE; + r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS; + } } /* reset flushed query */ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index ae4c62b2f1d..9e71e61c303 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -133,10 +133,13 @@ static void get_compare_state( struct r300_fragment_program_external_state* state, unsigned shadow_samplers) { + struct r300_textures_state *texstate = + (struct r300_textures_state*)r300->textures_state.state; + memset(state, 0, sizeof(*state)); - for (int i = 0; i < r300->sampler_count; i++) { - struct r300_sampler_state* s = r300->sampler_states[i]; + for (int i = 0; i < texstate->sampler_count; i++) { + struct r300_sampler_state* s = texstate->sampler_states[i]; if (s && s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { /* XXX Gallium doesn't provide us with any information regarding @@ -204,6 +207,7 @@ static void r300_translate_fragment_shader( DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n", compiler.Base.ErrorMsg); assert(0); + abort(); } /* And, finally... */ diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index a249e8b36be..1c2b2528877 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -540,7 +540,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PVS_FIRST_INST(x) ((x) << 0) # define R300_PVS_XYZW_VALID_INST(x) ((x) << 10) # define R300_PVS_LAST_INST(x) ((x) << 20) -/* Addresses are relative the the vertex program parameters area. */ +/* Addresses are relative to the vertex program parameters area. */ #define R300_VAP_PVS_CONST_CNTL 0x22D4 # define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 # define R300_PVS_MAX_CONST_ADDR_SHIFT 16 @@ -1500,6 +1500,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ANISO_THRESHOLD_MASK (7<<17) # define R500_MACRO_SWITCH (1<<22) +# define R500_TX_MAX_ANISO(x) ((x) << 23) +# define R500_TX_MAX_ANISO_MASK (63 << 23) +# define R500_TX_ANISO_HIGH_QUALITY (1 << 30) + # define R500_BORDER_FIX (1<<31) #define R300_TX_FORMAT0_0 0x4480 @@ -1857,7 +1861,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The destination register index is in FPI1 (color) and FPI3 (alpha) * together with enable bits. * There are separate enable bits for writing into temporary registers - * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_* * /DSTA_OUTPUT). You can write to both at once, or not write at all (the * same index must be used for both). * diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 648d884654f..47100c83b0b 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -30,10 +30,12 @@ #include "util/u_format.h" #include "util/u_memory.h" +#include "util/u_upload_mgr.h" #include "util/u_prim.h" #include "r300_cs.h" #include "r300_context.h" +#include "r300_screen_buffer.h" #include "r300_emit.h" #include "r300_reg.h" #include "r300_render.h" @@ -118,10 +120,50 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +/* Check if the requested number of dwords is available in the CS and + * if not, flush. Return TRUE if the flush occured. */ +static boolean r300_reserve_cs_space(struct r300_context *r300, + unsigned dwords) +{ + if (!r300->rws->check_cs(r300->rws, dwords)) { + r300->context.flush(&r300->context, 0, NULL); + return TRUE; + } + return FALSE; +} + static boolean immd_is_good_idea(struct r300_context *r300, - unsigned count) + unsigned count) { - return count <= 4; + struct pipe_vertex_element* velem; + struct pipe_vertex_buffer* vbuf; + boolean checked[PIPE_MAX_ATTRIBS] = {0}; + unsigned vertex_element_count = r300->velems->count; + unsigned i, vbi; + + if (count > 4) { + return FALSE; + } + + /* We shouldn't map buffers referenced by CS, busy buffers, + * and ones placed in VRAM. */ + /* XXX Check for VRAM buffers. */ + for (i = 0; i < vertex_element_count; i++) { + velem = &r300->velems->velem[i]; + vbi = velem->vertex_buffer_index; + + if (!checked[vbi]) { + vbuf = &r300->vertex_buffer[vbi]; + + if (r300_buffer_is_referenced(r300, + vbuf->buffer)) { + /* It's a very bad idea to map it... */ + return FALSE; + } + checked[vbi] = TRUE; + } + } + return TRUE; } static void r300_emit_draw_arrays_immediate(struct r300_context *r300, @@ -131,8 +173,8 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, { struct pipe_vertex_element* velem; struct pipe_vertex_buffer* vbuf; - unsigned vertex_element_count = r300->vertex_element_count; - unsigned i, v, vbi, dw, elem_offset; + unsigned vertex_element_count = r300->velems->count; + unsigned i, v, vbi, dw, elem_offset, dwords; /* Size of the vertex, in dwords. */ unsigned vertex_size = 0; @@ -154,7 +196,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ for (i = 0; i < vertex_element_count; i++) { - velem = &r300->vertex_element[i]; + velem = &r300->velems->velem[i]; offset[i] = velem->src_offset / 4; size[i] = util_format_get_blocksize(velem->src_format) / 4; vertex_size += size[i]; @@ -171,14 +213,19 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, } } + dwords = 9 + count * vertex_size; + + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); - BEGIN_CS(10 + count * vertex_size); + BEGIN_CS(dwords); OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(count - 1); + OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | r300_translate_primitive(mode)); @@ -186,7 +233,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Emit vertices. */ for (v = 0; v < count; v++) { for (i = 0; i < vertex_element_count; i++) { - velem = &r300->vertex_element[i]; + velem = &r300->velems->velem[i]; vbi = velem->vertex_buffer_index; elem_offset = offset[i] + stride[vbi] * (v + start); @@ -199,7 +246,7 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300, /* Unmap buffers. */ for (i = 0; i < vertex_element_count; i++) { - vbi = r300->vertex_element[i].vertex_buffer_index; + vbi = r300->velems->velem[i].vertex_buffer_index; if (map[vbi]) { vbuf = &r300->vertex_buffer[vbi]; @@ -222,15 +269,16 @@ static void r300_emit_draw_arrays(struct r300_context *r300, if (alt_num_verts) { assert(count < (1 << 24)); - BEGIN_CS(10); + BEGIN_CS(9); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } else { - BEGIN_CS(8); + BEGIN_CS(7); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(count - 1); + OUT_CS(0); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300_translate_primitive(mode) | @@ -256,19 +304,25 @@ static void r300_emit_draw_elements(struct r300_context *r300, #endif CS_LOCALS(r300); - assert((start * indexSize) % 4 == 0); + assert((start * indexSize) % 4 == 0); + assert(count < (1 << 24)); + + maxIndex = MIN3(maxIndex, r300->vertex_buffer_max_index, count - minIndex); + + DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", + count, minIndex, maxIndex); if (alt_num_verts) { - assert(count < (1 << 24)); - BEGIN_CS(16); + BEGIN_CS(15); OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); } else { - BEGIN_CS(14); + BEGIN_CS(13); } OUT_CS_REG(R300_GA_COLOR_CONTROL, r300_provoking_vertex_fixes(r300, mode)); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex); - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex); + OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); + OUT_CS(maxIndex); + OUT_CS(minIndex); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (indexSize == 4) { count_dwords = count; @@ -292,37 +346,12 @@ static void r300_emit_draw_elements(struct r300_context *r300, OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_RELOC(indexBuffer, count_dwords, - RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_CS_BUF_RELOC(indexBuffer, count_dwords, + RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; } -static boolean r300_setup_vertex_buffers(struct r300_context *r300) -{ - struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; - struct pipe_buffer *pbuf; - -validate: - for (int i = 0; i < r300->vertex_element_count; i++) { - pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - - if (!r300->winsys->add_buffer(r300->winsys, pbuf, - RADEON_GEM_DOMAIN_GTT, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } - - if (!r300->winsys->validate(r300->winsys)) { - r300->context.flush(&r300->context, 0, NULL); - return r300->winsys->validate(r300->winsys); - } - - return TRUE; -} - static void r300_shorten_ubyte_elts(struct r300_context* r300, struct pipe_buffer** elts, unsigned count) @@ -378,32 +407,23 @@ void r300_draw_range_elements(struct pipe_context* pipe, return; } - r300_update_derived_state(r300); - - r300_emit_buffer_validate(r300); - - if (!r300_setup_vertex_buffers(r300)) { - return; - } - if (indexSize == 1) { r300_shorten_ubyte_elts(r300, &indexBuffer, count); indexSize = 2; } - if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, - RADEON_GEM_DOMAIN_GTT, 0)) { - goto cleanup; - } + r300_update_derived_state(r300); - if (!r300->winsys->validate(r300->winsys)) { - goto cleanup; - } + r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count); + /* 128 dwords for emit_aos and emit_draw_elements */ + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); + r300_emit_buffer_validate(r300, TRUE, indexBuffer); r300_emit_dirty_state(r300); - r300_emit_aos(r300, 0); + u_upload_flush(r300->upload_vb); + u_upload_flush(r300->upload_ib); if (alt_num_verts || count <= 65535) { r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); @@ -415,12 +435,18 @@ void r300_draw_range_elements(struct pipe_context* pipe, start += short_count; count -= short_count; + + /* 16 spare dwords are enough for emit_draw_elements. */ + if (count && r300_reserve_cs_space(r300, 16)) { + r300_emit_buffer_validate(r300, TRUE, indexBuffer); + r300_emit_dirty_state(r300); + r300_emit_aos(r300, 0); + } } while (count); } -cleanup: if (indexBuffer != orgIndexBuffer) { - pipe->screen->buffer_destroy(indexBuffer); + pipe_buffer_reference( &indexBuffer, NULL ); } } @@ -430,8 +456,11 @@ void r300_draw_elements(struct pipe_context* pipe, unsigned indexSize, unsigned mode, unsigned start, unsigned count) { - pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + struct r300_context *r300 = r300_context(pipe); + + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, + r300->vertex_buffer_max_index, + mode, start, count); } void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, @@ -452,15 +481,13 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, r300_update_derived_state(r300); - r300_emit_buffer_validate(r300); - if (immd_is_good_idea(r300, count)) { r300_emit_draw_arrays_immediate(r300, mode, start, count); } else { - if (!r300_setup_vertex_buffers(r300)) { - return; - } - + /* Make sure there are at least 128 spare dwords in the command buffer. + * (most of it being consumed by emit_aos) */ + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 128); + r300_emit_buffer_validate(r300, TRUE, NULL); r300_emit_dirty_state(r300); if (alt_num_verts || count <= 65535) { @@ -474,8 +501,16 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, start += short_count; count -= short_count; + + /* Again, we emit both AOS and draw_arrays so there should be + * at least 128 spare dwords. */ + if (count && r300_reserve_cs_space(r300, 128)) { + r300_emit_buffer_validate(r300, TRUE, NULL); + r300_emit_dirty_state(r300); + } } while (count); } + u_upload_flush(r300->upload_vb); } } @@ -605,7 +640,7 @@ r300_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return (struct vertex_info*)r300->vertex_format_state.state; + return &r300->vertex_info; } static boolean r300_render_allocate_vertices(struct vbuf_render* render, @@ -690,6 +725,8 @@ static void r300_render_draw_arrays(struct vbuf_render* render, CS_LOCALS(r300); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + 2); + r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); @@ -708,12 +745,15 @@ static void r300_render_draw(struct vbuf_render* render, struct r300_render* r300render = r300_render(render); struct r300_context* r300 = r300render->r300; int i; + unsigned dwords = 2 + (count+1)/2; CS_LOCALS(r300); + r300_reserve_cs_space(r300, r300_get_num_dirty_dwords(r300) + dwords); + r300_emit_buffer_validate(r300, FALSE, NULL); r300_emit_dirty_state(r300); - BEGIN_CS(2 + (count+1)/2); + BEGIN_CS(dwords); OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim); diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 6a55570571a..3e31688f8e8 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,17 +21,15 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "util/u_inlines.h" #include "util/u_format.h" #include "util/u_memory.h" -#include "util/u_simple_screen.h" #include "r300_context.h" -#include "r300_screen.h" #include "r300_texture.h" #include "radeon_winsys.h" -#include "r300_winsys.h" + +#include "r300_screen_buffer.h" /* Return the identifier behind whom the brave coders responsible for this * amalgamation of code, sweat, and duct tape, routinely obscure their names. @@ -210,9 +209,9 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, { uint32_t retval = 0; boolean is_r500 = r300_screen(screen)->caps->is_r500; - boolean is_z24 = format == PIPE_FORMAT_Z24X8_UNORM || - format == PIPE_FORMAT_Z24S8_UNORM; - boolean is_color2101010 = format == PIPE_FORMAT_A2B10G10R10_UNORM; + boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM || + format == PIPE_FORMAT_S8Z24_UNORM; + boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM; if (target >= PIPE_MAX_TEXTURE_TYPES) { debug_printf("r300: Implementation error: Received bogus texture " @@ -231,14 +230,16 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, /* Check colorbuffer format support. */ if ((usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) && + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) && /* 2101010 cannot be rendered to on non-r5xx. */ (is_r500 || !is_color2101010) && r300_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY); + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED); } /* Check depth-stencil format support. */ @@ -250,82 +251,22 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, return retval == usage; } -static struct pipe_transfer* -r300_get_tex_transfer(struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, unsigned x, unsigned y, - unsigned w, unsigned h) -{ - struct r300_texture *tex = (struct r300_texture *)texture; - struct r300_transfer *trans; - struct r300_screen *rscreen = r300_screen(screen); - unsigned offset; - - offset = r300_texture_get_offset(tex, level, zslice, face); /* in bytes */ - - trans = CALLOC_STRUCT(r300_transfer); - if (trans) { - pipe_texture_reference(&trans->transfer.texture, texture); - trans->transfer.x = x; - trans->transfer.y = y; - trans->transfer.width = w; - trans->transfer.height = h; - trans->transfer.stride = r300_texture_get_stride(rscreen, tex, level); - trans->transfer.usage = usage; - trans->transfer.zslice = zslice; - trans->transfer.face = face; - - trans->offset = offset; - } - return &trans->transfer; -} - -static void -r300_tex_transfer_destroy(struct pipe_transfer *trans) -{ - pipe_texture_reference(&trans->texture, NULL); - FREE(trans); -} - -static void* r300_transfer_map(struct pipe_screen* screen, - struct pipe_transfer* transfer) -{ - struct r300_texture* tex = (struct r300_texture*)transfer->texture; - char* map; - enum pipe_format format = tex->tex.format; - - map = pipe_buffer_map(screen, tex->buffer, - pipe_transfer_buffer_flags(transfer)); - - if (!map) { - return NULL; - } - - return map + r300_transfer(transfer)->offset + - transfer->y / util_format_get_blockheight(format) * transfer->stride + - transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); -} - -static void r300_transfer_unmap(struct pipe_screen* screen, - struct pipe_transfer* transfer) -{ - struct r300_texture* tex = (struct r300_texture*)transfer->texture; - pipe_buffer_unmap(screen, tex->buffer); -} - static void r300_destroy_screen(struct pipe_screen* pscreen) { struct r300_screen* r300screen = r300_screen(pscreen); + struct r300_winsys_screen *rws = r300_winsys_screen(pscreen); + + if (rws) + rws->destroy(rws); FREE(r300screen->caps); FREE(r300screen); } -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) +struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws) { - struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); - struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); + struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen); + struct r300_capabilities *caps = CALLOC_STRUCT(r300_capabilities); if (!r300screen || !caps) { FREE(r300screen); @@ -333,16 +274,16 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) return NULL; } - caps->pci_id = radeon_winsys->pci_id; - caps->num_frag_pipes = radeon_winsys->gb_pipes; - caps->num_z_pipes = radeon_winsys->z_pipes; + caps->pci_id = rws->get_value(rws, R300_VID_PCI_ID); + caps->num_frag_pipes = rws->get_value(rws, R300_VID_GB_PIPES); + caps->num_z_pipes = rws->get_value(rws, R300_VID_Z_PIPES); r300_init_debug(r300screen); r300_parse_chipset(caps); r300screen->caps = caps; - r300screen->radeon_winsys = radeon_winsys; - r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys; + r300screen->rws = rws; + r300screen->screen.winsys = (struct pipe_winsys*)rws; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; r300screen->screen.get_vendor = r300_get_vendor; @@ -350,13 +291,15 @@ struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys) r300screen->screen.get_paramf = r300_get_paramf; r300screen->screen.is_format_supported = r300_is_format_supported; r300screen->screen.context_create = r300_create_context; - r300screen->screen.get_tex_transfer = r300_get_tex_transfer; - r300screen->screen.tex_transfer_destroy = r300_tex_transfer_destroy; - r300screen->screen.transfer_map = r300_transfer_map; - r300screen->screen.transfer_unmap = r300_transfer_unmap; r300_init_screen_texture_functions(&r300screen->screen); - u_simple_screen_init(&r300screen->screen); + r300_screen_init_buffer_functions(r300screen); return &r300screen->screen; } + +struct r300_winsys_screen * +r300_winsys_screen(struct pipe_screen *screen) +{ + return r300_screen(screen)->rws; +} diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index 502fbfa5a24..1ccc0bfb7a5 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,13 +28,15 @@ #include "r300_chipset.h" +#define R300_TEXTURE_USAGE_TRANSFER PIPE_TEXTURE_USAGE_CUSTOM + struct radeon_winsys; struct r300_screen { /* Parent class */ struct pipe_screen screen; - struct radeon_winsys* radeon_winsys; + struct r300_winsys_screen *rws; /* Chipset capabilities */ struct r300_capabilities* caps; @@ -42,26 +45,12 @@ struct r300_screen { unsigned debug; }; -struct r300_transfer { - /* Parent class */ - struct pipe_transfer transfer; - - /* Offset from start of buffer. */ - unsigned offset; -}; /* Convenience cast wrapper. */ static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) { return (struct r300_screen*)screen; } -/* Convenience cast wrapper. */ -static INLINE struct r300_transfer* -r300_transfer(struct pipe_transfer* transfer) -{ - return (struct r300_transfer*)transfer; -} - /* Debug functionality. */ /** @@ -81,6 +70,7 @@ r300_transfer(struct pipe_transfer* transfer) #define DBG_DRAW 0x0000010 #define DBG_TEX 0x0000020 #define DBG_FALL 0x0000040 +#define DBG_ANISOHQ 0x0000080 /*@}*/ static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c new file mode 100644 index 00000000000..b97d0d76a4a --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -0,0 +1,314 @@ +/* + * Copyright 2010 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + */ +#include <stdio.h> + +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_upload_mgr.h" +#include "util/u_math.h" + +#include "r300_screen_buffer.h" + +#include "r300_winsys.h" + +boolean r300_buffer_is_referenced(struct r300_context *r300, + struct pipe_buffer *buf) +{ + struct r300_buffer *rbuf = r300_buffer(buf); + if (r300_buffer_is_user_buffer(buf)) + return FALSE; + + return r300->rws->is_buffer_referenced(r300->rws, rbuf->buf); + +} +int r300_upload_index_buffer(struct r300_context *r300, + struct pipe_buffer **index_buffer, + unsigned index_size, + unsigned start, + unsigned count) +{ + struct pipe_buffer *upload_buffer = NULL; + unsigned index_offset = start * index_size; + int ret = 0; + + if (r300_buffer_is_user_buffer(*index_buffer)) { + ret = u_upload_buffer(r300->upload_ib, + index_offset, + count * index_size, + *index_buffer, + &index_offset, + &upload_buffer); + if (ret) { + goto done; + } + *index_buffer = upload_buffer; + } + done: + // if (upload_buffer) + // pipe_buffer_reference(&upload_buffer, NULL); + return ret; +} + +int r300_upload_user_buffers(struct r300_context *r300) +{ + enum pipe_error ret = PIPE_OK; + int i, nr; + + nr = r300->vertex_buffer_count; + + for (i = 0; i < nr; i++) { + + if (r300_buffer_is_user_buffer(r300->vertex_buffer[i].buffer)) { + struct pipe_buffer *upload_buffer = NULL; + unsigned offset = 0; /*r300->vertex_buffer[i].buffer_offset * 4;*/ + unsigned size = r300->vertex_buffer[i].buffer->size; + unsigned upload_offset; + ret = u_upload_buffer(r300->upload_vb, + offset, size, + r300->vertex_buffer[i].buffer, + &upload_offset, &upload_buffer); + if (ret) + return ret; + + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, NULL); + r300->vertex_buffer[i].buffer = upload_buffer; + r300->vertex_buffer[i].buffer_offset = upload_offset; + } + } + return ret; +} + +static struct r300_winsys_buffer * +r300_winsys_buffer_create(struct r300_screen *r300screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_winsys_buffer *buf; + + buf = rws->buffer_create(rws, alignment, usage, size); + return buf; +} + +static void r300_winsys_buffer_destroy(struct r300_screen *r300screen, + struct r300_buffer *rbuf) +{ + struct r300_winsys_screen *rws = r300screen->rws; + + if (rbuf->buf) { + rws->buffer_reference(rws, &rbuf->buf, NULL); + rbuf->buf = NULL; + } +} + +static struct pipe_buffer *r300_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_buffer *rbuf; + + rbuf = CALLOC_STRUCT(r300_buffer); + if (!rbuf) + goto error1; + + rbuf->magic = R300_BUFFER_MAGIC; + + pipe_reference_init(&rbuf->base.reference, 1); + rbuf->base.screen = screen; + rbuf->base.alignment = alignment; + rbuf->base.usage = usage; + rbuf->base.size = size; + + rbuf->buf = r300_winsys_buffer_create(r300screen, + alignment, + usage, + size); + + if (!rbuf->buf) + goto error2; + + return &rbuf->base; +error2: + FREE(rbuf); +error1: + return NULL; +} + + +static struct pipe_buffer *r300_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct r300_buffer *rbuf; + + rbuf = CALLOC_STRUCT(r300_buffer); + if (!rbuf) + goto no_rbuf; + + rbuf->magic = R300_BUFFER_MAGIC; + + pipe_reference_init(&rbuf->base.reference, 1); + rbuf->base.screen = screen; + rbuf->base.alignment = 1; + rbuf->base.usage = 0; + rbuf->base.size = bytes; + + rbuf->user_buffer = ptr; + return &rbuf->base; + +no_rbuf: + return NULL; +} + +static void r300_buffer_destroy(struct pipe_buffer *buf) +{ + struct r300_screen *r300screen = r300_screen(buf->screen); + struct r300_buffer *rbuf = r300_buffer(buf); + + r300_winsys_buffer_destroy(r300screen, rbuf); + FREE(rbuf); +} + +static void * +r300_buffer_map_range(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned length, + unsigned usage ) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + void *map; + int flush = 0; + int i; + + if (rbuf->user_buffer) + return rbuf->user_buffer; + + if (rbuf->base.usage & PIPE_BUFFER_USAGE_CONSTANT) + goto just_map; + + /* check if the mapping is to a range we already flushed */ + if (usage & PIPE_BUFFER_USAGE_DISCARD) { + for (i = 0; i < rbuf->num_ranges; i++) { + + if ((offset >= rbuf->ranges[i].start) && + (offset < rbuf->ranges[i].end)) + flush = 1; + + if (flush) { + /* unreference this hw buffer and allocate a new one */ + rws->buffer_reference(rws, &rbuf->buf, NULL); + + rbuf->num_ranges = 0; + rbuf->map = NULL; + rbuf->buf = r300_winsys_buffer_create(r300screen, + rbuf->base.alignment, + rbuf->base.usage, + rbuf->base.size); + break; + } + } + } +just_map: + map = rws->buffer_map(rws, rbuf->buf, usage | R300_USAGE_FLAG_DONT_SYNC); + + return map; +} + +static void +r300_buffer_flush_mapped_range( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, + unsigned length ) +{ + struct r300_buffer *rbuf = r300_buffer(buf); + int i; + + if (rbuf->user_buffer) + return; + + if (rbuf->base.usage & PIPE_BUFFER_USAGE_CONSTANT) + return; + + /* mark the range as used */ + for(i = 0; i < rbuf->num_ranges; ++i) { + if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+length)) { + rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset); + rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length)); + return; + } + } + + rbuf->ranges[rbuf->num_ranges].start = offset; + rbuf->ranges[rbuf->num_ranges].end = offset+length; + rbuf->num_ranges++; +} + +static void * +r300_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + void *map; + + if (rbuf->user_buffer) + return rbuf->user_buffer; + + map = rws->buffer_map(rws, rbuf->buf, usage); + + return map; +} + +static void +r300_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + struct r300_screen *r300screen = r300_screen(screen); + struct r300_winsys_screen *rws = r300screen->rws; + struct r300_buffer *rbuf = r300_buffer(buf); + + if (rbuf->buf) { + rws->buffer_unmap(rws, rbuf->buf); + } +} + +void r300_screen_init_buffer_functions(struct r300_screen *r300screen) +{ + r300screen->screen.buffer_create = r300_buffer_create; + r300screen->screen.user_buffer_create = r300_user_buffer_create; + r300screen->screen.buffer_map = r300_buffer_map; + r300screen->screen.buffer_map_range = r300_buffer_map_range; + r300screen->screen.buffer_flush_mapped_range = r300_buffer_flush_mapped_range; + r300screen->screen.buffer_unmap = r300_buffer_unmap; + r300screen->screen.buffer_destroy = r300_buffer_destroy; +} diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h new file mode 100644 index 00000000000..0cf349c25cd --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen_buffer.h @@ -0,0 +1,99 @@ +#ifndef R300_SCREEN_BUFFER_H +#define R300_SCREEN_BUFFER_H +#include <stdio.h> +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "r300_screen.h" + +#include "r300_winsys.h" +#include "r300_context.h" + +#define R300_BUFFER_MAGIC 0xabcd1234 + +struct r300_buffer_range { + uint32_t start; + uint32_t end; +}; +#define R300_BUFFER_MAX_RANGES 32 + +struct r300_buffer +{ + struct pipe_buffer base; + + uint32_t magic; + + struct r300_winsys_buffer *buf; + + void *user_buffer; + struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES]; + unsigned num_ranges; + + void *map; +}; + +static INLINE struct r300_buffer * +r300_buffer(struct pipe_buffer *buffer) +{ + if (buffer) { + assert(((struct r300_buffer *)buffer)->magic == R300_BUFFER_MAGIC); + return (struct r300_buffer *)buffer; + } + return NULL; +} + +static INLINE boolean +r300_buffer_is_user_buffer(struct pipe_buffer *buffer) +{ + return r300_buffer(buffer)->user_buffer ? true : false; +} + +static INLINE boolean r300_add_buffer(struct r300_winsys_screen *rws, + struct pipe_buffer *buffer, + int rd, int wr) +{ + struct r300_buffer *buf = r300_buffer(buffer); + + if (!buf->buf) + return true; + + return rws->add_buffer(rws, buf->buf, rd, wr); +} + + +static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws, + struct r300_texture *tex, + int rd, int wr) +{ + return rws->add_buffer(rws, tex->buffer, rd, wr); +} + +void r300_screen_init_buffer_functions(struct r300_screen *r300screen); + +static INLINE void r300_buffer_write_reloc(struct r300_winsys_screen *rws, + struct r300_buffer *buf, + uint32_t rd, uint32_t wd, uint32_t flags) +{ + if (!buf->buf) + return; + + rws->write_cs_reloc(rws, buf->buf, rd, wd, flags); +} + +static INLINE void r300_texture_write_reloc(struct r300_winsys_screen *rws, + struct r300_texture *texture, + uint32_t rd, uint32_t wd, uint32_t flags) +{ + rws->write_cs_reloc(rws, texture->buffer, rd, wd, flags); +} + +int r300_upload_user_buffers(struct r300_context *r300); + +int r300_upload_index_buffer(struct r300_context *r300, + struct pipe_buffer **index_buffer, + unsigned index_size, + unsigned start, + unsigned count); + +boolean r300_buffer_is_referenced(struct r300_context *r300, + struct pipe_buffer *buf); +#endif diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 34bf81c1930..712e9280e3c 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -34,6 +34,7 @@ #include "r300_context.h" #include "r300_reg.h" #include "r300_screen.h" +#include "r300_screen_buffer.h" #include "r300_state_inlines.h" #include "r300_fs.h" #include "r300_vs.h" @@ -43,6 +44,12 @@ /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ +#define UPDATE_STATE(cso, atom) \ + if (cso != atom.state) { \ + atom.state = cso; \ + atom.dirty = TRUE; \ + } + static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, unsigned dstRGB, unsigned dstA) { @@ -328,8 +335,7 @@ static void r300_bind_blend_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->blend_state.state = state; - r300->blend_state.dirty = TRUE; + UPDATE_STATE(state, r300->blend_state); } /* Free blend state. */ @@ -356,7 +362,7 @@ static void r300_set_blend_color(struct pipe_context* pipe, (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; - util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ @@ -376,6 +382,8 @@ static void r300_set_clip_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); + r300->clip = *state; + if (r300_screen(pipe->screen)->caps->has_tcl) { memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); r300->clip_state.size = 29; @@ -476,11 +484,8 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); - struct r300_screen* r300screen = r300_screen(pipe->screen); - r300->dsa_state.state = state; - r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; - r300->dsa_state.dirty = TRUE; + UPDATE_STATE(state, r300->dsa_state); } /* Free DSA state. */ @@ -521,7 +526,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)old_state->cbufs[i]->texture; if (tex) { - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[0], tex->microtile != 0, tex->macrotile != 0); @@ -533,7 +538,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)old_state->zsbuf->texture; if (tex) { - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[0], tex->microtile != 0, tex->macrotile != 0); @@ -545,7 +550,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)new_state->cbufs[i]->texture; level = new_state->cbufs[i]->level; - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[level], tex->microtile != 0, tex->mip_macrotile[level] != 0); @@ -554,7 +559,7 @@ static void r300_fb_update_tiling_flags(struct r300_context *r300, tex = (struct r300_texture*)new_state->zsbuf->texture; level = new_state->zsbuf->level; - r300->winsys->buffer_set_tiling(r300->winsys, tex->buffer, + r300->rws->buffer_set_tiling(r300->rws, tex->buffer, tex->pitch[level], tex->microtile != 0, tex->mip_macrotile[level] != 0); @@ -567,6 +572,7 @@ static void { struct r300_context* r300 = r300_context(pipe); struct r300_screen* r300screen = r300_screen(pipe->screen); + struct pipe_framebuffer_state *old_state = r300->fb_state.state; unsigned max_width, max_height; uint32_t zbuffer_bpp = 0; @@ -591,22 +597,30 @@ static void return; } - if (r300->draw) { draw_flush(r300->draw); } - memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); + r300->fb_state.dirty = TRUE; - r300->fb_state.size = (10 * state->nr_cbufs) + (state->zsbuf ? 10 : 0) + 6; + /* If nr_cbufs is changed from zero to non-zero or vice versa... */ + if (!!old_state->nr_cbufs != !!state->nr_cbufs) { + r300->blend_state.dirty = TRUE; + } + /* If zsbuf is set from NULL to non-NULL or vice versa.. */ + if (!!old_state->zsbuf != !!state->zsbuf) { + r300->dsa_state.dirty = TRUE; + } + if (!r300->scissor_enabled) { + r300->scissor_state.dirty = TRUE; + } r300_fb_update_tiling_flags(r300, r300->fb_state.state, state); - /* XXX wait what */ - r300->blend_state.dirty = TRUE; - r300->dsa_state.dirty = TRUE; - r300->fb_state.dirty = TRUE; - r300->scissor_state.dirty = TRUE; + memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state)); + + r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) + + (state->zsbuf ? 10 : 0) + 8; /* Polygon offset depends on the zbuffer bit depth. */ if (state->zsbuf && r300->polygon_offset_enabled) { @@ -658,8 +672,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; r300_pick_fragment_shader(r300); - if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { - r300->vertex_format_state.dirty = TRUE; + r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ + + if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) { + r300->vap_output_state.dirty = TRUE; } r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -709,32 +725,14 @@ static void* r300_create_rs_state(struct pipe_context* pipe, rs->vap_control_status = R300_VC_32BIT_SWAP; #endif - /* If bypassing TCL, or if no TCL engine is present, turn off the HW TCL. - * Else, enable HW TCL and force Draw's TCL off. */ - if (state->bypass_vs_clip_and_viewport || - !r300screen->caps->has_tcl) { + /* If no TCL engine is present, turn off the HW TCL. */ + if (!r300screen->caps->has_tcl) { rs->vap_control_status |= R300_VAP_TCL_BYPASS; } rs->point_size = pack_float_16_6x(state->point_size) | (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); - /* Point minimum and maximum sizes. This register has to be emitted, - * and it'd be a step backwards to put it in invariant state. */ - if (r300screen->caps->is_r500) { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(4096.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } else if (r300screen->caps->is_r400) { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(4021.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } else { - rs->point_minmax = - ((int)(0.0 * 6.0) << R300_GA_POINT_MINMAX_MIN_SHIFT) | - ((int)(2560.0 * 6.0) << R300_GA_POINT_MINMAX_MAX_SHIFT); - } - rs->line_control = pack_float_16_6x(state->line_width) | R300_GA_LINE_CNTL_END_TYPE_COMP; @@ -817,6 +815,7 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); struct r300_rs_state* rs = (struct r300_rs_state*)state; + boolean scissor_was_enabled = r300->scissor_enabled; if (r300->draw) { draw_flush(r300->draw); @@ -824,22 +823,18 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) } if (rs) { - r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw; + r300->scissor_enabled = rs->rs.scissor; } else { - r300->tcl_bypass = FALSE; r300->polygon_offset_enabled = FALSE; + r300->scissor_enabled = FALSE; } - r300->rs_state.state = rs; - r300->rs_state.dirty = TRUE; - /* XXX Why is this still needed, dammit!? */ - r300->scissor_state.dirty = TRUE; - r300->viewport_state.dirty = TRUE; + UPDATE_STATE(state, r300->rs_state); + r300->rs_state.size = 17 + (r300->polygon_offset_enabled ? 5 : 0); - /* XXX Clean these up when we move to atom emits */ - if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { - r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + if (scissor_was_enabled != r300->scissor_enabled) { + r300->scissor_state.dirty = TRUE; } } @@ -855,6 +850,7 @@ static void* { struct r300_context* r300 = r300_context(pipe); struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); + boolean is_r500 = r300_screen(pipe->screen)->caps->is_r500; int lod_bias; union util_color uc; @@ -870,8 +866,10 @@ static void* state->min_mip_filter, state->max_anisotropy > 0); + sampler->filter0 |= r300_anisotropy(state->max_anisotropy); + /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ - /* We must pass these to the emit function to clamp them properly. */ + /* We must pass these to the merge function to clamp them properly. */ sampler->min_lod = MAX2((unsigned)state->min_lod, 0); sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0); @@ -879,9 +877,15 @@ static void* sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; - sampler->filter1 |= r300_anisotropy(state->max_anisotropy); + /* This is very high quality anisotropic filtering for R5xx. + * It's good for benchmarking the performance of texturing but + * in practice we don't want to slow down the driver because it's + * a pretty good performance killer. Feel free to play with it. */ + if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) { + sampler->filter1 |= r500_anisotropy(state->max_anisotropy); + } - util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); sampler->border_color = uc.ui; /* R500-specific fixups and optimizations */ @@ -897,23 +901,20 @@ static void r300_bind_sampler_states(struct pipe_context* pipe, void** states) { struct r300_context* r300 = r300_context(pipe); - int i; + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; if (count > 8) { return; } - for (i = 0; i < count; i++) { - if (r300->sampler_states[i] != states[i]) { - r300->sampler_states[i] = (struct r300_sampler_state*)states[i]; - r300->dirty_state |= (R300_NEW_SAMPLER << i); - } - } + memcpy(state->sampler_states, states, sizeof(void*) * count); + state->sampler_count = count; - r300->sampler_count = count; + r300->textures_state.dirty = TRUE; /* Pick a fragment shader based on the texture compare state. */ - if (r300->fs && (r300->dirty_state & R300_ANY_NEW_SAMPLERS)) { + if (r300->fs && count) { if (r300_pick_fragment_shader(r300)) { r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -937,22 +938,25 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, struct pipe_texture** texture) { struct r300_context* r300 = r300_context(pipe); + struct r300_textures_state* state = + (struct r300_textures_state*)r300->textures_state.state; + unsigned i; boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500; - int i; + boolean dirty_tex = FALSE; /* XXX magic num */ if (count > 8) { return; } - + for (i = 0; i < count; i++) { - if (r300->textures[i] != (struct r300_texture*)texture[i]) { - pipe_texture_reference((struct pipe_texture**)&r300->textures[i], - texture[i]); - r300->dirty_state |= (R300_NEW_TEXTURE << i); + if (state->textures[i] != (struct r300_texture*)texture[i]) { + pipe_texture_reference((struct pipe_texture**)&state->textures[i], + texture[i]); + dirty_tex = TRUE; - /* R300-specific - set the texrect factor in a fragment shader */ - if (!is_r500 && r300->textures[i]->is_npot) { + /* R300-specific - set the texrect factor in the fragment shader */ + if (!is_r500 && state->textures[i]->is_npot) { /* XXX It would be nice to re-emit just 1 constant, * XXX not all of them */ r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; @@ -961,14 +965,19 @@ static void r300_set_sampler_textures(struct pipe_context* pipe, } for (i = count; i < 8; i++) { - if (r300->textures[i]) { - pipe_texture_reference((struct pipe_texture**)&r300->textures[i], + if (state->textures[i]) { + pipe_texture_reference((struct pipe_texture**)&state->textures[i], NULL); - r300->dirty_state |= (R300_NEW_TEXTURE << i); } } - r300->texture_count = count; + state->texture_count = count; + + r300->textures_state.dirty = TRUE; + + if (dirty_tex) { + r300->texture_cache_inval.dirty = TRUE; + } } static void r300_set_scissor_state(struct pipe_context* pipe, @@ -979,7 +988,9 @@ static void r300_set_scissor_state(struct pipe_context* pipe, memcpy(r300->scissor_state.state, state, sizeof(struct pipe_scissor_state)); - r300->scissor_state.dirty = TRUE; + if (r300->scissor_enabled) { + r300->scissor_state.dirty = TRUE; + } } static void r300_set_viewport_state(struct pipe_context* pipe, @@ -989,6 +1000,8 @@ static void r300_set_viewport_state(struct pipe_context* pipe, struct r300_viewport_state* viewport = (struct r300_viewport_state*)r300->viewport_state.state; + r300->viewport = *state; + /* Do the transform in HW. */ viewport->vte_control = R300_VTX_W0_FMT; @@ -1028,27 +1041,45 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe, const struct pipe_vertex_buffer* buffers) { struct r300_context* r300 = r300_context(pipe); + int i; + unsigned max_index = (1 << 24) - 1; + boolean any_user_buffer = false; + + if (count == r300->vertex_buffer_count && + memcmp(r300->vertex_buffer, buffers, count * sizeof(buffers[0])) == 0) + return; + + for (i = 0; i < count; i++) { + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer); + if (r300_buffer_is_user_buffer(buffers[i].buffer)) + any_user_buffer = true; + max_index = MIN2(buffers[i].max_index, max_index); + } + + for ( ; i < r300->vertex_buffer_count; i++) + pipe_buffer_reference(&r300->vertex_buffer[i].buffer, NULL); memcpy(r300->vertex_buffer, buffers, - sizeof(struct pipe_vertex_buffer) * count); + sizeof(struct pipe_vertex_buffer) * count); + r300->vertex_buffer_count = count; + r300->vertex_buffer_max_index = max_index; + r300->any_user_vbs = any_user_buffer; if (r300->draw) { draw_flush(r300->draw); draw_set_vertex_buffers(r300->draw, count, buffers); } - - r300->vertex_format_state.dirty = TRUE; } static boolean r300_validate_aos(struct r300_context *r300) { struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; - struct pipe_vertex_element *velem = r300->vertex_element; + struct pipe_vertex_element *velem = r300->velems->velem; int i; /* Check if formats and strides are aligned to the size of DWORD. */ - for (i = 0; i < r300->vertex_element_count; i++) { + for (i = 0; i < r300->velems->count; i++) { if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 || util_format_get_blocksize(velem[i].src_format) % 4 != 0) { return FALSE; @@ -1057,20 +1088,209 @@ static boolean r300_validate_aos(struct r300_context *r300) return TRUE; } -static void r300_set_vertex_elements(struct pipe_context* pipe, - unsigned count, - const struct pipe_vertex_element* elements) +static void r300_draw_emit_attrib(struct r300_context* r300, + enum attrib_emit emit, + enum interp_mode interp, + int index) { - struct r300_context* r300 = r300_context(pipe); + struct r300_vertex_shader* vs = r300->vs_state.state; + struct tgsi_shader_info* info = &vs->info; + int output; + + output = draw_find_shader_output(r300->draw, + info->output_semantic_name[index], + info->output_semantic_index[index]); + draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output); +} + +static void r300_draw_emit_all_attribs(struct r300_context* r300) +{ + struct r300_vertex_shader* vs = r300->vs_state.state; + struct r300_shader_semantics* vs_outputs = &vs->outputs; + int i, gen_count; + + /* Position. */ + if (vs_outputs->pos != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->pos); + } else { + assert(0); + } + + /* Point size. */ + if (vs_outputs->psize != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, + vs_outputs->psize); + } + + /* Colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (vs_outputs->color[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, + vs_outputs->color[i]); + } + } - memcpy(r300->vertex_element, - elements, - sizeof(struct pipe_vertex_element) * count); - r300->vertex_element_count = count; + /* XXX Back-face colors. */ + + /* Texture coordinates. */ + gen_count = 0; + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->generic[i]); + gen_count++; + } + } + + /* Fog coordinates. */ + if (vs_outputs->fog != ATTR_UNUSED) { + r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, + vs_outputs->fog); + gen_count++; + } + + /* XXX magic */ + assert(gen_count <= 8); +} + +/* Update the PSC tables. */ +static void r300_vertex_psc(struct r300_vertex_element_state *velems) +{ + struct r300_vertex_stream_state *vstream = &velems->vertex_stream; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i; + + assert(velems->count <= 16); + + /* Vertex shaders have no semantics on their inputs, + * so PSC should just route stuff based on the vertex elements, + * and not on attrib information. */ + for (i = 0; i < velems->count; i++) { + format = velems->velem[i].src_format; + + type = r300_translate_vertex_data_type(format) | + (i << R300_DST_VEC_LOC_SHIFT); + swizzle = r300_translate_vertex_data_swizzle(format); + + if (i & 1) { + vstream->vap_prog_stream_cntl[i >> 1] |= type << 16; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vstream->vap_prog_stream_cntl[i >> 1] |= type; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; + } + } + + /* Set the last vector in the PSC. */ + if (i) { + i -= 1; + } + vstream->vap_prog_stream_cntl[i >> 1] |= + (R300_LAST_VEC << (i & 1 ? 16 : 0)); + + vstream->count = (i >> 1) + 1; +} + +/* Update the PSC tables for SW TCL, using Draw. */ +static void r300_swtcl_vertex_psc(struct r300_context *r300, + struct r300_vertex_element_state *velems) +{ + struct r300_vertex_stream_state *vstream = &velems->vertex_stream; + struct r300_vertex_shader* vs = r300->vs_state.state; + struct vertex_info* vinfo = &r300->vertex_info; + uint16_t type, swizzle; + enum pipe_format format; + unsigned i, attrib_count; + int* vs_output_tab = vs->stream_loc_notcl; + + /* For each Draw attribute, route it to the fragment shader according + * to the vs_output_tab. */ + attrib_count = vinfo->num_attribs; + DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); + for (i = 0; i < attrib_count; i++) { + DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," + " vs_output_tab %d\n", vinfo->attrib[i].src_index, + vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, + vs_output_tab[i]); + } + + for (i = 0; i < attrib_count; i++) { + /* Make sure we have a proper destination for our attribute. */ + assert(vs_output_tab[i] != -1); + + format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + + /* Obtain the type of data in this attribute. */ + type = r300_translate_vertex_data_type(format) | + vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; + + /* Obtain the swizzle for this attribute. Note that the default + * swizzle in the hardware is not XYZW! */ + swizzle = r300_translate_vertex_data_swizzle(format); + + /* Add the attribute to the PSC table. */ + if (i & 1) { + vstream->vap_prog_stream_cntl[i >> 1] |= type << 16; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; + } else { + vstream->vap_prog_stream_cntl[i >> 1] |= type; + vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; + } + } + + /* Set the last vector in the PSC. */ + if (i) { + i -= 1; + } + vstream->vap_prog_stream_cntl[i >> 1] |= + (R300_LAST_VEC << (i & 1 ? 16 : 0)); + + vstream->count = (i >> 1) + 1; +} + +static void* r300_create_vertex_elements_state(struct pipe_context* pipe, + unsigned count, + const struct pipe_vertex_element* attribs) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); + struct r300_vertex_element_state *velems; + + assert(count <= PIPE_MAX_ATTRIBS); + velems = CALLOC_STRUCT(r300_vertex_element_state); + if (velems != NULL) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count); + + if (r300screen->caps->has_tcl) { + r300_vertex_psc(velems); + } else { + memset(&r300->vertex_info, 0, sizeof(struct vertex_info)); + r300_draw_emit_all_attribs(r300); + draw_compute_vertex_size(&r300->vertex_info); + r300_swtcl_vertex_psc(r300, velems); + } + } + return velems; +} + +static void r300_bind_vertex_elements_state(struct pipe_context *pipe, + void *state) +{ + struct r300_context *r300 = r300_context(pipe); + struct r300_vertex_element_state *velems = state; + + if (velems == NULL) { + return; + } + + r300->velems = velems; if (r300->draw) { draw_flush(r300->draw); - draw_set_vertex_elements(r300->draw, count, elements); + draw_set_vertex_elements(r300->draw, velems->count, velems->velem); } if (!r300_validate_aos(r300)) { @@ -1078,6 +1298,14 @@ static void r300_set_vertex_elements(struct pipe_context* pipe, assert(0); abort(); } + + UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state); + r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2; +} + +static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state) +{ + FREE(state); } static void* r300_create_vs_state(struct pipe_context* pipe, @@ -1085,64 +1313,71 @@ static void* r300_create_vs_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - if (r300_screen(pipe->screen)->caps->has_tcl) { - struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); - /* Copy state directly into shader. */ - vs->state = *shader; - vs->state.tokens = tgsi_dup_tokens(shader->tokens); - - tgsi_scan_shader(shader->tokens, &vs->info); + struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader); + r300_vertex_shader_common_init(vs, shader); - return (void*)vs; + if (r300_screen(pipe->screen)->caps->has_tcl) { + r300_translate_vertex_shader(r300, vs); } else { - return draw_create_vertex_shader(r300->draw, shader); + vs->draw_vs = draw_create_vertex_shader(r300->draw, shader); } + + return vs; } static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); + struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; - if (r300_screen(pipe->screen)->caps->has_tcl) { - struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; + if (vs == NULL) { + r300->vs_state.state = NULL; + return; + } + if (vs == r300->vs_state.state) { + return; + } + r300->vs_state.state = vs; - if (vs == NULL) { - r300->vs = NULL; - return; - } else if (!vs->translated) { - r300_translate_vertex_shader(r300, vs); - } + // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block + if (r300->fs) { + r300_vertex_shader_setup_wpos(r300); + } + memcpy(r300->vap_output_state.state, &vs->vap_out, + sizeof(struct r300_vap_output_state)); + r300->vap_output_state.dirty = TRUE; - r300->vs = vs; - if (r300->fs) { - r300_vertex_shader_setup_wpos(r300); - } + /* The majority of the RS block bits is dependent on the vertex shader. */ + r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */ + + if (r300_screen(pipe->screen)->caps->has_tcl) { + r300->vs_state.dirty = TRUE; + r300->vs_state.size = vs->code.length + 9; - r300->vertex_format_state.dirty = TRUE; + r300->pvs_flush.dirty = TRUE; - r300->dirty_state |= - R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; + r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, - (struct draw_vertex_shader*)shader); + (struct draw_vertex_shader*)vs->draw_vs); } } static void r300_delete_vs_state(struct pipe_context* pipe, void* shader) { struct r300_context* r300 = r300_context(pipe); + struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; if (r300_screen(pipe->screen)->caps->has_tcl) { - struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader; - rc_constants_destroy(&vs->code.constants); - FREE((void*)vs->state.tokens); - FREE(shader); } else { draw_delete_vertex_shader(r300->draw, - (struct draw_vertex_shader*)shader); + (struct draw_vertex_shader*)vs->draw_vs); } + + FREE((void*)vs->state.tokens); + FREE(shader); } static void r300_set_constant_buffer(struct pipe_context *pipe, @@ -1193,8 +1428,12 @@ static void r300_set_constant_buffer(struct pipe_context *pipe, r300->shader_constants[shader].count = buf->size / (4 * sizeof(float)); pipe_buffer_unmap(pipe->screen, buf); - if (shader == PIPE_SHADER_VERTEX) - r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; + if (shader == PIPE_SHADER_VERTEX) { + if (r300screen->caps->has_tcl) { + r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS; + r300->pvs_flush.dirty = TRUE; + } + } else if (shader == PIPE_SHADER_FRAGMENT) r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -1241,7 +1480,10 @@ void r300_init_state_functions(struct r300_context* r300) r300->context.set_viewport_state = r300_set_viewport_state; r300->context.set_vertex_buffers = r300_set_vertex_buffers; - r300->context.set_vertex_elements = r300_set_vertex_elements; + + r300->context.create_vertex_elements_state = r300_create_vertex_elements_state; + r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state; + r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state; r300->context.create_vs_state = r300_create_vs_state; r300->context.bind_vs_state = r300_bind_vs_state; diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 2cbce9210a7..6b9f61acd7b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -37,179 +37,6 @@ /* r300_state_derived: Various bits of state which are dependent upon * currently bound CSO data. */ -static void r300_draw_emit_attrib(struct r300_context* r300, - enum attrib_emit emit, - enum interp_mode interp, - int index) -{ - struct tgsi_shader_info* info = &r300->vs->info; - int output; - - output = draw_find_shader_output(r300->draw, - info->output_semantic_name[index], - info->output_semantic_index[index]); - draw_emit_vertex_attr( - (struct vertex_info*)r300->vertex_format_state.state, - emit, interp, output); -} - -static void r300_draw_emit_all_attribs(struct r300_context* r300) -{ - struct r300_shader_semantics* vs_outputs = &r300->vs->outputs; - int i, gen_count; - - /* Position. */ - if (vs_outputs->pos != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, - vs_outputs->pos); - } else { - assert(0); - } - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS, - vs_outputs->psize); - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR, - vs_outputs->color[i]); - } - } - - /* XXX Back-face colors. */ - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, - vs_outputs->generic[i]); - gen_count++; - } - } - - /* Fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE, - vs_outputs->fog); - gen_count++; - } - - /* XXX magic */ - assert(gen_count <= 8); -} - -/* Update the PSC tables. */ -static void r300_vertex_psc(struct r300_context* r300) -{ - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; - uint16_t type, swizzle; - enum pipe_format format; - unsigned i; - int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - int* stream_tab; - - /* If TCL is bypassed, map vertex streams to equivalent VS output - * locations. */ - if (r300->tcl_bypass) { - stream_tab = r300->vs->stream_loc_notcl; - } else { - stream_tab = identity; - } - - /* Vertex shaders have no semantics on their inputs, - * so PSC should just route stuff based on the vertex elements, - * and not on attrib information. */ - DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements" - " in psc\n", - r300->vs->info.num_inputs, - r300->vertex_element_count); - - for (i = 0; i < r300->vertex_element_count; i++) { - format = r300->vertex_element[i].src_format; - - type = r300_translate_vertex_data_type(format) | - (stream_tab[i] << R300_DST_VEC_LOC_SHIFT); - swizzle = r300_translate_vertex_data_swizzle(format); - - if (i & 1) { - vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; - } else { - vformat->vap_prog_stream_cntl[i >> 1] |= type; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; - } - } - - assert(i <= 15); - - /* Set the last vector in the PSC. */ - if (i) { - i -= 1; - } - vformat->vap_prog_stream_cntl[i >> 1] |= - (R300_LAST_VEC << (i & 1 ? 16 : 0)); -} - -/* Update the PSC tables for SW TCL, using Draw. */ -static void r300_swtcl_vertex_psc(struct r300_context* r300) -{ - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; - struct vertex_info* vinfo = &vformat->vinfo; - uint16_t type, swizzle; - enum pipe_format format; - unsigned i, attrib_count; - int* vs_output_tab = r300->vs->stream_loc_notcl; - - /* For each Draw attribute, route it to the fragment shader according - * to the vs_output_tab. */ - attrib_count = vinfo->num_attribs; - DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count); - for (i = 0; i < attrib_count; i++) { - DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d," - " vs_output_tab %d\n", vinfo->attrib[i].src_index, - vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit, - vs_output_tab[i]); - } - - for (i = 0; i < attrib_count; i++) { - /* Make sure we have a proper destination for our attribute. */ - assert(vs_output_tab[i] != -1); - - format = draw_translate_vinfo_format(vinfo->attrib[i].emit); - - /* Obtain the type of data in this attribute. */ - type = r300_translate_vertex_data_type(format) | - vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT; - - /* Obtain the swizzle for this attribute. Note that the default - * swizzle in the hardware is not XYZW! */ - swizzle = r300_translate_vertex_data_swizzle(format); - - /* Add the attribute to the PSC table. */ - if (i & 1) { - vformat->vap_prog_stream_cntl[i >> 1] |= type << 16; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16; - } else { - vformat->vap_prog_stream_cntl[i >> 1] |= type; - vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle; - } - } - - /* Set the last vector in the PSC. */ - if (i) { - i -= 1; - } - vformat->vap_prog_stream_cntl[i >> 1] |= - (R300_LAST_VEC << (i & 1 ? 16 : 0)); -} - static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr, boolean swizzle_0001) { @@ -416,33 +243,16 @@ static void r300_update_rs_block(struct r300_context* r300, /* Now, after all that, see if we actually need to update the state. */ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) { memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block)); - r300->rs_block_state.size = 5 + count; - r300->rs_block_state.dirty = TRUE; + r300->rs_block_state.size = 5 + count*2; } } /* Update the shader-dependant states. */ static void r300_update_derived_shader_state(struct r300_context* r300) { - struct r300_screen* r300screen = r300_screen(r300->context.screen); - struct r300_vertex_info *vformat = - (struct r300_vertex_info*)r300->vertex_format_state.state; - struct vertex_info* vinfo = &vformat->vinfo; + struct r300_vertex_shader* vs = r300->vs_state.state; - /* Mmm, delicious hax */ - memset(r300->vertex_format_state.state, 0, sizeof(struct r300_vertex_info)); - memcpy(vinfo->hwfmt, r300->vs->hwfmt, sizeof(uint)*4); - - r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs); - - if (r300screen->caps->has_tcl) { - r300_vertex_psc(r300); - } else { - r300_draw_emit_all_attribs(r300); - draw_compute_vertex_size( - (struct vertex_info*)r300->vertex_format_state.state); - r300_swtcl_vertex_psc(r300); - } + r300_update_rs_block(r300, &vs->outputs, &r300->fs->inputs); } static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa) @@ -516,14 +326,72 @@ static void r300_update_ztop(struct r300_context* r300) r300->ztop_state.dirty = TRUE; } +static void r300_merge_textures_and_samplers(struct r300_context* r300) +{ + struct r300_textures_state *state = + (struct r300_textures_state*)r300->textures_state.state; + struct r300_texture_sampler_state *texstate; + struct r300_sampler_state *sampler; + struct r300_texture *tex; + unsigned min_level, max_level, i, size; + unsigned count = MIN2(state->texture_count, state->sampler_count); + + state->tx_enable = 0; + size = 2; + + for (i = 0; i < count; i++) { + if (state->textures[i] && state->sampler_states[i]) { + state->tx_enable |= 1 << i; + + tex = state->textures[i]; + sampler = state->sampler_states[i]; + + texstate = &state->regs[i]; + memcpy(texstate->format, &tex->state, sizeof(uint32_t)*3); + texstate->filter[0] = sampler->filter0; + texstate->filter[1] = sampler->filter1; + texstate->border_color = sampler->border_color; + texstate->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile); + + /* to emulate 1D textures through 2D ones correctly */ + if (tex->tex.target == PIPE_TEXTURE_1D) { + texstate->filter[0] &= ~R300_TX_WRAP_T_MASK; + texstate->filter[0] |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE); + } + + if (tex->is_npot) { + /* NPOT textures don't support mip filter, unfortunately. + * This prevents incorrect rendering. */ + texstate->filter[0] &= ~R300_TX_MIN_FILTER_MIP_MASK; + } else { + /* determine min/max levels */ + /* the MAX_MIP level is the largest (finest) one */ + max_level = MIN2(sampler->max_lod, tex->tex.last_level); + min_level = MIN2(sampler->min_lod, max_level); + texstate->format[0] |= R300_TX_NUM_LEVELS(max_level); + texstate->filter[0] |= R300_TX_MAX_MIP_LEVEL(min_level); + } + + texstate->filter[0] |= i << 28; + + size += 16; + state->count = i+1; + } + } + + r300->textures_state.size = size; +} + void r300_update_derived_state(struct r300_context* r300) { - /* XXX */ - if (r300->dirty_state & - (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER) || - r300->vertex_format_state.dirty || r300->rs_state.dirty) { + if (r300->rs_block_state.dirty) { r300_update_derived_shader_state(r300); } + if (r300->textures_state.dirty) { + r300_merge_textures_and_samplers(r300); + } + r300_update_ztop(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index 0e1cb328d17..8485d4f8f94 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -327,6 +327,18 @@ static INLINE uint32_t r300_anisotropy(unsigned max_aniso) } } +static INLINE uint32_t r500_anisotropy(unsigned max_aniso) +{ + if (!max_aniso) { + return 0; + } + max_aniso -= 1; + + // Map the range [0, 15] to [0, 63]. + return R500_TX_MAX_ANISO(MIN2((unsigned)(max_aniso*4.2001), 63)) | + R500_TX_ANISO_HIGH_QUALITY;; +} + /* Non-CSO state. (For now.) */ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) @@ -348,44 +360,16 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) return 0; } -/* Utility function to count the number of components in RGBAZS formats. - * XXX should go to util or p_format.h */ -static INLINE unsigned pf_component_count(enum pipe_format format) { - unsigned count = 0; - - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0)) { - count++; - } - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) { - count++; - } - - return count; -} - /* Translate pipe_formats into PSC vertex types. */ static INLINE uint16_t r300_translate_vertex_data_type(enum pipe_format format) { uint32_t result = 0; const struct util_format_description *desc; - unsigned components = pf_component_count(format); + unsigned components = util_format_get_nr_components(format); desc = util_format_description(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) { + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { debug_printf("r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); assert(0); @@ -454,36 +438,19 @@ r300_translate_vertex_data_type(enum pipe_format format) { static INLINE uint16_t r300_translate_vertex_data_swizzle(enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); - unsigned swizzle[4], i; assert(format); - if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH && - desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) { + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { debug_printf("r300: Bad format %s in %s:%d\n", util_format_name(format), __FUNCTION__, __LINE__); return 0; } - /* Swizzles for 8bits formats are in the reversed order, not sure why. */ - if (desc->channel[0].size == 8) { - for (i = 0; i < 4; i++) { - if (desc->swizzle[i] <= 3) { - swizzle[i] = 3 - desc->swizzle[i]; - } else { - swizzle[i] = desc->swizzle[i]; - } - } - } else { - for (i = 0; i < 4; i++) { - swizzle[i] = desc->swizzle[i]; - } - } - - return ((swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | + return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | + (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | + (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | + (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) | (0xf << R300_WRITE_ENA_SHIFT)); } diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 97927acf1b4..4a2c68269b1 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -38,7 +38,8 @@ struct pipe_viewport_state r300_viewport_identity = { * * Note that eventually this should be empty, but it's useful for development * and general unduplication of code. */ -void r300_emit_invariant_state(struct r300_context* r300, void* state) +void r300_emit_invariant_state(struct r300_context* r300, + unsigned size, void* state) { struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h index 5d1a9636545..83d031c7fe9 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.h +++ b/src/gallium/drivers/r300/r300_state_invariant.h @@ -25,6 +25,7 @@ struct r300_context; -void r300_emit_invariant_state(struct r300_context* r300, void* state); +void r300_emit_invariant_state(struct r300_context* r300, + unsigned size, void* state); #endif /* R300_STATE_INVARIANT_H */ diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index ed2be06254a..7c7656068bb 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -70,6 +70,12 @@ static uint32_t r300_translate_texformat(enum pipe_format format) R300_TX_FORMAT_B_SHIFT, R300_TX_FORMAT_A_SHIFT }; + const uint32_t swizzle[4] = { + R300_TX_FORMAT_X, + R300_TX_FORMAT_Y, + R300_TX_FORMAT_Z, + R300_TX_FORMAT_W + }; const uint32_t sign_bit[4] = { R300_TX_FORMAT_SIGNED_X, R300_TX_FORMAT_SIGNED_Y, @@ -86,8 +92,8 @@ static uint32_t r300_translate_texformat(enum pipe_format format) switch (format) { case PIPE_FORMAT_Z16_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, X16); - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP); default: return ~0; /* Unsupported. */ @@ -98,9 +104,9 @@ static uint32_t r300_translate_texformat(enum pipe_format format) result |= R300_TX_FORMAT_YUV_TO_RGB; switch (format) { - case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_UYVY: return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | result; - case PIPE_FORMAT_YCBCR_REV: + case PIPE_FORMAT_YUYV: return R300_EASY_TX_FORMAT(X, Y, Z, ONE, VYUY422) | result; default: return ~0; /* Unsupported/unknown. */ @@ -119,16 +125,16 @@ static uint32_t r300_translate_texformat(enum pipe_format format) switch (desc->swizzle[i]) { case UTIL_FORMAT_SWIZZLE_X: case UTIL_FORMAT_SWIZZLE_NONE: - result |= R300_TX_FORMAT_X << swizzle_shift[i]; + result |= swizzle[0] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_Y: - result |= R300_TX_FORMAT_Y << swizzle_shift[i]; + result |= swizzle[1] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_Z: - result |= R300_TX_FORMAT_Z << swizzle_shift[i]; + result |= swizzle[2] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_W: - result |= R300_TX_FORMAT_W << swizzle_shift[i]; + result |= swizzle[3] << swizzle_shift[i]; break; case UTIL_FORMAT_SWIZZLE_0: result |= R300_TX_FORMAT_ZERO << swizzle_shift[i]; @@ -142,7 +148,7 @@ static uint32_t r300_translate_texformat(enum pipe_format format) } /* Compressed formats. */ - if (desc->layout == UTIL_FORMAT_LAYOUT_DXT) { + if (desc->layout == UTIL_FORMAT_LAYOUT_COMPRESSED) { switch (format) { case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: @@ -302,33 +308,30 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) return R300_COLOR_FORMAT_I8; /* 16-bit buffers. */ - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return R300_COLOR_FORMAT_RGB565; - case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: return R300_COLOR_FORMAT_ARGB1555; - case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: return R300_COLOR_FORMAT_ARGB4444; /* 32-bit buffers. */ - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A8R8G8B8_SRGB: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_SRGB: case PIPE_FORMAT_B8G8R8A8_UNORM: case PIPE_FORMAT_B8G8R8A8_SRGB: case PIPE_FORMAT_B8G8R8X8_UNORM: case PIPE_FORMAT_B8G8R8X8_SRGB: - case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_SRGB: + case PIPE_FORMAT_A8B8G8R8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SRGB: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8G8B8X8_SRGB: - case PIPE_FORMAT_R8G8B8X8_SNORM: - case PIPE_FORMAT_A8B8G8R8_SNORM: - case PIPE_FORMAT_X8B8G8R8_SNORM: - case PIPE_FORMAT_X8UB8UG8SR8S_NORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_SRGB: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: return R300_COLOR_FORMAT_ARGB8888; - case PIPE_FORMAT_A2B10G10R10_UNORM: + case PIPE_FORMAT_R10G10B10A2_UNORM: return R500_COLOR_FORMAT_ARGB2101010; /* R5xx-only? */ /* 64-bit buffers. */ @@ -345,9 +348,9 @@ static uint32_t r300_translate_colorformat(enum pipe_format format) #endif /* YUV buffers. */ - case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_UYVY: return R300_COLOR_FORMAT_YVYU; - case PIPE_FORMAT_YCBCR_REV: + case PIPE_FORMAT_YUYV: return R300_COLOR_FORMAT_VYUY; default: return ~0; /* Unsupported. */ @@ -362,9 +365,9 @@ static uint32_t r300_translate_zsformat(enum pipe_format format) case PIPE_FORMAT_Z16_UNORM: return R300_DEPTHFORMAT_16BIT_INT_Z; /* 24-bit depth, ignored stencil */ - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: /* 24-bit depth, 8-bit stencil */ - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; default: return ~0; /* Unsupported. */ @@ -431,42 +434,39 @@ static uint32_t r300_translate_out_fmt(enum pipe_format format) return modifier | R300_C2_SEL_R; /* ARGB 32-bit outputs. */ - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_A8R8G8B8_SRGB: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_SRGB: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8X8_SRGB: return modifier | R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A; /* BGRA 32-bit outputs. */ - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_B8G8R8A8_SRGB: - case PIPE_FORMAT_B8G8R8X8_UNORM: - case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_SRGB: return modifier | R300_C0_SEL_A | R300_C1_SEL_R | R300_C2_SEL_G | R300_C3_SEL_B; /* RGBA 32-bit outputs. */ - case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_A8B8G8R8_UNORM: case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_SRGB: - case PIPE_FORMAT_R8G8B8X8_UNORM: - case PIPE_FORMAT_R8G8B8X8_SRGB: - case PIPE_FORMAT_R8G8B8X8_SNORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_SRGB: return modifier | R300_C0_SEL_A | R300_C1_SEL_B | R300_C2_SEL_G | R300_C3_SEL_R; /* ABGR 32-bit outputs. */ - case PIPE_FORMAT_A8B8G8R8_SNORM: - case PIPE_FORMAT_X8B8G8R8_SNORM: - case PIPE_FORMAT_X8UB8UG8SR8S_NORM: - case PIPE_FORMAT_A2B10G10R10_UNORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_R10G10B10A2_UNORM: /* RGBA high precision outputs (same swizzles as ABGR low precision) */ case PIPE_FORMAT_R16G16B16A16_UNORM: case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -499,7 +499,7 @@ boolean r300_is_sampler_format_supported(enum pipe_format format) static void r300_setup_texture_state(struct r300_screen* screen, struct r300_texture* tex) { - struct r300_texture_state* state = &tex->state; + struct r300_texture_format_state* state = &tex->state; struct pipe_texture *pt = &tex->tex; unsigned i; boolean is_r500 = screen->caps->is_r500; @@ -617,18 +617,23 @@ static unsigned r300_texture_get_tile_size(struct r300_texture* tex, /* Return true if macrotiling should be enabled on the miplevel. */ static boolean r300_texture_macro_switch(struct r300_texture *tex, unsigned level, - boolean rv350_mode) + boolean rv350_mode, + int dim) { - unsigned tile_width, width; + unsigned tile, texdim; - tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH, TRUE); - width = u_minify(tex->tex.width0, level); + tile = r300_texture_get_tile_size(tex, dim, TRUE); + if (dim == TILE_WIDTH) { + texdim = u_minify(tex->tex.width0, level); + } else { + texdim = u_minify(tex->tex.height0, level); + } /* See TX_FILTER1_n.MACRO_SWITCH. */ if (rv350_mode) { - return width >= tile_width; + return texdim >= tile; } else { - return width > tile_width; + return texdim > tile; } } @@ -692,9 +697,10 @@ static void r300_setup_miptree(struct r300_screen* screen, for (i = 0; i <= base->last_level; i++) { /* Let's see if this miplevel can be macrotiled. */ - tex->mip_macrotile[i] = (tex->macrotile == R300_BUFFER_TILED && - r300_texture_macro_switch(tex, i, rv350_mode)) ? - R300_BUFFER_TILED : R300_BUFFER_LINEAR; + tex->mip_macrotile[i] = + (tex->macrotile == R300_BUFFER_TILED && + r300_texture_macro_switch(tex, i, rv350_mode, TILE_WIDTH)) ? + R300_BUFFER_TILED : R300_BUFFER_LINEAR; stride = r300_texture_get_stride(screen, tex, i); nblocksy = r300_texture_get_nblocksy(tex, i); @@ -724,14 +730,50 @@ static void r300_setup_flags(struct r300_texture* tex) !util_is_power_of_two(tex->tex.height0); } +static void r300_setup_tiling(struct pipe_screen *screen, + struct r300_texture *tex) +{ + enum pipe_format format = tex->tex.format; + boolean rv350_mode = r300_screen(screen)->caps->family >= CHIP_FAMILY_RV350; + + if (util_format_is_compressed(format)) { + return; + } + + if (tex->tex.width0 == 1 || + tex->tex.height0 == 1) { + return; + } + + /* Set microtiling. */ + switch (util_format_get_blocksize(format)) { + case 1: + case 4: + tex->microtile = R300_BUFFER_TILED; + break; + + /* XXX Square-tiling doesn't work with kernel older than 2.6.34, + * XXX need to check the DRM version */ + /*case 2: + case 8: + tex->microtile = R300_BUFFER_SQUARETILED; + break;*/ + } + + /* Set macrotiling. */ + if (r300_texture_macro_switch(tex, 0, rv350_mode, TILE_WIDTH) && + r300_texture_macro_switch(tex, 0, rv350_mode, TILE_HEIGHT)) { + tex->macrotile = R300_BUFFER_TILED; + } +} + /* Create a new texture. */ -static struct pipe_texture* - r300_texture_create(struct pipe_screen* screen, - const struct pipe_texture* template) +static struct pipe_texture* r300_texture_create(struct pipe_screen* screen, + const struct pipe_texture* template) { struct r300_texture* tex = CALLOC_STRUCT(r300_texture); struct r300_screen* rscreen = r300_screen(screen); - struct radeon_winsys* winsys = (struct radeon_winsys*)screen->winsys; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; if (!tex) { return NULL; @@ -742,16 +784,19 @@ static struct pipe_texture* tex->tex.screen = screen; r300_setup_flags(tex); + if (!(template->tex_usage & R300_TEXTURE_USAGE_TRANSFER)) { + r300_setup_tiling(screen, tex); + } r300_setup_miptree(rscreen, tex); r300_setup_texture_state(rscreen, tex); - tex->buffer = screen->buffer_create(screen, 2048, - PIPE_BUFFER_USAGE_PIXEL, - tex->size); - winsys->buffer_set_tiling(winsys, tex->buffer, - tex->pitch[0], - tex->microtile != R300_BUFFER_LINEAR, - tex->macrotile != R300_BUFFER_LINEAR); + tex->buffer = rws->buffer_create(rws, 2048, + PIPE_BUFFER_USAGE_PIXEL, + tex->size); + rws->buffer_set_tiling(rws, tex->buffer, + tex->pitch[0], + tex->microtile != R300_BUFFER_LINEAR, + tex->macrotile != R300_BUFFER_LINEAR); if (!tex->buffer) { FREE(tex); @@ -764,9 +809,9 @@ static struct pipe_texture* static void r300_texture_destroy(struct pipe_texture* texture) { struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys; - pipe_buffer_reference(&tex->buffer, NULL); - + rws->buffer_reference(rws, &tex->buffer, NULL); FREE(tex); } @@ -806,14 +851,17 @@ static void r300_tex_surface_destroy(struct pipe_surface* s) FREE(s); } + static struct pipe_texture* - r300_texture_blanket(struct pipe_screen* screen, - const struct pipe_texture* base, - const unsigned* stride, - struct pipe_buffer* buffer) + r300_texture_from_handle(struct pipe_screen* screen, + const struct pipe_texture* base, + struct winsys_handle *whandle) { - struct r300_texture* tex; + struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys; struct r300_screen* rscreen = r300_screen(screen); + struct r300_winsys_buffer *buffer; + struct r300_texture* tex; + unsigned stride; /* Support only 2D textures without mipmaps */ if (base->target != PIPE_TEXTURE_2D || @@ -822,6 +870,11 @@ static struct pipe_texture* return NULL; } + buffer = rws->buffer_from_handle(rws, screen, whandle, &stride); + if (!buffer) { + return NULL; + } + tex = CALLOC_STRUCT(r300_texture); if (!tex) { return NULL; @@ -831,17 +884,38 @@ static struct pipe_texture* pipe_reference_init(&tex->tex.reference, 1); tex->tex.screen = screen; - tex->stride_override = *stride; - tex->pitch[0] = *stride / util_format_get_blocksize(base->format); + tex->stride_override = stride; + tex->pitch[0] = stride / util_format_get_blocksize(base->format); r300_setup_flags(tex); r300_setup_texture_state(rscreen, tex); - pipe_buffer_reference(&tex->buffer, buffer); + /* one ref already taken */ + tex->buffer = buffer; return (struct pipe_texture*)tex; } +static boolean + r300_texture_get_handle(struct pipe_screen* screen, + struct pipe_texture *texture, + struct winsys_handle *whandle) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; + struct r300_texture* tex = (struct r300_texture*)texture; + unsigned stride; + + if (!tex) { + return FALSE; + } + + stride = r300_texture_get_stride(r300_screen(screen), tex, 0); + + rws->buffer_get_handle(rws, tex->buffer, stride, whandle); + + return TRUE; +} + static struct pipe_video_surface * r300_video_surface_create(struct pipe_screen *screen, enum pipe_video_chroma_format chroma_format, @@ -865,7 +939,7 @@ r300_video_surface_create(struct pipe_screen *screen, memset(&template, 0, sizeof(struct pipe_texture)); template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_X8R8G8B8_UNORM; + template.format = PIPE_FORMAT_B8G8R8X8_UNORM; template.last_level = 0; template.width0 = util_next_power_of_two(width); template.height0 = util_next_power_of_two(height); @@ -893,10 +967,11 @@ static void r300_video_surface_destroy(struct pipe_video_surface *vsfc) void r300_init_screen_texture_functions(struct pipe_screen* screen) { screen->texture_create = r300_texture_create; + screen->texture_from_handle = r300_texture_from_handle; + screen->texture_get_handle = r300_texture_get_handle; screen->texture_destroy = r300_texture_destroy; screen->get_tex_surface = r300_get_tex_surface; screen->tex_surface_destroy = r300_tex_surface_destroy; - screen->texture_blanket = r300_texture_blanket; screen->video_surface_create = r300_video_surface_create; screen->video_surface_destroy= r300_video_surface_destroy; @@ -904,19 +979,23 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen) boolean r300_get_texture_buffer(struct pipe_screen* screen, struct pipe_texture* texture, - struct pipe_buffer** buffer, + struct r300_winsys_buffer** buffer, unsigned* stride) { struct r300_texture* tex = (struct r300_texture*)texture; + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys; + struct r300_winsys_buffer *buf; + if (!tex) { return FALSE; } - pipe_buffer_reference(buffer, tex->buffer); + rws->buffer_reference(rws, &buf, tex->buffer); if (stride) { *stride = r300_texture_get_stride(r300_screen(screen), tex, 0); } + *buffer = buf; return TRUE; } diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h index 46a5fb6188b..60c7fa83420 100644 --- a/src/gallium/drivers/r300/r300_texture.h +++ b/src/gallium/drivers/r300/r300_texture.h @@ -60,13 +60,11 @@ r300_video_surface(struct pipe_video_surface *pvs) return (struct r300_video_surface *)pvs; } -#ifndef R300_WINSYS_H - +/* Used internally for texture_is_referenced() + */ boolean r300_get_texture_buffer(struct pipe_screen* screen, - struct pipe_texture* texture, - struct pipe_buffer** buffer, + struct pipe_texture *texture, + struct r300_winsys_buffer** buffer, unsigned* stride); -#endif /* R300_WINSYS_H */ - #endif /* R300_TEXTURE_H */ diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c new file mode 100644 index 00000000000..987a040698f --- /dev/null +++ b/src/gallium/drivers/r300/r300_transfer.c @@ -0,0 +1,280 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" +#include "r300_transfer.h" +#include "r300_texture.h" +#include "r300_screen.h" + +#include "r300_winsys.h" + +#include "util/u_memory.h" +#include "util/u_format.h" + +struct r300_transfer { + /* Parent class */ + struct pipe_transfer transfer; + + /* Pipe context. */ + struct pipe_context *ctx; + + /* Parameters of get_tex_transfer. */ + unsigned x, y, level, zslice, face; + + /* Offset from start of buffer. */ + unsigned offset; + + /* Detiled texture. */ + struct r300_texture *detiled_texture; + + /* Transfer and format flags. */ + unsigned buffer_usage, render_target_usage; +}; + +/* Convenience cast wrapper. */ +static INLINE struct r300_transfer* +r300_transfer(struct pipe_transfer* transfer) +{ + return (struct r300_transfer*)transfer; +} + +/* Copy from a tiled texture to a detiled one. */ +static void r300_copy_from_tiled_texture(struct pipe_context *ctx, + struct r300_transfer *r300transfer) +{ + struct pipe_screen *screen = ctx->screen; + struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; + struct pipe_texture *tex = transfer->texture; + struct pipe_surface *src, *dst; + + src = screen->get_tex_surface(screen, tex, r300transfer->face, + r300transfer->level, r300transfer->zslice, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_PIXEL); + + dst = screen->get_tex_surface(screen, &r300transfer->detiled_texture->tex, + 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_PIXEL | + r300transfer->buffer_usage); + + ctx->surface_copy(ctx, dst, 0, 0, src, r300transfer->x, r300transfer->y, + transfer->width, transfer->height); + + pipe_surface_reference(&src, NULL); + pipe_surface_reference(&dst, NULL); +} + +/* Copy a detiled texture to a tiled one. */ +static void r300_copy_into_tiled_texture(struct pipe_context *ctx, + struct r300_transfer *r300transfer) +{ + struct pipe_screen *screen = ctx->screen; + struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer; + struct pipe_texture *tex = transfer->texture; + struct pipe_surface *src, *dst; + + src = screen->get_tex_surface(screen, &r300transfer->detiled_texture->tex, + 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_PIXEL); + + dst = screen->get_tex_surface(screen, tex, r300transfer->face, + r300transfer->level, r300transfer->zslice, + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_PIXEL); + + /* XXX this flush prevents the following DRM error from occuring: + * [drm:radeon_cs_ioctl] *ERROR* Failed to parse relocation ! + * Reproducible with perf/copytex. */ + ctx->flush(ctx, 0, NULL); + + ctx->surface_copy(ctx, dst, r300transfer->x, r300transfer->y, src, 0, 0, + transfer->width, transfer->height); + + /* XXX this flush fixes a few piglit tests (e.g. glean/pixelFormats). */ + ctx->flush(ctx, 0, NULL); + + pipe_surface_reference(&src, NULL); + pipe_surface_reference(&dst, NULL); +} + +static struct pipe_transfer* +r300_get_tex_transfer(struct pipe_context *ctx, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct r300_texture *tex = (struct r300_texture *)texture; + struct r300_screen *r300screen = r300_screen(ctx->screen); + struct r300_transfer *trans; + struct pipe_texture template; + + trans = CALLOC_STRUCT(r300_transfer); + if (trans) { + /* Initialize the transfer object. */ + pipe_texture_reference(&trans->transfer.texture, texture); + trans->transfer.usage = usage; + trans->transfer.width = w; + trans->transfer.height = h; + trans->ctx = ctx; + trans->x = x; + trans->y = y; + trans->level = level; + trans->zslice = zslice; + trans->face = face; + + /* If the texture is tiled, we must create a temporary detiled texture + * for this transfer. */ + if (tex->microtile || tex->macrotile) { + trans->buffer_usage = pipe_transfer_buffer_flags(&trans->transfer); + trans->render_target_usage = + util_format_is_depth_or_stencil(texture->format) ? + PIPE_TEXTURE_USAGE_DEPTH_STENCIL : + PIPE_TEXTURE_USAGE_RENDER_TARGET; + + template.target = PIPE_TEXTURE_2D; + template.format = texture->format; + template.width0 = w; + template.height0 = h; + template.depth0 = 0; + template.last_level = 0; + template.nr_samples = 0; + template.tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | + R300_TEXTURE_USAGE_TRANSFER; + + /* For texture reading, the temporary (detiled) texture is used as + * a render target when blitting from a tiled texture. */ + if (usage & PIPE_TRANSFER_READ) { + template.tex_usage |= trans->render_target_usage; + } + /* For texture writing, the temporary texture is used as a sampler + * when blitting into a tiled texture. */ + if (usage & PIPE_TRANSFER_WRITE) { + template.tex_usage |= PIPE_TEXTURE_USAGE_SAMPLER; + } + + /* Create the temporary texture. */ + trans->detiled_texture = (struct r300_texture*) + ctx->screen->texture_create(ctx->screen, + &template); + + assert(!trans->detiled_texture->microtile && + !trans->detiled_texture->macrotile); + + /* Set the stride. + * Parameters x, y, level, zslice, and face remain zero. */ + trans->transfer.stride = + r300_texture_get_stride(r300screen, trans->detiled_texture, 0); + + if (usage & PIPE_TRANSFER_READ) { + /* We cannot map a tiled texture directly because the data is + * in a different order, therefore we do detiling using a blit. */ + r300_copy_from_tiled_texture(ctx, trans); + } + } else { + trans->transfer.x = x; + trans->transfer.y = y; + trans->transfer.stride = + r300_texture_get_stride(r300screen, tex, level); + trans->transfer.level = level; + trans->transfer.zslice = zslice; + trans->transfer.face = face; + trans->offset = r300_texture_get_offset(tex, level, zslice, face); + } + } + return &trans->transfer; +} + +static void r300_tex_transfer_destroy(struct pipe_context *ctx, + struct pipe_transfer *trans) +{ + struct r300_transfer *r300transfer = r300_transfer(trans); + + if (r300transfer->detiled_texture) { + if (trans->usage & PIPE_TRANSFER_WRITE) { + r300_copy_into_tiled_texture(r300transfer->ctx, r300transfer); + } + + pipe_texture_reference( + (struct pipe_texture**)&r300transfer->detiled_texture, NULL); + } + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); +} + +static void* r300_transfer_map(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; + struct r300_transfer *r300transfer = r300_transfer(transfer); + struct r300_texture *tex = (struct r300_texture*)transfer->texture; + char *map; + enum pipe_format format = tex->tex.format; + + if (r300transfer->detiled_texture) { + /* The detiled texture is of the same size as the region being mapped + * (no offset needed). */ + return rws->buffer_map(rws, + r300transfer->detiled_texture->buffer, + pipe_transfer_buffer_flags(transfer)); + } else { + /* Tiling is disabled. */ + map = rws->buffer_map(rws, tex->buffer, + pipe_transfer_buffer_flags(transfer)); + + if (!map) { + return NULL; + } + + return map + r300_transfer(transfer)->offset + + transfer->y / util_format_get_blockheight(format) * transfer->stride + + transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); + } +} + +static void r300_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) +{ + struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys; + struct r300_transfer *r300transfer = r300_transfer(transfer); + struct r300_texture *tex = (struct r300_texture*)transfer->texture; + + if (r300transfer->detiled_texture) { + rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer); + } else { + rws->buffer_unmap(rws, tex->buffer); + } +} + + +void r300_init_transfer_functions( struct r300_context *r300ctx ) +{ + struct pipe_context *ctx = &r300ctx->context; + + ctx->get_tex_transfer = r300_get_tex_transfer; + ctx->tex_transfer_destroy = r300_tex_transfer_destroy; + ctx->transfer_map = r300_transfer_map; + ctx->transfer_unmap = r300_transfer_unmap; +} diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h new file mode 100644 index 00000000000..79baf6d0480 --- /dev/null +++ b/src/gallium/drivers/r300/r300_transfer.h @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2010 Marek Olšák <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TRANSFER +#define R300_TRANSFER + +#include "pipe/p_screen.h" + +struct r300_context; + +void r300_init_transfer_functions(struct r300_context *r300ctx); + +#endif diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index a6786c321c6..bd6b95dccba 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -34,8 +34,6 @@ #include "radeon_compiler.h" -#include "util/u_math.h" - /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( struct tgsi_shader_info* info, @@ -89,95 +87,41 @@ static void r300_shader_read_vs_outputs( assert(0); } } + + /* WPOS is a straight copy of POSITION and it's always emitted. */ + vs_outputs->wpos = i; } -static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) +/* This function sets up: + * - VAP mapping, which maps VS registers to output semantics and + * at the same time it indicates which attributes are enabled and should + * be rasterized. + * - Stream mapping to VS outputs if TCL is not present. */ +static void r300_init_vs_output_mapping(struct r300_vertex_shader* vs) { struct r300_shader_semantics* vs_outputs = &vs->outputs; - uint32_t* hwfmt = vs->hwfmt; - int i, gen_count; + struct r300_vap_output_state *vap_out = &vs->vap_out; + int *stream_loc = vs->stream_loc_notcl; + int i, gen_count, tabi = 0; boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || vs_outputs->bcolor[1] != ATTR_UNUSED; - /* Do the actual vertex_info setup. - * - * vertex_info has four uints of hardware-specific data in it. - * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL - * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM - * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 - * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ - - hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ + vap_out->vap_vtx_state_cntl = 0x5555; /* XXX this is classic Mesa bonghits */ /* Position. */ if (vs_outputs->pos != ATTR_UNUSED) { - hwfmt[1] |= R300_INPUT_CNTL_POS; - hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS; + vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + stream_loc[tabi++] = 0; } else { assert(0); } /* Point size. */ if (vs_outputs->psize != ATTR_UNUSED) { - hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - } - - /* Colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || - vs_outputs->color[1] != ATTR_UNUSED) { - hwfmt[1] |= R300_INPUT_CNTL_COLOR; - hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; - } - } - - /* Back-face colors. */ - if (any_bcolor_used) { - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - hwfmt[1] |= R300_INPUT_CNTL_COLOR; - hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); - } - } - - /* Texture coordinates. */ - gen_count = 0; - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->generic[i] != ATTR_UNUSED) { - hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); - hwfmt[3] |= (4 << (3 * gen_count)); - gen_count++; - } - } - - /* Fog coordinates. */ - if (vs_outputs->fog != ATTR_UNUSED) { - hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count); - hwfmt[3] |= (4 << (3 * gen_count)); - gen_count++; - } - - /* XXX magic */ - assert(gen_count <= 8); + vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; - /* WPOS. */ - vs->wpos_tex_output = gen_count; -} - -/* Sets up stream mapping to equivalent VS outputs if TCL is bypassed - * or isn't present. */ -static void r300_stream_locations_notcl( - struct r300_shader_semantics* vs_outputs, - int* stream_loc) -{ - int i, tabi = 0, gen_count; - boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || - vs_outputs->bcolor[1] != ATTR_UNUSED; - - /* Position. */ - stream_loc[tabi++] = 0; - - /* Point size. */ - if (vs_outputs->psize != ATTR_UNUSED) { stream_loc[tabi++] = 1; } @@ -185,6 +129,9 @@ static void r300_stream_locations_notcl( for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used || vs_outputs->color[1] != ATTR_UNUSED) { + vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; + stream_loc[tabi++] = 2 + i; } } @@ -192,6 +139,9 @@ static void r300_stream_locations_notcl( /* Back-face colors. */ if (any_bcolor_used) { for (i = 0; i < ATTR_COLOR_COUNT; i++) { + vap_out->vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR; + vap_out->vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + stream_loc[tabi++] = 4 + i; } } @@ -200,6 +150,9 @@ static void r300_stream_locations_notcl( gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT; i++) { if (vs_outputs->generic[i] != ATTR_UNUSED) { + vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); + vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); + assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; @@ -208,17 +161,22 @@ static void r300_stream_locations_notcl( /* Fog coordinates. */ if (vs_outputs->fog != ATTR_UNUSED) { + vap_out->vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << gen_count); + vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * gen_count)); + assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; } + /* XXX magic */ + assert(gen_count <= 8); + /* WPOS. */ - if (vs_outputs->wpos != ATTR_UNUSED) { - assert(tabi < 16); - stream_loc[tabi++] = 6 + gen_count; - gen_count++; - } + vs->wpos_tex_output = gen_count; + + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; for (; tabi < 16;) { stream_loc[tabi++] = -1; @@ -294,26 +252,16 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) } } -static void r300_insert_wpos(struct r300_vertex_program_compiler* c, - struct r300_shader_semantics* outputs) +void r300_vertex_shader_common_init(struct r300_vertex_shader *vs, + const struct pipe_shader_state *shader) { - int i, lastOutput = 0; + /* Copy state directly into shader. */ + vs->state = *shader; + vs->state.tokens = tgsi_dup_tokens(shader->tokens); + tgsi_scan_shader(shader->tokens, &vs->info); - /* Find the max output index. */ - lastOutput = MAX2(lastOutput, outputs->psize); - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - lastOutput = MAX2(lastOutput, outputs->color[i]); - lastOutput = MAX2(lastOutput, outputs->bcolor[i]); - } - for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - lastOutput = MAX2(lastOutput, outputs->generic[i]); - } - lastOutput = MAX2(lastOutput, outputs->fog); - - /* Set WPOS after the last output. */ - lastOutput++; - rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */ - outputs->wpos = lastOutput; + r300_shader_read_vs_outputs(&vs->info, &vs->outputs); + r300_init_vs_output_mapping(vs); } void r300_translate_vertex_shader(struct r300_context* r300, @@ -322,9 +270,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_program_compiler compiler; struct tgsi_to_rc ttr; - /* Initialize. */ - r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - /* Setup the compiler */ rc_init(&compiler.Base); @@ -348,10 +293,7 @@ void r300_translate_vertex_shader(struct r300_context* r300, compiler.SetHwInputOutput = &set_vertex_inputs_outputs; /* Insert the WPOS output. */ - r300_insert_wpos(&compiler, &vs->outputs); - - r300_shader_vap_output_fmt(vs); - r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); + rc_copy_output(&compiler.Base, 0, vs->outputs.wpos); /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); @@ -363,30 +305,29 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* And, finally... */ rc_destroy(&compiler.Base); - vs->translated = TRUE; } boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) { - struct r300_vertex_shader* vs = r300->vs; - int tex_output = r300->vs->wpos_tex_output; + struct r300_vertex_shader* vs = r300->vs_state.state; + struct r300_vap_output_state *vap_out = &vs->vap_out; + int tex_output = vs->wpos_tex_output; uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; - uint32_t* hwfmt = vs->hwfmt; if (r300->fs->inputs.wpos != ATTR_UNUSED) { /* Enable WPOS in VAP. */ - if (!(hwfmt[1] & tex_fmt)) { - hwfmt[1] |= tex_fmt; - hwfmt[3] |= (4 << (3 * tex_output)); + if (!(vap_out->vap_vsm_vtx_assm & tex_fmt)) { + vap_out->vap_vsm_vtx_assm |= tex_fmt; + vap_out->vap_out_vtx_fmt[1] |= (4 << (3 * tex_output)); assert(tex_output < 8); return TRUE; } } else { /* Disable WPOS in VAP. */ - if (hwfmt[1] & tex_fmt) { - hwfmt[1] &= ~tex_fmt; - hwfmt[3] &= ~(4 << (3 * tex_output)); + if (vap_out->vap_vsm_vtx_assm & tex_fmt) { + vap_out->vap_vsm_vtx_assm &= ~tex_fmt; + vap_out->vap_out_vtx_fmt[1] &= ~(4 << (3 * tex_output)); return TRUE; } } diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 18cfeee3cd4..f6f0b86b683 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -28,6 +28,7 @@ #include "tgsi/tgsi_scan.h" #include "radeon_code.h" +#include "r300_context.h" #include "r300_shader_semantics.h" struct r300_context; @@ -38,7 +39,7 @@ struct r300_vertex_shader { struct tgsi_shader_info info; struct r300_shader_semantics outputs; - uint hwfmt[4]; + struct r300_vap_output_state vap_out; /* Stream locations for SWTCL or if TCL is bypassed. */ int stream_loc_notcl[16]; @@ -46,13 +47,17 @@ struct r300_vertex_shader { /* Output stream location for WPOS. */ int wpos_tex_output; - /* Has this shader been translated yet? */ - boolean translated; - + /* HWTCL-specific. */ /* Machine code (if translated) */ struct r300_vertex_program_code code; + + /* SWTCL-specific. */ + void *draw_vs; }; +void r300_vertex_shader_common_init(struct r300_vertex_shader *vs, + const struct pipe_shader_state *shader); + void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index 40fb8a95ca5..e5183a8239c 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -23,10 +23,6 @@ #ifndef R300_WINSYS_H #define R300_WINSYS_H -#ifdef __cplusplus -extern "C" { -#endif - /* The public interface header for the r300 pipe driver. * Any winsys hosting this pipe needs to implement r300_winsys and then * call r300_create_screen to start things. */ @@ -34,19 +30,146 @@ extern "C" { #include "pipe/p_defines.h" #include "pipe/p_state.h" -struct radeon_winsys; +struct r300_winsys_screen; /* Creates a new r300 screen. */ -struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys); +struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws); + +struct r300_winsys_buffer; boolean r300_get_texture_buffer(struct pipe_screen* screen, struct pipe_texture* texture, - struct pipe_buffer** buffer, - unsigned* stride); + struct r300_winsys_buffer** buffer, + unsigned *stride); + +enum r300_value_id { + R300_VID_PCI_ID, + R300_VID_GB_PIPES, + R300_VID_Z_PIPES, +}; + +#define R300_USAGE_FLAG_DONT_SYNC (1 << 17) + +struct r300_winsys_screen { + void (*destroy)(struct r300_winsys_screen *ws); + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + * + * usage is a bitmask of R300_WINSYS_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This + * usage argument is only an optimization hint, not a guarantee, therefore + * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. + */ + struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws, + unsigned alignment, + unsigned usage, + unsigned size); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags. + */ + void *(*buffer_map)( struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf, + unsigned usage); + + void (*buffer_unmap)( struct r300_winsys_screen *ws, + struct r300_winsys_buffer *buf ); + + void (*buffer_destroy)( struct r300_winsys_buffer *buf ); + + + void (*buffer_reference)(struct r300_winsys_screen *rws, + struct r300_winsys_buffer **pdst, + struct r300_winsys_buffer *src); + + boolean (*buffer_references)(struct r300_winsys_buffer *a, + struct r300_winsys_buffer *b); + + void (*buffer_flush_range)(struct r300_winsys_screen *rws, + struct r300_winsys_buffer *buf, + unsigned offset, + unsigned length); + + /* Add a pipe_buffer to the list of buffer objects to validate. */ + boolean (*add_buffer)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buf, + uint32_t rd, + uint32_t wd); + + + /* Revalidate all currently setup pipe_buffers. + * Returns TRUE if a flush is required. */ + boolean (*validate)(struct r300_winsys_screen* winsys); + + /* Check to see if there's room for commands. */ + boolean (*check_cs)(struct r300_winsys_screen* winsys, int size); + + /* Start a command emit. */ + void (*begin_cs)(struct r300_winsys_screen* winsys, + int size, + const char* file, + const char* function, + int line); + + /* Write a dword to the command buffer. */ + void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword); + + /* Write a relocated dword to the command buffer. */ + void (*write_cs_reloc)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buf, + uint32_t rd, + uint32_t wd, + uint32_t flags); + + /* Finish a command emit. */ + void (*end_cs)(struct r300_winsys_screen* winsys, + const char* file, + const char* function, + int line); + + /* Flush the CS. */ + void (*flush_cs)(struct r300_winsys_screen* winsys); + + /* winsys flush - callback from winsys when flush required */ + void (*set_flush_cb)(struct r300_winsys_screen *winsys, + void (*flush_cb)(void *), void *data); + + void (*reset_bos)(struct r300_winsys_screen *winsys); + + void (*buffer_set_tiling)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer, + uint32_t pitch, + boolean microtiled, + boolean macrotiled); + + uint32_t (*get_value)(struct r300_winsys_screen *winsys, + enum r300_value_id vid); + + struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys, + struct pipe_screen *screen, + struct winsys_handle *whandle, + unsigned *stride); + boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer, + unsigned stride, + struct winsys_handle *whandle); + + boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys, + struct r300_winsys_buffer *buffer); + + +}; -#ifdef __cplusplus -} -#endif +struct r300_winsys_screen * +r300_winsys_screen(struct pipe_screen *screen); #endif /* R300_WINSYS_H */ diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index e4ac49fa85f..239655d628b 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -6,7 +6,9 @@ LIBNAME = softpipe C_SOURCES = \ sp_fs_exec.c \ sp_fs_sse.c \ + sp_buffer.c \ sp_clear.c \ + sp_fence.c \ sp_flush.c \ sp_query.c \ sp_context.c \ @@ -32,7 +34,6 @@ C_SOURCES = \ sp_tex_tile_cache.c \ sp_tile_cache.c \ sp_surface.c \ - sp_video_context.c \ - sp_winsys.c + sp_video_context.c include ../../Makefile.template diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 3042e556c64..9949a53adfd 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -7,9 +7,11 @@ softpipe = env.ConvenienceLibrary( source = [ 'sp_fs_exec.c', 'sp_fs_sse.c', + 'sp_buffer.c', 'sp_clear.c', 'sp_context.c', 'sp_draw_arrays.c', + 'sp_fence.c', 'sp_flush.c', 'sp_prim_vbuf.c', 'sp_setup.c', @@ -33,8 +35,7 @@ softpipe = env.ConvenienceLibrary( 'sp_tex_tile_cache.c', 'sp_texture.c', 'sp_tile_cache.c', - 'sp_video_context.c', - 'sp_winsys.c' + 'sp_video_context.c' ]) Export('softpipe') diff --git a/src/gallium/drivers/softpipe/sp_buffer.c b/src/gallium/drivers/softpipe/sp_buffer.c new file mode 100644 index 00000000000..8f390250869 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_buffer.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "sp_screen.h" +#include "sp_buffer.h" + + +static void * +softpipe_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned flags) +{ + struct softpipe_buffer *softpipe_buf = softpipe_buffer(buf); + return softpipe_buf->data; +} + + +static void +softpipe_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ +} + + +static void +softpipe_buffer_destroy(struct pipe_buffer *buf) +{ + struct softpipe_buffer *sbuf = softpipe_buffer(buf); + + if (!sbuf->userBuffer) + align_free(sbuf->data); + + FREE(sbuf); +} + + +static struct pipe_buffer * +softpipe_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct softpipe_buffer *buffer = CALLOC_STRUCT(softpipe_buffer); + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = screen; + buffer->base.alignment = MAX2(alignment, 16); + buffer->base.usage = usage; + buffer->base.size = size; + + buffer->data = align_malloc(size, alignment); + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +softpipe_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct softpipe_buffer *buffer; + + buffer = CALLOC_STRUCT(softpipe_buffer); + if(!buffer) + return NULL; + + pipe_reference_init(&buffer->base.reference, 1); + buffer->base.screen = screen; + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + +void +softpipe_init_screen_buffer_funcs(struct pipe_screen *screen) +{ + screen->buffer_create = softpipe_buffer_create; + screen->user_buffer_create = softpipe_user_buffer_create; + screen->buffer_map = softpipe_buffer_map; + screen->buffer_unmap = softpipe_buffer_unmap; + screen->buffer_destroy = softpipe_buffer_destroy; +} diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_buffer.h index 6e3920c49b2..9d8e56a176b 100644 --- a/src/gallium/drivers/softpipe/sp_winsys.h +++ b/src/gallium/drivers/softpipe/sp_buffer.h @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * + * Copyright 2009 VMware, Inc. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,64 +10,46 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -/* This is the interface that softpipe requires any window system - * hosting it to implement. This is the only include file in softpipe - * which is public. - */ +#ifndef SP_BUFFER_H +#define SP_BUFFER_H +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" -#ifndef SP_WINSYS_H -#define SP_WINSYS_H -#ifdef __cplusplus -extern "C" { -#endif +struct softpipe_buffer +{ + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; +}; -#include "pipe/p_defines.h" -struct pipe_screen; -struct pipe_winsys; -struct pipe_context; -struct pipe_texture; -struct pipe_buffer; - - - -/** - * Create a softpipe screen that uses the - * given winsys for allocating buffers. - */ -struct pipe_screen *softpipe_create_screen( struct pipe_winsys * ); - -/** - * Create a softpipe screen that uses - * regular malloc to create all its buffers. - */ -struct pipe_screen *softpipe_create_screen_malloc(void); +/** Cast wrapper */ +static INLINE struct softpipe_buffer * +softpipe_buffer( struct pipe_buffer *buf ) +{ + return (struct softpipe_buffer *)buf; +} -boolean -softpipe_get_texture_buffer( struct pipe_texture *texture, - struct pipe_buffer **buf, - unsigned *stride ); +void +softpipe_init_screen_buffer_funcs(struct pipe_screen *screen); -#ifdef __cplusplus -} -#endif -#endif /* SP_WINSYS_H */ +#endif /* SP_BUFFER_H */ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index ddc35bcd629..de92a0cd2c7 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -44,6 +44,7 @@ #include "sp_surface.h" #include "sp_tile_cache.h" #include "sp_tex_tile_cache.h" +#include "sp_texture.h" #include "sp_query.h" @@ -210,7 +211,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE ); - softpipe->pipe.winsys = screen->winsys; + softpipe->pipe.winsys = NULL; softpipe->pipe.screen = screen; softpipe->pipe.destroy = softpipe_destroy; softpipe->pipe.priv = priv; @@ -245,6 +246,10 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.bind_gs_state = softpipe_bind_gs_state; softpipe->pipe.delete_gs_state = softpipe_delete_gs_state; + softpipe->pipe.create_vertex_elements_state = softpipe_create_vertex_elements_state; + softpipe->pipe.bind_vertex_elements_state = softpipe_bind_vertex_elements_state; + softpipe->pipe.delete_vertex_elements_state = softpipe_delete_vertex_elements_state; + softpipe->pipe.set_blend_color = softpipe_set_blend_color; softpipe->pipe.set_stencil_ref = softpipe_set_stencil_ref; softpipe->pipe.set_clip_state = softpipe_set_clip_state; @@ -257,7 +262,6 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; - softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements; softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; @@ -272,6 +276,7 @@ softpipe_create_context( struct pipe_screen *screen, softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced; softpipe_init_query_funcs( softpipe ); + softpipe_init_texture_funcs( &softpipe->pipe ); softpipe->pipe.render_condition = softpipe_render_condition; @@ -280,13 +285,13 @@ softpipe_create_context( struct pipe_screen *screen, * Must be before quad stage setup! */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - softpipe->cbuf_cache[i] = sp_create_tile_cache( screen ); - softpipe->zsbuf_cache = sp_create_tile_cache( screen ); + softpipe->cbuf_cache[i] = sp_create_tile_cache( &softpipe->pipe ); + softpipe->zsbuf_cache = sp_create_tile_cache( &softpipe->pipe ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen ); + softpipe->tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) { - softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache(screen); + softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache( &softpipe->pipe ); } /* setup quad rendering stages */ diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 95def72c541..9a8158e6a22 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -45,6 +45,7 @@ struct softpipe_tile_cache; struct softpipe_tex_tile_cache; struct sp_fragment_shader; struct sp_vertex_shader; +struct sp_velems_state; struct softpipe_context { @@ -59,6 +60,7 @@ struct softpipe_context { struct sp_fragment_shader *fs; struct sp_vertex_shader *vs; struct sp_geometry_shader *gs; + struct sp_velems_state *velems; /** Other rendering state */ struct pipe_blend_color blend_color; @@ -72,13 +74,11 @@ struct softpipe_context { struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned num_samplers; unsigned num_textures; unsigned num_vertex_samplers; unsigned num_vertex_textures; - unsigned num_vertex_elements; unsigned num_vertex_buffers; unsigned dirty; /**< Mask of SP_NEW_x flags */ @@ -93,7 +93,7 @@ struct softpipe_context { ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; /** Mapped constant buffers */ - void *mapped_constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + const void *mapped_constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; /** Vertex format */ struct vertex_info vertex_info; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index b2acc36bf7a..7b77eb239fd 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -33,90 +33,19 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "util/u_simple_screen.h" #include "util/u_inlines.h" #include "util/u_prim.h" #include "sp_context.h" #include "sp_query.h" #include "sp_state.h" +#include "sp_buffer.h" #include "draw/draw_context.h" -static void -softpipe_map_constant_buffers(struct softpipe_context *sp) -{ - struct pipe_winsys *ws = sp->pipe.winsys; - uint i; - - for (i = 0; i < PIPE_SHADER_TYPES; i++) { - uint j; - - for (j = 0; j < PIPE_MAX_CONSTANT_BUFFERS; j++) { - if (sp->constants[i][j] && sp->constants[i][j]->size) { - sp->mapped_constants[i][j] = ws->buffer_map(ws, - sp->constants[i][j], - PIPE_BUFFER_USAGE_CPU_READ); - } - } - } - - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - if (sp->constants[PIPE_SHADER_VERTEX][i]) { - draw_set_mapped_constant_buffer(sp->draw, - PIPE_SHADER_VERTEX, - i, - sp->mapped_constants[PIPE_SHADER_VERTEX][i], - sp->constants[PIPE_SHADER_VERTEX][i]->size); - } - if (sp->constants[PIPE_SHADER_GEOMETRY][i]) { - draw_set_mapped_constant_buffer(sp->draw, - PIPE_SHADER_GEOMETRY, - i, - sp->mapped_constants[PIPE_SHADER_GEOMETRY][i], - sp->constants[PIPE_SHADER_GEOMETRY][i]->size); - } - } -} - - -static void -softpipe_unmap_constant_buffers(struct softpipe_context *sp) -{ - struct pipe_winsys *ws = sp->pipe.winsys; - uint i; - /* really need to flush all prims since the vert/frag shaders const buffers - * are going away now. - */ - draw_flush(sp->draw); - - for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - draw_set_mapped_constant_buffer(sp->draw, - PIPE_SHADER_VERTEX, - i, - NULL, - 0); - draw_set_mapped_constant_buffer(sp->draw, - PIPE_SHADER_GEOMETRY, - i, - NULL, - 0); - } - - for (i = 0; i < PIPE_SHADER_TYPES; i++) { - uint j; - - for (j = 0; j < PIPE_MAX_CONSTANT_BUFFERS; j++) { - if (sp->constants[i][j] && sp->constants[i][j]->size) { - ws->buffer_unmap(ws, sp->constants[i][j]); - } - sp->mapped_constants[i][j] = NULL; - } - } -} /** @@ -261,25 +190,16 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, } softpipe_map_transfers(sp); - softpipe_map_constant_buffers(sp); /* Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf; - - buf = pipe_buffer_map(pipe->screen, - sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + void *buf = softpipe_buffer(sp->vertex_buffer[i].buffer)->data; draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes; - - mapped_indexes = pipe_buffer_map(pipe->screen, - indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + void *mapped_indexes = softpipe_buffer(indexBuffer)->data; draw_set_mapped_element_buffer_range(draw, indexSize, minIndex, @@ -300,15 +220,18 @@ softpipe_draw_range_elements_instanced(struct pipe_context *pipe, /* unmap vertex/index buffers - will cause draw module to flush */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); - pipe_buffer_unmap(pipe->screen, indexBuffer); } - /* Note: leave drawing surfaces mapped */ - softpipe_unmap_constant_buffers(sp); + /* + * TODO: Flush only when a user vertex/index buffer is present + * (or even better, modify draw module to do this + * internally when this condition is seen?) + */ + draw_flush(draw); + /* Note: leave drawing surfaces mapped */ sp->dirty_render_cache = TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_fence.c b/src/gallium/drivers/softpipe/sp_fence.c new file mode 100644 index 00000000000..66c52141132 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fence.c @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#include "pipe/p_screen.h" +#include "util/u_debug.h" +#include "sp_fence.h" + + +static void +softpipe_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + assert(!*ptr); + assert(!fence); +} + + +static int +softpipe_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + assert(!fence); + return 0; +} + + +static int +softpipe_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + assert(!fence); + return 0; +} + + +void +softpipe_init_screen_fence_funcs(struct pipe_screen *screen) +{ + screen->fence_reference = softpipe_fence_reference; + screen->fence_finish = softpipe_fence_finish; + screen->fence_signalled = softpipe_fence_signalled; +} diff --git a/src/gallium/drivers/softpipe/sp_fence.h b/src/gallium/drivers/softpipe/sp_fence.h new file mode 100644 index 00000000000..39c33243bd5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fence.h @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + + +#ifndef SP_FENCE_H_ +#define SP_FENCE_H_ + + +struct pipe_screen; + + +void +softpipe_init_screen_fence_funcs(struct pipe_screen *screen); + + +#endif /* SP_FENCE_H_ */ diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index e8952bf4fb8..3d76af4d8cb 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -93,9 +93,9 @@ softpipe_flush( struct pipe_context *pipe, static unsigned frame_no = 1; static char filename[256]; util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]); + debug_dump_surface_bmp(softpipe, filename, softpipe->framebuffer.cbufs[0]); util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); - debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf); + debug_dump_surface_bmp(softpipe, filename, softpipe->framebuffer.zsbuf); ++frame_no; } #endif diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 27fa126b7c3..67e2c8f8bc4 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -145,8 +145,13 @@ exec_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_COLOR: { uint cbuf = sem_index[i]; + + assert(sizeof(quad->output.color[cbuf]) == + sizeof(machine->Outputs[i])); + + /* copy float[4][4] result */ memcpy(quad->output.color[cbuf], - &machine->Outputs[i].xyzw[0].f[0], + &machine->Outputs[i], sizeof(quad->output.color[0]) ); } break; diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index acee2136706..daa158df7c4 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -156,8 +156,13 @@ fs_sse_run( const struct sp_fragment_shader *base, case TGSI_SEMANTIC_COLOR: { uint cbuf = sem_index[i]; + + assert(sizeof(quad->output.color[cbuf]) == + sizeof(machine->Outputs[i])); + + /* copy float[4][4] result */ memcpy(quad->output.color[cbuf], - &machine->Outputs[i].xyzw[0].f[0], + &machine->Outputs[i], sizeof(quad->output.color[0]) ); } break; diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 98c08eaffaf..6749243ab41 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -264,57 +264,29 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) break; case PIPE_PRIM_QUADS: - if (softpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 4) { - sp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - sp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - sp_setup_tri( setup_ctx, + for (i = 3; i < nr; i += 4) { + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - sp_setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - } } break; case PIPE_PRIM_QUAD_STRIP: - if (softpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 2) { - sp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-1], stride), - get_vert(vertex_buffer, indices[i-3], stride)); - sp_setup_tri( setup_ctx, - get_vert(vertex_buffer, indices[i-2], stride), - get_vert(vertex_buffer, indices[i-0], stride), - get_vert(vertex_buffer, indices[i-3], stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - sp_setup_tri( setup_ctx, + for (i = 3; i < nr; i += 2) { + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-2], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - sp_setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, indices[i-1], stride), get_vert(vertex_buffer, indices[i-3], stride), get_vert(vertex_buffer, indices[i-0], stride) ); - } } break; @@ -448,56 +420,28 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) break; case PIPE_PRIM_QUADS: - if (softpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 4) { - sp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - sp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 4) { - sp_setup_tri( setup_ctx, + for (i = 3; i < nr; i += 4) { + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - sp_setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-0, stride) ); - } } break; case PIPE_PRIM_QUAD_STRIP: - if (softpipe->rasterizer->flatshade_first) { - for (i = 3; i < nr; i += 2) { - sp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-1, stride), - get_vert(vertex_buffer, i-3, stride) ); - sp_setup_tri( setup_ctx, - get_vert(vertex_buffer, i-2, stride), - get_vert(vertex_buffer, i-0, stride), - get_vert(vertex_buffer, i-3, stride) ); - } - } - else { - for (i = 3; i < nr; i += 2) { - sp_setup_tri( setup_ctx, + for (i = 3; i < nr; i += 2) { + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-2, stride), get_vert(vertex_buffer, i-0, stride) ); - sp_setup_tri( setup_ctx, + sp_setup_tri( setup_ctx, get_vert(vertex_buffer, i-1, stride), get_vert(vertex_buffer, i-3, stride), get_vert(vertex_buffer, i-0, stride) ); - } } break; diff --git a/src/gallium/drivers/softpipe/sp_public.h b/src/gallium/drivers/softpipe/sp_public.h new file mode 100644 index 00000000000..62d0903d87a --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_public.h @@ -0,0 +1,10 @@ +#ifndef SP_PUBLIC_H +#define SP_PUBLIC_H + +struct pipe_screen; +struct sw_winsys; + +struct pipe_screen * +softpipe_create_screen(struct sw_winsys *winsys); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index dab95654430..4815a0d49f1 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -73,8 +73,8 @@ get_depth_stencil_values( struct depth_data *data, data->bzzzz[j] = tile->data.depth32[y][x]; } break; - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); @@ -82,8 +82,8 @@ get_depth_stencil_values( struct depth_data *data, data->stencilVals[j] = tile->data.depth32[y][x] >> 24; } break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); @@ -146,8 +146,8 @@ convert_quad_depth( struct depth_data *data, } } break; - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: { float scale = (float) ((1 << 24) - 1); @@ -156,8 +156,8 @@ convert_quad_depth( struct depth_data *data, } } break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: { float scale = (float) ((1 << 24) - 1); @@ -189,7 +189,7 @@ write_depth_stencil_values( struct depth_data *data, tile->data.depth16[y][x] = (ushort) data->bzzzz[j]; } break; - case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z32_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); @@ -197,21 +197,21 @@ write_depth_stencil_values( struct depth_data *data, tile->data.depth32[y][x] = data->bzzzz[j]; } break; - case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j]; } break; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j]; } break; - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { int x = quad->input.x0 % TILE_SIZE + (j & 1); int y = quad->input.y0 % TILE_SIZE + (j >> 1); diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 87415f43404..d62bfa3d633 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -27,15 +27,17 @@ #include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "util/u_simple_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" +#include "state_tracker/sw_winsys.h" + #include "sp_texture.h" -#include "sp_winsys.h" #include "sp_screen.h" #include "sp_context.h" +#include "sp_buffer.h" +#include "sp_fence.h" +#include "sp_public.h" static const char * @@ -83,11 +85,11 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_TEXTURE_SHADOW_MAP: return 1; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 13; /* max 4Kx4K */ + return SP_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 9; /* max 256x256x256 */ + return SP_MAX_TEXTURE_3D_LEVELS; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 13; /* max 4Kx4K */ + return SP_MAX_TEXTURE_2D_LEVELS; case PIPE_CAP_TGSI_CONT_SUPPORTED: return 1; case PIPE_CAP_BLEND_EQUATION_SEPARATE: @@ -145,6 +147,8 @@ softpipe_is_format_supported( struct pipe_screen *screen, unsigned tex_usage, unsigned geom_flags ) { + struct sw_winsys *winsys = softpipe_screen(screen)->winsys; + assert(target == PIPE_TEXTURE_1D || target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_3D || @@ -152,29 +156,39 @@ softpipe_is_format_supported( struct pipe_screen *screen, switch(format) { case PIPE_FORMAT_L16_UNORM: - case PIPE_FORMAT_YCBCR_REV: - case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_YUYV: + case PIPE_FORMAT_UYVY: case PIPE_FORMAT_DXT1_RGB: case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_B6UG5SR5S_NORM: - case PIPE_FORMAT_X8UB8UG8SR8S_NORM: - case PIPE_FORMAT_A8B8G8R8_SNORM: + case PIPE_FORMAT_R5SG5SB6U_NORM: + case PIPE_FORMAT_R8SG8SB8UX8U_NORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: case PIPE_FORMAT_NONE: return FALSE; default: - return TRUE; + break; } + + if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if(!winsys->is_displaytarget_format_supported(winsys, format)) + return FALSE; + } + + /* XXX: this is often a lie. Pull in logic from llvmpipe to fix. + */ + return TRUE; } static void softpipe_destroy_screen( struct pipe_screen *screen ) { - struct pipe_winsys *winsys = screen->winsys; + struct softpipe_screen *sp_screen = softpipe_screen(screen); + struct sw_winsys *winsys = sp_screen->winsys; if(winsys->destroy) winsys->destroy(winsys); @@ -183,21 +197,37 @@ softpipe_destroy_screen( struct pipe_screen *screen ) } +/* This is often overriden by the co-state tracker. + */ +static void +softpipe_flush_frontbuffer(struct pipe_screen *_screen, + struct pipe_surface *surface, + void *context_private) +{ + struct softpipe_screen *screen = softpipe_screen(_screen); + struct sw_winsys *winsys = screen->winsys; + struct softpipe_texture *texture = softpipe_texture(surface->texture); + + assert(texture->dt); + if (texture->dt) + winsys->displaytarget_display(winsys, texture->dt, context_private); +} /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no softpipe_screen). */ struct pipe_screen * -softpipe_create_screen(struct pipe_winsys *winsys) +softpipe_create_screen(struct sw_winsys *winsys) { struct softpipe_screen *screen = CALLOC_STRUCT(softpipe_screen); if (!screen) return NULL; - screen->base.winsys = winsys; + screen->winsys = winsys; + screen->base.winsys = NULL; screen->base.destroy = softpipe_destroy_screen; screen->base.get_name = softpipe_get_name; @@ -206,9 +236,11 @@ softpipe_create_screen(struct pipe_winsys *winsys) screen->base.get_paramf = softpipe_get_paramf; screen->base.is_format_supported = softpipe_is_format_supported; screen->base.context_create = softpipe_create_context; + screen->base.flush_frontbuffer = softpipe_flush_frontbuffer; softpipe_init_screen_texture_funcs(&screen->base); - u_simple_screen_init(&screen->base); + softpipe_init_screen_buffer_funcs(&screen->base); + softpipe_init_screen_fence_funcs(&screen->base); return &screen->base; } diff --git a/src/gallium/drivers/softpipe/sp_screen.h b/src/gallium/drivers/softpipe/sp_screen.h index 3d4bfd3e840..f741454c9e5 100644 --- a/src/gallium/drivers/softpipe/sp_screen.h +++ b/src/gallium/drivers/softpipe/sp_screen.h @@ -35,10 +35,13 @@ #include "pipe/p_defines.h" +struct sw_winsys; struct softpipe_screen { struct pipe_screen base; + struct sw_winsys *winsys; + /* Increments whenever textures are modified. Contexts can track * this. */ @@ -55,4 +58,5 @@ softpipe_screen( struct pipe_screen *pipe ) } + #endif /* SP_SCREEN_H */ diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 4370bbeaee2..6b01c0f4d72 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -100,6 +100,11 @@ struct sp_geometry_shader { struct draw_geometry_shader *draw_data; }; +struct sp_velems_state { + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; +}; + void * softpipe_create_blend_state(struct pipe_context *, @@ -160,8 +165,14 @@ void *softpipe_create_gs_state(struct pipe_context *, void softpipe_bind_gs_state(struct pipe_context *, void *); void softpipe_delete_gs_state(struct pipe_context *, void *); +void *softpipe_create_vertex_elements_state(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); +void softpipe_bind_vertex_elements_state(struct pipe_context *, void *); +void softpipe_delete_vertex_elements_state(struct pipe_context *, void *); + void softpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); + const struct pipe_poly_stipple * ); void softpipe_set_scissor_state( struct pipe_context *, const struct pipe_scissor_state * ); @@ -178,10 +189,6 @@ softpipe_set_vertex_sampler_textures(struct pipe_context *, void softpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); -void softpipe_set_vertex_elements(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); - void softpipe_set_vertex_buffers(struct pipe_context *, unsigned count, const struct pipe_vertex_buffer *); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index c88e2137510..2b089c28316 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -28,6 +28,7 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_fs.h" +#include "sp_buffer.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -163,26 +164,33 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) FREE( state ); } - - void softpipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - struct pipe_buffer *buf) + struct pipe_buffer *constants) { struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned size = constants ? constants->size : 0; + const void *data = constants ? softpipe_buffer(constants)->data : NULL; assert(shader < PIPE_SHADER_TYPES); - assert(index < PIPE_MAX_CONSTANT_BUFFERS); + assert(index == 0); draw_flush(softpipe->draw); /* note: reference counting */ - pipe_buffer_reference(&softpipe->constants[shader][index], buf); + pipe_buffer_reference(&softpipe->constants[shader][index], constants); + if(shader == PIPE_SHADER_VERTEX) { + draw_set_mapped_constant_buffer(softpipe->draw, PIPE_SHADER_VERTEX, index, + data, size); + } + + softpipe->mapped_constants[shader][index] = data; softpipe->dirty |= SP_NEW_CONSTANTS; } + void * softpipe_create_gs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index b491d92ed15..462f4d2655e 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -32,27 +32,45 @@ #include "sp_context.h" #include "sp_state.h" +#include "util/u_memory.h" #include "draw/draw_context.h" +void * +softpipe_create_vertex_elements_state(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) +{ + struct sp_velems_state *velems; + assert(count <= PIPE_MAX_ATTRIBS); + velems = (struct sp_velems_state *) MALLOC(sizeof(struct sp_velems_state)); + if (velems) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(*attribs) * count); + } + return velems; +} + void -softpipe_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *attribs) +softpipe_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems) { struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_velems_state *sp_velems = (struct sp_velems_state *) velems; - assert(count <= PIPE_MAX_ATTRIBS); - - memcpy(softpipe->vertex_element, attribs, - count * sizeof(struct pipe_vertex_element)); - softpipe->num_vertex_elements = count; + softpipe->velems = sp_velems; softpipe->dirty |= SP_NEW_VERTEX; - draw_set_vertex_elements(softpipe->draw, count, attribs); + if (sp_velems) + draw_set_vertex_elements(softpipe->draw, sp_velems->count, sp_velems->velem); } +void +softpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + FREE( velems ); +} void softpipe_set_vertex_buffers(struct pipe_context *pipe, diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index ecd6b39863c..fa9e19b282b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -55,7 +55,7 @@ static INLINE float frac(float f) { - return f - util_ifloor(f); + return f - floorf(f); } @@ -201,11 +201,9 @@ wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4]) const float max = 1.0F - min; for (ch = 0; ch < 4; ch++) { const int flr = util_ifloor(s[ch]); - float u; + float u = frac(s[ch]); if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; + u = 1.0F - u; if (u < min) icoord[ch] = 0; else if (u > max) @@ -358,11 +356,9 @@ wrap_linear_mirror_repeat(const float s[4], unsigned size, uint ch; for (ch = 0; ch < 4; ch++) { const int flr = util_ifloor(s[ch]); - float u; + float u = frac(s[ch]); if (flr & 1) - u = 1.0F - (s[ch] - (float) flr); - else - u = s[ch] - (float) flr; + u = 1.0F - u; u = u * size - 0.5F; icoord0[ch] = util_ifloor(u); icoord1[ch] = icoord0[ch] + 1; @@ -441,8 +437,7 @@ wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size, /** - * For RECT textures / unnormalized texcoords - * Only a subset of wrap modes supported. + * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords. */ static void wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4]) @@ -456,7 +451,7 @@ wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4]) /** - * Handles clamp_to_edge and clamp_to_border: + * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords. */ static void wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size, @@ -464,14 +459,27 @@ wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size, { uint ch; for (ch = 0; ch < 4; ch++) { + icoord[ch]= util_ifloor( CLAMP(s[ch], -0.5F, (float) size + 0.5F) ); + } +} + + +/** + * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords. + */ +static void +wrap_nearest_unorm_clamp_to_edge(const float s[4], unsigned size, + int icoord[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); } } /** - * For RECT textures / unnormalized texcoords. - * Only a subset of wrap modes supported. + * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords. */ static void wrap_linear_unorm_clamp(const float s[4], unsigned size, @@ -488,13 +496,36 @@ wrap_linear_unorm_clamp(const float s[4], unsigned size, } +/** + * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords. + */ static void wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size, int icoord0[4], int icoord1[4], float w[4]) { uint ch; for (ch = 0; ch < 4; ch++) { - float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); + float u = CLAMP(s[ch], -0.5F, (float) size + 0.5F); + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord1[ch] > (int) size - 1) + icoord1[ch] = size - 1; + w[ch] = frac(u); + } +} + + +/** + * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords. + */ +static void +wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) +{ + uint ch; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], +0.5F, (float) size - 0.5F); u -= 0.5F; icoord0[ch] = util_ifloor(u); icoord1[ch] = icoord0[ch] + 1; @@ -1583,7 +1614,6 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler); unsigned j; float ssss[4], tttt[4]; - unsigned face; /* major axis @@ -1597,7 +1627,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz */ - /* First choose the cube face. + /* Choose the cube face and compute new s/t coords for the 2D face. + * * Use the same cube face for all four pixels in the quad. * * This isn't ideal, but if we want to use a different cube face @@ -1616,82 +1647,34 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); if (arx >= ary && arx >= arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - } - else { - face = PIPE_TEX_FACE_NEG_X; + float sign = (rx >= 0.0F) ? 1.0F : -1.0F; + uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X; + for (j = 0; j < QUAD_SIZE; j++) { + const float ima = -0.5F / fabsf(s[j]); + ssss[j] = sign * p[j] * ima + 0.5F; + tttt[j] = t[j] * ima + 0.5F; + samp->faces[j] = face; } } else if (ary >= arx && ary >= arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - } - else { - face = PIPE_TEX_FACE_NEG_Y; + float sign = (ry >= 0.0F) ? 1.0F : -1.0F; + uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y; + for (j = 0; j < QUAD_SIZE; j++) { + const float ima = -0.5F / fabsf(t[j]); + ssss[j] = -s[j] * ima + 0.5F; + tttt[j] = sign * -p[j] * ima + 0.5F; + samp->faces[j] = face; } } else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; + float sign = (rz >= 0.0F) ? 1.0F : -1.0F; + uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z; + for (j = 0; j < QUAD_SIZE; j++) { + const float ima = -0.5 / fabsf(p[j]); + ssss[j] = sign * -s[j] * ima + 0.5F; + tttt[j] = t[j] * ima + 0.5F; + samp->faces[j] = face; } - else { - face = PIPE_TEX_FACE_NEG_Z; - } - } - } - - /* Now compute the 2D _face_ texture coords from the - * 3D _cube_ texture coords. - */ - for (j = 0; j < QUAD_SIZE; j++) { - const float rx = s[j], ry = t[j], rz = p[j]; - const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); - float sc, tc, ma; - - switch (face) { - case PIPE_TEX_FACE_POS_X: - sc = -rz; - tc = -ry; - ma = arx; - break; - case PIPE_TEX_FACE_NEG_X: - sc = rz; - tc = -ry; - ma = arx; - break; - case PIPE_TEX_FACE_POS_Y: - sc = rx; - tc = rz; - ma = ary; - break; - case PIPE_TEX_FACE_NEG_Y: - sc = rx; - tc = -rz; - ma = ary; - break; - case PIPE_TEX_FACE_POS_Z: - sc = rx; - tc = -ry; - ma = arz; - break; - case PIPE_TEX_FACE_NEG_Z: - sc = -rx; - tc = -ry; - ma = arz; - break; - default: - assert(0 && "bad cube face"); - sc = 0.0F; - tc = 0.0F; - ma = 0.0F; - } - - { - const float ima = 1.0 / ma; - ssss[j] = ( sc * ima + 1.0F ) * 0.5F; - tttt[j] = ( tc * ima + 1.0F ) * 0.5F; - samp->faces[j] = face; } } @@ -1711,6 +1694,7 @@ get_nearest_unorm_wrap(unsigned mode) case PIPE_TEX_WRAP_CLAMP: return wrap_nearest_unorm_clamp; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return wrap_nearest_unorm_clamp_to_edge; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return wrap_nearest_unorm_clamp_to_border; default: @@ -1754,6 +1738,7 @@ get_linear_unorm_wrap(unsigned mode) case PIPE_TEX_WRAP_CLAMP: return wrap_linear_unorm_clamp; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return wrap_linear_unorm_clamp_to_edge; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return wrap_linear_unorm_clamp_to_border; default: diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c index a0b95c88846..e3a5e37ce44 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c @@ -43,14 +43,14 @@ struct softpipe_tex_tile_cache * -sp_create_tex_tile_cache( struct pipe_screen *screen ) +sp_create_tex_tile_cache( struct pipe_context *pipe ) { struct softpipe_tex_tile_cache *tc; uint pos; tc = CALLOC_STRUCT( softpipe_tex_tile_cache ); if (tc) { - tc->screen = screen; + tc->pipe = pipe; for (pos = 0; pos < NUM_ENTRIES; pos++) { tc->entries[pos].addr.bits.invalid = 1; } @@ -63,19 +63,16 @@ sp_create_tex_tile_cache( struct pipe_screen *screen ) void sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc) { - struct pipe_screen *screen; uint pos; for (pos = 0; pos < NUM_ENTRIES; pos++) { /*assert(tc->entries[pos].x < 0);*/ } if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->transfer); } if (tc->tex_trans) { - screen = tc->tex_trans->texture->screen; - screen->tex_transfer_destroy(tc->tex_trans); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans); } FREE( tc ); @@ -88,7 +85,7 @@ void sp_tex_tile_cache_map_transfers(struct softpipe_tex_tile_cache *tc) { if (tc->tex_trans && !tc->tex_trans_map) - tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); + tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans); } @@ -96,7 +93,7 @@ void sp_tex_tile_cache_unmap_transfers(struct softpipe_tex_tile_cache *tc) { if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans); tc->tex_trans_map = NULL; } } @@ -133,14 +130,12 @@ sp_tex_tile_cache_set_texture(struct softpipe_tex_tile_cache *tc, pipe_texture_reference(&tc->texture, texture); if (tc->tex_trans) { - struct pipe_screen *screen = tc->tex_trans->texture->screen; - if (tc->tex_trans_map) { - screen->transfer_unmap(screen, tc->tex_trans); + tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans); tc->tex_trans_map = NULL; } - screen->tex_transfer_destroy(tc->tex_trans); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans); tc->tex_trans = NULL; } @@ -204,7 +199,6 @@ const struct softpipe_tex_cached_tile * sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, union tex_tile_address addr ) { - struct pipe_screen *screen = tc->screen; struct softpipe_tex_cached_tile *tile; tile = tc->entries + tex_cache_pos( addr ); @@ -232,16 +226,16 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, if (tc->tex_trans) { if (tc->tex_trans_map) { - tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans); tc->tex_trans_map = NULL; } - screen->tex_transfer_destroy(tc->tex_trans); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->tex_trans); tc->tex_trans = NULL; } tc->tex_trans = - screen->get_tex_transfer(screen, tc->texture, + tc->pipe->get_tex_transfer(tc->pipe, tc->texture, addr.bits.face, addr.bits.level, addr.bits.z, @@ -249,7 +243,7 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, u_minify(tc->texture->width0, addr.bits.level), u_minify(tc->texture->height0, addr.bits.level)); - tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); + tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans); tc->tex_face = addr.bits.face; tc->tex_level = addr.bits.level; @@ -257,7 +251,8 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, } /* get tile from the transfer (view into texture) */ - pipe_get_tile_rgba(tc->tex_trans, + pipe_get_tile_rgba(tc->pipe, + tc->tex_trans, addr.bits.x * TILE_SIZE, addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h index ac6886a3df1..b1163972587 100644 --- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h @@ -70,7 +70,7 @@ struct softpipe_tex_cached_tile struct softpipe_tex_tile_cache { - struct pipe_screen *screen; + struct pipe_context *pipe; struct pipe_transfer *transfer; void *transfer_map; @@ -88,7 +88,7 @@ struct softpipe_tex_tile_cache extern struct softpipe_tex_tile_cache * -sp_create_tex_tile_cache( struct pipe_screen *screen ); +sp_create_tex_tile_cache( struct pipe_context *pipe ); extern void sp_destroy_tex_tile_cache(struct softpipe_tex_tile_cache *tc); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 371c4e20251..2aff6118f41 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -40,7 +40,8 @@ #include "sp_context.h" #include "sp_texture.h" #include "sp_screen.h" -#include "sp_winsys.h" + +#include "state_tracker/sw_winsys.h" /** @@ -72,11 +73,9 @@ softpipe_texture_layout(struct pipe_screen *screen, depth = u_minify(depth, 1); } - spt->buffer = screen->buffer_create(screen, 32, - PIPE_BUFFER_USAGE_PIXEL, - buffer_size); + spt->data = align_malloc(buffer_size, 16); - return spt->buffer != NULL; + return spt->data != NULL; } @@ -87,19 +86,18 @@ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, struct softpipe_texture * spt) { - unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE | - PIPE_BUFFER_USAGE_GPU_READ_WRITE); - unsigned tex_usage = spt->base.tex_usage; - - spt->buffer = screen->surface_buffer_create( screen, - spt->base.width0, - spt->base.height0, - spt->base.format, - usage, - tex_usage, - &spt->stride[0]); - - return spt->buffer != NULL; + struct sw_winsys *winsys = softpipe_screen(screen)->winsys; + + /* Round up the surface size to a multiple of the tile size? + */ + spt->dt = winsys->displaytarget_create(winsys, + spt->base.format, + spt->base.width0, + spt->base.height0, + 16, + &spt->stride[0] ); + + return spt->dt != NULL; } @@ -123,7 +121,8 @@ softpipe_texture_create(struct pipe_screen *screen, util_is_power_of_two(template->depth0)); if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) { + PIPE_TEXTURE_USAGE_SCANOUT | + PIPE_TEXTURE_USAGE_SHARED)) { if (!softpipe_displaytarget_layout(screen, spt)) goto fail; } @@ -140,46 +139,24 @@ softpipe_texture_create(struct pipe_screen *screen, } -/** - * Create a new pipe_texture which wraps an existing buffer. - */ -static struct pipe_texture * -softpipe_texture_blanket(struct pipe_screen * screen, - const struct pipe_texture *base, - const unsigned *stride, - struct pipe_buffer *buffer) -{ - struct softpipe_texture *spt; - assert(screen); - - /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth0 != 1) { - return NULL; - } - - spt = CALLOC_STRUCT(softpipe_texture); - if (!spt) - return NULL; - - spt->base = *base; - pipe_reference_init(&spt->base.reference, 1); - spt->base.screen = screen; - spt->stride[0] = stride[0]; - - pipe_buffer_reference(&spt->buffer, buffer); - - return &spt->base; -} static void softpipe_texture_destroy(struct pipe_texture *pt) { + struct softpipe_screen *screen = softpipe_screen(pt->screen); struct softpipe_texture *spt = softpipe_texture(pt); - pipe_buffer_reference(&spt->buffer, NULL); + if (spt->dt) { + /* display target */ + struct sw_winsys *winsys = screen->winsys; + winsys->displaytarget_destroy(winsys, spt->dt); + } + else { + /* regular texture */ + align_free(spt->data); + } + FREE(spt); } @@ -278,7 +255,7 @@ softpipe_tex_surface_destroy(struct pipe_surface *surf) * \param height height of region to read/write */ static struct pipe_transfer * -softpipe_get_tex_transfer(struct pipe_screen *screen, +softpipe_get_tex_transfer(struct pipe_context *pipe, struct pipe_texture *texture, unsigned face, unsigned level, unsigned zslice, enum pipe_transfer_usage usage, @@ -332,7 +309,8 @@ softpipe_get_tex_transfer(struct pipe_screen *screen, * softpipe_get_tex_transfer(). */ static void -softpipe_tex_transfer_destroy(struct pipe_transfer *transfer) +softpipe_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) { /* Effectively do the texture_update work here - if texture images * needed post-processing to put them into hardware layout, this is @@ -348,7 +326,7 @@ softpipe_tex_transfer_destroy(struct pipe_transfer *transfer) * Create memory mapping for given pipe_transfer object. */ static void * -softpipe_transfer_map( struct pipe_screen *screen, +softpipe_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { ubyte *map, *xfer_map; @@ -359,9 +337,20 @@ softpipe_transfer_map( struct pipe_screen *screen, spt = softpipe_texture(transfer->texture); format = transfer->texture->format; - map = pipe_buffer_map(screen, spt->buffer, pipe_transfer_buffer_flags(transfer)); - if (map == NULL) - return NULL; + if (spt->dt) { + /* display target */ + struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys; + + map = winsys->displaytarget_map(winsys, spt->dt, + pipe_transfer_buffer_flags(transfer)); + if (map == NULL) + return NULL; + } + else { + map = spt->data; + if (map == NULL) + return NULL; + } /* May want to different things here depending on read/write nature * of the map: @@ -370,7 +359,7 @@ softpipe_transfer_map( struct pipe_screen *screen, /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. */ - softpipe_screen(screen)->timestamp++; + softpipe_screen(pipe->screen)->timestamp++; } xfer_map = map + softpipe_transfer(transfer)->offset + @@ -385,7 +374,7 @@ softpipe_transfer_map( struct pipe_screen *screen, * Unmap memory mapping for given pipe_transfer object. */ static void -softpipe_transfer_unmap(struct pipe_screen *screen, +softpipe_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { struct softpipe_texture *spt; @@ -393,7 +382,11 @@ softpipe_transfer_unmap(struct pipe_screen *screen, assert(transfer->texture); spt = softpipe_texture(transfer->texture); - pipe_buffer_unmap( screen, spt->buffer ); + if (spt->dt) { + /* display target */ + struct sw_winsys *winsys = softpipe_screen(pipe->screen)->winsys; + winsys->displaytarget_unmap(winsys, spt->dt); + } if (transfer->usage & PIPE_TRANSFER_WRITE) { /* Mark the texture as dirty to expire the tile caches. */ @@ -426,7 +419,7 @@ softpipe_video_surface_create(struct pipe_screen *screen, memset(&template, 0, sizeof(struct pipe_texture)); template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_X8R8G8B8_UNORM; + template.format = PIPE_FORMAT_B8G8R8X8_UNORM; template.last_level = 0; /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */ template.width0 = util_next_power_of_two(width); @@ -455,43 +448,26 @@ softpipe_video_surface_destroy(struct pipe_video_surface *vsfc) void +softpipe_init_texture_funcs(struct pipe_context *pipe) +{ + pipe->get_tex_transfer = softpipe_get_tex_transfer; + pipe->tex_transfer_destroy = softpipe_tex_transfer_destroy; + pipe->transfer_map = softpipe_transfer_map; + pipe->transfer_unmap = softpipe_transfer_unmap; +} + +void softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; - screen->texture_blanket = softpipe_texture_blanket; screen->texture_destroy = softpipe_texture_destroy; screen->get_tex_surface = softpipe_get_tex_surface; screen->tex_surface_destroy = softpipe_tex_surface_destroy; - screen->get_tex_transfer = softpipe_get_tex_transfer; - screen->tex_transfer_destroy = softpipe_tex_transfer_destroy; - screen->transfer_map = softpipe_transfer_map; - screen->transfer_unmap = softpipe_transfer_unmap; - screen->video_surface_create = softpipe_video_surface_create; screen->video_surface_destroy = softpipe_video_surface_destroy; } -/** - * Return pipe_buffer handle and stride for given texture object. - * XXX used for??? - */ -boolean -softpipe_get_texture_buffer( struct pipe_texture *texture, - struct pipe_buffer **buf, - unsigned *stride ) -{ - struct softpipe_texture *tex = (struct softpipe_texture *) texture; - - if (!tex) - return FALSE; - - pipe_buffer_reference(buf, tex->buffer); - if (stride) - *stride = tex->stride[0]; - - return TRUE; -} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 2ef64e1e7c3..c0e6ba8a869 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -33,6 +33,10 @@ #include "pipe/p_video_state.h" +#define SP_MAX_TEXTURE_2D_LEVELS 13 /* 4K x 4K */ +#define SP_MAX_TEXTURE_3D_LEVELS 9 /* 512 x 512 x 512 */ + + struct pipe_context; struct pipe_screen; struct softpipe_context; @@ -42,12 +46,19 @@ struct softpipe_texture { struct pipe_texture base; - unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long level_offset[SP_MAX_TEXTURE_2D_LEVELS]; + unsigned stride[SP_MAX_TEXTURE_2D_LEVELS]; - /* The data is held here: + /** + * Display target, for textures with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET + * usage. */ - struct pipe_buffer *buffer; + struct sw_displaytarget *dt; + + /** + * Malloc'ed data for regular textures, or a mapping to dt above. + */ + void *data; /* True if texture images are power-of-two in all dimensions: */ @@ -96,5 +107,8 @@ softpipe_video_surface(struct pipe_video_surface *pvs) extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); +void +softpipe_init_texture_funcs(struct pipe_context *pipe); + #endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 1b50bd7ffe5..1c3c2667d73 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -79,20 +79,20 @@ clear_clear_flag(uint *bitvec, union tile_address addr) struct softpipe_tile_cache * -sp_create_tile_cache( struct pipe_screen *screen ) +sp_create_tile_cache( struct pipe_context *pipe ) { struct softpipe_tile_cache *tc; uint pos; int maxLevels, maxTexSize; /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */ - maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); + maxLevels = pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS); maxTexSize = 1 << (maxLevels - 1); assert(MAX_WIDTH >= maxTexSize); tc = CALLOC_STRUCT( softpipe_tile_cache ); if (tc) { - tc->screen = screen; + tc->pipe = pipe; for (pos = 0; pos < NUM_ENTRIES; pos++) { tc->entries[pos].addr.bits.invalid = 1; } @@ -115,15 +115,13 @@ sp_create_tile_cache( struct pipe_screen *screen ) void sp_destroy_tile_cache(struct softpipe_tile_cache *tc) { - struct pipe_screen *screen; uint pos; for (pos = 0; pos < NUM_ENTRIES; pos++) { /*assert(tc->entries[pos].x < 0);*/ } if (tc->transfer) { - screen = tc->transfer->texture->screen; - screen->tex_transfer_destroy(tc->transfer); + tc->pipe->tex_transfer_destroy(tc->pipe, tc->transfer); } FREE( tc ); @@ -137,35 +135,33 @@ void sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, struct pipe_surface *ps) { - if (tc->transfer) { - struct pipe_screen *screen = tc->transfer->texture->screen; + struct pipe_context *pipe = tc->pipe; + if (tc->transfer) { if (ps == tc->surface) return; if (tc->transfer_map) { - screen->transfer_unmap(screen, tc->transfer); + pipe->transfer_unmap(pipe, tc->transfer); tc->transfer_map = NULL; } - screen->tex_transfer_destroy(tc->transfer); + pipe->tex_transfer_destroy(pipe, tc->transfer); tc->transfer = NULL; } tc->surface = ps; if (ps) { - struct pipe_screen *screen = ps->texture->screen; - - tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face, + tc->transfer = pipe->get_tex_transfer(pipe, ps->texture, ps->face, ps->level, ps->zslice, PIPE_TRANSFER_READ_WRITE, 0, 0, ps->width, ps->height); - tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || - ps->format == PIPE_FORMAT_X8Z24_UNORM || - ps->format == PIPE_FORMAT_Z24S8_UNORM || + tc->depth_stencil = (ps->format == PIPE_FORMAT_Z24S8_UNORM || ps->format == PIPE_FORMAT_Z24X8_UNORM || + ps->format == PIPE_FORMAT_S8Z24_UNORM || + ps->format == PIPE_FORMAT_X8Z24_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM || ps->format == PIPE_FORMAT_Z32_UNORM || ps->format == PIPE_FORMAT_S8_UNORM); @@ -187,7 +183,7 @@ void sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc) { if (tc->transfer && !tc->transfer_map) - tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); + tc->transfer_map = tc->pipe->transfer_map(tc->pipe, tc->transfer); } @@ -195,7 +191,7 @@ void sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc) { if (tc->transfer_map) { - tc->screen->transfer_unmap(tc->screen, tc->transfer); + tc->pipe->transfer_unmap(tc->pipe, tc->transfer); tc->transfer_map = NULL; } } @@ -295,17 +291,19 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc) union tile_address addr = tile_address(x, y); if (is_clear_flag_set(tc->clear_flags, addr)) { - pipe_put_tile_raw(pt, + pipe_put_tile_raw(tc->pipe, + pt, x, y, TILE_SIZE, TILE_SIZE, tc->tile.data.color32, 0/*STRIDE*/); - /* do this? */ - clear_clear_flag(tc->clear_flags, addr); - numCleared++; } } } + + /* reset all clear flags to zero */ + memset(tc->clear_flags, 0, sizeof(tc->clear_flags)); + #if 0 debug_printf("num cleared: %u\n", numCleared); #endif @@ -328,14 +326,14 @@ sp_flush_tile_cache(struct softpipe_tile_cache *tc) struct softpipe_cached_tile *tile = tc->entries + pos; if (!tile->addr.bits.invalid) { if (tc->depth_stencil) { - pipe_put_tile_raw(pt, + pipe_put_tile_raw(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(pt, + pipe_put_tile_rgba(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, @@ -378,14 +376,14 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, if (tile->addr.bits.invalid == 0) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { - pipe_put_tile_raw(pt, + pipe_put_tile_raw(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(pt, + pipe_put_tile_rgba(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, @@ -408,14 +406,14 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc, else { /* get new tile data from transfer */ if (tc->depth_stencil) { - pipe_get_tile_raw(pt, + pipe_get_tile_raw(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_get_tile_rgba(pt, + pipe_get_tile_rgba(tc->pipe, pt, tile->addr.bits.x * TILE_SIZE, tile->addr.bits.y * TILE_SIZE, TILE_SIZE, TILE_SIZE, diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index a12092702a6..753d8c0daac 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -80,7 +80,7 @@ struct softpipe_cached_tile struct softpipe_tile_cache { - struct pipe_screen *screen; + struct pipe_context *pipe; struct pipe_surface *surface; /**< the surface we're caching */ struct pipe_transfer *transfer; void *transfer_map; @@ -98,7 +98,7 @@ struct softpipe_tile_cache extern struct softpipe_tile_cache * -sp_create_tile_cache( struct pipe_screen *screen ); +sp_create_tile_cache( struct pipe_context *pipe ); extern void sp_destroy_tile_cache(struct softpipe_tile_cache *tc); diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c index 7a3a6361670..242aaac4665 100644 --- a/src/gallium/drivers/softpipe/sp_video_context.c +++ b/src/gallium/drivers/softpipe/sp_video_context.c @@ -25,10 +25,12 @@ * **************************************************************************/ +#include "util/u_inlines.h" +#include "util/u_memory.h" + #include "sp_video_context.h" -#include <util/u_inlines.h> -#include <util/u_memory.h> -#include "softpipe/sp_texture.h" +#include "sp_texture.h" + static void sp_mpeg12_destroy(struct pipe_video_context *vpipe) @@ -174,7 +176,6 @@ init_pipe_state(struct sp_mpeg12_context *ctx) rast.line_stipple_factor = 0; rast.line_stipple_pattern = 0; rast.line_last_pixel = 0; - rast.bypass_vs_clip_and_viewport = 0; rast.line_width = 1; rast.point_smooth = 0; rast.point_quad_rasterization = 0; diff --git a/src/gallium/drivers/softpipe/sp_winsys.c b/src/gallium/drivers/softpipe/sp_winsys.c deleted file mode 100644 index f6598927d35..00000000000 --- a/src/gallium/drivers/softpipe/sp_winsys.c +++ /dev/null @@ -1,245 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ - -/** - * @file - * Malloc softpipe winsys. Uses malloc for all memory allocations. - * - * @author Keith Whitwell - * @author Brian Paul - * @author Jose Fonseca - */ - - -#include "util/u_simple_screen.h"/* port to just p_screen */ -#include "pipe/p_format.h" -#include "pipe/p_context.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "softpipe/sp_winsys.h" - - -struct st_softpipe_buffer -{ - struct pipe_buffer base; - boolean userBuffer; /** Is this a user-space buffer? */ - void *data; - void *mapped; -}; - - -/** Cast wrapper */ -static INLINE struct st_softpipe_buffer * -st_softpipe_buffer( struct pipe_buffer *buf ) -{ - return (struct st_softpipe_buffer *)buf; -} - - -static void * -st_softpipe_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags) -{ - struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); - st_softpipe_buf->mapped = st_softpipe_buf->data; - return st_softpipe_buf->mapped; -} - - -static void -st_softpipe_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); - st_softpipe_buf->mapped = NULL; -} - - -static void -st_softpipe_buffer_destroy(struct pipe_buffer *buf) -{ - struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf); - - if (oldBuf->data) { - if (!oldBuf->userBuffer) - align_free(oldBuf->data); - - oldBuf->data = NULL; - } - - FREE(oldBuf); -} - - -static void -st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, - struct pipe_surface *surf, - void *context_private) -{ -} - - - -static const char * -st_softpipe_get_name(struct pipe_winsys *winsys) -{ - return "softpipe"; -} - - -static struct pipe_buffer * -st_softpipe_buffer_create(struct pipe_winsys *winsys, - unsigned alignment, - unsigned usage, - unsigned size) -{ - struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer); - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.alignment = alignment; - buffer->base.usage = usage; - buffer->base.size = size; - - buffer->data = align_malloc(size, alignment); - - return &buffer->base; -} - - -/** - * Create buffer which wraps user-space data. - */ -static struct pipe_buffer * -st_softpipe_user_buffer_create(struct pipe_winsys *winsys, - void *ptr, - unsigned bytes) -{ - struct st_softpipe_buffer *buffer; - - buffer = CALLOC_STRUCT(st_softpipe_buffer); - if(!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.size = bytes; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; -} - - -static struct pipe_buffer * -st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *stride) -{ - const unsigned alignment = 64; - unsigned nblocksy; - - nblocksy = util_format_get_nblocksy(format, height); - *stride = align(util_format_get_stride(format, width), alignment); - - return winsys->buffer_create(winsys, alignment, - usage, - *stride * nblocksy); -} - - -static void -st_softpipe_fence_reference(struct pipe_winsys *winsys, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ -} - - -static int -st_softpipe_fence_signalled(struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static int -st_softpipe_fence_finish(struct pipe_winsys *winsys, - struct pipe_fence_handle *fence, - unsigned flag) -{ - return 0; -} - - -static void -st_softpipe_destroy(struct pipe_winsys *winsys) -{ - FREE(winsys); -} - - -struct pipe_screen * -softpipe_create_screen_malloc(void) -{ - static struct pipe_winsys *winsys; - struct pipe_screen *screen; - - winsys = CALLOC_STRUCT(pipe_winsys); - if(!winsys) - return NULL; - - winsys->destroy = st_softpipe_destroy; - - winsys->buffer_create = st_softpipe_buffer_create; - winsys->user_buffer_create = st_softpipe_user_buffer_create; - winsys->buffer_map = st_softpipe_buffer_map; - winsys->buffer_unmap = st_softpipe_buffer_unmap; - winsys->buffer_destroy = st_softpipe_buffer_destroy; - - winsys->surface_buffer_create = st_softpipe_surface_buffer_create; - - winsys->fence_reference = st_softpipe_fence_reference; - winsys->fence_signalled = st_softpipe_fence_signalled; - winsys->fence_finish = st_softpipe_fence_finish; - - winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer; - winsys->get_name = st_softpipe_get_name; - - screen = softpipe_create_screen(winsys); - if(!screen) - st_softpipe_destroy(winsys); - - return screen; -} diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c index d499ae6acc9..adb7840182b 100644 --- a/src/gallium/drivers/svga/svga_context.c +++ b/src/gallium/drivers/svga/svga_context.c @@ -164,6 +164,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen, svga_init_constbuffer_functions(svga); svga_init_query_functions(svga); + svga_init_texture_functions(&svga->pipe); + /* debug */ svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE); svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE); diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index 03302e2a6ec..791d30edc0e 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -169,6 +169,11 @@ struct svga_sampler_state { unsigned view_max_lod; }; +struct svga_velems_state { + unsigned count; + struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; +}; + /* Use to calculate differences between state emitted to hardware and * current driver-calculated state. */ @@ -178,13 +183,13 @@ struct svga_state const struct svga_depth_stencil_state *depth; const struct svga_rasterizer_state *rast; const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct svga_velems_state *velems; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; /* or texture ID's? */ struct svga_fragment_shader *fs; struct svga_vertex_shader *vs; struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS]; struct pipe_buffer *cb[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; @@ -204,7 +209,6 @@ struct svga_state unsigned num_samplers; unsigned num_textures; - unsigned num_vertex_elements; unsigned num_vertex_buffers; unsigned reduced_prim; diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c index 409b3b41cbc..8483a3fad74 100644 --- a/src/gallium/drivers/svga/svga_pipe_clear.c +++ b/src/gallium/drivers/svga/svga_pipe_clear.c @@ -54,7 +54,7 @@ try_clear(struct svga_context *svga, if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) { flags |= SVGA3D_CLEAR_COLOR; - util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); + util_pack_color(rgba, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); rect.w = fb->cbufs[0]->width; rect.h = fb->cbufs[0]->height; @@ -63,7 +63,7 @@ try_clear(struct svga_context *svga, if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && fb->zsbuf) { flags |= SVGA3D_CLEAR_DEPTH; - if (svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) + if (svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_S8Z24_UNORM) flags |= SVGA3D_CLEAR_STENCIL; rect.w = MAX2(rect.w, fb->zsbuf->width); diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c index 49b43bebc29..95bf0e6f91b 100644 --- a/src/gallium/drivers/svga/svga_pipe_misc.c +++ b/src/gallium/drivers/svga/svga_pipe_misc.c @@ -118,10 +118,10 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe, case PIPE_FORMAT_Z16_UNORM: svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D16; break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D24S8; break; case PIPE_FORMAT_Z32_UNORM: diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c index 224c4f4c183..1a8ef296cac 100644 --- a/src/gallium/drivers/svga/svga_pipe_sampler.c +++ b/src/gallium/drivers/svga/svga_pipe_sampler.c @@ -155,7 +155,7 @@ static void svga_bind_sampler_states(struct pipe_context *pipe, /* Check for no-op */ if (num == svga->curr.num_samplers && !memcmp(svga->curr.sampler, sampler, num * sizeof(void *))) { - debug_printf("sampler noop\n"); + if (0) debug_printf("sampler noop\n"); return; } @@ -201,7 +201,7 @@ static void svga_set_sampler_textures(struct pipe_context *pipe, if (!texture[i]) continue; - if (texture[i]->format == PIPE_FORMAT_A8R8G8B8_SRGB) + if (texture[i]->format == PIPE_FORMAT_B8G8R8A8_SRGB) flag_srgb |= 1 << i; if (texture[i]->target == PIPE_TEXTURE_1D) diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 836b8441da2..1715a47fc62 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -26,6 +26,7 @@ #include "util/u_inlines.h" #include "pipe/p_defines.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "svga_screen.h" @@ -64,20 +65,37 @@ static void svga_set_vertex_buffers(struct pipe_context *pipe, svga->dirty |= SVGA_NEW_VBUFFER; } -static void svga_set_vertex_elements(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *elements) + +static void * +svga_create_vertex_elements_state(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) { - struct svga_context *svga = svga_context(pipe); - unsigned i; + struct svga_velems_state *velems; + assert(count <= PIPE_MAX_ATTRIBS); + velems = (struct svga_velems_state *) MALLOC(sizeof(struct svga_velems_state)); + if (velems) { + velems->count = count; + memcpy(velems->velem, attribs, sizeof(*attribs) * count); + } + return velems; +} - for (i = 0; i < count; i++) - svga->curr.ve[i] = elements[i]; +static void svga_bind_vertex_elements_state(struct pipe_context *pipe, + void *velems) +{ + struct svga_context *svga = svga_context(pipe); + struct svga_velems_state *svga_velems = (struct svga_velems_state *) velems; - svga->curr.num_vertex_elements = count; + svga->curr.velems = svga_velems; svga->dirty |= SVGA_NEW_VELEMENT; } +static void svga_delete_vertex_elements_state(struct pipe_context *pipe, + void *velems) +{ + FREE(velems); +} void svga_cleanup_vertex_state( struct svga_context *svga ) { @@ -91,7 +109,9 @@ void svga_cleanup_vertex_state( struct svga_context *svga ) void svga_init_vertex_functions( struct svga_context *svga ) { svga->pipe.set_vertex_buffers = svga_set_vertex_buffers; - svga->pipe.set_vertex_elements = svga_set_vertex_elements; + svga->pipe.create_vertex_elements_state = svga_create_vertex_elements_state; + svga->pipe.bind_vertex_elements_state = svga_bind_vertex_elements_state; + svga->pipe.delete_vertex_elements_state = svga_delete_vertex_elements_state; } diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index 414ac52e1f6..6022c38cfca 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -196,23 +196,23 @@ svga_translate_format_cap(enum pipe_format format) { switch(format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8; - case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: return SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8; - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return SVGA3D_DEVCAP_SURFACEFMT_R5G6B5; - case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: return SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5; - case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: return SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4; case PIPE_FORMAT_Z16_UNORM: return SVGA3D_DEVCAP_SURFACEFMT_Z_D16; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: return SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8; - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: return SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8; case PIPE_FORMAT_A8_UNORM: @@ -254,8 +254,8 @@ svga_is_format_supported( struct pipe_screen *screen, /* Often unsupported/problematic. This means we end up with the same * visuals for all virtual hardware implementations. */ - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: return FALSE; /* Simulate ability to render into compressed textures */ diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c index 12f3531a1df..4a058eda885 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.c +++ b/src/gallium/drivers/svga/svga_screen_texture.c @@ -41,8 +41,6 @@ #include "svga_debug.h" #include "svga_screen_buffer.h" -#include <util/u_string.h> - /* XXX: This isn't a real hardware flag, but just a hack for kernel to * know about primary surfaces. Find a better way to accomplish this. @@ -59,21 +57,21 @@ svga_translate_format(enum pipe_format format) { switch(format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_A8R8G8B8; - case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: return SVGA3D_X8R8G8B8; /* Required for GL2.1: */ - case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_B8G8R8A8_SRGB: return SVGA3D_A8R8G8B8; - case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: return SVGA3D_R5G6B5; - case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: return SVGA3D_A1R5G5B5; - case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: return SVGA3D_A4R4G4B4; @@ -83,9 +81,9 @@ svga_translate_format(enum pipe_format format) */ case PIPE_FORMAT_Z16_UNORM: return SVGA3D_Z_D16; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: return SVGA3D_Z_D24S8; - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: return SVGA3D_Z_D24X8; case PIPE_FORMAT_A8_UNORM: @@ -111,13 +109,13 @@ SVGA3dSurfaceFormat svga_translate_format_render(enum pipe_format format) { switch(format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_X8R8G8B8_UNORM: - case PIPE_FORMAT_A1R5G5B5_UNORM: - case PIPE_FORMAT_A4R4G4B4_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + case PIPE_FORMAT_B5G6R5_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_Z32_UNORM: case PIPE_FORMAT_Z16_UNORM: case PIPE_FORMAT_L8_UNORM: @@ -315,7 +313,11 @@ svga_texture_create(struct pipe_screen *screen, tex->key.cachable = 0; } - if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) { + if(templat->tex_usage & PIPE_TEXTURE_USAGE_SHARED) { + tex->key.cachable = 0; + } + + if(templat->tex_usage & PIPE_TEXTURE_USAGE_SCANOUT) { tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT; tex->key.cachable = 0; } @@ -355,80 +357,18 @@ error1: } -static struct pipe_texture * -svga_texture_blanket(struct pipe_screen * screen, - const struct pipe_texture *base, - const unsigned *stride, - struct pipe_buffer *buffer) -{ - struct svga_texture *tex; - struct svga_buffer *sbuf = svga_buffer(buffer); - struct svga_winsys_screen *sws = svga_winsys_screen(screen); - assert(screen); - - /* Only supports one type */ - if (base->target != PIPE_TEXTURE_2D || - base->last_level != 0 || - base->depth0 != 1) { - return NULL; - } - - /** - * We currently can't do texture blanket on - * SVGA3D_BUFFER. Need to blit to a temporary surface? - */ - - assert(sbuf->handle); - if (!sbuf->handle) - return NULL; - - if (svga_translate_format(base->format) != sbuf->key.format) { - unsigned f1 = svga_translate_format(base->format); - unsigned f2 = sbuf->key.format; - - /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */ - if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) || - (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) || - (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) { - debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2); - return NULL; - } - } - - tex = CALLOC_STRUCT(svga_texture); - if (!tex) - return NULL; - - tex->base = *base; - - - if (sbuf->key.format == 1) - tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM; - else if (sbuf->key.format == 2) - tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM; - - pipe_reference_init(&tex->base.reference, 1); - tex->base.screen = screen; - - SVGA_DBG(DEBUG_DMA, "blanket sid %p\n", sbuf->handle); - - /* We don't own this storage, so don't try to cache it. - */ - assert(sbuf->key.cachable == 0); - tex->key.cachable = 0; - sws->surface_reference(sws, &tex->handle, sbuf->handle); - return &tex->base; -} -struct pipe_texture * -svga_screen_texture_wrap_surface(struct pipe_screen *screen, - struct pipe_texture *base, - enum SVGA3dSurfaceFormat format, - struct svga_winsys_surface *srf) +static struct pipe_texture * +svga_screen_texture_from_handle(struct pipe_screen *screen, + const struct pipe_texture *base, + struct winsys_handle *whandle) { + struct svga_winsys_screen *sws = svga_winsys_screen(screen); + struct svga_winsys_surface *srf; struct svga_texture *tex; + enum SVGA3dSurfaceFormat format = 0; assert(screen); /* Only supports one type */ @@ -438,6 +378,8 @@ svga_screen_texture_wrap_surface(struct pipe_screen *screen, return NULL; } + srf = sws->surface_from_handle(sws, whandle, &format); + if (!srf) return NULL; @@ -462,9 +404,9 @@ svga_screen_texture_wrap_surface(struct pipe_screen *screen, if (format == 1) - tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM; + tex->base.format = PIPE_FORMAT_B8G8R8X8_UNORM; else if (format == 2) - tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM; + tex->base.format = PIPE_FORMAT_B8G8R8A8_UNORM; pipe_reference_init(&tex->base.reference, 1); tex->base.screen = screen; @@ -478,6 +420,22 @@ svga_screen_texture_wrap_surface(struct pipe_screen *screen, } +static boolean +svga_screen_texture_get_handle(struct pipe_screen *screen, + struct pipe_texture *texture, + struct winsys_handle *whandle) +{ + struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen); + unsigned stride; + + assert(svga_texture(texture)->key.cachable == 0); + svga_texture(texture)->key.cachable = 0; + stride = util_format_get_nblocksx(texture->format, texture->width0) * + util_format_get_blocksize(texture->format); + return sws->surface_get_handle(sws, svga_texture(texture)->handle, stride, whandle); +} + + static void svga_texture_destroy(struct pipe_texture *pt) { @@ -823,15 +781,17 @@ svga_surface_needs_propagation(struct pipe_surface *surf) return s->dirty && s->handle != tex->handle; } - +/* XXX: Still implementing this as if it was a screen function, but + * can now modify it to queue transfers on the context. + */ static struct pipe_transfer * -svga_get_tex_transfer(struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned face, unsigned level, unsigned zslice, - enum pipe_transfer_usage usage, unsigned x, unsigned y, - unsigned w, unsigned h) +svga_get_tex_transfer(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) { - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st; unsigned nblocksx = util_format_get_nblocksx(texture->format, w); @@ -899,11 +859,14 @@ no_hwbuf: } +/* XXX: Still implementing this as if it was a screen function, but + * can now modify it to queue transfers on the context. + */ static void * -svga_transfer_map( struct pipe_screen *screen, +svga_transfer_map( struct pipe_context *pipe, struct pipe_transfer *transfer ) { - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); @@ -917,11 +880,14 @@ svga_transfer_map( struct pipe_screen *screen, } +/* XXX: Still implementing this as if it was a screen function, but + * can now modify it to queue transfers on the context. + */ static void -svga_transfer_unmap(struct pipe_screen *screen, +svga_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) { - struct svga_screen *ss = svga_screen(screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); @@ -931,10 +897,11 @@ svga_transfer_unmap(struct pipe_screen *screen, static void -svga_tex_transfer_destroy(struct pipe_transfer *transfer) +svga_tex_transfer_destroy(struct pipe_context *pipe, + struct pipe_transfer *transfer) { struct svga_texture *tex = svga_texture(transfer->texture); - struct svga_screen *ss = svga_screen(transfer->texture->screen); + struct svga_screen *ss = svga_screen(pipe->screen); struct svga_winsys_screen *sws = ss->sws; struct svga_transfer *st = svga_transfer(transfer); @@ -951,18 +918,26 @@ svga_tex_transfer_destroy(struct pipe_transfer *transfer) FREE(st); } + +void +svga_init_texture_functions(struct pipe_context *pipe) +{ + pipe->get_tex_transfer = svga_get_tex_transfer; + pipe->transfer_map = svga_transfer_map; + pipe->transfer_unmap = svga_transfer_unmap; + pipe->tex_transfer_destroy = svga_tex_transfer_destroy; +} + + void svga_screen_init_texture_functions(struct pipe_screen *screen) { screen->texture_create = svga_texture_create; + screen->texture_from_handle = svga_screen_texture_from_handle; + screen->texture_get_handle = svga_screen_texture_get_handle; screen->texture_destroy = svga_texture_destroy; screen->get_tex_surface = svga_get_tex_surface; screen->tex_surface_destroy = svga_tex_surface_destroy; - screen->texture_blanket = svga_texture_blanket; - screen->get_tex_transfer = svga_get_tex_transfer; - screen->transfer_map = svga_transfer_map; - screen->transfer_unmap = svga_transfer_unmap; - screen->tex_transfer_destroy = svga_tex_transfer_destroy; } /*********************************************************************** @@ -1120,33 +1095,3 @@ svga_destroy_sampler_view_priv(struct svga_sampler_view *v) pipe_texture_reference(&v->texture, NULL); FREE(v); } - -boolean -svga_screen_buffer_from_texture(struct pipe_texture *texture, - struct pipe_buffer **buffer, - unsigned *stride) -{ - struct svga_texture *stex = svga_texture(texture); - - *buffer = svga_screen_buffer_wrap_surface - (texture->screen, - svga_translate_format(texture->format), - stex->handle); - - *stride = util_format_get_stride(texture->format, texture->width0); - - return *buffer != NULL; -} - - -struct svga_winsys_surface * -svga_screen_texture_get_winsys_surface(struct pipe_texture *texture) -{ - struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen); - struct svga_winsys_surface *vsurf = NULL; - - assert(svga_texture(texture)->key.cachable == 0); - svga_texture(texture)->key.cachable = 0; - sws->surface_reference(sws, &vsurf, svga_texture(texture)->handle); - return vsurf; -} diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h index 24c1f78ca55..96d035b12d8 100644 --- a/src/gallium/drivers/svga/svga_screen_texture.h +++ b/src/gallium/drivers/svga/svga_screen_texture.h @@ -78,7 +78,7 @@ struct svga_texture { struct pipe_texture base; - boolean defined[6][PIPE_MAX_TEXTURE_LEVELS]; + boolean defined[6][SVGA_MAX_TEXTURE_LEVELS]; struct svga_sampler_view *cached_view; @@ -186,6 +186,9 @@ svga_surface_needs_propagation(struct pipe_surface *surf); extern void svga_screen_init_texture_functions(struct pipe_screen *screen); +void +svga_init_texture_functions(struct pipe_context *pipe); + enum SVGA3dSurfaceFormat svga_translate_format(enum pipe_format format); diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c index b4cafb8f219..b710914acda 100644 --- a/src/gallium/drivers/svga/svga_state_framebuffer.c +++ b/src/gallium/drivers/svga/svga_state_framebuffer.c @@ -70,7 +70,7 @@ static int emit_framebuffer( struct svga_context *svga, return ret; if (curr->zsbuf && - curr->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) { + curr->zsbuf->format == PIPE_FORMAT_S8Z24_UNORM) { ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, curr->zsbuf); if (ret != PIPE_OK) return ret; @@ -120,174 +120,153 @@ static int emit_viewport( struct svga_context *svga, float fb_width = svga->curr.framebuffer.width; float fb_height = svga->curr.framebuffer.height; - memset( &prescale, 0, sizeof(prescale) ); - - if (svga->curr.rast->templ.bypass_vs_clip_and_viewport) { - - /* Avoid POSITIONT as it has a non trivial implementation outside the D3D - * API. Always generate a vertex shader. - */ - rect.x = 0; - rect.y = 0; - rect.w = svga->curr.framebuffer.width; - rect.h = svga->curr.framebuffer.height; - - prescale.scale[0] = 2.0 / (float)rect.w; - prescale.scale[1] = - 2.0 / (float)rect.h; - prescale.scale[2] = 1.0; - prescale.scale[3] = 1.0; - prescale.translate[0] = -1.0f; - prescale.translate[1] = 1.0f; - prescale.translate[2] = 0; - prescale.translate[3] = 0; - prescale.enabled = TRUE; - } else { - - /* Examine gallium viewport transformation and produce a screen - * rectangle and possibly vertex shader pre-transformation to - * get the same results. - */ - float fx = viewport->scale[0] * -1.0 + viewport->translate[0]; - float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1]; - float fw = viewport->scale[0] * 2; - float fh = flip * viewport->scale[1] * 2; - - SVGA_DBG(DEBUG_VIEWPORT, - "\ninitial %f,%f %fx%f\n", - fx, - fy, - fw, - fh); - - prescale.scale[0] = 1.0; - prescale.scale[1] = 1.0; - prescale.scale[2] = 1.0; - prescale.scale[3] = 1.0; - prescale.translate[0] = 0; - prescale.translate[1] = 0; - prescale.translate[2] = 0; - prescale.translate[3] = 0; - prescale.enabled = TRUE; - - - - if (fw < 0) { - prescale.scale[0] *= -1.0; - prescale.translate[0] += -fw; - fw = -fw; - fx = viewport->scale[0] * 1.0 + viewport->translate[0]; - } + float fx = viewport->scale[0] * -1.0 + viewport->translate[0]; + float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1]; + float fw = viewport->scale[0] * 2; + float fh = flip * viewport->scale[1] * 2; - if (fh < 0) { - prescale.scale[1] *= -1.0; - prescale.translate[1] += -fh; - fh = -fh; - fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1]; - } + memset( &prescale, 0, sizeof(prescale) ); - if (fx < 0) { - prescale.translate[0] += fx; - prescale.scale[0] *= fw / (fw + fx); - fw += fx; - fx = 0; - } + /* Examine gallium viewport transformation and produce a screen + * rectangle and possibly vertex shader pre-transformation to + * get the same results. + */ - if (fy < 0) { - prescale.translate[1] += fy; - prescale.scale[1] *= fh / (fh + fy); - fh += fy; - fy = 0; - } + SVGA_DBG(DEBUG_VIEWPORT, + "\ninitial %f,%f %fx%f\n", + fx, + fy, + fw, + fh); + + prescale.scale[0] = 1.0; + prescale.scale[1] = 1.0; + prescale.scale[2] = 1.0; + prescale.scale[3] = 1.0; + prescale.translate[0] = 0; + prescale.translate[1] = 0; + prescale.translate[2] = 0; + prescale.translate[3] = 0; + prescale.enabled = TRUE; + + + + if (fw < 0) { + prescale.scale[0] *= -1.0; + prescale.translate[0] += -fw; + fw = -fw; + fx = viewport->scale[0] * 1.0 + viewport->translate[0]; + } - if (fx + fw > fb_width) { - prescale.scale[0] *= fw / (fb_width - fx); - prescale.translate[0] -= fx * (fw / (fb_width - fx)); - prescale.translate[0] += fx; - fw = fb_width - fx; - - } + if (fh < 0) { + prescale.scale[1] *= -1.0; + prescale.translate[1] += -fh; + fh = -fh; + fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1]; + } - if (fy + fh > fb_height) { - prescale.scale[1] *= fh / (fb_height - fy); - prescale.translate[1] -= fy * (fh / (fb_height - fy)); - prescale.translate[1] += fy; - fh = fb_height - fy; - } + if (fx < 0) { + prescale.translate[0] += fx; + prescale.scale[0] *= fw / (fw + fx); + fw += fx; + fx = 0; + } - if (fw < 0 || fh < 0) { - fw = fh = fx = fy = 0; - degenerate = TRUE; - goto out; - } + if (fy < 0) { + prescale.translate[1] += fy; + prescale.scale[1] *= fh / (fh + fy); + fh += fy; + fy = 0; + } + if (fx + fw > fb_width) { + prescale.scale[0] *= fw / (fb_width - fx); + prescale.translate[0] -= fx * (fw / (fb_width - fx)); + prescale.translate[0] += fx; + fw = fb_width - fx; + + } - /* D3D viewport is integer space. Convert fx,fy,etc. to - * integers. - * - * TODO: adjust pretranslate correct for any subpixel error - * introduced converting to integers. - */ - rect.x = fx; - rect.y = fy; - rect.w = fw; - rect.h = fh; + if (fy + fh > fb_height) { + prescale.scale[1] *= fh / (fb_height - fy); + prescale.translate[1] -= fy * (fh / (fb_height - fy)); + prescale.translate[1] += fy; + fh = fb_height - fy; + } - SVGA_DBG(DEBUG_VIEWPORT, - "viewport error %f,%f %fx%f\n", - fabs((float)rect.x - fx), - fabs((float)rect.y - fy), - fabs((float)rect.w - fw), - fabs((float)rect.h - fh)); + if (fw < 0 || fh < 0) { + fw = fh = fx = fy = 0; + degenerate = TRUE; + goto out; + } - SVGA_DBG(DEBUG_VIEWPORT, - "viewport %d,%d %dx%d\n", - rect.x, - rect.y, - rect.w, - rect.h); - - /* Finally, to get GL rasterization rules, need to tweak the - * screen-space coordinates slightly relative to D3D which is - * what hardware implements natively. - */ - if (svga->curr.rast->templ.gl_rasterization_rules) { - float adjust_x = 0.0; - float adjust_y = 0.0; - - switch (svga->curr.reduced_prim) { - case PIPE_PRIM_LINES: - adjust_x = -0.5; - adjust_y = 0; - break; - case PIPE_PRIM_POINTS: - case PIPE_PRIM_TRIANGLES: - adjust_x = -0.375; - adjust_y = -0.5; - break; - } - - prescale.translate[0] += adjust_x; - prescale.translate[1] += adjust_y; - prescale.translate[2] = 0.5; /* D3D clip space */ - prescale.scale[2] = 0.5; /* D3D clip space */ + /* D3D viewport is integer space. Convert fx,fy,etc. to + * integers. + * + * TODO: adjust pretranslate correct for any subpixel error + * introduced converting to integers. + */ + rect.x = fx; + rect.y = fy; + rect.w = fw; + rect.h = fh; + + SVGA_DBG(DEBUG_VIEWPORT, + "viewport error %f,%f %fx%f\n", + fabs((float)rect.x - fx), + fabs((float)rect.y - fy), + fabs((float)rect.w - fw), + fabs((float)rect.h - fh)); + + SVGA_DBG(DEBUG_VIEWPORT, + "viewport %d,%d %dx%d\n", + rect.x, + rect.y, + rect.w, + rect.h); + + + /* Finally, to get GL rasterization rules, need to tweak the + * screen-space coordinates slightly relative to D3D which is + * what hardware implements natively. + */ + if (svga->curr.rast->templ.gl_rasterization_rules) { + float adjust_x = 0.0; + float adjust_y = 0.0; + + switch (svga->curr.reduced_prim) { + case PIPE_PRIM_LINES: + adjust_x = -0.5; + adjust_y = 0; + break; + case PIPE_PRIM_POINTS: + case PIPE_PRIM_TRIANGLES: + adjust_x = -0.375; + adjust_y = -0.5; + break; } + prescale.translate[0] += adjust_x; + prescale.translate[1] += adjust_y; + prescale.translate[2] = 0.5; /* D3D clip space */ + prescale.scale[2] = 0.5; /* D3D clip space */ + } - range_min = viewport->scale[2] * -1.0 + viewport->translate[2]; - range_max = viewport->scale[2] * 1.0 + viewport->translate[2]; - /* D3D (and by implication SVGA) doesn't like dealing with zmax - * less than zmin. Detect that case, flip the depth range and - * invert our z-scale factor to achieve the same effect. - */ - if (range_min > range_max) { - float range_tmp; - range_tmp = range_min; - range_min = range_max; - range_max = range_tmp; - prescale.scale[2] = -prescale.scale[2]; - } + range_min = viewport->scale[2] * -1.0 + viewport->translate[2]; + range_max = viewport->scale[2] * 1.0 + viewport->translate[2]; + + /* D3D (and by implication SVGA) doesn't like dealing with zmax + * less than zmin. Detect that case, flip the depth range and + * invert our z-scale factor to achieve the same effect. + */ + if (range_min > range_max) { + float range_tmp; + range_tmp = range_min; + range_min = range_max; + range_max = range_tmp; + prescale.scale[2] = -prescale.scale[2]; } if (prescale.enabled) { diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c index dd13a89d24d..dfaab53aef4 100644 --- a/src/gallium/drivers/svga/svga_state_need_swtnl.c +++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c @@ -43,7 +43,7 @@ svga_translate_vertex_format(enum pipe_format format) case PIPE_FORMAT_R32G32_FLOAT: return SVGA3D_DECLTYPE_FLOAT2; case PIPE_FORMAT_R32G32B32_FLOAT: return SVGA3D_DECLTYPE_FLOAT3; case PIPE_FORMAT_R32G32B32A32_FLOAT: return SVGA3D_DECLTYPE_FLOAT4; - case PIPE_FORMAT_B8G8R8A8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR; + case PIPE_FORMAT_A8R8G8B8_UNORM: return SVGA3D_DECLTYPE_D3DCOLOR; case PIPE_FORMAT_R8G8B8A8_USCALED: return SVGA3D_DECLTYPE_UBYTE4; case PIPE_FORMAT_R16G16_SSCALED: return SVGA3D_DECLTYPE_SHORT2; case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4; @@ -76,8 +76,13 @@ static int update_need_swvfetch( struct svga_context *svga, unsigned i; boolean need_swvfetch = FALSE; - for (i = 0; i < svga->curr.num_vertex_elements; i++) { - svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format); + if (!svga->curr.velems) { + /* No vertex elements bound. */ + return 0; + } + + for (i = 0; i < svga->curr.velems->count; i++) { + svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.velems->velem[i].src_format); if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) { need_swvfetch = TRUE; break; @@ -129,8 +134,7 @@ static int update_need_pipeline( struct svga_context *svga, /* SVGA_NEW_CLIP */ - if (!svga->curr.rast->templ.bypass_vs_clip_and_viewport && - svga->curr.clip.nr) { + if (svga->curr.clip.nr) { SVGA_DBG(DEBUG_SWTNL, "%s: userclip\n", __FUNCTION__); need_pipeline = TRUE; } diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c index 107cc403b4d..b7195d246bc 100644 --- a/src/gallium/drivers/svga/svga_state_rss.c +++ b/src/gallium/drivers/svga/svga_state_rss.c @@ -191,15 +191,24 @@ static int emit_rss( struct svga_context *svga, EMIT_RS( svga, svga->curr.stencil_ref.ref_value[0], STENCILREF, fail ); } - if (dirty & SVGA_NEW_RAST) + if (dirty & (SVGA_NEW_RAST | SVGA_NEW_NEED_PIPELINE)) { const struct svga_rasterizer_state *curr = svga->curr.rast; + unsigned cullmode = curr->cullmode; /* Shademode: still need to rearrange index list to move * flat-shading PV first vertex. */ EMIT_RS( svga, curr->shademode, SHADEMODE, fail ); - EMIT_RS( svga, curr->cullmode, CULLMODE, fail ); + + /* Don't do culling while the software pipeline is active. It + * does it for us, and additionally introduces potentially + * back-facing triangles. + */ + if (svga->state.sw.need_pipeline) + cullmode = SVGA3D_FACE_NONE; + + EMIT_RS( svga, cullmode, CULLMODE, fail ); EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail ); EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail ); EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail ); diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index ded903170b5..f531e223048 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -95,17 +95,17 @@ upload_user_buffers( struct svga_context *svga ) static int emit_hw_vs_vdecl( struct svga_context *svga, unsigned dirty ) { - const struct pipe_vertex_element *ve = svga->curr.ve; + const struct pipe_vertex_element *ve = svga->curr.velems->velem; SVGA3dVertexDecl decl; unsigned i; - assert(svga->curr.num_vertex_elements >= + assert(svga->curr.velems->count >= svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]); svga_hwtnl_reset_vdecl( svga->hwtnl, - svga->curr.num_vertex_elements ); + svga->curr.velems->count ); - for (i = 0; i < svga->curr.num_vertex_elements; i++) { + for (i = 0; i < svga->curr.velems->count; i++) { const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index]; unsigned usage, index; diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c index d7999fe53d2..781f7bf5339 100644 --- a/src/gallium/drivers/svga/svga_state_vs.c +++ b/src/gallium/drivers/svga/svga_state_vs.c @@ -186,8 +186,8 @@ static int update_zero_stride( struct svga_context *svga, svga->curr.zero_stride_vertex_elements = 0; svga->curr.num_zero_stride_vertex_elements = 0; - for (i = 0; i < svga->curr.num_vertex_elements; i++) { - const struct pipe_vertex_element *vel = &svga->curr.ve[i]; + for (i = 0; i < svga->curr.velems->count; i++) { + const struct pipe_vertex_element *vel = &svga->curr.velems->velem[i]; const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[ vel->vertex_buffer_index]; if (vbuffer->stride == 0) { diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c index 35f36a828fd..246d34e649e 100644 --- a/src/gallium/drivers/svga/svga_swtnl_state.c +++ b/src/gallium/drivers/svga/svga_swtnl_state.c @@ -99,8 +99,8 @@ static int update_swtnl_draw( struct svga_context *svga, if (dirty & SVGA_NEW_VELEMENT) draw_set_vertex_elements(svga->swtnl.draw, - svga->curr.num_vertex_elements, - svga->curr.ve ); + svga->curr.velems->count, + svga->curr.velems->velem ); if (dirty & SVGA_NEW_CLIP) draw_set_clip_state(svga->swtnl.draw, diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h index b4e3af0eafc..d4bb176f9a8 100644 --- a/src/gallium/drivers/svga/svga_winsys.h +++ b/src/gallium/drivers/svga/svga_winsys.h @@ -51,6 +51,7 @@ struct pipe_context; struct pipe_fence_handle; struct pipe_texture; struct svga_region; +struct winsys_handle; #define SVGA_BUFFER_USAGE_PINNED (PIPE_BUFFER_USAGE_CUSTOM << 0) @@ -187,6 +188,25 @@ struct svga_winsys_screen uint32 numMipLevels); /** + * Creates a surface from a winsys handle. + * Used to implement pipe_screen::texture_from_handle. + */ + struct svga_winsys_surface * + (*surface_from_handle)(struct svga_winsys_screen *sws, + struct winsys_handle *whandle, + SVGA3dSurfaceFormat *format); + + /** + * Get a winsys_handle from a surface. + * Used to implement pipe_screen::texture_get_handle. + */ + boolean + (*surface_get_handle)(struct svga_winsys_screen *sws, + struct svga_winsys_surface *surface, + unsigned stride, + struct winsys_handle *whandle); + + /** * Whether this surface is sitting in a validate list */ boolean @@ -284,19 +304,6 @@ svga_screen_buffer_wrap_surface(struct pipe_screen *screen, struct svga_winsys_surface *srf); struct svga_winsys_surface * -svga_screen_texture_get_winsys_surface(struct pipe_texture *texture); -struct svga_winsys_surface * svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer); -boolean -svga_screen_buffer_from_texture(struct pipe_texture *texture, - struct pipe_buffer **buffer, - unsigned *stride); - -struct pipe_texture * -svga_screen_texture_wrap_surface(struct pipe_screen *screen, - struct pipe_texture *base, - enum SVGA3dSurfaceFormat format, - struct svga_winsys_surface *srf); - #endif /* SVGA_WINSYS_H_ */ diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c index 705ca29e8f5..4ee1bf2c353 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -360,7 +360,9 @@ dump_dstreg(struct sh_dstreg dstreg, union { struct sh_reg reg; struct sh_dstreg dstreg; - } u = { { 0 } }; + } u; + + memset(&u, 0, sizeof(u)); assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier ); diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index df40fbade6c..b7e6bbac68e 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -27,7 +27,9 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" +#include "util/u_format.h" +#include "pipe/p_format.h" #include "pipe/p_screen.h" #include "tr_dump.h" @@ -773,6 +775,70 @@ trace_context_delete_vs_state(struct pipe_context *_pipe, } +static INLINE void * +trace_context_create_vertex_elements_state(struct pipe_context *_pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_vertex_elements_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_elements); + + trace_dump_arg_begin("elements"); + trace_dump_struct_array(vertex_element, elements, num_elements); + trace_dump_arg_end(); + + result = pipe->create_vertex_elements_state(pipe, num_elements, elements); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_vertex_elements_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_vertex_elements_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_vertex_elements_state(pipe, state); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_vertex_elements_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_verte_elements_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_vertex_elements_state(pipe, state); + + trace_dump_call_end(); +} + + static INLINE void trace_context_set_blend_color(struct pipe_context *_pipe, const struct pipe_blend_color *state) @@ -1048,29 +1114,6 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, static INLINE void -trace_context_set_vertex_elements(struct pipe_context *_pipe, - unsigned num_elements, - const struct pipe_vertex_element *elements) -{ - struct trace_context *tr_ctx = trace_context(_pipe); - struct pipe_context *pipe = tr_ctx->pipe; - - trace_dump_call_begin("pipe_context", "set_vertex_elements"); - - trace_dump_arg(ptr, pipe); - trace_dump_arg(uint, num_elements); - - trace_dump_arg_begin("elements"); - trace_dump_struct_array(vertex_element, elements, num_elements); - trace_dump_arg_end(); - - pipe->set_vertex_elements(pipe, num_elements, elements); - - trace_dump_call_end(); -} - - -static INLINE void trace_context_surface_copy(struct pipe_context *_pipe, struct pipe_surface *dest, unsigned destx, unsigned desty, @@ -1242,6 +1285,136 @@ trace_is_buffer_referenced( struct pipe_context *_pipe, return referenced; } + +/******************************************************************** + * transfer + */ + + +static struct pipe_transfer * +trace_context_get_tex_transfer(struct pipe_context *_context, + struct pipe_texture *_texture, + unsigned face, unsigned level, + unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_texture *tr_tex = trace_texture(_texture); + struct pipe_context *context = tr_context->pipe; + struct pipe_texture *texture = tr_tex->texture; + struct pipe_transfer *result = NULL; + + assert(texture->screen == context->screen); + + trace_dump_call_begin("pipe_context", "get_tex_transfer"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, texture); + trace_dump_arg(uint, face); + trace_dump_arg(uint, level); + trace_dump_arg(uint, zslice); + trace_dump_arg(uint, usage); + + trace_dump_arg(uint, x); + trace_dump_arg(uint, y); + trace_dump_arg(uint, w); + trace_dump_arg(uint, h); + + result = context->get_tex_transfer(context, texture, face, level, zslice, usage, + x, y, w, h); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + if (result) + result = trace_transfer_create(tr_context, tr_tex, result); + + return result; +} + + +static void +trace_context_tex_transfer_destroy(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_transfer *tr_trans = trace_transfer(_transfer); + struct pipe_context *context = tr_context->pipe; + struct pipe_transfer *transfer = tr_trans->transfer; + + trace_dump_call_begin("pipe_context", "tex_transfer_destroy"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, transfer); + + trace_dump_call_end(); + + trace_transfer_destroy(tr_context, tr_trans); +} + + +static void * +trace_context_transfer_map(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct trace_context *tr_context = trace_context(_context); + struct trace_transfer *tr_trans = trace_transfer(_transfer); + struct pipe_context *context = tr_context->pipe; + struct pipe_transfer *transfer = tr_trans->transfer; + void *map; + + map = context->transfer_map(context, transfer); + if(map) { + if(transfer->usage & PIPE_TRANSFER_WRITE) { + assert(!tr_trans->map); + tr_trans->map = map; + } + } + + return map; +} + + +static void +trace_context_transfer_unmap(struct pipe_context *_context, + struct pipe_transfer *_transfer) +{ + struct trace_context *tr_ctx = trace_context(_context); + struct trace_transfer *tr_trans = trace_transfer(_transfer); + struct pipe_context *context = tr_ctx->pipe; + struct pipe_transfer *transfer = tr_trans->transfer; + + if(tr_trans->map) { + size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; + + trace_dump_call_begin("pipe_context", "transfer_write"); + + trace_dump_arg(ptr, context); + + trace_dump_arg(ptr, transfer); + + trace_dump_arg_begin("stride"); + trace_dump_uint(transfer->stride); + trace_dump_arg_end(); + + trace_dump_arg_begin("data"); + trace_dump_bytes(tr_trans->map, size); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + tr_trans->map = NULL; + } + + context->transfer_unmap(context, transfer); +} + static const struct debug_named_value rbug_blocker_flags[] = { {"before", 1}, {"after", 2}, @@ -1303,6 +1476,9 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.create_vs_state = trace_context_create_vs_state; tr_ctx->base.bind_vs_state = trace_context_bind_vs_state; tr_ctx->base.delete_vs_state = trace_context_delete_vs_state; + tr_ctx->base.create_vertex_elements_state = trace_context_create_vertex_elements_state; + tr_ctx->base.bind_vertex_elements_state = trace_context_bind_vertex_elements_state; + tr_ctx->base.delete_vertex_elements_state = trace_context_delete_vertex_elements_state; tr_ctx->base.set_blend_color = trace_context_set_blend_color; tr_ctx->base.set_stencil_ref = trace_context_set_stencil_ref; tr_ctx->base.set_clip_state = trace_context_set_clip_state; @@ -1314,7 +1490,6 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures; tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures; tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; - tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; if (pipe->surface_copy) tr_ctx->base.surface_copy = trace_context_surface_copy; if (pipe->surface_fill) @@ -1324,6 +1499,11 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.is_texture_referenced = trace_is_texture_referenced; tr_ctx->base.is_buffer_referenced = trace_is_buffer_referenced; + tr_ctx->base.get_tex_transfer = trace_context_get_tex_transfer; + tr_ctx->base.tex_transfer_destroy = trace_context_tex_transfer_destroy; + tr_ctx->base.transfer_map = trace_context_transfer_map; + tr_ctx->base.transfer_unmap = trace_context_transfer_unmap; + tr_ctx->pipe = pipe; trace_screen_add_to_list(tr_scr, contexts, tr_ctx); diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c index b8adde77f10..eaa47df4066 100644 --- a/src/gallium/drivers/trace/tr_drm.c +++ b/src/gallium/drivers/trace/tr_drm.c @@ -28,11 +28,9 @@ #include "state_tracker/drm_api.h" #include "util/u_memory.h" -#include "trace/tr_drm.h" -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#include "trace/tr_buffer.h" -#include "trace/tr_texture.h" +#include "tr_drm.h" +#include "tr_screen.h" +#include "tr_public.h" struct trace_drm_api { @@ -62,69 +60,8 @@ trace_drm_create_screen(struct drm_api *_api, int fd, screen = api->create_screen(api, fd, arg); - return trace_screen_create(screen); -} - - -static struct pipe_texture * -trace_drm_texture_from_shared_handle(struct drm_api *_api, - struct pipe_screen *_screen, - struct pipe_texture *templ, - const char *name, - unsigned stride, - unsigned handle) -{ - struct trace_screen *tr_screen = trace_screen(_screen); - struct trace_drm_api *tr_api = trace_drm_api(_api); - struct pipe_screen *screen = tr_screen->screen; - struct drm_api *api = tr_api->api; - struct pipe_texture *result; - - /* TODO trace call */ - - result = api->texture_from_shared_handle(api, screen, templ, name, stride, handle); - - result = trace_texture_create(trace_screen(_screen), result); - - return result; -} - -static boolean -trace_drm_shared_handle_from_texture(struct drm_api *_api, - struct pipe_screen *_screen, - struct pipe_texture *_texture, - unsigned *stride, - unsigned *handle) -{ - struct trace_screen *tr_screen = trace_screen(_screen); - struct trace_texture *tr_texture = trace_texture(_texture); - struct trace_drm_api *tr_api = trace_drm_api(_api); - struct pipe_screen *screen = tr_screen->screen; - struct pipe_texture *texture = tr_texture->texture; - struct drm_api *api = tr_api->api; - - /* TODO trace call */ - - return api->shared_handle_from_texture(api, screen, texture, stride, handle); -} -static boolean -trace_drm_local_handle_from_texture(struct drm_api *_api, - struct pipe_screen *_screen, - struct pipe_texture *_texture, - unsigned *stride, - unsigned *handle) -{ - struct trace_screen *tr_screen = trace_screen(_screen); - struct trace_texture *tr_texture = trace_texture(_texture); - struct trace_drm_api *tr_api = trace_drm_api(_api); - struct pipe_screen *screen = tr_screen->screen; - struct pipe_texture *texture = tr_texture->texture; - struct drm_api *api = tr_api->api; - - /* TODO trace call */ - - return api->local_handle_from_texture(api, screen, texture, stride, handle); + return trace_screen_create(screen); } static void @@ -158,9 +95,6 @@ trace_drm_create(struct drm_api *api) tr_api->base.name = api->name; tr_api->base.driver_name = api->driver_name; tr_api->base.create_screen = trace_drm_create_screen; - tr_api->base.texture_from_shared_handle = trace_drm_texture_from_shared_handle; - tr_api->base.shared_handle_from_texture = trace_drm_shared_handle_from_texture; - tr_api->base.local_handle_from_texture = trace_drm_local_handle_from_texture; tr_api->base.destroy = trace_drm_destroy; tr_api->api = api; diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 6da186a6555..f82dd01c697 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -112,7 +112,6 @@ void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) trace_dump_member(uint, state, line_stipple_factor); trace_dump_member(uint, state, line_stipple_pattern); trace_dump_member(bool, state, line_last_pixel); - trace_dump_member(bool, state, bypass_vs_clip_and_viewport); trace_dump_member(bool, state, flatshade_first); trace_dump_member(bool, state, gl_rasterization_rules); @@ -480,7 +479,6 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state) trace_dump_member(uint, state, src_offset); trace_dump_member(uint, state, vertex_buffer_index); - trace_dump_member(uint, state, nr_components); trace_dump_member(format, state, src_format); diff --git a/src/gallium/drivers/trace/tr_public.h b/src/gallium/drivers/trace/tr_public.h new file mode 100644 index 00000000000..62e217097d0 --- /dev/null +++ b/src/gallium/drivers/trace/tr_public.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +#ifndef TR_PUBLIC_H +#define TR_PUBLIC_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_screen; +struct pipe_context; + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen); + +#ifdef __cplusplus +} +#endif + +#endif /* TR_PUBLIC_H */ diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c index a43adac6940..f4f17566fd3 100644 --- a/src/gallium/drivers/trace/tr_rbug.c +++ b/src/gallium/drivers/trace/tr_rbug.c @@ -219,7 +219,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, struct trace_texture *tr_tex = NULL; struct tr_list *ptr; - struct pipe_screen *screen = tr_scr->screen; + struct pipe_context *context = tr_scr->private_context; struct pipe_texture *tex; struct pipe_transfer *t; @@ -239,12 +239,12 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, } tex = tr_tex->texture; - t = screen->get_tex_transfer(tr_scr->screen, tex, - gptr->face, gptr->level, gptr->zslice, - PIPE_TRANSFER_READ, - gptr->x, gptr->y, gptr->w, gptr->h); + t = context->get_tex_transfer(context, tex, + gptr->face, gptr->level, gptr->zslice, + PIPE_TRANSFER_READ, + gptr->x, gptr->y, gptr->w, gptr->h); - map = screen->transfer_map(screen, t); + map = context->transfer_map(context, t); rbug_send_texture_read_reply(tr_rbug->con, serial, t->texture->format, @@ -256,8 +256,8 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header, t->stride, NULL); - screen->transfer_unmap(screen, t); - screen->tex_transfer_destroy(t); + context->transfer_unmap(context, t); + context->tex_transfer_destroy(context, t); pipe_mutex_unlock(tr_scr->list_mutex); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 388d83eb5c2..25990bdac7f 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -35,6 +35,7 @@ #include "tr_texture.h" #include "tr_context.h" #include "tr_screen.h" +#include "tr_public.h" #include "util/u_inlines.h" #include "pipe/p_format.h" @@ -236,39 +237,41 @@ trace_screen_texture_create(struct pipe_screen *_screen, return result; } - static struct pipe_texture * -trace_screen_texture_blanket(struct pipe_screen *_screen, - const struct pipe_texture *templat, - const unsigned *ppitch, - struct pipe_buffer *_buffer) +trace_screen_texture_from_handle(struct pipe_screen *_screen, + const struct pipe_texture *templ, + struct winsys_handle *handle) { - struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_buffer *tr_buf = trace_buffer(_buffer); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_buffer *buffer = tr_buf->buffer; - unsigned pitch = *ppitch; + struct trace_screen *tr_screen = trace_screen(_screen); + struct pipe_screen *screen = tr_screen->screen; struct pipe_texture *result; - trace_dump_call_begin("pipe_screen", "texture_blanket"); + /* TODO trace call */ - trace_dump_arg(ptr, screen); - trace_dump_arg(template, templat); - trace_dump_arg(uint, pitch); - trace_dump_arg(ptr, buffer); + result = screen->texture_from_handle(screen, templ, handle); - result = screen->texture_blanket(screen, templat, ppitch, buffer); + result = trace_texture_create(trace_screen(_screen), result); - trace_dump_ret(ptr, result); + return result; +} - trace_dump_call_end(); +static boolean +trace_screen_texture_get_handle(struct pipe_screen *_screen, + struct pipe_texture *_texture, + struct winsys_handle *handle) +{ + struct trace_screen *tr_screen = trace_screen(_screen); + struct trace_texture *tr_texture = trace_texture(_texture); + struct pipe_screen *screen = tr_screen->screen; + struct pipe_texture *texture = tr_texture->texture; - result = trace_texture_create(tr_scr, result); + /* TODO trace call */ - return result; + return screen->texture_get_handle(screen, texture, handle); } + static void trace_screen_texture_destroy(struct pipe_texture *_texture) { @@ -350,133 +353,7 @@ trace_screen_tex_surface_destroy(struct pipe_surface *_surface) } -/******************************************************************** - * transfer - */ - - -static struct pipe_transfer * -trace_screen_get_tex_transfer(struct pipe_screen *_screen, - struct pipe_texture *_texture, - unsigned face, unsigned level, - unsigned zslice, - enum pipe_transfer_usage usage, - unsigned x, unsigned y, unsigned w, unsigned h) -{ - struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_texture *tr_tex = trace_texture(_texture); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_texture *texture = tr_tex->texture; - struct pipe_transfer *result = NULL; - - assert(texture->screen == screen); - - trace_dump_call_begin("pipe_screen", "get_tex_transfer"); - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, texture); - trace_dump_arg(uint, face); - trace_dump_arg(uint, level); - trace_dump_arg(uint, zslice); - trace_dump_arg(uint, usage); - - trace_dump_arg(uint, x); - trace_dump_arg(uint, y); - trace_dump_arg(uint, w); - trace_dump_arg(uint, h); - - result = screen->get_tex_transfer(screen, texture, face, level, zslice, usage, - x, y, w, h); - - trace_dump_ret(ptr, result); - - trace_dump_call_end(); - - if (result) - result = trace_transfer_create(tr_tex, result); - - return result; -} - - -static void -trace_screen_tex_transfer_destroy(struct pipe_transfer *_transfer) -{ - struct trace_screen *tr_scr = trace_screen(_transfer->texture->screen); - struct trace_transfer *tr_trans = trace_transfer(_transfer); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_transfer *transfer = tr_trans->transfer; - - trace_dump_call_begin("pipe_screen", "tex_transfer_destroy"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, transfer); - - trace_dump_call_end(); - - trace_transfer_destroy(tr_trans); -} - - -static void * -trace_screen_transfer_map(struct pipe_screen *_screen, - struct pipe_transfer *_transfer) -{ - struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_transfer *tr_trans = trace_transfer(_transfer); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_transfer *transfer = tr_trans->transfer; - void *map; - - map = screen->transfer_map(screen, transfer); - if(map) { - if(transfer->usage & PIPE_TRANSFER_WRITE) { - assert(!tr_trans->map); - tr_trans->map = map; - } - } - - return map; -} - - -static void -trace_screen_transfer_unmap(struct pipe_screen *_screen, - struct pipe_transfer *_transfer) -{ - struct trace_screen *tr_scr = trace_screen(_screen); - struct trace_transfer *tr_trans = trace_transfer(_transfer); - struct pipe_screen *screen = tr_scr->screen; - struct pipe_transfer *transfer = tr_trans->transfer; - - if(tr_trans->map) { - size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride; - - trace_dump_call_begin("pipe_screen", "transfer_write"); - - trace_dump_arg(ptr, screen); - - trace_dump_arg(ptr, transfer); - - trace_dump_arg_begin("stride"); - trace_dump_uint(transfer->stride); - trace_dump_arg_end(); - - trace_dump_arg_begin("data"); - trace_dump_bytes(tr_trans->map, size); - trace_dump_arg_end(); - - trace_dump_arg_begin("size"); - trace_dump_uint(size); - trace_dump_arg_end(); - - trace_dump_call_end(); - - tr_trans->map = NULL; - } - - screen->transfer_unmap(screen, transfer); -} /******************************************************************** @@ -484,45 +361,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen, */ -static struct pipe_buffer * -trace_screen_surface_buffer_create(struct pipe_screen *_screen, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned tex_usage, - unsigned *pstride) -{ - struct trace_screen *tr_scr = trace_screen(_screen); - struct pipe_screen *screen = tr_scr->screen; - unsigned stride; - struct pipe_buffer *result; - - trace_dump_call_begin("pipe_screen", "surface_buffer_create"); - - trace_dump_arg(ptr, screen); - trace_dump_arg(uint, width); - trace_dump_arg(uint, height); - trace_dump_arg(format, format); - trace_dump_arg(uint, usage); - trace_dump_arg(uint, tex_usage); - - result = screen->surface_buffer_create(screen, - width, height, - format, - usage, - tex_usage, - pstride); - - stride = *pstride; - - trace_dump_arg(uint, stride); - - trace_dump_ret(ptr, result); - - trace_dump_call_end(); - return trace_buffer_create(tr_scr, result); -} static struct pipe_buffer * @@ -931,17 +770,13 @@ trace_screen_create(struct pipe_screen *screen) assert(screen->context_create); tr_scr->base.context_create = trace_screen_context_create; tr_scr->base.texture_create = trace_screen_texture_create; - tr_scr->base.texture_blanket = trace_screen_texture_blanket; + tr_scr->base.texture_from_handle = trace_screen_texture_from_handle; + tr_scr->base.texture_get_handle = trace_screen_texture_get_handle; tr_scr->base.texture_destroy = trace_screen_texture_destroy; tr_scr->base.get_tex_surface = trace_screen_get_tex_surface; tr_scr->base.tex_surface_destroy = trace_screen_tex_surface_destroy; - tr_scr->base.get_tex_transfer = trace_screen_get_tex_transfer; - tr_scr->base.tex_transfer_destroy = trace_screen_tex_transfer_destroy; - tr_scr->base.transfer_map = trace_screen_transfer_map; - tr_scr->base.transfer_unmap = trace_screen_transfer_unmap; tr_scr->base.buffer_create = trace_screen_buffer_create; tr_scr->base.user_buffer_create = trace_screen_user_buffer_create; - tr_scr->base.surface_buffer_create = trace_screen_surface_buffer_create; if (screen->buffer_map) tr_scr->base.buffer_map = trace_screen_buffer_map; if (screen->buffer_map_range) @@ -955,7 +790,11 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.fence_signalled = trace_screen_fence_signalled; tr_scr->base.fence_finish = trace_screen_fence_finish; tr_scr->base.flush_frontbuffer = trace_screen_flush_frontbuffer; + tr_scr->screen = screen; + tr_scr->private_context = screen->context_create(screen, NULL); + if (tr_scr->private_context == NULL) + goto error3; trace_dump_ret(ptr, screen); trace_dump_call_end(); @@ -965,10 +804,8 @@ trace_screen_create(struct pipe_screen *screen) return &tr_scr->base; -#if 0 error3: FREE(tr_scr); -#endif error2: trace_dump_ret(ptr, screen); trace_dump_call_end(); diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h index fe5a0fa1909..9bfbe72e2c0 100644 --- a/src/gallium/drivers/trace/tr_screen.h +++ b/src/gallium/drivers/trace/tr_screen.h @@ -56,6 +56,7 @@ struct trace_screen struct pipe_screen base; struct pipe_screen *screen; + struct pipe_context *private_context; /* remote debugger */ struct trace_rbug *rbug; @@ -99,9 +100,6 @@ trace_enabled(void); struct trace_screen * trace_screen(struct pipe_screen *screen); -struct pipe_screen * -trace_screen_create(struct pipe_screen *screen); - void trace_screen_user_buffer_update(struct pipe_screen *screen, struct pipe_buffer *buffer); diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 5321d68ec0c..d818e21bb82 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -31,6 +31,7 @@ #include "util/u_simple_list.h" #include "tr_screen.h" +#include "tr_context.h" #include "tr_texture.h" @@ -124,8 +125,9 @@ trace_surface_destroy(struct trace_surface *tr_surf) struct pipe_transfer * -trace_transfer_create(struct trace_texture *tr_tex, - struct pipe_transfer *transfer) +trace_transfer_create(struct trace_context *tr_ctx, + struct trace_texture *tr_tex, + struct pipe_transfer *transfer) { struct trace_screen *tr_scr = trace_screen(tr_tex->base.screen); struct trace_transfer *tr_trans; @@ -142,8 +144,9 @@ trace_transfer_create(struct trace_texture *tr_tex, memcpy(&tr_trans->base, transfer, sizeof(struct pipe_transfer)); tr_trans->base.texture = NULL; - pipe_texture_reference(&tr_trans->base.texture, &tr_tex->base); tr_trans->transfer = transfer; + + pipe_texture_reference(&tr_trans->base.texture, &tr_tex->base); assert(tr_trans->base.texture == &tr_tex->base); trace_screen_add_to_list(tr_scr, transfers, tr_trans); @@ -151,21 +154,23 @@ trace_transfer_create(struct trace_texture *tr_tex, return &tr_trans->base; error: - transfer->texture->screen->tex_transfer_destroy(transfer); + tr_ctx->pipe->tex_transfer_destroy(tr_ctx->pipe, transfer); return NULL; } void -trace_transfer_destroy(struct trace_transfer *tr_trans) +trace_transfer_destroy(struct trace_context *tr_context, + struct trace_transfer *tr_trans) { - struct trace_screen *tr_scr = trace_screen(tr_trans->base.texture->screen); - struct pipe_screen *screen = tr_trans->transfer->texture->screen; + struct trace_screen *tr_scr = trace_screen(tr_context->base.screen); + struct pipe_context *context = tr_context->pipe; + struct pipe_transfer *transfer = tr_trans->transfer; trace_screen_remove_from_list(tr_scr, transfers, tr_trans); pipe_texture_reference(&tr_trans->base.texture, NULL); - screen->tex_transfer_destroy(tr_trans->transfer); + context->tex_transfer_destroy(context, transfer); FREE(tr_trans); } diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h index 395e523e73a..4dc95308a79 100644 --- a/src/gallium/drivers/trace/tr_texture.h +++ b/src/gallium/drivers/trace/tr_texture.h @@ -34,6 +34,7 @@ #include "tr_screen.h" +struct trace_context; struct trace_texture { @@ -60,6 +61,7 @@ struct trace_transfer struct pipe_transfer base; struct pipe_transfer *transfer; + struct pipe_context *pipe; struct tr_list list; @@ -112,11 +114,13 @@ void trace_surface_destroy(struct trace_surface *tr_surf); struct pipe_transfer * -trace_transfer_create(struct trace_texture *tr_tex, - struct pipe_transfer *transfer); +trace_transfer_create(struct trace_context *tr_ctx, + struct trace_texture *tr_tex, + struct pipe_transfer *transfer); void -trace_transfer_destroy(struct trace_transfer *tr_trans); +trace_transfer_destroy(struct trace_context *tr_ctx, + struct trace_transfer *tr_trans); #endif /* TR_TEXTURE_H_ */ |