diff options
Diffstat (limited to 'src/gallium/drivers')
73 files changed, 2681 insertions, 1665 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 6bace0bb11a..e989d8c2e5c 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -84,7 +84,7 @@ #define CELL_CMD_BATCH 5 #define CELL_CMD_RELEASE_VERTS 6 #define CELL_CMD_STATE_FRAMEBUFFER 10 -#define CELL_CMD_STATE_DEPTH_STENCIL 11 +#define CELL_CMD_STATE_FRAGMENT_OPS 11 #define CELL_CMD_STATE_SAMPLER 12 #define CELL_CMD_STATE_TEXTURE 13 #define CELL_CMD_STATE_VERTEX_INFO 14 @@ -92,9 +92,7 @@ #define CELL_CMD_STATE_UNIFORMS 16 #define CELL_CMD_STATE_VS_ARRAY_INFO 17 #define CELL_CMD_STATE_BIND_VS 18 -#define CELL_CMD_STATE_BLEND 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 -#define CELL_CMD_STATE_LOGICOP 21 #define CELL_CMD_VS_EXECUTE 22 #define CELL_CMD_FLUSH_BUFFER_RANGE 23 @@ -106,30 +104,24 @@ #define CELL_BUFFER_STATUS_USED 20 +#define CELL_DEBUG_CHECKER (1 << 0) +#define CELL_DEBUG_SYNC (1 << 1) -/** - */ -struct cell_command_depth_stencil_alpha_test { - uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of SPE code. */ - unsigned read_depth; /**< Flag: should depth be read? */ - unsigned read_stencil; /**< Flag: should stencil be read? */ -}; -/** - * Upload code to perform framebuffer blend operation - */ -struct cell_command_blend { - uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of SPE code. */ - unsigned read_fb; /**< Flag: should framebuffer be read? */ -}; +/** Max instructions for doing per-fragment operations */ +#define SPU_MAX_FRAGMENT_OPS_INSTS 64 -struct cell_command_logicop { - uint64_t base; /**< Effective address of code start. */ - unsigned size; /**< Size in bytes of SPE code. */ +/** + * Command to specify per-fragment operations state and generated code. + */ +struct cell_command_fragment_ops +{ + uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; + unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; }; @@ -169,13 +161,15 @@ struct cell_array_info }; -struct cell_attribute_fetch_code { +struct cell_attribute_fetch_code +{ uint64_t base; uint size; }; -struct cell_buffer_range { +struct cell_buffer_range +{ uint64_t base; unsigned size; }; @@ -263,6 +257,7 @@ struct cell_init_info { unsigned id; unsigned num_spus; + unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ struct cell_command *cmd; /** Buffers for command batches, vertex/index data */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 0389a9554cf..8699f3f8ec2 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -5,7 +5,7 @@ TOP = ../../../../.. -include $(TOP)/configs/linux-cell +include $(TOP)/configs/current # This is the "top-level" cell PPU driver code, will get pulled into libGL.so @@ -25,9 +25,9 @@ SOURCES = \ cell_context.c \ cell_draw_arrays.c \ cell_flush.c \ + cell_gen_fragment.c \ cell_state_derived.c \ cell_state_emit.c \ - cell_state_per_fragment.c \ cell_state_shader.c \ cell_pipe_state.c \ cell_screen.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index f45e5f25b64..16882c01295 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -32,6 +32,13 @@ +/** + * Search the buffer pool for an empty/free buffer and return its index. + * Buffers are used for storing vertex data, state and commands which + * will be sent to the SPUs. + * If no empty buffers are available, wait for one. + * \return buffer index in [0, CELL_NUM_BUFFERS-1] + */ uint cell_get_empty_buffer(struct cell_context *cell) { @@ -74,6 +81,11 @@ cell_get_empty_buffer(struct cell_context *cell) } +/** + * Flush the current batch buffer to the SPUs. + * An empty buffer will be found and set as the new current batch buffer + * for subsequent commands/data. + */ void cell_batch_flush(struct cell_context *cell) { @@ -93,11 +105,11 @@ cell_batch_flush(struct cell_context *cell) /* printf("cell_batch_dispatch: buf %u at %p, size %u\n", - batch, &cell->batch_buffer[batch][0], size); + batch, &cell->buffer[batch][0], size); */ /* - * Build "BATCH" command and sent to all SPUs. + * Build "BATCH" command and send to all SPUs. */ cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); @@ -120,6 +132,9 @@ cell_batch_flush(struct cell_context *cell) } +/** + * Return the number of bytes free in the current batch buffer. + */ uint cell_batch_free_space(const struct cell_context *cell) { @@ -129,7 +144,9 @@ cell_batch_free_space(const struct cell_context *cell) /** - * Append data to current batch. + * Append data to the current batch buffer. + * \param data address of block of bytes to append + * \param bytes size of block of bytes */ void cell_batch_append(struct cell_context *cell, const void *data, uint bytes) @@ -165,6 +182,10 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes) } +/** + * Allocate space in the current batch buffer for 'bytes' space. + * \return address in batch buffer to put data + */ void * cell_batch_alloc(struct cell_context *cell, uint bytes) { @@ -172,6 +193,10 @@ cell_batch_alloc(struct cell_context *cell, uint bytes) } +/** + * Same as \sa cell_batch_alloc, but return an address at a particular + * alignment. + */ void * cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, uint alignment) @@ -215,3 +240,28 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, return pos; } + + +/** + * One-time init of batch buffers. + */ +void +cell_init_batch_buffers(struct cell_context *cell) +{ + uint spu, buf; + + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; + + /* init batch buffer status values, + * mark 0th buffer as used, rest as free. + */ + for (spu = 0; spu < cell->num_spus; spu++) { + if (buf == 0) + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + else + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h index a6eee0a8b18..f74dd600791 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.h +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -54,5 +54,8 @@ extern void * cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, uint alignment); +extern void +cell_init_batch_buffers(struct cell_context *cell); + #endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index a421c95c8e8..c9c0c721bbe 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -35,6 +35,7 @@ #include <stdint.h> #include "pipe/p_inlines.h" #include "util/u_memory.h" +#include "util/u_pack_color.h" #include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" @@ -44,6 +45,27 @@ #include "cell_state.h" +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + +/** + * Called via pipe->clear() + */ void cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) @@ -61,13 +83,21 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, PIPE_BUFFER_USAGE_GPU_WRITE); if (ps == cell->framebuffer.zsbuf) { + /* clear z/stencil buffer */ surfIndex = 1; } else { + /* clear color buffer */ surfIndex = 0; + + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } } + /* Build a CLEAR command and place it in the current batch buffer */ { struct cell_command_clear_surface *clr = (struct cell_command_clear_surface *) diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 9ff4e86943b..71f1a3049d1 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -43,11 +43,11 @@ #include "draw/draw_private.h" #include "cell/common.h" +#include "cell_batch.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" #include "cell_flush.h" -#include "cell_render.h" #include "cell_state.h" #include "cell_surface.h" #include "cell_spu.h" @@ -85,12 +85,20 @@ cell_draw_create(struct cell_context *cell) } +#ifdef DEBUG +static const struct debug_named_value cell_debug_flags[] = { + {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ + {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ + {NULL, 0} +}; +#endif + + struct pipe_context * cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws) { struct cell_context *cell; - uint spu, buf; /* some fields need to be 16-byte aligned, so align the whole object */ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); @@ -104,15 +112,6 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.screen = screen; cell->pipe.destroy = cell_destroy_context; - /* state setters */ - cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; - cell->pipe.set_vertex_elements = cell_set_vertex_elements; - - cell->pipe.draw_arrays = cell_draw_arrays; - cell->pipe.draw_elements = cell_draw_elements; - cell->pipe.draw_range_elements = cell_draw_range_elements; - cell->pipe.set_edgeflags = cell_set_edgeflags; - cell->pipe.clear = cell_clear_surface; cell->pipe.flush = cell_flush; @@ -122,20 +121,28 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.wait_query = cell_wait_query; #endif + cell_init_draw_functions(cell); cell_init_state_functions(cell); cell_init_shader_functions(cell); cell_init_surface_functions(cell); cell_init_texture_functions(cell); + cell_init_vertex_functions(cell); cell->draw = cell_draw_create(cell); cell_init_vbuf(cell); + draw_set_rasterize_stage(cell->draw, cell->vbuf); /* convert all points/lines to tris for the time being */ draw_wide_point_threshold(cell->draw, 0.0); draw_wide_line_threshold(cell->draw, 0.0); + /* get env vars or read config file to get debug flags */ + cell->debug_flags = debug_get_flags_option("CELL_DEBUG", + cell_debug_flags, + 0 ); + /* * SPU stuff */ @@ -146,20 +153,7 @@ cell_create_context(struct pipe_screen *screen, cell_start_spus(cell); - /* init command, vertex/index buffer info */ - for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { - cell->buffer_size[buf] = 0; - - /* init batch buffer status values, - * mark 0th buffer as used, rest as free. - */ - for (spu = 0; spu < cell->num_spus; spu++) { - if (buf == 0) - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - else - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; - } - } + cell_init_batch_buffers(cell); return &cell->pipe; } diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index f1d1ca89a97..8cec9f45b2e 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -39,8 +39,13 @@ #include "rtasm/rtasm_ppc_spe.h" #include "tgsi/tgsi_scan.h" + struct cell_vbuf_render; + +/** + * Cell vertex shader state, subclass of pipe_shader_state. + */ struct cell_vertex_shader_state { struct pipe_shader_state shader; @@ -49,6 +54,9 @@ struct cell_vertex_shader_state }; +/** + * Cell fragment shader state, subclass of pipe_shader_state. + */ struct cell_fragment_shader_state { struct pipe_shader_state shader; @@ -57,7 +65,11 @@ struct cell_fragment_shader_state }; -struct cell_blend_state { +/** + * Cell blend state atom, subclass of pipe_blend_state. + */ +struct cell_blend_state +{ struct pipe_blend_state base; /** @@ -67,17 +79,24 @@ struct cell_blend_state { }; -struct cell_depth_stencil_alpha_state { - struct pipe_depth_stencil_alpha_state base; +/** + * Cell depth/stencil/alpha state atom, subclass of + * pipe_depth_stencil_alpha_state. + */ +struct cell_depth_stencil_alpha_state +{ + struct pipe_depth_stencil_alpha_state base; /** * Generated code to perform alpha, stencil, and depth testing on the SPE */ struct spe_function code; - }; +/** + * Per-context state, subclass of pipe_context. + */ struct cell_context { struct pipe_context pipe; @@ -144,6 +163,8 @@ struct cell_context struct spe_function attrib_fetch; unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; + + unsigned debug_flags; }; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index f02dffe1245..880d5353207 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -76,14 +77,6 @@ cell_unmap_constant_buffers(struct cell_context *sp) } -boolean -cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - return cell_draw_elements(pipe, NULL, 0, mode, start, count); -} - - /** * Draw vertex arrays, with optional indexing. @@ -92,7 +85,7 @@ cell_draw_arrays(struct pipe_context *pipe, unsigned mode, * * XXX should the element buffer be specified/bound with a separate function? */ -boolean +static boolean cell_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -116,7 +109,7 @@ cell_draw_range_elements(struct pipe_context *pipe, * Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf = pipe->winsys->buffer_map(pipe->winsys, + void *buf = pipe_buffer_map(pipe->screen, sp->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); @@ -124,7 +117,7 @@ cell_draw_range_elements(struct pipe_context *pipe, } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, + void *mapped_indexes = pipe_buffer_map(pipe->screen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); @@ -143,11 +136,11 @@ cell_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe_buffer_unmap(pipe->screen, indexBuffer); } /* Note: leave drawing surfaces mapped */ @@ -157,7 +150,7 @@ cell_draw_range_elements(struct pipe_context *pipe, } -boolean +static boolean cell_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -170,10 +163,29 @@ cell_draw_elements(struct pipe_context *pipe, } +static boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + -void +static void cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) { struct cell_context *cell = cell_context(pipe); draw_set_edgeflags(cell->draw, edgeflags); } + + + +void +cell_init_draw_functions(struct cell_context *cell) +{ + cell->pipe.draw_arrays = cell_draw_arrays; + cell->pipe.draw_elements = cell_draw_elements; + cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.set_edgeflags = cell_set_edgeflags; +} + diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h index cd35ec17b4e..148873aa675 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -29,26 +29,8 @@ #define CELL_DRAW_ARRAYS_H -extern boolean -cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -extern boolean -cell_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); - -extern boolean -cell_draw_range_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count); - extern void -cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); +cell_init_draw_functions(struct cell_context *cell); #endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 3aaf3de6684..6596b720101 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -34,6 +34,9 @@ #include "draw/draw_context.h" +/** + * Called via pipe->flush() + */ void cell_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) @@ -50,16 +53,19 @@ cell_flush(struct pipe_context *pipe, unsigned flags, flags |= CELL_FLUSH_WAIT; draw_flush( cell->draw ); - cell_flush_int(pipe, flags); + cell_flush_int(cell, flags); } -/** internal flush */ +/** + * Cell internal flush function. Send the current batch buffer to all SPUs. + * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. + * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero + */ void -cell_flush_int(struct pipe_context *pipe, unsigned flags) +cell_flush_int(struct cell_context *cell, unsigned flags) { static boolean flushing = FALSE; /* recursion catcher */ - struct cell_context *cell = cell_context(pipe); uint i; ASSERT(!flushing); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h index 8f0645c4293..509ae6239ac 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -36,7 +36,7 @@ cell_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence); extern void -cell_flush_int(struct pipe_context *pipe, unsigned flags); +cell_flush_int(struct cell_context *cell, unsigned flags); extern void cell_flush_buffer_range(struct cell_context *cell, void *ptr, diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c new file mode 100644 index 00000000000..79a82ef72b5 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -0,0 +1,862 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +/** + * Generate SPU per-fragment code (actually per-quad code). + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" + + + +/** Do extra optimizations? */ +#define OPTIMIZATIONS 1 + + +/** + * Generate SPE code to perform Z/depth testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param ifragZ_reg register containing integer fragment Z values (in) + * \param ifbZ_reg register containing integer frame buffer Z values (in/out) + * \param zmask_reg register containing result of Z test/comparison (out) + */ +static void +gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, + int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) +{ + ASSERT(dsa->depth.enabled); + + switch (dsa->depth.func) { + case PIPE_FUNC_EQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GREATER: + /* zmask = (ifragZ > ref) */ + spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LESS: + /* zmask = (ref > ifragZ) */ + spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* zmask = (ifragZ > ref) */ + spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* zmask = (ref > ifragZ) */ + spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ + spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ + break; + + case PIPE_FUNC_ALWAYS: + /* mask unchanged */ + spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ + break; + + default: + ASSERT(0); + break; + } + + if (dsa->depth.writemask) { + /* + * If (ztest passed) { + * framebufferZ = fragmentZ; + * } + * OR, + * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; + */ + spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + } +} + + +/** + * Generate SPE code to perform alpha testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param fragA_reg register containing four fragment alpha values (in) + */ +static void +gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask_reg, int fragA_reg) +{ + int ref_reg = spe_allocate_available_register(f); + int amask_reg = spe_allocate_available_register(f); + + ASSERT(dsa->alpha.enabled); + + if ((dsa->alpha.func != PIPE_FUNC_NEVER) && + (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + /* load/splat the alpha reference float value */ + spe_load_float(f, ref_reg, dsa->alpha.ref); + } + + /* emit code to do the alpha comparison, updating 'mask' */ + switch (dsa->alpha.func) { + case PIPE_FUNC_EQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GREATER: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LESS: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ + break; + + case PIPE_FUNC_ALWAYS: + /* no-op, mask unchanged */ + break; + + default: + ASSERT(0); + break; + } + +#if OPTIMIZATIONS + /* if mask == {0,0,0,0} we're all done, return */ + { + /* re-use amask reg here */ + int tmp_reg = amask_reg; + /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ + spe_orx(f, tmp_reg, mask_reg); + /* if tmp[0] == 0 then return from function call */ + spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); + } +#endif + + spe_release_register(f, ref_reg); + spe_release_register(f, amask_reg); +} + + + +/** + * Generate SPE code to implement the given blend mode for a quad of pixels. + * \param f SPE function to append instruction onto. + * \param fragR_reg register with fragment red values (float) (in/out) + * \param fragG_reg register with fragment green values (float) (in/out) + * \param fragB_reg register with fragment blue values (float) (in/out) + * \param fragA_reg register with fragment alpha values (float) (in/out) + * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) + */ +static void +gen_blend(const struct pipe_blend_state *blend, + struct spe_function *f, + enum pipe_format color_format, + int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, + int fbRGBA_reg) +{ + int term1R_reg = spe_allocate_available_register(f); + int term1G_reg = spe_allocate_available_register(f); + int term1B_reg = spe_allocate_available_register(f); + int term1A_reg = spe_allocate_available_register(f); + + int term2R_reg = spe_allocate_available_register(f); + int term2G_reg = spe_allocate_available_register(f); + int term2B_reg = spe_allocate_available_register(f); + int term2A_reg = spe_allocate_available_register(f); + + int fbR_reg = spe_allocate_available_register(f); + int fbG_reg = spe_allocate_available_register(f); + int fbB_reg = spe_allocate_available_register(f); + int fbA_reg = spe_allocate_available_register(f); + + int one_reg = spe_allocate_available_register(f); + int tmp_reg = spe_allocate_available_register(f); + + ASSERT(blend->blend_enable); + + /* Unpack/convert framebuffer colors from four 32-bit packed colors + * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). + * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + */ + { + int mask_reg = spe_allocate_available_register(f); + + /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ + spe_fsmbi(f, mask_reg, 0x1111); + + /* XXX there may be more clever ways to implement the following code */ + switch (color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* fbB = fbB & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 8 */ + spe_roti(f, fbG_reg, fbG_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 16 */ + spe_roti(f, fbR_reg, fbR_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* fbA = fbA >> 24 */ + spe_roti(f, fbA_reg, fbA_reg, -24); + break; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* fbA = fbA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 8 */ + spe_roti(f, fbR_reg, fbR_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 16 */ + spe_roti(f, fbG_reg, fbG_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* fbB = fbB >> 24 */ + spe_roti(f, fbB_reg, fbB_reg, -24); + break; + + default: + ASSERT(0); + } + + /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ + spe_cuflt(f, fbR_reg, fbR_reg, 8); + spe_cuflt(f, fbG_reg, fbG_reg, 8); + spe_cuflt(f, fbB_reg, fbB_reg, 8); + spe_cuflt(f, fbA_reg, fbA_reg, 8); + + spe_release_register(f, mask_reg); + } + + + /* + * Compute Src RGB terms + */ + switch (blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term1R_reg, fragR_reg); + spe_move(f, term1G_reg, fragG_reg); + spe_move(f, term1B_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term1R_reg); + spe_zero(f, term1G_reg); + spe_zero(f, term1B_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term1R_reg, fragR_reg, fragR_reg); + spe_fm(f, term1G_reg, fragG_reg, fragG_reg); + spe_fm(f, term1B_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1R_reg, fragR_reg, fragA_reg); + spe_fm(f, term1G_reg, fragG_reg, fragA_reg); + spe_fm(f, term1B_reg, fragB_reg, fragA_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term + */ + switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term1A_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms + */ + switch (blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term2R_reg, fbR_reg); + spe_move(f, term2G_reg, fbG_reg); + spe_move(f, term2B_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term2R_reg); + spe_zero(f, term2G_reg); + spe_zero(f, term2B_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term2R_reg, fbR_reg, fragR_reg); + spe_fm(f, term2G_reg, fbG_reg, fragG_reg); + spe_fm(f, term2B_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term2R_reg, fbR_reg, fragA_reg); + spe_fm(f, term2G_reg, fbG_reg, fragA_reg); + spe_fm(f, term2B_reg, fbB_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(f, one_reg, 1.0f); + /* tmp = one - fragA */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* term = fb * tmp */ + spe_fm(f, term2R_reg, fbR_reg, tmp_reg); + spe_fm(f, term2G_reg, fbG_reg, tmp_reg); + spe_fm(f, term2B_reg, fbB_reg, tmp_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term + */ + switch (blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term2A_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term2A_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term2A_reg, fbA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(f, one_reg, 1.0f); + /* tmp = one - fragA */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* termA = fbA * tmp */ + spe_fm(f, term2A_reg, fbA_reg, tmp_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (blend->rgb_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragR_reg, term1R_reg, term2R_reg); + spe_fa(f, fragG_reg, term1G_reg, term2G_reg); + spe_fa(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragR_reg, term1R_reg, term2R_reg); + spe_fs(f, fragG_reg, term1G_reg, term2G_reg); + spe_fs(f, fragB_reg, term1B_reg, term2B_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (blend->alpha_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragA_reg, term1A_reg, term2A_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + spe_release_register(f, term1R_reg); + spe_release_register(f, term1G_reg); + spe_release_register(f, term1B_reg); + spe_release_register(f, term1A_reg); + + spe_release_register(f, term2R_reg); + spe_release_register(f, term2G_reg); + spe_release_register(f, term2B_reg); + spe_release_register(f, term2A_reg); + + spe_release_register(f, fbR_reg); + spe_release_register(f, fbG_reg); + spe_release_register(f, fbB_reg); + spe_release_register(f, fbA_reg); + + spe_release_register(f, one_reg); + spe_release_register(f, tmp_reg); +} + + +static void +gen_logicop(const struct pipe_blend_state *blend, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* XXX to-do */ + /* operate on 32-bit packed pixels, not float colors */ +} + + +static void +gen_colormask(uint colormask, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* XXX to-do */ + /* operate on 32-bit packed pixels, not float colors */ +} + + + +/** + * Generate code to pack a quad of float colors into a four 32-bit integers. + * + * \param f SPE function to append instruction onto. + * \param color_format the dest color packing format + * \param r_reg register containing four red values (in/clobbered) + * \param g_reg register containing four green values (in/clobbered) + * \param b_reg register containing four blue values (in/clobbered) + * \param a_reg register containing four alpha values (in/clobbered) + * \param rgba_reg register to store the packed RGBA colors (out) + */ +static void +gen_pack_colors(struct spe_function *f, + enum pipe_format color_format, + int r_reg, int g_reg, int b_reg, int a_reg, + int rgba_reg) +{ + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ + spe_cfltu(f, r_reg, r_reg, 32); + spe_cfltu(f, g_reg, g_reg, 32); + spe_cfltu(f, b_reg, b_reg, 32); + spe_cfltu(f, a_reg, a_reg, 32); + + /* Shift the most significant bytes to least the significant positions. + * I.e.: reg = reg >> 24 + */ + spe_rotmi(f, r_reg, r_reg, -24); + spe_rotmi(f, g_reg, g_reg, -24); + spe_rotmi(f, b_reg, b_reg, -24); + spe_rotmi(f, a_reg, a_reg, -24); + + /* Shift the color bytes according to the surface format */ + if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { + spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ + spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ + spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ + } + else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { + spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ + spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ + spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ + } + else { + ASSERT(0); + } + + /* Merge red, green, blue, alpha registers to make packed RGBA colors. + * Eg: after shifting according to color_format we might have: + * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} + * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} + * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} + * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} + * OR-ing all those together gives us four packed colors: + * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} + */ + spe_or(f, rgba_reg, r_reg, g_reg); + spe_or(f, rgba_reg, rgba_reg, b_reg); + spe_or(f, rgba_reg, rgba_reg, a_reg); +} + + + + +/** + * Generate SPE code to implement the fragment operations (alpha test, + * depth test, stencil test, blending, colormask, and final + * framebuffer write) as specified by the current context state. + * + * Logically, this code will be called after running the fragment + * shader. But under some circumstances we could run some of this + * code before the fragment shader to cull fragments/quads that are + * totally occluded/discarded. + * + * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now. + * + * See the spu_default_fragment_ops() function to see how the per-fragment + * operations would be done with ordinary C code. + * The code we generate here though has no branches, is SIMD, etc and + * should be much faster. + * + * \param cell the rendering context (in) + * \param f the generated function (out) + */ +void +gen_fragment_function(struct cell_context *cell, struct spe_function *f) +{ + const struct pipe_depth_stencil_alpha_state *dsa = + &cell->depth_stencil->base; + const struct pipe_blend_state *blend = &cell->blend->base; + const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; + + /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ + const int x_reg = 3; /* uint */ + const int y_reg = 4; /* uint */ + const int color_tile_reg = 5; /* tile_t * */ + const int depth_tile_reg = 6; /* tile_t * */ + const int fragZ_reg = 7; /* vector float */ + const int fragR_reg = 8; /* vector float */ + const int fragG_reg = 9; /* vector float */ + const int fragB_reg = 10; /* vector float */ + const int fragA_reg = 11; /* vector float */ + const int mask_reg = 12; /* vector uint */ + + /* offset of quad from start of tile + * XXX assuming 4-byte pixels for color AND Z/stencil!!!! + */ + int quad_offset_reg; + + int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ + int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ + + spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + spe_allocate_register(f, x_reg); + spe_allocate_register(f, y_reg); + spe_allocate_register(f, color_tile_reg); + spe_allocate_register(f, depth_tile_reg); + spe_allocate_register(f, fragZ_reg); + spe_allocate_register(f, fragR_reg); + spe_allocate_register(f, fragG_reg); + spe_allocate_register(f, fragB_reg); + spe_allocate_register(f, fragA_reg); + spe_allocate_register(f, mask_reg); + + quad_offset_reg = spe_allocate_available_register(f); + fbRGBA_reg = spe_allocate_available_register(f); + fbZS_reg = spe_allocate_available_register(f); + + /* compute offset of quad from start of tile, in bytes */ + { + int x2_reg = spe_allocate_available_register(f); + int y2_reg = spe_allocate_available_register(f); + + ASSERT(TILE_SIZE == 32); + + spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ + spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ + spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ + spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ + spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ + + spe_release_register(f, x2_reg); + spe_release_register(f, y2_reg); + } + + + if (dsa->alpha.enabled) { + gen_alpha_test(dsa, f, mask_reg, fragA_reg); + } + + if (dsa->depth.enabled || dsa->stencil[0].enabled) { + const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; + boolean write_depth_stencil; + + int fbZ_reg = spe_allocate_available_register(f); /* Z values */ + int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ + + /* fetch quad of depth/stencil values from tile at (x,y) */ + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + if (dsa->depth.enabled) { + /* Extract Z bits from fbZS_reg into fbZ_reg */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + int mask_reg = spe_allocate_available_register(f); + spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ + spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ + spe_release_register(f, mask_reg); + /* OK, fbZ_reg has four 24-bit Z values now */ + } + else { + /* XXX handle other z/stencil formats */ + ASSERT(0); + } + + /* Convert fragZ values from float[4] to uint[4] */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM || + zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + /* 24-bit Z values */ + int scale_reg = spe_allocate_available_register(f); + + /* scale_reg[0,1,2,3] = float(2^24-1) */ + spe_load_float(f, scale_reg, (float) 0xffffff); + + /* XXX these two instructions might be combined */ + spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */ + + spe_release_register(f, scale_reg); + } + else { + /* XXX handle 16-bit Z format */ + ASSERT(0); + } + } + + if (dsa->stencil[0].enabled) { + /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + /* XXX extract with a shift */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + /* XXX extract with a mask */ + ASSERT(0); + } + } + + + if (dsa->stencil[0].enabled) { + /* XXX this may involve depth testing too */ + // gen_stencil_test(dsa, f, ... ); + ASSERT(0); + } + else if (dsa->depth.enabled) { + int zmask_reg = spe_allocate_available_register(f); + gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + spe_release_register(f, zmask_reg); + } + + /* do we need to write Z and/or Stencil back into framebuffer? */ + write_depth_stencil = (dsa->depth.writemask | + dsa->stencil[0].write_mask | + dsa->stencil[1].write_mask); + + if (write_depth_stencil) { + /* Merge latest Z and Stencil values into fbZS_reg. + * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. + * fbS_reg has four 8-bit Z values in bits [7..0]. + */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_Z16_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_S8_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else { + /* bad zs_format */ + ASSERT(0); + } + + /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ + spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + } + + spe_release_register(f, fbZ_reg); + spe_release_register(f, fbS_reg); + } + + + /* Get framebuffer quad/colors. We'll need these for blending, + * color masking, and to obey the quad/pixel mask. + * Load: fbRGBA_reg = memory[color_tile + quad_offset] + * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking + * we could skip this load. + */ + spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); + + + if (blend->blend_enable) { + gen_blend(blend, f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); + } + + /* + * Write fragment colors to framebuffer/tile. + * This involves converting the fragment colors from float[4] to the + * tile's specific format and obeying the quad/pixel mask. + */ + { + int rgba_reg = spe_allocate_available_register(f); + + /* Pack four float colors as four 32-bit int colors */ + gen_pack_colors(f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, + rgba_reg); + + if (blend->logicop_enable) { + gen_logicop(blend, f, rgba_reg, fbRGBA_reg); + } + + if (blend->colormask != 0xf) { + gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); + } + + + /* Mix fragment colors with framebuffer colors using the quad/pixel mask: + * if (mask[i]) + * rgba[i] = rgba[i]; + * else + * rgba[i] = framebuffer[i]; + */ + spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); + + /* Store updated quad in tile: + * memory[color_tile + quad_offset] = rgba_reg; + */ + spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); + + spe_release_register(f, rgba_reg); + } + + printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); + + spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ + + + spe_release_register(f, fbRGBA_reg); + spe_release_register(f, fbZS_reg); + spe_release_register(f, quad_offset_reg); +} + diff --git a/src/gallium/drivers/trace/tr_stream.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h index 6111174d6a8..0ea0fc690c8 100644 --- a/src/gallium/drivers/trace/tr_stream.h +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,38 +22,17 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Cross-platform sequential access stream abstraction. * - * These are really general purpose file access functions, and might one day - * be moved into the util module. - */ - -#ifndef TR_STREAM_H -#define TR_STREAM_H - - -#include "pipe/p_compiler.h" - - -struct trace_stream; + **************************************************************************/ -struct trace_stream * -trace_stream_create(const char *filename); +#ifndef CELL_GEN_FRAGMENT_H +#define CELL_GEN_FRAGMENT_H -boolean -trace_stream_write(struct trace_stream *stream, const void *data, size_t size); -void -trace_stream_flush(struct trace_stream *stream); +extern void +gen_fragment_function(struct cell_context *cell, struct spe_function *f); -void -trace_stream_close(struct trace_stream *stream); +#endif /* CELL_GEN_FRAGMENT_H */ -#endif /* TR_STREAM_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index fe5437023b9..e04cf5f274a 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -34,6 +34,7 @@ #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cell_context.h" +#include "cell_flush.h" #include "cell_state.h" #include "cell_texture.h" #include "cell_state_per_fragment.h" @@ -130,8 +131,9 @@ cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) } -static void cell_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) +static void +cell_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) { struct cell_context *cell = cell_context(pipe); @@ -310,8 +312,21 @@ cell_set_framebuffer_state(struct pipe_context *pipe, cell->zsbuf_map = NULL; } - /* update my state */ - cell->framebuffer = *fb; + /* Finish any pending rendering to the current surface before + * installing a new surface! + */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + + /* update my state + * (this is also where old surfaces will finally get freed) + */ + cell->framebuffer.width = fb->width; + cell->framebuffer.height = fb->height; + cell->framebuffer.num_cbufs = fb->num_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); + } + pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); /* map new surfaces */ if (csurf) diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 973c0b1aa12..9508227e298 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -26,6 +26,11 @@ **************************************************************************/ +/** + * Utility/wrappers for communicating with the SPUs. + */ + + #include <pthread.h> #include "cell_spu.h" @@ -40,6 +45,9 @@ helpful headers: */ +/** + * Cell/SPU info that's not per-context. + */ struct cell_global_info cell_global; @@ -74,7 +82,11 @@ wait_mbox_message(spe_context_ptr_t ctx) } -static void *cell_thread_function(void *arg) +/** + * Called by pthread_create() to spawn an SPU thread. + */ +static void * +cell_thread_function(void *arg) { struct cell_init_info *init = (struct cell_init_info *) arg; unsigned entry = SPE_DEFAULT_ENTRY; @@ -92,7 +104,10 @@ static void *cell_thread_function(void *arg) /** - * Create the SPU threads + * Create the SPU threads. This is done once during driver initialization. + * This involves setting the the "init" message which is sent to each SPU. + * The init message specifies an SPU id, total number of SPUs, location + * and number of batch buffers, etc. */ void cell_start_spus(struct cell_context *cell) @@ -100,7 +115,6 @@ cell_start_spus(struct cell_context *cell) static boolean one_time_init = FALSE; uint i, j; - if (one_time_init) { fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " "on Cell.\n"); @@ -120,6 +134,7 @@ cell_start_spus(struct cell_context *cell) for (i = 0; i < cell->num_spus; i++) { cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; + cell_global.inits[i].debug_flags = cell->debug_flags; cell_global.inits[i].cmd = &cell_global.command[i]; for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; @@ -137,14 +152,17 @@ cell_start_spus(struct cell_context *cell) exit(1); } - pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function, - &cell_global.inits[i]); + pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ + NULL, /* pthread attribs */ + &cell_thread_function, /* start routine */ + &cell_global.inits[i]); /* thread argument */ } } /** * Tell all the SPUs to stop/exit. + * This is done when the driver's exiting / cleaning up. */ void cell_spu_exit(struct cell_context *cell) diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index 82580ea35ab..a7771a55a31 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -48,19 +48,17 @@ #define CELL_NEW_VERTEX_INFO 0x8000 -void cell_set_vertex_elements(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); +extern void +cell_update_derived( struct cell_context *softpipe ); -void cell_set_vertex_buffers(struct pipe_context *, - unsigned count, - const struct pipe_vertex_buffer *); -void cell_update_derived( struct cell_context *softpipe ); +extern void +cell_init_shader_functions(struct cell_context *cell); -void -cell_init_shader_functions(struct cell_context *cell); +extern void +cell_init_vertex_functions(struct cell_context *cell); + #endif /* CELL_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 8ab938a02aa..efc4f78364b 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -35,21 +35,6 @@ #include "cell_state_emit.h" -static int -find_vs_output(const struct cell_vertex_shader_state *vs, - uint semantic_name, - uint semantic_index) -{ - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == semantic_name && - vs->info.output_semantic_index[i] == semantic_index) - return i; - } - return -1; -} - - /** * Determine how to map vertex program outputs to fragment program inputs. * Basically, this will be used when computing the triangle interpolation @@ -58,7 +43,6 @@ find_vs_output(const struct cell_vertex_shader_state *vs, static void calculate_vertex_layout( struct cell_context *cell ) { - const struct cell_vertex_shader_state *vs = cell->vs; const struct cell_fragment_shader_state *fs = cell->fs; const enum interp_mode colorInterp = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; @@ -82,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell ) vinfo->num_attribs = 0; /* we always want to emit vertex pos */ - src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); @@ -98,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell ) break; case TGSI_SEMANTIC_COLOR: - src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, - fs->info.input_semantic_index[i]); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); #if 1 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ src = 0; @@ -116,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); @@ -163,6 +147,9 @@ compute_cliprect(struct cell_context *sp) +/** + * Update derived state, send current state to SPUs prior to rendering. + */ void cell_update_derived( struct cell_context *cell ) { if (cell->dirty & (CELL_NEW_RASTERIZER | diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9d88c1cf3d2..180b89c1f66 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" #include "cell_context.h" +#include "cell_gen_fragment.h" #include "cell_state.h" #include "cell_state_emit.h" #include "cell_state_per_fragment.h" @@ -47,27 +48,13 @@ emit_state_cmd(struct cell_context *cell, uint cmd, } - +/** + * For state marked as 'dirty', construct a state-update command block + * and insert it into the current batch buffer. + */ void cell_emit_state(struct cell_context *cell) { - if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_BLEND)) { - struct cell_command_logicop logicop; - - if (cell->logic_op.store != NULL) { - spe_release_func(& cell->logic_op); - } - - cell_generate_logic_op(& cell->logic_op, - & cell->blend->base, - cell->framebuffer.cbufs[0]); - - logicop.base = (intptr_t) cell->logic_op.store; - logicop.size = 64 * 4; - emit_state_cmd(cell, CELL_CMD_STATE_LOGICOP, &logicop, - sizeof(logicop)); - } - if (cell->dirty & CELL_NEW_FRAMEBUFFER) { struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; struct pipe_surface *zbuf = cell->framebuffer.zsbuf; @@ -80,44 +67,33 @@ cell_emit_state(struct cell_context *cell) fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; fb->width = cell->framebuffer.width; fb->height = cell->framebuffer.height; +#if 0 + printf("EMIT color format %s\n", pf_name(fb->color_format)); + printf("EMIT depth format %s\n", pf_name(fb->depth_format)); +#endif } - if (cell->dirty & CELL_NEW_BLEND) { - struct cell_command_blend blend; - if (cell->blend != NULL) { - blend.base = (intptr_t) cell->blend->code.store; - blend.size = (char *) cell->blend->code.csr - - (char *) cell->blend->code.store; - blend.read_fb = TRUE; - } else { - blend.base = 0; - blend.size = 0; - blend.read_fb = FALSE; - } - - emit_state_cmd(cell, CELL_CMD_STATE_BLEND, &blend, sizeof(blend)); - } - - if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { - struct cell_command_depth_stencil_alpha_test dsat; - - - if (cell->depth_stencil != NULL) { - dsat.base = (intptr_t) cell->depth_stencil->code.store; - dsat.size = (char *) cell->depth_stencil->code.csr - - (char *) cell->depth_stencil->code.store; - dsat.read_depth = TRUE; - dsat.read_stencil = FALSE; - } else { - dsat.base = 0; - dsat.size = 0; - dsat.read_depth = FALSE; - dsat.read_stencil = FALSE; - } - - emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, - sizeof(dsat)); + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | + CELL_NEW_DEPTH_STENCIL | + CELL_NEW_BLEND)) { + /* XXX we don't want to always do codegen here. We should have + * a hash/lookup table to cache previous results... + */ + struct cell_command_fragment_ops *fops + = cell_batch_alloc(cell, sizeof(*fops)); + struct spe_function spe_code; + + /* generate new code */ + gen_fragment_function(cell, &spe_code); + /* put the new code into the batch buffer */ + fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; + memcpy(&fops->code, spe_code.store, + SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + fops->dsa = cell->depth_stencil->base; + fops->blend = cell->blend->base; + /* free codegen buffer */ + spe_release_func(&spe_code); } if (cell->dirty & CELL_NEW_SAMPLER) { @@ -157,7 +133,8 @@ cell_emit_state(struct cell_context *cell) emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, &cell->vertex_info, sizeof(struct vertex_info)); } - + +#if 0 if (cell->dirty & CELL_NEW_VS) { const struct draw_context *const draw = cell->draw; struct cell_shader_info info; @@ -170,7 +147,7 @@ cell_emit_state(struct cell_context *cell) info.immediates = (uintptr_t) draw->vs.machine.Imms; info.num_immediates = draw->vs.machine.ImmLimit / 4; - emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, - & info, sizeof(info)); + emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); } +#endif } diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 53ae3aa50e7..78cb446c14a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -132,9 +132,9 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, /** + * Generate code to perform Z testing. Four Z values are tested at once. * \param dsa Current depth-test state * \param f Function to which code should be appended - * \param m Mask of allocated / free SPE registers * \param mask Index of register to contain depth-pass mask * \param stored Index of register containing values from depth buffer * \param calculated Index of register containing per-fragment depth values @@ -198,6 +198,7 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, /** + * Generate code to apply the stencil operation (after testing). * \note Emits a maximum of 5 instructions. * * \warning @@ -222,9 +223,13 @@ emit_stencil_op(struct spe_function *f, spe_il(f, result, ref); break; case PIPE_STENCIL_OP_INCR: + /* clamp = [0xff, 0xff, 0xff, 0xff] */ spe_il(f, clamp, 0x0ff); + /* result[i] = in[i] + 1 */ spe_ai(f, result, in, 1); + /* clamp_mask[i] = (result[i] > 0xff) */ spe_clgti(f, clamp_mask, result, 0x0ff); + /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ spe_selb(f, result, result, clamp, clamp_mask); break; case PIPE_STENCIL_OP_DECR: @@ -259,10 +264,10 @@ emit_stencil_op(struct spe_function *f, /** + * Generate code to do stencil test. Four pixels are tested at once. * \param dsa Depth / stencil test state * \param face 0 for front face, 1 for back face * \param f Function to append instructions to - * \param reg_mask Mask of allocated registers * \param mask Register containing mask of fragments passing the * alpha test * \param depth_mask Register containing mask of fragments passing the @@ -310,13 +315,14 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, switch (dsa->stencil[face].func) { case PIPE_FUNC_NEVER: - spe_il(f, stencil_mask, 0); + spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ break; case PIPE_FUNC_NOTEQUAL: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_EQUAL: + /* stencil_mask[i] = (stored[i] == ref) */ spe_ceqi(f, stencil_mask, stored, ref); break; @@ -324,6 +330,8 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GREATER: + complement = TRUE; + /* stencil_mask[i] = (stored[i] > ref) */ spe_clgti(f, stencil_mask, stored, ref); break; @@ -331,8 +339,11 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GEQUAL: + /* stencil_mask[i] = (stored[i] > ref) */ spe_clgti(f, stencil_mask, stored, ref); + /* tmp[i] = (stored[i] == ref) */ spe_ceqi(f, tmp, stored, ref); + /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ spe_or(f, stencil_mask, stencil_mask, tmp); break; @@ -461,7 +472,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round * up to 64 to make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); /* Allocate registers for the function's input parameters. Cleverly (and @@ -540,7 +551,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) spe_selb(f, depth, depth, zvals, mask); } - spe_bi(f, 0, 0, 0); + spe_bi(f, 0, 0, 0); /* return from function call */ #if 0 @@ -956,7 +967,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to * make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); const int frag[4] = { @@ -1144,9 +1155,10 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) } -int PC_OFFSET(const struct spe_function *f, const void *d) +static int +PC_OFFSET(const struct spe_function *f, const void *d) { - const intptr_t pc = (intptr_t) f->csr; + const intptr_t pc = (intptr_t) &f->store[f->num_inst]; const intptr_t ea = ~0x0f & (intptr_t) d; return (ea - pc) >> 2; @@ -1178,7 +1190,7 @@ cell_generate_logic_op(struct spe_function *f, * bytes (equiv. to 8 instructions) are needed for data storage. Round up * to 64 to make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); /* Pixel colors in framebuffer format in AoS layout. diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 86bcad05e9e..97e44eeb1a4 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -53,7 +53,10 @@ cell_vertex_shader_state(void *shader) } - +/** + * Create fragment shader state. + * Called via pipe->create_fs_state() + */ static void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) @@ -77,6 +80,9 @@ cell_create_fs_state(struct pipe_context *pipe, } +/** + * Called via pipe->bind_fs_state() + */ static void cell_bind_fs_state(struct pipe_context *pipe, void *fs) { @@ -88,6 +94,9 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) } +/** + * Called via pipe->delete_fs_state() + */ static void cell_delete_fs_state(struct pipe_context *pipe, void *fs) { @@ -98,6 +107,10 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs) } +/** + * Create vertex shader state. + * Called via pipe->create_vs_state() + */ static void * cell_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) @@ -128,6 +141,9 @@ cell_create_vs_state(struct pipe_context *pipe, } +/** + * Called via pipe->bind_vs_state() + */ static void cell_bind_vs_state(struct pipe_context *pipe, void *vs) { @@ -142,6 +158,9 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) } +/** + * Called via pipe->delete_vs_state() + */ static void cell_delete_vs_state(struct pipe_context *pipe, void *vs) { @@ -154,6 +173,9 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) } +/** + * Called via pipe->set_constant_buffer() + */ static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, @@ -166,7 +188,7 @@ cell_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); /* note: reference counting */ - pipe_buffer_reference(ws, + winsys_buffer_reference(ws, &cell->constants[shader].buffer, buf->buffer); cell->constants[shader].size = buf->size; diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 114684c2a33..fbe55c84721 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" -void +static void cell_set_vertex_elements(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *elements) @@ -53,7 +53,7 @@ cell_set_vertex_elements(struct pipe_context *pipe, } -void +static void cell_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *buffers) @@ -69,3 +69,11 @@ cell_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(cell->draw, count, buffers); } + + +void +cell_init_vertex_functions(struct cell_context *cell) +{ + cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_vertex_elements = cell_set_vertex_elements; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index d9e3b510dc0..732c64082ef 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -25,108 +25,13 @@ * **************************************************************************/ -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" -#include "util/u_memory.h" #include "util/u_rect.h" -#include "util/u_tile.h" - #include "cell_context.h" -#include "cell_surface.h" - - -static void -cell_surface_copy(struct pipe_context *pipe, - boolean do_flip, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - assert( dst->cpp == src->cpp ); - - pipe_copy_rect(pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE), - &dst->block, - dst->stride, - dstx, dsty, - width, height, - pipe_surface_map(src, PIPE_BUFFER_USAGE_CPU_READ), - do_flip ? -src->stride : src->stride, - srcx, do_flip ? height - 1 - srcy : srcy); - - pipe_surface_unmap(src); - pipe_surface_unmap(dst); -} - - -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size; -} - - -#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) - - -/** - * Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void -cell_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - unsigned i, j; - void *dst_map = pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE); - - assert(dst->stride > 0); - - switch (dst->block.size) { - case 1: - case 2: - case 4: - pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); - break; - case 8: - { - /* expand the 4-byte clear value to an 8-byte value */ - ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); - ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); - ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); - ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); - ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); - val0 = (val0 << 8) | val0; - val1 = (val1 << 8) | val1; - val2 = (val2 << 8) | val2; - val3 = (val3 << 8) | val3; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - row[j*4+0] = val0; - row[j*4+1] = val1; - row[j*4+2] = val2; - row[j*4+3] = val3; - } - row += dst->stride/2; - } - } - break; - default: - assert(0); - break; - } - - pipe_surface_unmap( dst ); -} void cell_init_surface_functions(struct cell_context *cell) { - cell->pipe.surface_copy = cell_surface_copy; - cell->pipe.surface_fill = cell_surface_fill; + cell->pipe.surface_copy = util_surface_copy; + cell->pipe.surface_fill = util_surface_fill; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 5a0942bbd6e..b6590dfb86e 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -63,19 +63,30 @@ cell_texture_layout(struct cell_texture * spt) spt->buffer_size = 0; for ( level = 0 ; level <= pt->last_level ; level++ ) { + unsigned size; + unsigned w_tile, h_tile; + + /* width, height, rounded up to tile size */ + w_tile = align(width, TILE_SIZE); + h_tile = align(height, TILE_SIZE); + pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); spt->stride[level] = pt->nblocksx[level] * pt->block.size; spt->level_offset[level] = spt->buffer_size; - spt->buffer_size += (pt->nblocksy[level] * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - pt->nblocksx[level] * pt->block.size); + size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + if (pt->target == PIPE_TEXTURE_CUBE) + size *= 6; + else + size *= depth; + + spt->buffer_size += size; width = minify(width); height = minify(height); @@ -85,8 +96,8 @@ cell_texture_layout(struct cell_texture * spt) static struct pipe_texture * -cell_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) +cell_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) { struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = CALLOC_STRUCT(cell_texture); @@ -113,8 +124,8 @@ cell_texture_create_screen(struct pipe_screen *screen, static void -cell_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) +cell_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -130,7 +141,7 @@ cell_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); */ - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen, &spt->buffer, NULL); FREE(spt); } @@ -138,6 +149,7 @@ cell_texture_release_screen(struct pipe_screen *screen, } +#if 0 static void cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, uint face, uint levelsMask) @@ -145,13 +157,14 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, /* XXX TO DO: re-tile the texture data ... */ } +#endif static struct pipe_surface * -cell_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned usage) +cell_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned usage) { struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = cell_texture(pt); @@ -161,7 +174,7 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + winsys_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; @@ -174,12 +187,17 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, /* XXX may need to override usage flags (see sp_texture.c) */ + pipe_texture_reference(&ps->texture, pt); + ps->face = face; + ps->level = level; + ps->zslice = zslice; if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * ps->nblocksy * ps->stride; - } else { + } + else { assert(face == 0); assert(zslice == 0); } @@ -189,6 +207,11 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, +/** + * Copy tile data from linear layout to tiled layout. + * XXX this should be rolled into the future surface-creation code. + * XXX also need "untile" code... + */ static void tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) { @@ -219,6 +242,7 @@ tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) /** * Convert linear texture image data to tiled format for SPU usage. + * XXX recast this in terms of pipe_surfaces (aka texture views). */ static void cell_tile_texture(struct cell_context *cell, @@ -285,6 +309,21 @@ cell_update_texture_mapping(struct cell_context *cell) } +static void +cell_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) +{ + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For softpipe, nothing to do. + */ + assert ((*s)->texture); + pipe_texture_reference(&(*s)->texture, NULL); + + screen->winsys->surface_release(screen->winsys, s); +} + + static void * cell_surface_map( struct pipe_screen *screen, struct pipe_surface *surface, @@ -297,7 +336,7 @@ cell_surface_map( struct pipe_screen *screen, return NULL; } - map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + map = pipe_buffer_map( screen, surface->buffer, flags ); if (map == NULL) return NULL; @@ -323,7 +362,7 @@ static void cell_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); + pipe_buffer_unmap( screen, surface->buffer ); } @@ -333,12 +372,15 @@ cell_init_texture_functions(struct cell_context *cell) /*cell->pipe.texture_update = cell_texture_update;*/ } + void cell_init_screen_texture_funcs(struct pipe_screen *screen) { - screen->texture_create = cell_texture_create_screen; - screen->texture_release = cell_texture_release_screen; - screen->get_tex_surface = cell_get_tex_surface_screen; + screen->texture_create = cell_texture_create; + screen->texture_release = cell_texture_release; + + screen->get_tex_surface = cell_get_tex_surface; + screen->tex_surface_release = cell_tex_surface_release; screen->surface_map = cell_surface_map; screen->surface_unmap = cell_surface_unmap; diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index e4230c7a5ff..aa63435b934 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -26,6 +26,11 @@ **************************************************************************/ /** + * Vertex buffer code. The draw module transforms vertices to window + * coords, etc. and emits the vertices into buffer supplied by this module. + * When a vertex buffer is full, or we flush, we'll send the vertex data + * to the SPUs. + * * Authors * Brian Paul */ @@ -113,7 +118,7 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, } cvbr->vertex_buf = ~0; - cell_flush_int(&cell->pipe, 0x0); + cell_flush_int(cell, 0x0); assert(vertices == cvbr->vertex_buffer); cvbr->vertex_buffer = NULL; @@ -121,12 +126,13 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, -static void +static boolean cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); cvbr->prim = prim; /*printf("cell_set_prim %u\n", prim);*/ + return TRUE; } @@ -244,7 +250,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, #if 0 /* helpful for debug */ - cell_flush_int(&cell->pipe, CELL_FLUSH_WAIT); + cell_flush_int(cell, CELL_FLUSH_WAIT); #endif } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 2ece0250f6f..566df7f59e3 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -297,10 +297,9 @@ void cell_update_vertex_fetch(struct draw_context *draw) /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). That means (34 * 4) = 136 bytes - * each maximum. + * actually a slight over-estimate). */ - spe_init_func(p, 136 * unique_attr_formats); + spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); /* Allocate registers for the function's input parameters. diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 3658947715f..2b10c116fa3 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -135,7 +135,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) vs->num_elts = n; send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); - cell_flush_int(& cell->pipe, CELL_FLUSH_WAIT); + cell_flush_int(cell, CELL_FLUSH_WAIT); } draw->vs.post_nr = draw->vs.queue_nr; diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 8e83610790e..1ae0dfb8c10 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -5,7 +5,7 @@ TOP = ../../../../.. -include $(TOP)/configs/linux-cell +include $(TOP)/configs/current PROG = g3d @@ -22,12 +22,15 @@ SOURCES = \ spu_render.c \ spu_texture.c \ spu_tile.c \ - spu_tri.c \ + spu_tri.c + +OLD_SOURCES = \ spu_exec.c \ spu_util.c \ spu_vertex_fetch.c \ spu_vertex_shader.c + SPU_OBJECTS = $(SOURCES:.c=.o) \ SPU_ASM_OUT = $(SOURCES:.c=.s) \ @@ -43,7 +46,7 @@ INCLUDE_DIRS = \ $(SPU_CC) $(SPU_CFLAGS) -c $< .c.s: - $(SPU_CC) $(SPU_CFLAGS) -S $< + $(SPU_CC) $(SPU_CFLAGS) -O3 -S $< # The .a file will be linked into the main/PPU executable diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h index e9fee8a3a61..fd8dc6ded3e 100644 --- a/src/gallium/drivers/cell/spu/spu_colorpack.h +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -79,14 +79,14 @@ spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) static INLINE vector float -spu_unpack_color(uint color) +spu_unpack_B8G8R8A8(uint color) { vector unsigned int color_u4 = spu_splats(color); color_u4 = spu_shuffle(color_u4, color_u4, ((vector unsigned char) { - 0, 0, 0, 0, - 5, 5, 5, 5, 10, 10, 10, 10, + 5, 5, 5, 5, + 0, 0, 0, 0, 15, 15, 15, 15}) ); return spu_convtf(color_u4, 32); } diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 89c61136a4c..e27df2dfb38 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -382,10 +382,10 @@ fetch_src_file_channel( break; case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - assert( index->i[1] < (int) mach->ImmLimit ); - assert( index->i[2] < (int) mach->ImmLimit ); - assert( index->i[3] < (int) mach->ImmLimit ); + ASSERT( index->i[0] < (int) mach->ImmLimit ); + ASSERT( index->i[1] < (int) mach->ImmLimit ); + ASSERT( index->i[2] < (int) mach->ImmLimit ); + ASSERT( index->i[3] < (int) mach->ImmLimit ); chan->f[0] = mach->Imms[index->i[0]][swizzle]; chan->f[1] = mach->Imms[index->i[1]][swizzle]; @@ -409,7 +409,7 @@ fetch_src_file_channel( break; default: - assert( 0 ); + ASSERT( 0 ); } break; @@ -422,7 +422,7 @@ fetch_src_file_channel( break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -471,7 +471,7 @@ fetch_source( index.q = si_shli(index.q, 12); break; default: - assert( 0 ); + ASSERT( 0 ); } index.i[0] += reg->SrcRegisterDim.Index; @@ -558,7 +558,7 @@ store_dest( break; default: - assert( 0 ); + ASSERT( 0 ); return; } @@ -582,11 +582,11 @@ store_dest( break; case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); + ASSERT( 0 ); break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -769,7 +769,7 @@ exec_tex(struct spu_exec_machine *mach, break; default: - assert (0); + ASSERT (0); } FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { @@ -861,7 +861,7 @@ exec_declaration(struct spu_exec_machine *mach, break; default: - assert( 0 ); + ASSERT( 0 ); } if( mask == TGSI_WRITEMASK_XYZW ) { @@ -971,11 +971,11 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_LOG: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_MUL: @@ -1151,24 +1151,24 @@ exec_instruction( break; case TGSI_OPCODE_CND: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CND0: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_DOT2ADD: /* TGSI_OPCODE_DP2A */ - assert (0); + ASSERT (0); break; case TGSI_OPCODE_INDEX: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_NEGATE: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_FRAC: @@ -1181,7 +1181,7 @@ exec_instruction( break; case TGSI_OPCODE_CLAMP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_FLOOR: @@ -1276,7 +1276,7 @@ exec_instruction( break; case TGSI_OPCODE_MULTIPLYMATRIX: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ABS: @@ -1290,7 +1290,7 @@ exec_instruction( break; case TGSI_OPCODE_RCC: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_DPH: @@ -1353,23 +1353,23 @@ exec_instruction( break; case TGSI_OPCODE_PK2H: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK2US: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK4B: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PK4UB: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_RFL: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_SEQ: @@ -1384,7 +1384,7 @@ exec_instruction( break; case TGSI_OPCODE_SFL: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_SGT: @@ -1429,7 +1429,7 @@ exec_instruction( break; case TGSI_OPCODE_STR: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TEX: @@ -1452,7 +1452,7 @@ exec_instruction( /* src[1] = d[strq]/dx */ /* src[2] = d[strq]/dy */ /* src[3] = sampler unit */ - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXL: @@ -1470,35 +1470,35 @@ exec_instruction( break; case TGSI_OPCODE_UP2H: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP2US: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP4B: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_UP4UB: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_X2D: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ARA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ARR: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_BRA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CAL: @@ -1507,14 +1507,14 @@ exec_instruction( /* do the call */ /* push the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; /* note that PC was already incremented above */ @@ -1538,13 +1538,13 @@ exec_instruction( *pc = mach->CallStack[--mach->CallStackTop]; /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); + ASSERT(mach->LoopStackTop > 0); mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->FuncStackTop > 0); + ASSERT(mach->FuncStackTop > 0); mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; UPDATE_EXEC_MASK(mach); @@ -1552,7 +1552,7 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CMP: @@ -1592,11 +1592,11 @@ exec_instruction( break; case TGSI_OPCODE_NRM: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_DIV: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_DP2: @@ -1615,7 +1615,7 @@ exec_instruction( case TGSI_OPCODE_IF: /* push CondMask */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); mach->CondStack[mach->CondStackTop++] = mach->CondMask; FETCH( &r[0], 0, CHAN_X ); /* update CondMask */ @@ -1639,7 +1639,7 @@ exec_instruction( /* invert CondMask wrt previous mask */ { uint prevMask; - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); prevMask = mach->CondStack[mach->CondStackTop - 1]; mach->CondMask = ~mach->CondMask & prevMask; UPDATE_EXEC_MASK(mach); @@ -1649,7 +1649,7 @@ exec_instruction( case TGSI_OPCODE_ENDIF: /* pop CondMask */ - assert(mach->CondStackTop > 0); + ASSERT(mach->CondStackTop > 0); mach->CondMask = mach->CondStack[--mach->CondStackTop]; UPDATE_EXEC_MASK(mach); break; @@ -1660,19 +1660,19 @@ exec_instruction( break; case TGSI_OPCODE_REP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_ENDREP: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_PUSHA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_POPA: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_CEIL: @@ -1746,7 +1746,7 @@ exec_instruction( break; case TGSI_OPCODE_MOD: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_XOR: @@ -1759,15 +1759,15 @@ exec_instruction( break; case TGSI_OPCODE_SAD: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXF: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_TXQ: - assert (0); + ASSERT (0); break; case TGSI_OPCODE_EMIT: @@ -1784,9 +1784,9 @@ exec_instruction( /* fall-through (for now) */ case TGSI_OPCODE_BGNLOOP2: /* push LoopMask and ContMasks */ - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); mach->ContStack[mach->ContStackTop++] = mach->ContMask; break; @@ -1794,7 +1794,7 @@ exec_instruction( /* fall-through (for now at least) */ case TGSI_OPCODE_ENDLOOP2: /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; if (mach->LoopMask) { /* repeat loop: jump to instruction just past BGNLOOP */ @@ -1802,10 +1802,10 @@ exec_instruction( } else { /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); + ASSERT(mach->LoopStackTop > 0); mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; /* pop ContMask */ - assert(mach->ContStackTop > 0); + ASSERT(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[--mach->ContStackTop]; } UPDATE_EXEC_MASK(mach); @@ -1834,26 +1834,26 @@ exec_instruction( break; case TGSI_OPCODE_NOISE1: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE2: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE3: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOISE4: - assert( 0 ); + ASSERT( 0 ); break; case TGSI_OPCODE_NOP: break; default: - assert( 0 ); + ASSERT( 0 ); } } @@ -1874,11 +1874,11 @@ spu_exec_machine_run( struct spu_exec_machine *mach ) mach->FuncMask = 0xf; mach->ExecMask = 0xf; - mach->CondStackTop = 0; /* temporarily subvert this assertion */ - assert(mach->CondStackTop == 0); - assert(mach->LoopStackTop == 0); - assert(mach->ContStackTop == 0); - assert(mach->CallStackTop == 0); + mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ + ASSERT(mach->CondStackTop == 0); + ASSERT(mach->LoopStackTop == 0); + ASSERT(mach->ContStackTop == 0); + ASSERT(mach->CallStackTop == 0); mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index e04ffeb9b16..2a7cb75f592 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -34,6 +34,7 @@ #include "spu_main.h" #include "spu_render.h" +#include "spu_per_fragment_op.h" #include "spu_texture.h" #include "spu_tile.h" //#include "spu_test.h" @@ -46,7 +47,7 @@ /* helpful headers: /usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h -/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h +/opt/cell/sdk/usr/include/libmisc.h */ boolean Debug = FALSE; @@ -55,17 +56,13 @@ struct spu_global spu; struct spu_vs_context draw; -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; -static unsigned char depth_stencil_code_buffer[4 * 64] - ALIGN16_ATTRIB; - -static unsigned char fb_blend_code_buffer[4 * 64] +/** + * Buffers containing dynamically generated SPU code: + */ +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] ALIGN16_ATTRIB; -static unsigned char logicop_code_buffer[4 * 64] - ALIGN16_ATTRIB; /** @@ -136,54 +133,75 @@ really_clear_tiles(uint surfaceIndex) static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - if (Debug) printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, clear->surface, clear->value); -#define CLEAR_OPT 1 -#if CLEAR_OPT - /* set all tile's status to CLEAR */ if (clear->surface == 0) { - memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); spu.fb.color_clear_value = clear->value; + if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { + uint x = (spu.init.id << 4) | (spu.init.id << 12) | + (spu.init.id << 20) | (spu.init.id << 28); + spu.fb.color_clear_value ^= x; + } } else { - memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); spu.fb.depth_clear_value = clear->value; } - return; -#endif +#define CLEAR_OPT 1 +#if CLEAR_OPT + + /* Simply set all tiles' status to CLEAR. + * When we actually begin rendering into a tile, we'll initialize it to + * the clear value. If any tiles go untouched during the frame, + * really_clear_tiles() will set them to the clear value. + */ if (clear->surface == 0) { - spu.fb.color_clear_value = clear->value; - clear_c_tile(&spu.ctile); + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); } else { - spu.fb.depth_clear_value = clear->value; - clear_z_tile(&spu.ztile); + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); } +#else + + /* + * This path clears the whole framebuffer to the clear color right now. + */ + /* printf("SPU: %s num=%d w=%d h=%d\n", __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); */ - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (clear->surface == 0) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - else - put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); - /* XXX we don't want this here, but it fixes bad tile results */ + /* init a single tile to the clear value */ + if (clear->surface == 0) { + clear_c_tile(&spu.ctile); + } + else { + clear_z_tile(&spu.ztile); } -#if 0 - wait_on_mask(1 << TAG_SURFACE_CLEAR); -#endif + /* walk over my tiles, writing the 'clear' tile's data */ + { + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + } + } + + if (spu.init.debug_flags & CELL_DEBUG_SYNC) { + wait_on_mask(1 << TAG_SURFACE_CLEAR); + } + +#endif /* CLEAR_OPT */ if (Debug) printf("SPU %u: CLEAR SURF done\n", spu.init.id); @@ -201,6 +219,31 @@ cmd_release_verts(const struct cell_command_release_verts *release) } +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ + if (Debug) + printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + /* Copy state info */ + memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); + memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + + /* Point function pointer at new code */ + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + + spu.read_depth = spu.depth_stencil_alpha.depth.enabled; + spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; +} + + static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { @@ -227,87 +270,24 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) switch (spu.fb.depth_format) { case PIPE_FORMAT_Z32_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0xffffffffu; + break; case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: spu.fb.zsize = 4; + spu.fb.zscale = (float) 0x00ffffffu; break; case PIPE_FORMAT_Z16_UNORM: spu.fb.zsize = 2; + spu.fb.zscale = (float) 0xffffu; break; default: spu.fb.zsize = 0; break; } - - if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) - spu.color_shuffle = ((vector unsigned char) { - 12, 0, 4, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}); - else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) - spu.color_shuffle = ((vector unsigned char) { - 8, 4, 0, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}); - else - ASSERT(0); -} - - -static void -cmd_state_blend(const struct cell_command_blend *state) -{ - if (Debug) - printf("SPU %u: BLEND: enabled %d\n", - spu.init.id, - (state->size != 0)); - - ASSERT_ALIGN16(state->base); - - if (state->size != 0) { - mfc_get(fb_blend_code_buffer, - (unsigned int) state->base, /* src */ - ROUNDUP16(state->size), - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - spu.blend = (blend_func) fb_blend_code_buffer; - spu.read_fb = state->read_fb; - } else { - spu.read_fb = FALSE; - } -} - - -static void -cmd_state_depth_stencil(const struct cell_command_depth_stencil_alpha_test *state) -{ - if (Debug) - printf("SPU %u: DEPTH_STENCIL: ztest %d\n", - spu.init.id, - state->read_depth); - - ASSERT_ALIGN16(state->base); - - if (state->size != 0) { - mfc_get(depth_stencil_code_buffer, - (unsigned int) state->base, /* src */ - ROUNDUP16(state->size), - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - } else { - /* If there is no code, emit a return instruction. - */ - depth_stencil_code_buffer[0] = 0x35; - depth_stencil_code_buffer[1] = 0x00; - depth_stencil_code_buffer[2] = 0x00; - depth_stencil_code_buffer[3] = 0x00; - } - - spu.frag_test = (frag_test_func) depth_stencil_code_buffer; - spu.read_depth = state->read_depth; - spu.read_stencil = state->read_stencil; } @@ -381,6 +361,21 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info) static void +cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) +{ + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; +} + + +static void cmd_finish(void) { if (Debug) @@ -395,7 +390,9 @@ cmd_finish(void) /** - * Execute a batch of commands + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * * The opcode param encodes the location of the buffer and its size. */ static void @@ -432,16 +429,14 @@ cmd_batch(uint opcode) printf("SPU %u: release batch buf %u\n", spu.init.id, buf); release_buffer(buf); + /* + * Loop over commands in the batch buffer + */ for (pos = 0; pos < usize; /* no incr */) { switch (buffer[pos]) { - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 8; - } - break; + /* + * rendering commands + */ case CELL_CMD_CLEAR_SURFACE: { struct cell_command_clear_surface *clr @@ -459,26 +454,24 @@ cmd_batch(uint opcode) pos += pos_incr; } break; - case CELL_CMD_RELEASE_VERTS: + /* + * state-update commands + */ + case CELL_CMD_STATE_FRAMEBUFFER: { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 8; + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; } break; - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; - case CELL_CMD_STATE_BLEND: - cmd_state_blend((struct cell_command_blend *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_blend)) / 8); - break; - case CELL_CMD_STATE_DEPTH_STENCIL: - cmd_state_depth_stencil((struct cell_command_depth_stencil_alpha_test *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_depth_stencil_alpha_test)) / 8); + case CELL_CMD_STATE_FRAGMENT_OPS: + { + struct cell_command_fragment_ops *fops + = (struct cell_command_fragment_ops *) &buffer[pos]; + cmd_state_fragment_ops(fops); + pos += sizeof(*fops) / 8; + } break; case CELL_CMD_STATE_SAMPLER: { @@ -514,42 +507,32 @@ cmd_batch(uint opcode) pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; case CELL_CMD_STATE_BIND_VS: +#if 0 spu_bind_vertex_shader(&draw, (struct cell_shader_info *) &buffer[pos+1]); +#endif pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); break; - case CELL_CMD_STATE_ATTRIB_FETCH: { - struct cell_attribute_fetch_code *code = - (struct cell_attribute_fetch_code *) &buffer[pos+1]; - - mfc_get(attribute_fetch_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - draw.vertex_fetch.code = attribute_fetch_code_buffer; + case CELL_CMD_STATE_ATTRIB_FETCH: + cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) + &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); break; - } - case CELL_CMD_STATE_LOGICOP: { - struct cell_command_logicop *code = - (struct cell_command_logicop *) &buffer[pos+1]; - - mfc_get(logicop_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - spu.logicop = (logicop_func) logicop_code_buffer; - pos += (1 + ROUNDUP8(sizeof(struct cell_command_logicop)) / 8); + /* + * misc commands + */ + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } break; - } case CELL_CMD_FLUSH_BUFFER_RANGE: { struct cell_buffer_range *br = (struct cell_buffer_range *) &buffer[pos+1]; @@ -618,7 +601,9 @@ main_loop(void) exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: +#if 0 spu_execute_vertex_shader(&draw, &cmd.vs); +#endif break; case CELL_CMD_BATCH: cmd_batch(opcode); @@ -643,6 +628,11 @@ one_time_init(void) memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); + + /* Install default/fallback fragment processing function. + * This will normally be overriden by a code-gen'd function. + */ + spu.fragment_ops = spu_fallback_fragment_ops; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index e962e1426c6..d40539da83b 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -41,6 +41,10 @@ #define MAX_HEIGHT 1024 +/** + * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. + * The data may be addressed through several different types. + */ typedef union { ushort us[TILE_SIZE][TILE_SIZE]; uint ui[TILE_SIZE][TILE_SIZE]; @@ -56,38 +60,23 @@ typedef union { #define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ -struct spu_frag_test_results { - qword mask; - qword depth; - qword stencil; -}; - -typedef struct spu_frag_test_results (*frag_test_func)(qword frag_mask, - qword pixel_depth, qword pixel_stencil, qword frag_depth, - qword frag_alpha, qword facing); - - -struct spu_blend_results { - qword r; - qword g; - qword b; - qword a; -}; +/** Function for sampling textures */ +typedef vector float (*spu_sample_texture_func)(uint unit, + vector float texcoord); -typedef struct spu_blend_results (*blend_func)( - qword frag_r, qword frag_g, qword frag_b, qword frag_a, - qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, - qword const_r, qword const_g, qword const_b, qword const_a); +/** Function for performing per-fragment ops */ +typedef void (*spu_fragment_ops_func)(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask); -typedef struct spu_blend_results (*logicop_func)( - qword pixel_r, qword pixel_g, qword pixel_b, qword pixel_a, - qword frag_r, qword frag_g, qword frag_b, qword frag_a, - qword frag_mask); - - -typedef vector float (*sample_texture_func)(uint unit, vector float texcoord); - -struct spu_framebuffer { +struct spu_framebuffer +{ void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ enum pipe_format color_format; @@ -99,6 +88,7 @@ struct spu_framebuffer { uint depth_clear_value; uint zsize; /**< 0, 2 or 4 bytes per Z */ + float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ } ALIGN16_ATTRIB; @@ -115,35 +105,31 @@ struct spu_texture /** - * All SPU global/context state will be in singleton object of this type: + * All SPU global/context state will be in a singleton object of this type: */ struct spu_global { + /** One-time init/constant info */ struct cell_init_info init; + /* + * Current state + */ struct spu_framebuffer fb; - boolean read_depth; - boolean read_stencil; - frag_test_func frag_test; - - boolean read_fb; - blend_func blend; - qword const_blend_color[4] ALIGN16_ATTRIB; - - logicop_func logicop; - + struct pipe_depth_stencil_alpha_state depth_stencil_alpha; + struct pipe_blend_state blend; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; struct spu_texture texture[PIPE_MAX_SAMPLERS]; - struct vertex_info vertex_info; - /* XXX more state to come */ - - - /** current color and Z tiles */ + /** Current color and Z tiles */ tile_t ctile ALIGN16_ATTRIB; tile_t ztile ALIGN16_ATTRIB; + /** Read depth/stencil tiles? */ + boolean read_depth; + boolean read_stencil; + /** Current tiles' status */ ubyte cur_ctile_status, cur_ztile_status; @@ -151,11 +137,13 @@ struct spu_global ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + /** Current fragment ops machine code */ + uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS]; + /** Current fragment ops function */ + spu_fragment_ops_func fragment_ops; - /** for converting RGBA to PIPE_FORMAT_x colors */ - vector unsigned char color_shuffle; - - sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; + /** Current texture sampler function */ + spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index b4cffeeb32a..03dd547845b 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -1,211 +1,475 @@ -/* - * (C) Copyright IBM Corporation 2008 +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ /** - * \file spu_per_fragment_op.c - * SPU implementation various per-fragment operations. - * - * \author Ian Romanick <[email protected]> + * \author Brian Paul */ + +#include <transpose_matrix4x4.h> #include "pipe/p_format.h" #include "spu_main.h" +#include "spu_colorpack.h" #include "spu_per_fragment_op.h" -#define ZERO 0x80 - -static void -read_ds_quad(tile_t *buffer, unsigned x, unsigned y, - enum pipe_format depth_format, qword *depth, - qword *stencil) -{ - const int ix = x / 2; - const int iy = y / 2; - - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: { - qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; - - const qword shuf_vec = (qword) { - ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, - ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7 - }; +#define LINEAR_QUAD_LAYOUT 1 - /* At even X values we want the first 4 shorts, and at odd X values we - * want the second 4 shorts. - */ - qword bias = (qword) spu_splats((unsigned char) ((ix & 0x01) << 3)); - qword bias_mask = si_fsmbi(0x3333); - qword sv = si_a(shuf_vec, si_and(bias_mask, bias)); - - *depth = si_shufb(*ptr, *ptr, sv); - *stencil = si_il(0); - break; - } - - - case PIPE_FORMAT_Z32_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - - *depth = *ptr; - *stencil = si_il(0); - break; - } - - case PIPE_FORMAT_Z24S8_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - qword mask = si_fsmbi(0xEEEE); - - *depth = si_rotmai(si_and(*ptr, mask), -8); - *stencil = si_andc(*ptr, mask); - break; +/** + * Called by rasterizer for each quad after the shader has run. Do + * all the per-fragment operations including alpha test, z test, + * stencil test, blend, colormask and logicops. This is a + * fallback/debug function. In reality we'll use a generated function + * produced by the PPU. But this function is useful for + * debug/validation. + */ +void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragR, + vector float fragG, + vector float fragB, + vector float fragA, + vector unsigned int mask) +{ + vector float frag_aos[4]; + unsigned int c0, c1, c2, c3; + + /* do alpha test */ + if (spu.depth_stencil_alpha.alpha.enabled) { + vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); + vector unsigned int amask; + + switch (spu.depth_stencil_alpha.alpha.func) { + case PIPE_FUNC_LESS: + amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ + break; + case PIPE_FUNC_GREATER: + amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ + break; + case PIPE_FUNC_GEQUAL: + amask = spu_cmpgt(ref, fragA); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_LEQUAL: + amask = spu_cmpgt(fragA, ref); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_EQUAL: + amask = spu_cmpeq(ref, fragA); + break; + case PIPE_FUNC_NOTEQUAL: + amask = spu_cmpeq(ref, fragA); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_ALWAYS: + amask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + amask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, amask); } - - case PIPE_FORMAT_S8Z24_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - - *depth = si_and(*ptr, si_fsmbi(0x7777)); - *stencil = si_andi(si_roti(*ptr, 8), 0x0ff); - break; + /* Z and/or stencil testing... */ + if (spu.depth_stencil_alpha.depth.enabled || + spu.depth_stencil_alpha.stencil[0].enabled) { + + /* get four Z/Stencil values from tile */ + vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); + vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; + vector unsigned int ifbZ = spu_and(ifbZS, mask24); + vector unsigned int ifbS = spu_andc(ifbZS, mask24); + + if (spu.depth_stencil_alpha.stencil[0].enabled) { + /* do stencil test */ + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); + + } + else if (spu.depth_stencil_alpha.depth.enabled) { + /* do depth test */ + + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || + spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); + + vector unsigned int ifragZ; + vector unsigned int zmask; + + /* convert four fragZ from float to uint */ + fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); + ifragZ = spu_convtu(fragZ, 0); + + /* do depth comparison, setting zmask with results */ + switch (spu.depth_stencil_alpha.depth.func) { + case PIPE_FUNC_LESS: + zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ + break; + case PIPE_FUNC_GREATER: + zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ + break; + case PIPE_FUNC_GEQUAL: + zmask = spu_cmpgt(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_LEQUAL: + zmask = spu_cmpgt(ifragZ, ifbZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_EQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + break; + case PIPE_FUNC_NOTEQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_ALWAYS: + zmask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + zmask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, zmask); + + /* merge framebuffer Z and fragment Z according to the mask */ + ifbZ = spu_or(spu_and(ifragZ, mask), + spu_andc(ifbZ, mask)); + } + + if (spu_extract(spu_orx(mask), 0)) { + /* put new fragment Z/Stencil values back into Z/Stencil tile */ + depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); + + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } } - - default: - assert(0); - break; + if (spu.blend.blend_enable) { + /* blending terms, misc regs */ + vector float term1r, term1g, term1b, term1a; + vector float term2r, term2g, term2b, term2a; + vector float one, tmp; + + vector float fbRGBA[4]; /* current framebuffer colors */ + + /* get colors from framebuffer/tile */ + { + vector float fc[4]; + uint c0, c1, c2, c3; + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ + c0 = colorTile->ui[y][x*2+0]; + c1 = colorTile->ui[y][x*2+1]; + c2 = colorTile->ui[y][x*2+2]; + c3 = colorTile->ui[y][x*2+3]; +#else + c0 = colorTile->ui[y+0][x+0]; + c1 = colorTile->ui[y+0][x+1]; + c2 = colorTile->ui[y+1][x+0]; + c3 = colorTile->ui[y+1][x+1]; +#endif + switch (spu.fb.color_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + fc[0] = spu_unpack_B8G8R8A8(c0); + fc[1] = spu_unpack_B8G8R8A8(c1); + fc[2] = spu_unpack_B8G8R8A8(c2); + fc[3] = spu_unpack_B8G8R8A8(c3); + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + fc[0] = spu_unpack_A8R8G8B8(c0); + fc[1] = spu_unpack_A8R8G8B8(c1); + fc[2] = spu_unpack_A8R8G8B8(c2); + fc[3] = spu_unpack_A8R8G8B8(c3); + break; + default: + ASSERT(0); + } + _transpose_matrix4x4(fbRGBA, fc); + } + + /* + * Compute Src RGB terms + */ + switch (spu.blend.rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1r = fragR; + term1g = fragG; + term1b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term1r = + term1g = + term1b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1r = spu_mul(fragR, fragR); + term1g = spu_mul(fragG, fragG); + term1b = spu_mul(fragB, fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1r = spu_mul(fragR, fragA); + term1g = spu_mul(fragG, fragA); + term1b = spu_mul(fragB, fragA); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term + */ + switch (spu.blend.alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1a = spu_mul(fragA, fragA); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms + */ + switch (spu.blend.rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2r = fragR; + term2g = fragG; + term2b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term2r = + term2g = + term2b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2r = spu_mul(fbRGBA[0], fragR); + term2g = spu_mul(fbRGBA[1], fragG); + term2b = spu_mul(fbRGBA[2], fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2r = spu_mul(fbRGBA[0], fragA); + term2g = spu_mul(fbRGBA[1], fragA); + term2b = spu_mul(fbRGBA[2], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2r = spu_mul(fbRGBA[0], tmp); + term2g = spu_mul(fbRGBA[1], tmp); + term2b = spu_mul(fbRGBA[2], tmp); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term + */ + switch (spu.blend.alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2a = spu_mul(fbRGBA[3], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2a = spu_mul(fbRGBA[3], tmp); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (spu.blend.rgb_func) { + case PIPE_BLEND_ADD: + fragR = spu_add(term1r, term2r); + fragG = spu_add(term1g, term2g); + fragB = spu_add(term1b, term2b); + break; + case PIPE_BLEND_SUBTRACT: + fragR = spu_sub(term1r, term2r); + fragG = spu_sub(term1g, term2g); + fragB = spu_sub(term1b, term2b); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (spu.blend.alpha_func) { + case PIPE_BLEND_ADD: + fragA = spu_add(term1a, term2a); + break; + case PIPE_BLEND_SUBTRACT: + fragA = spu_sub(term1a, term2a); + break; + /* XXX more cases */ + default: + ASSERT(0); + } } -} - - -static void -write_ds_quad(tile_t *buffer, unsigned x, unsigned y, - enum pipe_format depth_format, - qword depth, qword stencil) -{ - const int ix = x / 2; - const int iy = y / 2; - - (void) stencil; - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: { - qword *ptr = (qword *) &buffer->us8[iy][ix / 2]; - qword sv = ((ix & 0x01) == 0) - ? (qword) { 2, 3, 6, 7, 10, 11, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31 } - : (qword) { 16, 17, 18, 19, 20 , 21, 22, 23, - 2, 3, 6, 7, 10, 11, 14, 15 }; - *ptr = si_shufb(depth, *ptr, sv); - break; + /* + * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. + */ +#if 0 + /* original code */ + { + vector float frag_soa[4]; + frag_soa[0] = fragR; + frag_soa[1] = fragG; + frag_soa[2] = fragB; + frag_soa[3] = fragA; + _transpose_matrix4x4(frag_aos, frag_soa); } - - - case PIPE_FORMAT_Z32_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - *ptr = depth; +#else + /* short-cut relying on function parameter layout: */ + _transpose_matrix4x4(frag_aos, &fragR); + (void) fragG; + (void) fragB; +#endif + + /* + * Pack float colors into 32-bit RGBA words. + */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + c0 = spu_pack_A8R8G8B8(frag_aos[0]); + c1 = spu_pack_A8R8G8B8(frag_aos[1]); + c2 = spu_pack_A8R8G8B8(frag_aos[2]); + c3 = spu_pack_A8R8G8B8(frag_aos[3]); break; - } - - case PIPE_FORMAT_Z24S8_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - qword mask = si_fsmbi(0xEEEE); - - depth = si_shli(depth, 8); - *ptr = si_selb(stencil, depth, mask); + case PIPE_FORMAT_B8G8R8A8_UNORM: + c0 = spu_pack_B8G8R8A8(frag_aos[0]); + c1 = spu_pack_B8G8R8A8(frag_aos[1]); + c2 = spu_pack_B8G8R8A8(frag_aos[2]); + c3 = spu_pack_B8G8R8A8(frag_aos[3]); break; + default: + fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); + ASSERT(0); } - case PIPE_FORMAT_S8Z24_UNORM: { - qword *ptr = (qword *) &buffer->ui4[iy][ix]; - qword mask = si_fsmbi(0x7777); - - stencil = si_shli(stencil, 24); - *ptr = si_selb(stencil, depth, mask); - break; + /* + * Color masking + */ + if (spu.blend.colormask != 0xf) { + /* XXX to do */ + /* apply color mask to 32-bit packed colors */ } - default: - assert(0); - break; + /* + * Logic Ops + */ + if (spu.blend.logicop_enable) { + /* XXX to do */ + /* apply logicop to 32-bit packed colors */ } -} -qword -spu_do_depth_stencil(int x, int y, - qword frag_mask, qword frag_depth, qword frag_alpha, - qword facing) -{ - struct spu_frag_test_results result; - qword pixel_depth; - qword pixel_stencil; - - /* All of this preable code (everthing before the call to frag_test) should - * be generated on the PPU and upload to the SPU. + /* + * If mask is non-zero, mark tile as dirty. */ - if (spu.read_depth || spu.read_stencil) { - read_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, - &pixel_depth, &pixel_stencil); + if (spu_extract(spu_orx(mask), 0)) { + spu.cur_ctile_status = TILE_STATUS_DIRTY; } - - switch (spu.fb.depth_format) { - case PIPE_FORMAT_Z16_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x0000ffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - case PIPE_FORMAT_Z32_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0xffffffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - frag_depth = si_fm(frag_depth, (qword)spu_splats((float)(0x00ffffffu))); - frag_depth = si_cfltu(frag_depth, 0); - break; - default: - ASSERT(0); - break; + else { + return; } - result = (*spu.frag_test)(frag_mask, pixel_depth, pixel_stencil, - frag_depth, frag_alpha, facing); - - /* This code (everthing after the call to frag_test) should - * be generated on the PPU and upload to the SPU. + /* + * Write new quad colors to the framebuffer/tile. + * Only write pixels where the corresponding mask word is set. */ - if (spu.read_depth || spu.read_stencil) { - write_ds_quad(&spu.ztile, x, y, spu.fb.depth_format, - result.depth, result.stencil); - } - - return result.mask; +#if LINEAR_QUAD_LAYOUT + /* + * Quad layout: + * +--+--+--+--+ + * |p0|p1|p2|p3| + * +--+--+--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y][x*2] = c0; + if (spu_extract(mask, 1)) + colorTile->ui[y][x*2+1] = c1; + if (spu_extract(mask, 2)) + colorTile->ui[y][x*2+2] = c2; + if (spu_extract(mask, 3)) + colorTile->ui[y][x*2+3] = c3; +#else + /* + * Quad layout: + * +--+--+ + * |p0|p1| + * +--+--+ + * |p2|p3| + * +--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y+0][x+0] = c0; + if (spu_extract(mask, 1)) + colorTile->ui[y+0][x+1] = c1; + if (spu_extract(mask, 2)) + colorTile->ui[y+1][x+0] = c2; + if (spu_extract(mask, 3)) + colorTile->ui[y+1][x+1] = c3; +#endif } diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h index 65712586992..f817abf0463 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -1,32 +1,44 @@ -/* - * (C) Copyright IBM Corporation 2008 +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #ifndef SPU_PER_FRAGMENT_OP #define SPU_PER_FRAGMENT_OP -extern qword -spu_do_depth_stencil(int x, int y, qword frag_mask, qword frag_depth, - qword frag_alpha, qword facing); + +extern void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask); + #endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 5051774f00c..117b8a36f80 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -97,7 +97,7 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) const qword offset_y = si_andi((qword) y, 0x1f); const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); - const qword tile_size = (qword) spu_splats(sizeof(tile_t)); + const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); tile_offset = si_mpy((qword) tile_offset, tile_size); diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c index 12dc2463283..216a33126b7 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.c +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -31,6 +31,9 @@ #include "spu_main.h" +/** + * Get tile of color or Z values from main memory, put into SPU memory. + */ void get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) { @@ -56,6 +59,9 @@ get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) } +/** + * Move tile of color or Z values from SPU memory to main memory. + */ void put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) { diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 2a4e0b423ca..f02cdd1f763 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -38,7 +38,6 @@ #include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" -#include "spu_per_fragment_op.h" /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ @@ -209,7 +208,7 @@ clip_emit_quad(struct setup_stage *setup) /** * Evaluate attribute coefficients (plane equations) to compute * attribute values for the four fragments in a quad. - * Eg: four colors will be compute. + * Eg: four colors will be computed (in AoS format). */ static INLINE void eval_coeff(uint slot, float x, float y, vector float result[4]) @@ -255,31 +254,6 @@ eval_z(float x, float y) } -static INLINE mask_t -do_depth_test(int x, int y, mask_t quadmask) -{ - float4 zvals; - mask_t mask; - - if (spu.fb.depth_format == PIPE_FORMAT_NONE) - return quadmask; - - zvals.v = eval_z((float) x, (float) y); - - mask = (mask_t) spu_do_depth_stencil(x - setup.cliprect_minx, - y - setup.cliprect_miny, - (qword) quadmask, - (qword) zvals.v, - (qword) spu_splats((unsigned char) 0x0ffu), - (qword) spu_splats((unsigned int) 0x01u)); - - if (spu_extract(spu_orx(mask), 0)) - spu.cur_ztile_status = TILE_STATUS_DIRTY; - - return mask; -} - - /** * Emit a quad (pass to next stage). No clipping is done. * Note: about 1/5 to 1/7 of the time, mask is zero and this function @@ -289,18 +263,6 @@ do_depth_test(int x, int y, mask_t quadmask) static INLINE void emit_quad( int x, int y, mask_t mask ) { -#if 0 - struct softpipe_context *sp = setup.softpipe; - setup.quad.x0 = x; - setup.quad.y0 = y; - setup.quad.mask = mask; - sp->quad.first->run(sp->quad.first, &setup.quad); -#else - - if (spu.read_depth) { - mask = do_depth_test(x, y, mask); - } - /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; @@ -308,6 +270,7 @@ emit_quad( int x, int y, mask_t mask ) vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; + spu.cur_ztile_status = TILE_STATUS_DIRTY; if (spu.texture[0].start) { /* texture mapping */ @@ -355,55 +318,29 @@ emit_quad( int x, int y, mask_t mask ) } - /* Convert fragment data from AoS to SoA format. - */ - qword soa_frag[4]; - _transpose_matrix4x4((vec_float4 *) soa_frag, colors); + { + /* Convert fragment data from AoS to SoA format. + * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) + * This is temporary! + */ + vector float soa_frag[4]; + _transpose_matrix4x4(soa_frag, colors); - /* Read the current framebuffer values. - */ - const qword pix[4] = { - (qword) spu_splats(spu.ctile.ui[iy+0][ix+0]), - (qword) spu_splats(spu.ctile.ui[iy+0][ix+1]), - (qword) spu_splats(spu.ctile.ui[iy+1][ix+0]), - (qword) spu_splats(spu.ctile.ui[iy+1][ix+1]), - }; + float4 fragZ; - qword soa_pix[4]; + fragZ.v = eval_z((float) x, (float) y); - if (spu.read_fb) { - /* Convert pixel data from AoS to SoA format. + /* Do all per-fragment/quad operations here, including: + * alpha test, z test, stencil test, blend and framebuffer writing. */ - vec_float4 aos_pix[4] = { - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+0]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+0][ix+1]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+0]), - spu_unpack_A8R8G8B8(spu.ctile.ui[iy+1][ix+1]), - }; - - _transpose_matrix4x4((vec_float4 *) soa_pix, aos_pix); + spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, + fragZ.v, + soa_frag[0], soa_frag[1], + soa_frag[2], soa_frag[3], + mask); } - - struct spu_blend_results result = - (*spu.blend)(soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], - soa_pix[0], soa_pix[1], soa_pix[2], soa_pix[3], - spu.const_blend_color[0], spu.const_blend_color[1], - spu.const_blend_color[2], spu.const_blend_color[3]); - - - /* Convert final pixel data from SoA to AoS format. - */ - result = (*spu.logicop)(pix[0], pix[1], pix[2], pix[3], - result.r, result.g, result.b, result.a, - (qword) mask); - - spu.ctile.ui[iy+0][ix+0] = spu_extract((vec_uint4) result.r, 0); - spu.ctile.ui[iy+0][ix+1] = spu_extract((vec_uint4) result.g, 0); - spu.ctile.ui[iy+1][ix+0] = spu_extract((vec_uint4) result.b, 0); - spu.ctile.ui[iy+1][ix+1] = spu_extract((vec_uint4) result.a, 0); } -#endif } diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index b25ca4eafc0..b8a0d4a265f 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,4 +1,5 @@ +#include "cell/common.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_debug.h" #include "tgsi/tgsi_parse.h" @@ -20,7 +21,7 @@ tgsi_util_get_src_register_swizzle( case 3: return reg->SwizzleW; default: - assert( 0 ); + ASSERT( 0 ); } return 0; } @@ -40,7 +41,7 @@ tgsi_util_get_src_register_extswizzle( case 3: return reg->ExtSwizzleW; default: - assert( 0 ); + ASSERT( 0 ); } return 0; } @@ -60,12 +61,12 @@ tgsi_util_get_full_src_register_extswizzle( ®->SrcRegisterExtSwz, component ); - assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); /* * Second, calculate the simple swizzle for the unswizzled channel index. @@ -95,7 +96,7 @@ tgsi_util_get_src_register_extnegate( case 3: return reg->NegateW; default: - assert( 0 ); + ASSERT( 0 ); } return 0; } @@ -120,7 +121,7 @@ tgsi_util_set_src_register_extnegate( reg->NegateW = negate; break; default: - assert( 0 ); + ASSERT( 0 ); } } diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 26f23637492..03375d84a57 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -92,7 +92,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned nr_attrs = draw->vertex_fetch.nr_attrs; unsigned attr; - assert(count <= 4); + ASSERT(count <= 4); #if DRAW_DBG printf("SPU: %s count = %u, nr_attrs = %u\n", diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index f81d19fea1c..fbe5b34d397 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -112,7 +112,7 @@ run_vertex_program(struct spu_vs_context *draw, const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; - assert(count <= 4); + ASSERT(count <= 4); machine->Processor = TGSI_PROCESSOR_VERTEX; diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index c6776716a2f..6dd3eda85dc 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -35,6 +35,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "util/u_memory.h" #include "pipe/p_screen.h" @@ -72,7 +73,7 @@ i915_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < i915->num_vertex_buffers; i++) { void *buf - = pipe->winsys->buffer_map(pipe->winsys, + = pipe_buffer_map(pipe->screen, i915->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -80,7 +81,7 @@ i915_draw_range_elements(struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + = pipe_buffer_map(pipe->screen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(draw, indexSize, min_index, @@ -105,11 +106,11 @@ i915_draw_range_elements(struct pipe_context *pipe, * unmap vertex/index buffers */ for (i = 0; i < i915->num_vertex_buffers; i++) { - pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer); + pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); } if (indexBuffer) { - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index e4ece550985..4fda1ab64f5 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -115,7 +115,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct pipe_winsys *winsys = i915->pipe.winsys; + struct pipe_screen *screen = i915->pipe.screen; size_t size = (size_t)vertex_size * (size_t)nr_vertices; /* FIXME: handle failure */ @@ -124,20 +124,20 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { } else { i915->vbo_flushed = 0; - pipe_buffer_reference(winsys, &i915_render->vbo, NULL); + pipe_buffer_reference(screen, &i915_render->vbo, NULL); } if (!i915_render->vbo) { i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); i915_render->vbo_offset = 0; - i915_render->vbo = winsys->buffer_create(winsys, - 64, - I915_BUFFER_USAGE_LIT_VERTEX, - i915_render->vbo_size); - i915_render->vbo_ptr = winsys->buffer_map(winsys, - i915_render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - winsys->buffer_unmap(winsys, i915_render->vbo); + i915_render->vbo = pipe_buffer_create(screen, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, i915_render->vbo); } i915->vbo = i915_render->vbo; @@ -488,7 +488,7 @@ static struct vbuf_render * i915_vbuf_render_create( struct i915_context *i915 ) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); - struct pipe_winsys *winsys = i915->pipe.winsys; + struct pipe_screen *screen = i915->pipe.screen; i915_render->i915 = i915; @@ -510,14 +510,14 @@ i915_vbuf_render_create( struct i915_context *i915 ) i915_render->vbo_alloc_size = 128 * 4096; i915_render->vbo_size = i915_render->vbo_alloc_size; i915_render->vbo_offset = 0; - i915_render->vbo = winsys->buffer_create(winsys, - 64, - I915_BUFFER_USAGE_LIT_VERTEX, - i915_render->vbo_size); - i915_render->vbo_ptr = winsys->buffer_map(winsys, - i915_render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - winsys->buffer_unmap(winsys, i915_render->vbo); + i915_render->vbo = pipe_buffer_create(screen, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, i915_render->vbo); return &i915_render->base; } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index e9e40c3f0b2..1c976082df7 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -28,6 +28,7 @@ #include "util/u_memory.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "util/u_string.h" #include "i915_reg.h" @@ -207,7 +208,7 @@ i915_surface_map( struct pipe_screen *screen, struct pipe_surface *surface, unsigned flags ) { - char *map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + char *map = pipe_buffer_map( screen, surface->buffer, flags ); if (map == NULL) return NULL; @@ -226,7 +227,7 @@ static void i915_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); + pipe_buffer_unmap( screen, surface->buffer ); } diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 32344da4d5a..bd87217063c 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -80,7 +80,7 @@ static unsigned power_of_two(unsigned x) { unsigned value = 1; - while (value <= x) + while (value < x) value = value << 1; return value; } @@ -207,7 +207,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) unsigned nblocksy = pt->nblocksy[0]; #if 0 /* used for tiled display targets */ - if (pt->last_level == 0 && pt->cpp == 4) + if (pt->last_level == 0 && pt->block.size == 4) if (i915_displaytarget_layout(tex)) return; #endif @@ -645,7 +645,7 @@ i915_texture_release(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(screen->winsys, &tex->buffer, NULL); + pipe_buffer_reference(screen, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -684,7 +684,7 @@ i915_get_tex_surface(struct pipe_screen *screen, ps->refcount = 1; ps->winsys = ws; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(ws, &ps->buffer, tex->buffer); + pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -728,7 +728,7 @@ i915_texture_blanket(struct pipe_screen * screen, i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - pipe_buffer_reference(screen->winsys, &tex->buffer, buffer); + pipe_buffer_reference(screen, &tex->buffer, buffer); return &tex->base; } @@ -756,7 +756,7 @@ i915_tex_surface_release(struct pipe_screen *screen, } pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen->winsys, &surf->buffer, NULL); + pipe_buffer_reference(screen, &surf->buffer, NULL); FREE(surf); } diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index 78d4c0e411b..007dc8f9deb 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -92,10 +92,10 @@ static void brw_init_pool( struct brw_context *brw, pool->size = size; pool->brw = brw; - pool->buffer = brw->pipe.winsys->buffer_create(brw->pipe.winsys, - 4096, - 0 /* DRM_BO_FLAG_MEM_TT */, - size); + pool->buffer = pipe_buffer_create(brw->pipe.screen, + 4096, + 0 /* DRM_BO_FLAG_MEM_TT */, + size); } static void brw_destroy_pool( struct brw_context *brw, @@ -103,7 +103,7 @@ static void brw_destroy_pool( struct brw_context *brw, { struct brw_mem_pool *pool = &brw->pool[pool_id]; - pipe_buffer_reference( pool->brw->pipe.winsys, + pipe_buffer_reference( pool->brw->pipe.screen, &pool->buffer, NULL ); } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 05eda9d1f26..cc0c665e021 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -330,7 +330,7 @@ brw_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(ws, &tex->buffer, NULL); + winsys_buffer_reference(ws, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -369,7 +369,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { assert(ps->format); assert(ps->refcount); - pipe_buffer_reference(ws, &ps->buffer, tex->buffer); + winsys_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index 12367068917..dfa46c9fb70 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -31,6 +31,7 @@ #include "pipe/p_defines.h" +#include "util/u_pack_color.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_surface.h" @@ -39,8 +40,28 @@ /** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + +/** * Clear the given surface to the specified value. * No masking, no scissor (clear entire buffer). + * Note: when clearing a color buffer, the clearValue is always + * encoded as PIPE_FORMAT_A8R8G8B8_UNORM. */ void softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, @@ -66,7 +87,15 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { - sp_tile_cache_clear(softpipe->cbuf_cache[i], clearValue); + unsigned cv; + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + cv = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } + else { + cv = clearValue; + } + sp_tile_cache_clear(softpipe->cbuf_cache[i], cv); softpipe->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_CLEAR; } } diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index dda90f760a3..cd1e6663d86 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -92,17 +92,19 @@ static void softpipe_destroy( struct pipe_context *pipe ) if (softpipe->draw) draw_destroy( softpipe->draw ); - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.colormask->destroy( softpipe->quad.colormask ); - softpipe->quad.output->destroy( softpipe->quad.output ); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple ); + softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz ); + softpipe->quad[i].shade->destroy( softpipe->quad[i].shade ); + softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test ); + softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test ); + softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test ); + softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion ); + softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage ); + softpipe->quad[i].blend->destroy( softpipe->quad[i].blend ); + softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask ); + softpipe->quad[i].output->destroy( softpipe->quad[i].output ); + } for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) sp_destroy_tile_cache(softpipe->cbuf_cache[i]); @@ -113,7 +115,7 @@ static void softpipe_destroy( struct pipe_context *pipe ) for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { - pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); } } @@ -205,17 +207,19 @@ softpipe_create( struct pipe_screen *screen, /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); - softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad.output = sp_quad_output_stage(softpipe); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad[i].shade = sp_quad_shade_stage(softpipe); + softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad[i].blend = sp_quad_blend_stage(softpipe); + softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad[i].output = sp_quad_output_stage(softpipe); + } /* * Create drawing context and plug our rendering stage into it. @@ -257,3 +261,4 @@ softpipe_create( struct pipe_screen *screen, softpipe_destroy(&softpipe->pipe); return NULL; } + diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 078886f93c9..2b9a2a8ee52 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -45,6 +45,10 @@ */ #define USE_DRAW_STAGE_PSTIPPLE 1 +/* Number of threads working on individual quads. + * Setting to 1 disables this feature. + */ +#define SP_NUM_QUAD_THREADS 1 struct softpipe_winsys; struct softpipe_vbuf_render; @@ -133,7 +137,7 @@ struct softpipe_context { struct quad_stage *output; struct quad_stage *first; /**< points to one of the above stages */ - } quad; + } quad[SP_NUM_QUAD_THREADS]; /** The primitive drawing context */ struct draw_context *draw; @@ -151,13 +155,11 @@ struct softpipe_context { }; - - static INLINE struct softpipe_context * softpipe_context( struct pipe_context *pipe ) { return (struct softpipe_context *)pipe; } - #endif /* SP_CONTEXT_H */ + diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 12b44a82118..424bd568460 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "sp_context.h" #include "sp_state.h" @@ -135,7 +136,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < sp->num_vertex_buffers; i++) { void *buf - = pipe->winsys->buffer_map(pipe->winsys, + = pipe_buffer_map(pipe->screen, sp->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -143,7 +144,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + = pipe_buffer_map(pipe->screen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer_range(draw, indexSize, min_index, @@ -164,11 +165,11 @@ softpipe_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe_buffer_unmap(pipe->screen, indexBuffer); } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index e03994b63b7..401764bb439 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -73,6 +73,19 @@ softpipe_flush( struct pipe_context *pipe, softpipe_unmap_surfaces(softpipe); } + /* Enable to dump BMPs of the color/depth buffers each frame */ +#if 0 + if(flags & PIPE_FLUSH_FRAME) { + static unsigned frame_no = 1; + static char filename[256]; + util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); + debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]); + util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); + debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf); + ++frame_no; + } +#endif + if (fence) *fence = NULL; } diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index d0456731bea..701ee4c72f2 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -106,7 +106,7 @@ exec_run( const struct sp_fragment_shader *base, /* Compute X, Y, Z, W vals for this quad */ sp_setup_pos_vector(quad->posCoef, - (float)quad->x0, (float)quad->y0, + (float)quad->input.x0, (float)quad->input.y0, &machine->QuadPos); return tgsi_exec_machine_run( machine ); diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 35653a8e48c..496ed43df26 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -88,7 +88,7 @@ fs_sse_run( const struct sp_fragment_shader *base, /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */ sp_setup_pos_vector(quad->posCoef, - (float)quad->x0, (float)quad->y0, + (float)quad->input.x0, (float)quad->input.y0, machine->Temps); /* init kill mask */ diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index ae2ee210fc9..4a42cb3c192 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -59,20 +59,31 @@ * Encodes everything we need to know about a 2x2 pixel block. Uses * "Channel-Serial" or "SoA" layout. */ -struct quad_header { +struct quad_header_input +{ int x0; int y0; - unsigned mask:4; + float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ unsigned facing:1; /**< Front (0) or back (1) facing? */ unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ +}; + +struct quad_header_inout +{ + unsigned mask:4; +}; - struct { - /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; - float depth[QUAD_SIZE]; - } outputs; +struct quad_header_output +{ + /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ + float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + float depth[QUAD_SIZE]; +}; - float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ +struct quad_header { + struct quad_header_input input; + struct quad_header_inout inout; + struct quad_header_output output; const struct tgsi_interp_coef *coef; const struct tgsi_interp_coef *posCoef; @@ -80,5 +91,5 @@ struct quad_header { unsigned nr_attrs; }; - #endif /* SP_HEADERS_H */ + diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index bc83d78ea16..892ef87ee9f 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -33,29 +33,33 @@ static void sp_push_quad_first( struct softpipe_context *sp, - struct quad_stage *quad ) + struct quad_stage *quad, + uint i ) { - quad->next = sp->quad.first; - sp->quad.first = quad; + quad->next = sp->quad[i].first; + sp->quad[i].first = quad; } static void sp_build_depth_stencil( - struct softpipe_context *sp ) + struct softpipe_context *sp, + uint i ) { if (sp->depth_stencil->stencil[0].enabled || sp->depth_stencil->stencil[1].enabled) { - sp_push_quad_first( sp, sp->quad.stencil_test ); + sp_push_quad_first( sp, sp->quad[i].stencil_test, i ); } else if (sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf) { - sp_push_quad_first( sp, sp->quad.depth_test ); + sp_push_quad_first( sp, sp->quad[i].depth_test, i ); } } void sp_build_quad_pipeline(struct softpipe_context *sp) { + uint i; + boolean early_depth_test = sp->depth_stencil->depth.enabled && sp->framebuffer.zsbuf && @@ -64,49 +68,51 @@ sp_build_quad_pipeline(struct softpipe_context *sp) !sp->fs->info.writes_z; /* build up the pipeline in reverse order... */ - - sp->quad.first = sp->quad.output; - - if (sp->blend->colormask != 0xf) { - sp_push_quad_first( sp, sp->quad.colormask ); - } - - if (sp->blend->blend_enable || - sp->blend->logicop_enable) { - sp_push_quad_first( sp, sp->quad.blend ); - } - - if (sp->depth_stencil->depth.occlusion_count) { - sp_push_quad_first( sp, sp->quad.occlusion ); - } - - if (sp->rasterizer->poly_smooth || - sp->rasterizer->line_smooth || - sp->rasterizer->point_smooth) { - sp_push_quad_first( sp, sp->quad.coverage ); - } - - if (!early_depth_test) { - sp_build_depth_stencil( sp ); - } - - if (sp->depth_stencil->alpha.enabled) { - sp_push_quad_first( sp, sp->quad.alpha_test ); - } - - /* XXX always enable shader? */ - if (1) { - sp_push_quad_first( sp, sp->quad.shade ); - } - - if (early_depth_test) { - sp_build_depth_stencil( sp ); - sp_push_quad_first( sp, sp->quad.earlyz ); - } + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first = sp->quad[i].output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad[i].colormask, i ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad[i].blend, i ); + } + + if (sp->depth_stencil->depth.occlusion_count) { + sp_push_quad_first( sp, sp->quad[i].occlusion, i ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad[i].coverage, i ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp, i ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad[i].alpha_test, i ); + } + + /* XXX always enable shader? */ + if (1) { + sp_push_quad_first( sp, sp->quad[i].shade, i ); + } + + if (early_depth_test) { + sp_build_depth_stencil( sp, i ); + sp_push_quad_first( sp, sp->quad[i].earlyz, i ); + } #if !USE_DRAW_STAGE_PSTIPPLE - if (sp->rasterizer->poly_stipple_enable) { - sp_push_quad_first( sp, sp->quad.polygon_stipple ); - } + if (sp->rasterizer->poly_stipple_enable) { + sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i ); + } #endif + } } + diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 7d3580fb4f2..5bebd141e92 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -17,11 +17,10 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) const float ref = softpipe->depth_stencil->alpha.ref; unsigned passMask = 0x0, j; const uint cbuf = 0; /* only output[0].alpha is tested */ - const float *aaaa = quad->outputs.color[cbuf][3]; + const float *aaaa = quad->output.color[cbuf][3]; switch (softpipe->depth_stencil->alpha.func) { case PIPE_FUNC_NEVER: - quad->mask = 0x0; break; case PIPE_FUNC_LESS: /* @@ -76,9 +75,9 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } - quad->mask &= passMask; + quad->inout.mask &= passMask; - if (quad->mask) + if (quad->inout.mask) qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index a834accb863..6f64c6e584c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -114,14 +114,14 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_cached_tile * tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color[cbuf]; + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } @@ -244,14 +244,14 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color[cbuf]; + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index f72f31db973..f32bdfab784 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -56,14 +56,14 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color[cbuf]; + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; uint i, j; /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - int x = (quad->x0 & (TILE_SIZE-1)) + (j & 1); - int y = (quad->y0 & (TILE_SIZE-1)) + (j >> 1); + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); for (i = 0; i < 4; i++) { dest[i][j] = tile->data.color[y][x][i]; } diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index ad907ec25fe..ee29aa7dfea 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -47,19 +47,19 @@ coverage_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - if ((softpipe->rasterizer->poly_smooth && quad->prim == PRIM_TRI) || - (softpipe->rasterizer->line_smooth && quad->prim == PRIM_LINE) || - (softpipe->rasterizer->point_smooth && quad->prim == PRIM_POINT)) { + if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) || + (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) || + (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) { uint cbuf; /* loop over colorbuffer outputs */ for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { - float (*quadColor)[4] = quad->outputs.color[cbuf]; + float (*quadColor)[4] = quad->output.color[cbuf]; unsigned j; for (j = 0; j < QUAD_SIZE; j++) { - assert(quad->coverage[j] >= 0.0); - assert(quad->coverage[j] <= 1.0); - quadColor[3][j] *= quad->coverage[j]; + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; } } } diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 227cb2014e1..523bd3e0801 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -60,7 +60,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) unsigned zmask = 0; unsigned j; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0); + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); assert(ps); /* shouldn't get here if there's no zbuffer */ @@ -79,12 +79,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) float scale = 65535.0; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); bzzzz[j] = tile->data.depth16[y][x]; } } @@ -94,12 +94,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) double scale = (double) (uint) ~0UL; for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); bzzzz[j] = tile->data.depth32[y][x]; } } @@ -111,12 +111,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; } } @@ -128,12 +128,12 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) float scale = (float) ((1 << 24) - 1); for (j = 0; j < QUAD_SIZE; j++) { - qzzzz[j] = (unsigned) (quad->outputs.depth[j] * scale); + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); } for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); bzzzz[j] = tile->data.depth32[y][x] >> 8; } } @@ -192,14 +192,14 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } - quad->mask &= zmask; + quad->inout.mask &= zmask; if (softpipe->depth_stencil->depth.writemask) { /* This is also efficient with sse / spe instructions: */ for (j = 0; j < QUAD_SIZE; j++) { - if (quad->mask & (1 << j)) { + if (quad->inout.mask & (1 << j)) { bzzzz[j] = qzzzz[j]; } } @@ -208,8 +208,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) switch (format) { case PIPE_FORMAT_Z16_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth16[y][x] = (ushort) bzzzz[j]; } break; @@ -218,15 +218,15 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) /* (yes, this falls through to a different case than above) */ case PIPE_FORMAT_Z32_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = bzzzz[j]; } break; case PIPE_FORMAT_S8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); uint s8z24 = tile->data.depth32[y][x]; s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; tile->data.depth32[y][x] = s8z24; @@ -234,8 +234,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) break; case PIPE_FORMAT_Z24S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); uint z24s8 = tile->data.depth32[y][x]; z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); tile->data.depth32[y][x] = z24s8; @@ -243,8 +243,8 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) break; case PIPE_FORMAT_Z24X8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.depth32[y][x] = bzzzz[j] << 8; } break; @@ -260,7 +260,7 @@ depth_test_quad(struct quad_stage *qs, struct quad_header *quad) { sp_depth_test_quad(qs, quad); - if (quad->mask) + if (quad->inout.mask) qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c index 5a66a866993..6e2dde304ea 100644 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -45,16 +45,16 @@ earlyz_quad( struct quad_stage *qs, struct quad_header *quad ) { - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; const float dzdx = quad->posCoef->dadx[2]; const float dzdy = quad->posCoef->dady[2]; const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - quad->outputs.depth[0] = z0; - quad->outputs.depth[1] = z0 + dzdx; - quad->outputs.depth[2] = z0 + dzdy; - quad->outputs.depth[3] = z0 + dzdx + dzdy; + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; qs->next->run( qs->next, quad ); } diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 5499ba5361f..1f0cb3e0355 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -85,7 +85,7 @@ shade_quad( machine->InterpCoefs = quad->coef; /* run shader */ - quad->mask &= softpipe->fs->run( softpipe->fs, + quad->inout.mask &= softpipe->fs->run( softpipe->fs, &qss->machine, quad ); @@ -101,16 +101,16 @@ shade_quad( case TGSI_SEMANTIC_COLOR: { uint cbuf = sem_index[i]; - memcpy(quad->outputs.color[cbuf], + memcpy(quad->output.color[cbuf], &machine->Outputs[i].xyzw[0].f[0], - sizeof(quad->outputs.color[0]) ); + sizeof(quad->output.color[0]) ); } break; case TGSI_SEMANTIC_POSITION: { uint j; for (j = 0; j < 4; j++) { - quad->outputs.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; } z_written = TRUE; } @@ -122,20 +122,20 @@ shade_quad( if (!z_written) { /* compute Z values now, as in the quad earlyz stage */ /* XXX we should really only do this if the earlyz stage is not used */ - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; const float dzdx = quad->posCoef->dadx[2]; const float dzdy = quad->posCoef->dady[2]; const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - quad->outputs.depth[0] = z0; - quad->outputs.depth[1] = z0 + dzdx; - quad->outputs.depth[2] = z0 + dzdy; - quad->outputs.depth[3] = z0 + dzdx + dzdy; + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; } /* shader may cull fragments */ - if( quad->mask ) { + if( quad->inout.mask ) { qs->next->run( qs->next, quad ); } } diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c index db13e73ae35..169bd82876d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -54,7 +54,7 @@ occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - softpipe->occlusion_count += count_bits(quad->mask); + softpipe->occlusion_count += count_bits(quad->inout.mask); qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index b64646a449f..d05e12d1d95 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -41,8 +41,8 @@ static void output_quad(struct quad_stage *qs, struct quad_header *quad) { /* in-tile pos: */ - const int itx = quad->x0 % TILE_SIZE; - const int ity = quad->y0 % TILE_SIZE; + const int itx = quad->input.x0 % TILE_SIZE; + const int ity = quad->input.y0 % TILE_SIZE; struct softpipe_context *softpipe = qs->softpipe; uint cbuf; @@ -52,13 +52,13 @@ output_quad(struct quad_stage *qs, struct quad_header *quad) struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], - quad->x0, quad->y0); - float (*quadColor)[4] = quad->outputs.color[cbuf]; + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; int i, j; /* get/swizzle dest colors */ for (j = 0; j < QUAD_SIZE; j++) { - if (quad->mask & (1 << j)) { + if (quad->inout.mask & (1 << j)) { int x = itx + (j & 1); int y = ity + (j >> 1); for (i = 0; i < 4; i++) { /* loop over color chans */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index ce9562e07c6..abb54877487 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -206,9 +206,9 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) ubyte ref, wrtMask, valMask; ubyte stencilVals[QUAD_SIZE]; struct softpipe_cached_tile *tile - = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->x0, quad->y0); + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); uint j; - uint face = quad->facing; + uint face = quad->input.facing; if (!softpipe->depth_stencil->stencil[1].enabled) { /* single-sided stencil test, use front (face=0) state */ @@ -231,22 +231,22 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) switch (ps->format) { case PIPE_FORMAT_S8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); stencilVals[j] = tile->data.depth32[y][x] >> 24; } break; case PIPE_FORMAT_Z24S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); stencilVals[j] = tile->data.depth32[y][x] & 0xff; } break; case PIPE_FORMAT_S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); stencilVals[j] = tile->data.stencil8[y][x]; } break; @@ -258,35 +258,35 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) { unsigned passMask, failMask; passMask = do_stencil_test(stencilVals, func, ref, valMask); - failMask = quad->mask & ~passMask; - quad->mask &= passMask; + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; if (failOp != PIPE_STENCIL_OP_KEEP) { apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); } } - if (quad->mask) { + if (quad->inout.mask) { /* now the pixels that passed the stencil test are depth tested */ if (softpipe->depth_stencil->depth.enabled) { - const unsigned origMask = quad->mask; + const unsigned origMask = quad->inout.mask; sp_depth_test_quad(qs, quad); /* quad->mask is updated */ /* update stencil buffer values according to z pass/fail result */ if (zFailOp != PIPE_STENCIL_OP_KEEP) { - const unsigned failMask = origMask & ~quad->mask; + const unsigned failMask = origMask & ~quad->inout.mask; apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); } if (zPassOp != PIPE_STENCIL_OP_KEEP) { - const unsigned passMask = origMask & quad->mask; + const unsigned passMask = origMask & quad->inout.mask; apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); } } else { /* no depth test, apply Zpass operator to stencil buffer values */ - apply_stencil_op(stencilVals, quad->mask, zPassOp, ref, wrtMask); + apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); } } @@ -295,8 +295,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) switch (ps->format) { case PIPE_FORMAT_S8Z24_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); uint s8z24 = tile->data.depth32[y][x]; s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); tile->data.depth32[y][x] = s8z24; @@ -304,8 +304,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) break; case PIPE_FORMAT_Z24S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); uint z24s8 = tile->data.depth32[y][x]; z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; tile->data.depth32[y][x] = z24s8; @@ -313,8 +313,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) break; case PIPE_FORMAT_S8_UNORM: for (j = 0; j < QUAD_SIZE; j++) { - int x = quad->x0 % TILE_SIZE + (j & 1); - int y = quad->y0 % TILE_SIZE + (j >> 1); + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); tile->data.stencil8[y][x] = stencilVals[j]; } break; @@ -322,7 +322,7 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); } - if (quad->mask) + if (quad->inout.mask) qs->next->run(qs->next, quad); } diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index a39ecc2e9d4..ccf37f6be59 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -19,17 +19,17 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) static const uint bit31 = 1 << 31; static const uint bit30 = 1 << 30; - if (quad->prim == PRIM_TRI) { + if (quad->input.prim == PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; /* need to invert Y to index into OpenGL's stipple pattern */ int y0, y1; uint stipple0, stipple1; if (softpipe->rasterizer->origin_lower_left) { - y0 = softpipe->framebuffer.height - 1 - quad->y0; + y0 = softpipe->framebuffer.height - 1 - quad->input.y0; y1 = y0 - 1; } else { - y0 = quad->y0; + y0 = quad->input.y0; y1 = y0 + 1; } stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; @@ -37,18 +37,18 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) #if 1 { - const int col0 = quad->x0 % 32; + const int col0 = quad->input.x0 % 32; if ((stipple0 & (bit31 >> col0)) == 0) - quad->mask &= ~MASK_TOP_LEFT; + quad->inout.mask &= ~MASK_TOP_LEFT; if ((stipple0 & (bit30 >> col0)) == 0) - quad->mask &= ~MASK_TOP_RIGHT; + quad->inout.mask &= ~MASK_TOP_RIGHT; if ((stipple1 & (bit31 >> col0)) == 0) - quad->mask &= ~MASK_BOTTOM_LEFT; + quad->inout.mask &= ~MASK_BOTTOM_LEFT; if ((stipple1 & (bit30 >> col0)) == 0) - quad->mask &= ~MASK_BOTTOM_RIGHT; + quad->inout.mask &= ~MASK_BOTTOM_RIGHT; } #else /* We'd like to use this code, but we'd need to redefine @@ -56,11 +56,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) * and similarly for the BOTTOM bits. But that may have undesirable * side effects elsewhere. */ - const int col0 = 30 - (quad->x0 % 32); - quad->mask &= (((stipple0 >> col0) & 0x3) | + const int col0 = 30 - (quad->input.x0 % 32); + quad->inout.mask &= (((stipple0 >> col0) & 0x3) | (((stipple1 >> col0) & 0x3) << 2)); #endif - if (!quad->mask) + if (!quad->inout.mask) return; } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 87336ab6e31..bc8263c33e3 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -43,6 +43,7 @@ #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -61,6 +62,87 @@ struct edge { int lines; /**< number of lines on this edge */ }; +#if SP_NUM_QUAD_THREADS > 1 + +/* Set to 1 if you want other threads to be instantly + * notified of pending jobs. + */ +#define INSTANT_NOTEMPTY_NOTIFY 0 + +struct thread_info +{ + struct setup_context *setup; + uint id; + pipe_thread handle; +}; + +struct quad_job; + +typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job ); + +struct quad_job +{ + struct quad_header_input input; + struct quad_header_inout inout; + quad_job_routine routine; +}; + +#define NUM_QUAD_JOBS 64 + +struct quad_job_que +{ + struct quad_job jobs[NUM_QUAD_JOBS]; + uint first; + uint last; + pipe_mutex que_mutex; + pipe_condvar que_notfull_condvar; + pipe_condvar que_notempty_condvar; + uint jobs_added; + uint jobs_done; + pipe_condvar que_done_condvar; +}; + +static void +add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine ) +{ +#if INSTANT_NOTEMPTY_NOTIFY + boolean empty; +#endif + + /* Wait for empty slot, see if the que is empty. + */ + pipe_mutex_lock( que->que_mutex ); + while ((que->last + 1) % NUM_QUAD_JOBS == que->first) { +#if !INSTANT_NOTEMPTY_NOTIFY + pipe_condvar_broadcast( que->que_notempty_condvar ); +#endif + pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex ); + } +#if INSTANT_NOTEMPTY_NOTIFY + empty = que->last == que->first; +#endif + que->jobs_added++; + pipe_mutex_unlock( que->que_mutex ); + + /* Submit new job. + */ + que->jobs[que->last].input = quad->input; + que->jobs[que->last].inout = quad->inout; + que->jobs[que->last].routine = routine; + que->last = (que->last + 1) % NUM_QUAD_JOBS; + +#if INSTANT_NOTEMPTY_NOTIFY + /* If the que was empty, notify consumers there's a job to be done. + */ + if (empty) { + pipe_mutex_lock( que->que_mutex ); + pipe_condvar_broadcast( que->que_notempty_condvar ); + pipe_mutex_unlock( que->que_mutex ); + } +#endif +} + +#endif /** * Triangle setup info (derived from draw_stage). @@ -88,6 +170,11 @@ struct setup_context { struct tgsi_interp_coef posCoef; /* For Z, W */ struct quad_header quad; +#if SP_NUM_QUAD_THREADS > 1 + struct quad_job_que que; + struct thread_info threads[SP_NUM_QUAD_THREADS]; +#endif + struct { int left[2]; /**< [0] = row0, [1] = row1 */ int right[2]; @@ -104,7 +191,67 @@ struct setup_context { unsigned winding; /* which winding to cull */ }; +#if SP_NUM_QUAD_THREADS > 1 + +static PIPE_THREAD_ROUTINE( quad_thread, param ) +{ + struct thread_info *info = (struct thread_info *) param; + struct quad_job_que *que = &info->setup->que; + + for (;;) { + struct quad_job job; + boolean full; + + /* Wait for an available job. + */ + pipe_mutex_lock( que->que_mutex ); + while (que->last == que->first) + pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex ); + + /* See if the que is full. + */ + full = (que->last + 1) % NUM_QUAD_JOBS == que->first; + + /* Take a job and remove it from que. + */ + job = que->jobs[que->first]; + que->first = (que->first + 1) % NUM_QUAD_JOBS; + + /* Notify the producer if the que is not full. + */ + if (full) + pipe_condvar_signal( que->que_notfull_condvar ); + pipe_mutex_unlock( que->que_mutex ); + + job.routine( info->setup, info->id, &job ); + + /* Notify the producer if that's the last finished job. + */ + pipe_mutex_lock( que->que_mutex ); + que->jobs_done++; + if (que->jobs_added == que->jobs_done) + pipe_condvar_signal( que->que_done_condvar ); + pipe_mutex_unlock( que->que_mutex ); + } + + return NULL; +} + +#define WAIT_FOR_COMPLETION(setup) \ + do {\ + pipe_mutex_lock( setup->que.que_mutex );\ + if (!INSTANT_NOTEMPTY_NOTIFY)\ + pipe_condvar_broadcast( setup->que.que_notempty_condvar );\ + while (setup->que.jobs_added != setup->que.jobs_done)\ + pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\ + pipe_mutex_unlock( setup->que.que_mutex );\ + } while (0) + +#else + +#define WAIT_FOR_COMPLETION(setup) ((void) 0) +#endif /** * Test if x is NaN or +/- infinity. @@ -143,7 +290,7 @@ static boolean cull_tri( struct setup_context *setup, * Clip setup->quad against the scissor/surface bounds. */ static INLINE void -quad_clip(struct setup_context *setup) +quad_clip( struct setup_context *setup, struct quad_header *quad ) { const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; const int minx = (int) cliprect->minx; @@ -151,22 +298,22 @@ quad_clip(struct setup_context *setup) const int miny = (int) cliprect->miny; const int maxy = (int) cliprect->maxy; - if (setup->quad.x0 >= maxx || - setup->quad.y0 >= maxy || - setup->quad.x0 + 1 < minx || - setup->quad.y0 + 1 < miny) { + if (quad->input.x0 >= maxx || + quad->input.y0 >= maxy || + quad->input.x0 + 1 < minx || + quad->input.y0 + 1 < miny) { /* totally clipped */ - setup->quad.mask = 0x0; + quad->inout.mask = 0x0; return; } - if (setup->quad.x0 < minx) - setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup->quad.y0 < miny) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup->quad.x0 == maxx - 1) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup->quad.y0 == maxy - 1) - setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + if (quad->input.x0 < minx) + quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (quad->input.y0 < miny) + quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (quad->input.x0 == maxx - 1) + quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (quad->input.y0 == maxy - 1) + quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); } @@ -174,35 +321,59 @@ quad_clip(struct setup_context *setup) * Emit a quad (pass to next stage) with clipping. */ static INLINE void -clip_emit_quad(struct setup_context *setup) +clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) { - quad_clip(setup); - if (setup->quad.mask) { + quad_clip( setup, quad ); + if (quad->inout.mask) { struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run(sp->quad.first, &setup->quad); + + sp->quad[thread].first->run( sp->quad[thread].first, quad ); } } +#if SP_NUM_QUAD_THREADS > 1 + +static void +clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + struct quad_header quad; + + quad.input = job->input; + quad.inout = job->inout; + quad.coef = setup->quad.coef; + quad.posCoef = setup->quad.posCoef; + quad.nr_attrs = setup->quad.nr_attrs; + clip_emit_quad( setup, &quad, thread ); +} + +#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job ) + +#else + +#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 ) + +#endif /** * Emit a quad (pass to next stage). No clipping is done. */ static INLINE void -emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) +emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) { struct softpipe_context *sp = setup->softpipe; - setup->quad.x0 = x; - setup->quad.y0 = y; - setup->quad.mask = mask; +#if DEBUG_FRAGS + uint mask = quad->inout.mask; +#endif + #if DEBUG_FRAGS if (mask & 1) setup->numFragsEmitted++; if (mask & 2) setup->numFragsEmitted++; if (mask & 4) setup->numFragsEmitted++; if (mask & 8) setup->numFragsEmitted++; #endif - sp->quad.first->run(sp->quad.first, &setup->quad); + sp->quad[thread].first->run( sp->quad[thread].first, quad ); #if DEBUG_FRAGS - mask = setup->quad.mask; + mask = quad->inout.mask; if (mask & 1) setup->numFragsWritten++; if (mask & 2) setup->numFragsWritten++; if (mask & 4) setup->numFragsWritten++; @@ -210,6 +381,38 @@ emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) #endif } +#if SP_NUM_QUAD_THREADS > 1 + +static void +emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + struct quad_header quad; + + quad.input = job->input; + quad.inout = job->inout; + quad.coef = setup->quad.coef; + quad.posCoef = setup->quad.posCoef; + quad.nr_attrs = setup->quad.nr_attrs; + emit_quad( setup, &quad, thread ); +} + +#define EMIT_QUAD(setup,x,y,mask) do {\ + setup->quad.input.x0 = x;\ + setup->quad.input.y0 = y;\ + setup->quad.inout.mask = mask;\ + add_quad_job( &setup->que, &setup->quad, emit_quad_job );\ + } while (0) + +#else + +#define EMIT_QUAD(setup,x,y,mask) do {\ + setup->quad.input.x0 = x;\ + setup->quad.input.y0 = y;\ + setup->quad.inout.mask = mask;\ + emit_quad( setup, &setup->quad, 0 );\ + } while (0) + +#endif /** * Given an X or Y coordinate, return the block/quad coordinate that it @@ -249,7 +452,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_TOP_RIGHT; if (x+1 >= xleft1 && x+1 < xright1) mask |= MASK_BOTTOM_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -263,7 +466,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_TOP_LEFT; if (x+1 >= xleft0 && x+1 < xright0) mask |= MASK_TOP_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -277,7 +480,7 @@ static void flush_spans( struct setup_context *setup ) mask |= MASK_BOTTOM_LEFT; if (x+1 >= xleft1 && x+1 < xright1) mask |= MASK_BOTTOM_RIGHT; - emit_quad( setup, x, setup->span.y, mask ); + EMIT_QUAD( setup, x, setup->span.y, mask ); } break; @@ -398,7 +601,7 @@ static boolean setup_sort_vertices( struct setup_context *setup, * - the GLSL gl_FrontFacing fragment attribute (bool) * - two-sided stencil test */ - setup->quad.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); return TRUE; } @@ -595,7 +798,7 @@ static void setup_tri_coefficients( struct setup_context *setup ) if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; setup->coef[fragSlot].dadx[1] = 0.0; setup->coef[fragSlot].dady[1] = 0.0; } @@ -765,7 +968,7 @@ void setup_tri( struct setup_context *setup, setup_tri_coefficients( setup ); setup_tri_edges( setup ); - setup->quad.prim = PRIM_TRI; + setup->quad.input.prim = PRIM_TRI; setup->span.y = 0; setup->span.y_flags = 0; @@ -790,6 +993,8 @@ void setup_tri( struct setup_context *setup, flush_spans( setup ); + WAIT_FOR_COMPLETION(setup); + #if DEBUG_FRAGS printf("Tri: %u frags emitted, %u written\n", setup->numFragsEmitted, @@ -904,7 +1109,7 @@ setup_line_coefficients(struct setup_context *setup, if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; setup->coef[fragSlot].dadx[1] = 0.0; setup->coef[fragSlot].dady[1] = 0.0; } @@ -925,20 +1130,20 @@ plot(struct setup_context *setup, int x, int y) const int quadY = y - iy; const int mask = (1 << ix) << (2 * iy); - if (quadX != setup->quad.x0 || - quadY != setup->quad.y0) + if (quadX != setup->quad.input.x0 || + quadY != setup->quad.input.y0) { /* flush prev quad, start new quad */ - if (setup->quad.x0 != -1) - clip_emit_quad(setup); + if (setup->quad.input.x0 != -1) + CLIP_EMIT_QUAD(setup); - setup->quad.x0 = quadX; - setup->quad.y0 = quadY; - setup->quad.mask = 0x0; + setup->quad.input.x0 = quadX; + setup->quad.input.y0 = quadY; + setup->quad.inout.mask = 0x0; } - setup->quad.mask |= mask; + setup->quad.inout.mask |= mask; } @@ -999,16 +1204,16 @@ setup_line(struct setup_context *setup, assert(dx >= 0); assert(dy >= 0); - setup->quad.x0 = setup->quad.y0 = -1; - setup->quad.mask = 0x0; - setup->quad.prim = PRIM_LINE; + setup->quad.input.x0 = setup->quad.input.y0 = -1; + setup->quad.inout.mask = 0x0; + setup->quad.input.prim = PRIM_LINE; /* XXX temporary: set coverage to 1.0 so the line appears * if AA mode happens to be enabled. */ - setup->quad.coverage[0] = - setup->quad.coverage[1] = - setup->quad.coverage[2] = - setup->quad.coverage[3] = 1.0; + setup->quad.input.coverage[0] = + setup->quad.input.coverage[1] = + setup->quad.input.coverage[2] = + setup->quad.input.coverage[3] = 1.0; if (dx > dy) { /*** X-major line ***/ @@ -1052,9 +1257,11 @@ setup_line(struct setup_context *setup, } /* draw final quad */ - if (setup->quad.mask) { - clip_emit_quad(setup); + if (setup->quad.inout.mask) { + CLIP_EMIT_QUAD(setup); } + + WAIT_FOR_COMPLETION(setup); } @@ -1148,22 +1355,22 @@ setup_point( struct setup_context *setup, if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; setup->coef[fragSlot].dadx[1] = 0.0; setup->coef[fragSlot].dady[1] = 0.0; } } - setup->quad.prim = PRIM_POINT; + setup->quad.input.prim = PRIM_POINT; if (halfSize <= 0.5 && !round) { /* special case for 1-pixel points */ const int ix = ((int) x) & 1; const int iy = ((int) y) & 1; - setup->quad.x0 = (int) x - ix; - setup->quad.y0 = (int) y - iy; - setup->quad.mask = (1 << ix) << (2 * iy); - clip_emit_quad(setup); + setup->quad.input.x0 = (int) x - ix; + setup->quad.input.y0 = (int) y - iy; + setup->quad.inout.mask = (1 << ix) << (2 * iy); + CLIP_EMIT_QUAD(setup); } else { if (round) { @@ -1183,15 +1390,15 @@ setup_point( struct setup_context *setup, for (ix = ixmin; ix <= ixmax; ix += 2) { float dx, dy, dist2, cover; - setup->quad.mask = 0x0; + setup->quad.inout.mask = 0x0; dx = (ix + 0.5f) - x; dy = (iy + 0.5f) - y; dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_LEFT; + setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_TOP_LEFT; } dx = (ix + 1.5f) - x; @@ -1199,8 +1406,8 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_RIGHT; + setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_TOP_RIGHT; } dx = (ix + 0.5f) - x; @@ -1208,8 +1415,8 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_LEFT; + setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_BOTTOM_LEFT; } dx = (ix + 1.5f) - x; @@ -1217,14 +1424,14 @@ setup_point( struct setup_context *setup, dist2 = dx * dx + dy * dy; if (dist2 <= rmax2) { cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_RIGHT; + setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_BOTTOM_RIGHT; } - if (setup->quad.mask) { - setup->quad.x0 = ix; - setup->quad.y0 = iy; - clip_emit_quad(setup); + if (setup->quad.inout.mask) { + setup->quad.input.x0 = ix; + setup->quad.input.y0 = iy; + CLIP_EMIT_QUAD(setup); } } } @@ -1268,14 +1475,16 @@ setup_point( struct setup_context *setup, mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); } - setup->quad.mask = mask; - setup->quad.x0 = ix; - setup->quad.y0 = iy; - clip_emit_quad(setup); + setup->quad.inout.mask = mask; + setup->quad.input.x0 = ix; + setup->quad.input.y0 = iy; + CLIP_EMIT_QUAD(setup); } } } } + + WAIT_FOR_COMPLETION(setup); } void setup_prepare( struct setup_context *setup ) @@ -1300,7 +1509,9 @@ void setup_prepare( struct setup_context *setup ) /* Note: nr_attrs is only used for debugging (vertex printing) */ setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); - sp->quad.first->begin(sp->quad.first); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first->begin( sp->quad[i].first ); + } if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && @@ -1328,11 +1539,31 @@ void setup_destroy_context( struct setup_context *setup ) struct setup_context *setup_create_context( struct softpipe_context *softpipe ) { struct setup_context *setup = CALLOC_STRUCT(setup_context); +#if SP_NUM_QUAD_THREADS > 1 + uint i; +#endif setup->softpipe = softpipe; setup->quad.coef = setup->coef; setup->quad.posCoef = &setup->posCoef; +#if SP_NUM_QUAD_THREADS > 1 + setup->que.first = 0; + setup->que.last = 0; + pipe_mutex_init( setup->que.que_mutex ); + pipe_condvar_init( setup->que.que_notfull_condvar ); + pipe_condvar_init( setup->que.que_notempty_condvar ); + setup->que.jobs_added = 0; + setup->que.jobs_done = 0; + pipe_condvar_init( setup->que.que_done_condvar ); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + setup->threads[i].setup = setup; + setup->threads[i].id = i; + setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] ); + } +#endif + return setup; } + diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 1be461b3a46..e5b609cf6c9 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -152,7 +152,7 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); /* note: reference counting */ - pipe_buffer_reference(ws, + winsys_buffer_reference(ws, &softpipe->constants[shader].buffer, buf ? buf->buffer : NULL); softpipe->constants[shader].size = buf ? buf->size : 0; diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 389aceb27ce..6ade7326982 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -25,132 +25,14 @@ * **************************************************************************/ -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" -#include "util/u_tile.h" #include "util/u_rect.h" #include "sp_context.h" -#include "sp_surface.h" -/** - * Copy a rectangular region from one surface to another. - * Surfaces must have same bpp. - * - * Note that it's always the case that Y=0=top of the raster. - * If do_flip is non-zero, the region being copied will be flipped vertically. - * - * Assumes all values are within bounds -- no checking at this level - - * do it higher up if required. - */ -static void -sp_surface_copy(struct pipe_context *pipe, - boolean do_flip, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - const void *src_map = pipe->screen->surface_map( pipe->screen, - src, - PIPE_BUFFER_USAGE_CPU_READ ); - - assert(dst->block.size == src->block.size); - assert(dst->block.width == src->block.width); - assert(dst->block.height == src->block.height); - assert(src_map); - assert(dst_map); - - /* If do_flip, invert src_y position and pass negative src stride */ - pipe_copy_rect(dst_map, - &dst->block, - dst->stride, - dstx, dsty, - width, height, - src_map, - do_flip ? -(int) src->stride : src->stride, - srcx, srcy); - - pipe->screen->surface_unmap(pipe->screen, src); - pipe->screen->surface_unmap(pipe->screen, dst); -} - - -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size; -} - - -#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) - - -/** - * Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void -sp_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - unsigned i, j; - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - assert(dst->stride > 0); - - - switch (dst->block.size) { - case 1: - case 2: - case 4: - pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); - break; - case 8: - { - /* expand the 4-byte clear value to an 8-byte value */ - ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); - ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); - ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); - ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); - ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); - val0 = (val0 << 8) | val0; - val1 = (val1 << 8) | val1; - val2 = (val2 << 8) | val2; - val3 = (val3 << 8) | val3; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - row[j*4+0] = val0; - row[j*4+1] = val1; - row[j*4+2] = val2; - row[j*4+3] = val3; - } - row += dst->stride/2; - } - } - break; - default: - assert(0); - break; - } - - pipe->screen->surface_unmap(pipe->screen, dst); -} - - void sp_init_surface_functions(struct softpipe_context *sp) { - sp->pipe.surface_copy = sp_surface_copy; - sp->pipe.surface_fill = sp_surface_fill; + sp->pipe.surface_copy = util_surface_copy; + sp->pipe.surface_fill = util_surface_fill; } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 3a737d6f722..cb48035771b 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -192,7 +192,7 @@ softpipe_texture_blanket(struct pipe_screen * screen, spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); spt->stride[0] = stride[0]; - pipe_buffer_reference(screen->winsys, &spt->buffer, buffer); + pipe_buffer_reference(screen, &spt->buffer, buffer); return &spt->base; } @@ -208,7 +208,7 @@ softpipe_texture_release(struct pipe_screen *screen, if (--(*pt)->refcount <= 0) { struct softpipe_texture *spt = softpipe_texture(*pt); - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen, &spt->buffer, NULL); FREE(spt); } *pt = NULL; @@ -231,7 +231,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + pipe_buffer_reference(screen, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; @@ -307,7 +307,7 @@ softpipe_surface_map( struct pipe_screen *screen, return NULL; } - map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + map = pipe_buffer_map( screen, surface->buffer, flags ); if (map == NULL) return NULL; @@ -331,7 +331,7 @@ static void softpipe_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); + pipe_buffer_unmap( screen, surface->buffer ); } diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript index 5c49468c4eb..0a6bfb8f4c7 100644 --- a/src/gallium/drivers/trace/SConscript +++ b/src/gallium/drivers/trace/SConscript @@ -9,8 +9,6 @@ trace = env.ConvenienceLibrary( 'tr_dump.c', 'tr_screen.c', 'tr_state.c', - 'tr_stream_stdc.c', - 'tr_stream_wd.c', 'tr_texture.c', 'tr_winsys.c', ]) diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 48032c1617f..a0ead0ded33 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -48,12 +48,12 @@ #include "pipe/p_debug.h" #include "util/u_memory.h" #include "util/u_string.h" +#include "util/u_stream.h" -#include "tr_stream.h" #include "tr_dump.h" -static struct trace_stream *stream = NULL; +static struct util_stream *stream = NULL; static unsigned refcount = 0; @@ -61,7 +61,7 @@ static INLINE void trace_dump_write(const char *buf, size_t size) { if(stream) - trace_stream_write(stream, buf, size); + util_stream_write(stream, buf, size); } @@ -212,7 +212,7 @@ trace_dump_trace_close(void) { if(stream) { trace_dump_writes("</trace>\n"); - trace_stream_close(stream); + util_stream_close(stream); stream = NULL; refcount = 0; } @@ -228,7 +228,7 @@ boolean trace_dump_trace_begin() if(!stream) { - stream = trace_stream_create(filename); + stream = util_stream_create(filename, 0); if(!stream) return FALSE; @@ -272,7 +272,7 @@ void trace_dump_call_end(void) trace_dump_indent(1); trace_dump_tag_end("call"); trace_dump_newline(); - trace_stream_flush(stream); + util_stream_flush(stream); } void trace_dump_arg_begin(const char *name) diff --git a/src/gallium/drivers/trace/tr_stream_stdc.c b/src/gallium/drivers/trace/tr_stream_stdc.c deleted file mode 100644 index 4c19ec0b243..00000000000 --- a/src/gallium/drivers/trace/tr_stream_stdc.c +++ /dev/null @@ -1,104 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Stream implementation based on the Standard C Library. - */ - -#include "pipe/p_config.h" - -#if defined(PIPE_OS_LINUX) - -#include <stdio.h> - -#include "util/u_memory.h" - -#include "tr_stream.h" - - -struct trace_stream -{ - FILE *file; -}; - - -struct trace_stream * -trace_stream_create(const char *filename) -{ - struct trace_stream *stream; - - stream = CALLOC_STRUCT(trace_stream); - if(!stream) - goto error1; - - stream->file = fopen(filename, "w"); - if(!stream->file) - goto error2; - - return stream; - -error2: - FREE(stream); -error1: - return NULL; -} - - -boolean -trace_stream_write(struct trace_stream *stream, const void *data, size_t size) -{ - if(!stream) - return FALSE; - - return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; -} - - -void -trace_stream_flush(struct trace_stream *stream) -{ - if(!stream) - return; - - fflush(stream->file); -} - - -void -trace_stream_close(struct trace_stream *stream) -{ - if(!stream) - return; - - fclose(stream->file); - - FREE(stream); -} - - -#endif diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c deleted file mode 100644 index 704eb15bd71..00000000000 --- a/src/gallium/drivers/trace/tr_stream_wd.c +++ /dev/null @@ -1,183 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Stream implementation for the Windows Display driver. - */ - -#include "pipe/p_config.h" - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -#include <windows.h> -#include <winddi.h> - -#include "util/u_memory.h" -#include "util/u_string.h" - -#include "tr_stream.h" - - -#define MAP_FILE_SIZE (4*1024*1024) - - -struct trace_stream -{ - char filename[MAX_PATH + 1]; - WCHAR wFileName[MAX_PATH + 1]; - ULONG_PTR iFile; - char *pMap; - size_t written; - unsigned suffix; -}; - - -static INLINE boolean -trace_stream_map(struct trace_stream *stream) -{ - ULONG BytesInUnicodeString; - static char filename[MAX_PATH + 1]; - unsigned filename_len; - - filename_len = util_snprintf(filename, - sizeof(filename), - "\\??\\%s.%04x", - stream->filename, - stream->suffix++); - - EngMultiByteToUnicodeN( - stream->wFileName, - sizeof(stream->wFileName), - &BytesInUnicodeString, - filename, - filename_len); - - stream->pMap = EngMapFile(stream->wFileName, MAP_FILE_SIZE, &stream->iFile); - if(!stream->pMap) - return FALSE; - - memset(stream->pMap, 0, MAP_FILE_SIZE); - stream->written = 0; - - return TRUE; -} - - -static INLINE void -trace_stream_unmap(struct trace_stream *stream) -{ - EngUnmapFile(stream->iFile); - if(stream->written < MAP_FILE_SIZE) { - /* Truncate file size */ - stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); - if(stream->pMap) - EngUnmapFile(stream->iFile); - } - - stream->pMap = NULL; -} - - -struct trace_stream * -trace_stream_create(const char *filename) -{ - struct trace_stream *stream; - - stream = CALLOC_STRUCT(trace_stream); - if(!stream) - goto error1; - - strncpy(stream->filename, filename, sizeof(stream->filename)); - - if(!trace_stream_map(stream)) - goto error2; - - return stream; - -error2: - FREE(stream); -error1: - return NULL; -} - - -static INLINE void -trace_stream_copy(struct trace_stream *stream, const char *data, size_t size) -{ - assert(stream->written + size <= MAP_FILE_SIZE); - memcpy(stream->pMap + stream->written, data, size); - stream->written += size; -} - - -boolean -trace_stream_write(struct trace_stream *stream, const void *data, size_t size) -{ - if(!stream) - return FALSE; - - if(!stream->pMap) - return FALSE; - - while(stream->written + size > MAP_FILE_SIZE) { - size_t step = MAP_FILE_SIZE - stream->written; - trace_stream_copy(stream, data, step); - data = (const char *)data + step; - size -= step; - - trace_stream_unmap(stream); - if(!trace_stream_map(stream)) - return FALSE; - } - - trace_stream_copy(stream, data, size); - - return TRUE; -} - - -void -trace_stream_flush(struct trace_stream *stream) -{ - (void)stream; -} - - -void -trace_stream_close(struct trace_stream *stream) -{ - if(!stream) - return; - - trace_stream_unmap(stream); - - FREE(stream); -} - - -#endif |